diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2016-03-24 17:02:14 (GMT) |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-03-24 17:02:14 (GMT) |
commit | 3fa2fe2ce09c5a16be69c5319eb3347271a99735 (patch) | |
tree | 7067760e7006dec7ac6fe18fe3d09851293b63ea | |
parent | d88f48e12821ab4b2244124d50ac094568f48db5 (diff) | |
parent | 05f5ece76a88a2cd4859bc93f90379733dd8b4a3 (diff) | |
download | linux-3fa2fe2ce09c5a16be69c5319eb3347271a99735.tar.xz |
Merge branch 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf fixes from Ingo Molnar:
"This tree contains various perf fixes on the kernel side, plus three
hw/event-enablement late additions:
- Intel Memory Bandwidth Monitoring events and handling
- the AMD Accumulated Power Mechanism reporting facility
- more IOMMU events
... and a final round of perf tooling updates/fixes"
* 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (44 commits)
perf llvm: Use strerror_r instead of the thread unsafe strerror one
perf llvm: Use realpath to canonicalize paths
perf tools: Unexport some methods unused outside strbuf.c
perf probe: No need to use formatting strbuf method
perf help: Use asprintf instead of adhoc equivalents
perf tools: Remove unused perf_pathdup, xstrdup functions
perf tools: Do not include stringify.h from the kernel sources
tools include: Copy linux/stringify.h from the kernel
tools lib traceevent: Remove redundant CPU output
perf tools: Remove needless 'extern' from function prototypes
perf tools: Simplify die() mechanism
perf tools: Remove unused DIE_IF macro
perf script: Remove lots of unused arguments
perf thread: Rename perf_event__preprocess_sample_addr to thread__resolve
perf machine: Rename perf_event__preprocess_sample to machine__resolve
perf tools: Add cpumode to struct perf_sample
perf tests: Forward the perf_sample in the dwarf unwind test
perf tools: Remove misplaced __maybe_unused
perf list: Fix documentation of :ppp
perf bench numa: Fix assertion for nodes bitfield
...
95 files changed, 1336 insertions, 634 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 54478b7..2dc18605 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1210,6 +1210,15 @@ config MICROCODE_OLD_INTERFACE def_bool y depends on MICROCODE +config PERF_EVENTS_AMD_POWER + depends on PERF_EVENTS && CPU_SUP_AMD + tristate "AMD Processor Power Reporting Mechanism" + ---help--- + Provide power reporting mechanism support for AMD processors. + Currently, it leverages X86_FEATURE_ACC_POWER + (CPUID Fn8000_0007_EDX[12]) interface to calculate the + average power consumption on Family 15h processors. + config X86_MSR tristate "/dev/cpu/*/msr - Model-specific register support" ---help--- diff --git a/arch/x86/events/Makefile b/arch/x86/events/Makefile index fdfea15..f59618a 100644 --- a/arch/x86/events/Makefile +++ b/arch/x86/events/Makefile @@ -1,6 +1,7 @@ obj-y += core.o obj-$(CONFIG_CPU_SUP_AMD) += amd/core.o amd/uncore.o +obj-$(CONFIG_PERF_EVENTS_AMD_POWER) += amd/power.o obj-$(CONFIG_X86_LOCAL_APIC) += amd/ibs.o msr.o ifdef CONFIG_AMD_IOMMU obj-$(CONFIG_CPU_SUP_AMD) += amd/iommu.o diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c index 51087c2..3ea25c3 100644 --- a/arch/x86/events/amd/ibs.c +++ b/arch/x86/events/amd/ibs.c @@ -376,7 +376,13 @@ static void perf_ibs_start(struct perf_event *event, int flags) hwc->state = 0; perf_ibs_set_period(perf_ibs, hwc, &period); + /* + * Set STARTED before enabling the hardware, such that + * a subsequent NMI must observe it. Then clear STOPPING + * such that we don't consume NMIs by accident. + */ set_bit(IBS_STARTED, pcpu->state); + clear_bit(IBS_STOPPING, pcpu->state); perf_ibs_enable_event(perf_ibs, hwc, period >> 4); perf_event_update_userpage(event); @@ -390,7 +396,7 @@ static void perf_ibs_stop(struct perf_event *event, int flags) u64 config; int stopping; - stopping = test_and_clear_bit(IBS_STARTED, pcpu->state); + stopping = test_bit(IBS_STARTED, pcpu->state); if (!stopping && (hwc->state & PERF_HES_UPTODATE)) return; @@ -398,8 +404,24 @@ static void perf_ibs_stop(struct perf_event *event, int flags) rdmsrl(hwc->config_base, config); if (stopping) { + /* + * Set STOPPING before disabling the hardware, such that it + * must be visible to NMIs the moment we clear the EN bit, + * at which point we can generate an !VALID sample which + * we need to consume. + */ set_bit(IBS_STOPPING, pcpu->state); perf_ibs_disable_event(perf_ibs, hwc, config); + /* + * Clear STARTED after disabling the hardware; if it were + * cleared before an NMI hitting after the clear but before + * clearing the EN bit might think it a spurious NMI and not + * handle it. + * + * Clearing it after, however, creates the problem of the NMI + * handler seeing STARTED but not having a valid sample. + */ + clear_bit(IBS_STARTED, pcpu->state); WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED); hwc->state |= PERF_HES_STOPPED; } @@ -527,20 +549,24 @@ static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs) u64 *buf, *config, period; if (!test_bit(IBS_STARTED, pcpu->state)) { +fail: /* * Catch spurious interrupts after stopping IBS: After * disabling IBS there could be still incoming NMIs * with samples that even have the valid bit cleared. * Mark all this NMIs as handled. */ - return test_and_clear_bit(IBS_STOPPING, pcpu->state) ? 1 : 0; + if (test_and_clear_bit(IBS_STOPPING, pcpu->state)) + return 1; + + return 0; } msr = hwc->config_base; buf = ibs_data.regs; rdmsrl(msr, *buf); if (!(*buf++ & perf_ibs->valid_mask)) - return 0; + goto fail; config = &ibs_data.regs[0]; perf_ibs_event_update(perf_ibs, event, config); @@ -599,7 +625,7 @@ static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs) throttle = perf_event_overflow(event, &data, ®s); out: if (throttle) - perf_ibs_disable_event(perf_ibs, hwc, *config); + perf_ibs_stop(event, 0); else perf_ibs_enable_event(perf_ibs, hwc, period >> 4); @@ -611,6 +637,7 @@ out: static int perf_ibs_nmi_handler(unsigned int cmd, struct pt_regs *regs) { + u64 stamp = sched_clock(); int handled = 0; handled += perf_ibs_handle_irq(&perf_ibs_fetch, regs); @@ -619,6 +646,8 @@ perf_ibs_nmi_handler(unsigned int cmd, struct pt_regs *regs) if (handled) inc_irq_stat(apic_perf_irqs); + perf_sample_event_took(sched_clock() - stamp); + return handled; } NOKPROBE_SYMBOL(perf_ibs_nmi_handler); diff --git a/arch/x86/events/amd/iommu.c b/arch/x86/events/amd/iommu.c index 635e5eb..40625ca 100644 --- a/arch/x86/events/amd/iommu.c +++ b/arch/x86/events/amd/iommu.c @@ -118,6 +118,11 @@ static struct amd_iommu_event_desc amd_iommu_v2_event_descs[] = { AMD_IOMMU_EVENT_DESC(cmd_processed, "csource=0x11"), AMD_IOMMU_EVENT_DESC(cmd_processed_inv, "csource=0x12"), AMD_IOMMU_EVENT_DESC(tlb_inv, "csource=0x13"), + AMD_IOMMU_EVENT_DESC(ign_rd_wr_mmio_1ff8h, "csource=0x14"), + AMD_IOMMU_EVENT_DESC(vapic_int_non_guest, "csource=0x15"), + AMD_IOMMU_EVENT_DESC(vapic_int_guest, "csource=0x16"), + AMD_IOMMU_EVENT_DESC(smi_recv, "csource=0x17"), + AMD_IOMMU_EVENT_DESC(smi_blk, "csource=0x18"), { /* end: all zeroes */ }, }; diff --git a/arch/x86/events/amd/power.c b/arch/x86/events/amd/power.c new file mode 100644 index 0000000..55a3529 --- /dev/null +++ b/arch/x86/events/amd/power.c @@ -0,0 +1,353 @@ +/* + * Performance events - AMD Processor Power Reporting Mechanism + * + * Copyright (C) 2016 Advanced Micro Devices, Inc. + * + * Author: Huang Rui <ray.huang@amd.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/module.h> +#include <linux/slab.h> +#include <linux/perf_event.h> +#include <asm/cpu_device_id.h> +#include "../perf_event.h" + +#define MSR_F15H_CU_PWR_ACCUMULATOR 0xc001007a +#define MSR_F15H_CU_MAX_PWR_ACCUMULATOR 0xc001007b +#define MSR_F15H_PTSC 0xc0010280 + +/* Event code: LSB 8 bits, passed in attr->config any other bit is reserved. */ +#define AMD_POWER_EVENT_MASK 0xFFULL + +/* + * Accumulated power status counters. + */ +#define AMD_POWER_EVENTSEL_PKG 1 + +/* + * The ratio of compute unit power accumulator sample period to the + * PTSC period. + */ +static unsigned int cpu_pwr_sample_ratio; + +/* Maximum accumulated power of a compute unit. */ +static u64 max_cu_acc_power; + +static struct pmu pmu_class; + +/* + * Accumulated power represents the sum of each compute unit's (CU) power + * consumption. On any core of each CU we read the total accumulated power from + * MSR_F15H_CU_PWR_ACCUMULATOR. cpu_mask represents CPU bit map of all cores + * which are picked to measure the power for the CUs they belong to. + */ +static cpumask_t cpu_mask; + +static void event_update(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + u64 prev_pwr_acc, new_pwr_acc, prev_ptsc, new_ptsc; + u64 delta, tdelta; + + prev_pwr_acc = hwc->pwr_acc; + prev_ptsc = hwc->ptsc; + rdmsrl(MSR_F15H_CU_PWR_ACCUMULATOR, new_pwr_acc); + rdmsrl(MSR_F15H_PTSC, new_ptsc); + + /* + * Calculate the CU power consumption over a time period, the unit of + * final value (delta) is micro-Watts. Then add it to the event count. + */ + if (new_pwr_acc < prev_pwr_acc) { + delta = max_cu_acc_power + new_pwr_acc; + delta -= prev_pwr_acc; + } else + delta = new_pwr_acc - prev_pwr_acc; + + delta *= cpu_pwr_sample_ratio * 1000; + tdelta = new_ptsc - prev_ptsc; + + do_div(delta, tdelta); + local64_add(delta, &event->count); +} + +static void __pmu_event_start(struct perf_event *event) +{ + if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED))) + return; + + event->hw.state = 0; + + rdmsrl(MSR_F15H_PTSC, event->hw.ptsc); + rdmsrl(MSR_F15H_CU_PWR_ACCUMULATOR, event->hw.pwr_acc); +} + +static void pmu_event_start(struct perf_event *event, int mode) +{ + __pmu_event_start(event); +} + +static void pmu_event_stop(struct perf_event *event, int mode) +{ + struct hw_perf_event *hwc = &event->hw; + + /* Mark event as deactivated and stopped. */ + if (!(hwc->state & PERF_HES_STOPPED)) + hwc->state |= PERF_HES_STOPPED; + + /* Check if software counter update is necessary. */ + if ((mode & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) { + /* + * Drain the remaining delta count out of an event + * that we are disabling: + */ + event_update(event); + hwc->state |= PERF_HES_UPTODATE; + } +} + +static int pmu_event_add(struct perf_event *event, int mode) +{ + struct hw_perf_event *hwc = &event->hw; + + hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED; + + if (mode & PERF_EF_START) + __pmu_event_start(event); + + return 0; +} + +static void pmu_event_del(struct perf_event *event, int flags) +{ + pmu_event_stop(event, PERF_EF_UPDATE); +} + +static int pmu_event_init(struct perf_event *event) +{ + u64 cfg = event->attr.config & AMD_POWER_EVENT_MASK; + + /* Only look at AMD power events. */ + if (event->attr.type != pmu_class.type) + return -ENOENT; + + /* Unsupported modes and filters. */ + if (event->attr.exclude_user || + event->attr.exclude_kernel || + event->attr.exclude_hv || + event->attr.exclude_idle || + event->attr.exclude_host || + event->attr.exclude_guest || + /* no sampling */ + event->attr.sample_period) + return -EINVAL; + + if (cfg != AMD_POWER_EVENTSEL_PKG) + return -EINVAL; + + return 0; +} + +static void pmu_event_read(struct perf_event *event) +{ + event_update(event); +} + +static ssize_t +get_attr_cpumask(struct device *dev, struct device_attribute *attr, char *buf) +{ + return cpumap_print_to_pagebuf(true, buf, &cpu_mask); +} + +static DEVICE_ATTR(cpumask, S_IRUGO, get_attr_cpumask, NULL); + +static struct attribute *pmu_attrs[] = { + &dev_attr_cpumask.attr, + NULL, +}; + +static struct attribute_group pmu_attr_group = { + .attrs = pmu_attrs, +}; + +/* + * Currently it only supports to report the power of each + * processor/package. + */ +EVENT_ATTR_STR(power-pkg, power_pkg, "event=0x01"); + +EVENT_ATTR_STR(power-pkg.unit, power_pkg_unit, "mWatts"); + +/* Convert the count from micro-Watts to milli-Watts. */ +EVENT_ATTR_STR(power-pkg.scale, power_pkg_scale, "1.000000e-3"); + +static struct attribute *events_attr[] = { + EVENT_PTR(power_pkg), + EVENT_PTR(power_pkg_unit), + EVENT_PTR(power_pkg_scale), + NULL, +}; + +static struct attribute_group pmu_events_group = { + .name = "events", + .attrs = events_attr, +}; + +PMU_FORMAT_ATTR(event, "config:0-7"); + +static struct attribute *formats_attr[] = { + &format_attr_event.attr, + NULL, +}; + +static struct attribute_group pmu_format_group = { + .name = "format", + .attrs = formats_attr, +}; + +static const struct attribute_group *attr_groups[] = { + &pmu_attr_group, + &pmu_format_group, + &pmu_events_group, + NULL, +}; + +static struct pmu pmu_class = { + .attr_groups = attr_groups, + /* system-wide only */ + .task_ctx_nr = perf_invalid_context, + .event_init = pmu_event_init, + .add = pmu_event_add, + .del = pmu_event_del, + .start = pmu_event_start, + .stop = pmu_event_stop, + .read = pmu_event_read, +}; + +static void power_cpu_exit(int cpu) +{ + int target; + + if (!cpumask_test_and_clear_cpu(cpu, &cpu_mask)) + return; + + /* + * Find a new CPU on the same compute unit, if was set in cpumask + * and still some CPUs on compute unit. Then migrate event and + * context to new CPU. + */ + target = cpumask_any_but(topology_sibling_cpumask(cpu), cpu); + if (target < nr_cpumask_bits) { + cpumask_set_cpu(target, &cpu_mask); + perf_pmu_migrate_context(&pmu_class, cpu, target); + } +} + +static void power_cpu_init(int cpu) +{ + int target; + + /* + * 1) If any CPU is set at cpu_mask in the same compute unit, do + * nothing. + * 2) If no CPU is set at cpu_mask in the same compute unit, + * set current STARTING CPU. + * + * Note: if there is a CPU aside of the new one already in the + * sibling mask, then it is also in cpu_mask. + */ + target = cpumask_any_but(topology_sibling_cpumask(cpu), cpu); + if (target >= nr_cpumask_bits) + cpumask_set_cpu(cpu, &cpu_mask); +} + +static int +power_cpu_notifier(struct notifier_block *self, unsigned long action, void *hcpu) +{ + unsigned int cpu = (long)hcpu; + + switch (action & ~CPU_TASKS_FROZEN) { + case CPU_DOWN_FAILED: + case CPU_STARTING: + power_cpu_init(cpu); + break; + case CPU_DOWN_PREPARE: + power_cpu_exit(cpu); + break; + default: + break; + } + + return NOTIFY_OK; +} + +static struct notifier_block power_cpu_notifier_nb = { + .notifier_call = power_cpu_notifier, + .priority = CPU_PRI_PERF, +}; + +static const struct x86_cpu_id cpu_match[] = { + { .vendor = X86_VENDOR_AMD, .family = 0x15 }, + {}, +}; + +static int __init amd_power_pmu_init(void) +{ + int cpu, target, ret; + + if (!x86_match_cpu(cpu_match)) + return 0; + + if (!boot_cpu_has(X86_FEATURE_ACC_POWER)) + return -ENODEV; + + cpu_pwr_sample_ratio = cpuid_ecx(0x80000007); + + if (rdmsrl_safe(MSR_F15H_CU_MAX_PWR_ACCUMULATOR, &max_cu_acc_power)) { + pr_err("Failed to read max compute unit power accumulator MSR\n"); + return -ENODEV; + } + + cpu_notifier_register_begin(); + + /* Choose one online core of each compute unit. */ + for_each_online_cpu(cpu) { + target = cpumask_first(topology_sibling_cpumask(cpu)); + if (!cpumask_test_cpu(target, &cpu_mask)) + cpumask_set_cpu(target, &cpu_mask); + } + + ret = perf_pmu_register(&pmu_class, "power", -1); + if (WARN_ON(ret)) { + pr_warn("AMD Power PMU registration failed\n"); + goto out; + } + + __register_cpu_notifier(&power_cpu_notifier_nb); + + pr_info("AMD Power PMU detected\n"); + +out: + cpu_notifier_register_done(); + + return ret; +} +module_init(amd_power_pmu_init); + +static void __exit amd_power_pmu_exit(void) +{ + cpu_notifier_register_begin(); + __unregister_cpu_notifier(&power_cpu_notifier_nb); + cpu_notifier_register_done(); + + perf_pmu_unregister(&pmu_class); +} +module_exit(amd_power_pmu_exit); + +MODULE_AUTHOR("Huang Rui <ray.huang@amd.com>"); +MODULE_DESCRIPTION("AMD Processor Power Reporting Mechanism"); +MODULE_LICENSE("GPL v2"); diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 9b6ad08..041e442 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -1602,8 +1602,7 @@ __init struct attribute **merge_attr(struct attribute **a, struct attribute **b) return new; } -ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr, - char *page) +ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr, char *page) { struct perf_pmu_events_attr *pmu_attr = \ container_of(attr, struct perf_pmu_events_attr, attr); @@ -1615,6 +1614,7 @@ ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr, return x86_pmu.events_sysfs_show(page, config); } +EXPORT_SYMBOL_GPL(events_sysfs_show); EVENT_ATTR(cpu-cycles, CPU_CYCLES ); EVENT_ATTR(instructions, INSTRUCTIONS ); diff --git a/arch/x86/events/intel/cqm.c b/arch/x86/events/intel/cqm.c index 93cb412..7b5fd81 100644 --- a/arch/x86/events/intel/cqm.c +++ b/arch/x86/events/intel/cqm.c @@ -13,8 +13,16 @@ #define MSR_IA32_QM_CTR 0x0c8e #define MSR_IA32_QM_EVTSEL 0x0c8d +#define MBM_CNTR_WIDTH 24 +/* + * Guaranteed time in ms as per SDM where MBM counters will not overflow. + */ +#define MBM_CTR_OVERFLOW_TIME 1000 + static u32 cqm_max_rmid = -1; static unsigned int cqm_l3_scale; /* supposedly cacheline size */ +static bool cqm_enabled, mbm_enabled; +unsigned int mbm_socket_max; /** * struct intel_pqr_state - State cache for the PQR MSR @@ -42,8 +50,37 @@ struct intel_pqr_state { * interrupts disabled, which is sufficient for the protection. */ static DEFINE_PER_CPU(struct intel_pqr_state, pqr_state); +static struct hrtimer *mbm_timers; +/** + * struct sample - mbm event's (local or total) data + * @total_bytes #bytes since we began monitoring + * @prev_msr previous value of MSR + */ +struct sample { + u64 total_bytes; + u64 prev_msr; +}; /* + * samples profiled for total memory bandwidth type events + */ +static struct sample *mbm_total; +/* + * samples profiled for local memory bandwidth type events + */ +static struct sample *mbm_local; + +#define pkg_id topology_physical_package_id(smp_processor_id()) +/* + * rmid_2_index returns the index for the rmid in mbm_local/mbm_total array. + * mbm_total[] and mbm_local[] are linearly indexed by socket# * max number of + * rmids per socket, an example is given below + * RMID1 of Socket0: vrmid = 1 + * RMID1 of Socket1: vrmid = 1 * (cqm_max_rmid + 1) + 1 + * RMID1 of Socket2: vrmid = 2 * (cqm_max_rmid + 1) + 1 + */ +#define rmid_2_index(rmid) ((pkg_id * (cqm_max_rmid + 1)) + rmid) +/* * Protects cache_cgroups and cqm_rmid_free_lru and cqm_rmid_limbo_lru. * Also protects event->hw.cqm_rmid * @@ -65,9 +102,13 @@ static cpumask_t cqm_cpumask; #define RMID_VAL_ERROR (1ULL << 63) #define RMID_VAL_UNAVAIL (1ULL << 62) -#define QOS_L3_OCCUP_EVENT_ID (1 << 0) - -#define QOS_EVENT_MASK QOS_L3_OCCUP_EVENT_ID +/* + * Event IDs are used to program IA32_QM_EVTSEL before reading event + * counter from IA32_QM_CTR + */ +#define QOS_L3_OCCUP_EVENT_ID 0x01 +#define QOS_MBM_TOTAL_EVENT_ID 0x02 +#define QOS_MBM_LOCAL_EVENT_ID 0x03 /* * This is central to the rotation algorithm in __intel_cqm_rmid_rotate(). @@ -211,6 +252,21 @@ static void __put_rmid(u32 rmid) list_add_tail(&entry->list, &cqm_rmid_limbo_lru); } +static void cqm_cleanup(void) +{ + int i; + + if (!cqm_rmid_ptrs) + return; + + for (i = 0; i < cqm_max_rmid; i++) + kfree(cqm_rmid_ptrs[i]); + + kfree(cqm_rmid_ptrs); + cqm_rmid_ptrs = NULL; + cqm_enabled = false; +} + static int intel_cqm_setup_rmid_cache(void) { struct cqm_rmid_entry *entry; @@ -218,7 +274,7 @@ static int intel_cqm_setup_rmid_cache(void) int r = 0; nr_rmids = cqm_max_rmid + 1; - cqm_rmid_ptrs = kmalloc(sizeof(struct cqm_rmid_entry *) * + cqm_rmid_ptrs = kzalloc(sizeof(struct cqm_rmid_entry *) * nr_rmids, GFP_KERNEL); if (!cqm_rmid_ptrs) return -ENOMEM; @@ -249,11 +305,9 @@ static int intel_cqm_setup_rmid_cache(void) mutex_unlock(&cache_mutex); return 0; -fail: - while (r--) - kfree(cqm_rmid_ptrs[r]); - kfree(cqm_rmid_ptrs); +fail: + cqm_cleanup(); return -ENOMEM; } @@ -281,9 +335,13 @@ static bool __match_event(struct perf_event *a, struct perf_event *b) /* * Events that target same task are placed into the same cache group. + * Mark it as a multi event group, so that we update ->count + * for every event rather than just the group leader later. */ - if (a->hw.target == b->hw.target) + if (a->hw.target == b->hw.target) { + b->hw.is_group_event = true; return true; + } /* * Are we an inherited event? @@ -392,10 +450,26 @@ static bool __conflict_event(struct perf_event *a, struct perf_event *b) struct rmid_read { u32 rmid; + u32 evt_type; atomic64_t value; }; static void __intel_cqm_event_count(void *info); +static void init_mbm_sample(u32 rmid, u32 evt_type); +static void __intel_mbm_event_count(void *info); + +static bool is_mbm_event(int e) +{ + return (e >= QOS_MBM_TOTAL_EVENT_ID && e <= QOS_MBM_LOCAL_EVENT_ID); +} + +static void cqm_mask_call(struct rmid_read *rr) +{ + if (is_mbm_event(rr->evt_type)) + on_each_cpu_mask(&cqm_cpumask, __intel_mbm_event_count, rr, 1); + else + on_each_cpu_mask(&cqm_cpumask, __intel_cqm_event_count, rr, 1); +} /* * Exchange the RMID of a group of events. @@ -413,12 +487,12 @@ static u32 intel_cqm_xchg_rmid(struct perf_event *group, u32 rmid) */ if (__rmid_valid(old_rmid) && !__rmid_valid(rmid)) { struct rmid_read rr = { - .value = ATOMIC64_INIT(0), .rmid = old_rmid, + .evt_type = group->attr.config, + .value = ATOMIC64_INIT(0), }; - on_each_cpu_mask(&cqm_cpumask, __intel_cqm_event_count, - &rr, 1); + cqm_mask_call(&rr); local64_set(&group->count, atomic64_read(&rr.value)); } @@ -430,6 +504,22 @@ static u32 intel_cqm_xchg_rmid(struct perf_event *group, u32 rmid) raw_spin_unlock_irq(&cache_lock); + /* + * If the allocation is for mbm, init the mbm stats. + * Need to check if each event in the group is mbm event + * because there could be multiple type of events in the same group. + */ + if (__rmid_valid(rmid)) { + event = group; + if (is_mbm_event(event->attr.config)) + init_mbm_sample(rmid, event->attr.config); + + list_for_each_entry(event, head, hw.cqm_group_entry) { + if (is_mbm_event(event->attr.config)) + init_mbm_sample(rmid, event->attr.config); + } + } + return old_rmid; } @@ -837,6 +927,72 @@ static void intel_cqm_rmid_rotate(struct work_struct *work) schedule_delayed_work(&intel_cqm_rmid_work, delay); } +static u64 update_sample(unsigned int rmid, u32 evt_type, int first) +{ + struct sample *mbm_current; + u32 vrmid = rmid_2_index(rmid); + u64 val, bytes, shift; + u32 eventid; + + if (evt_type == QOS_MBM_LOCAL_EVENT_ID) { + mbm_current = &mbm_local[vrmid]; + eventid = QOS_MBM_LOCAL_EVENT_ID; + } else { + mbm_current = &mbm_total[vrmid]; + eventid = QOS_MBM_TOTAL_EVENT_ID; + } + + wrmsr(MSR_IA32_QM_EVTSEL, eventid, rmid); + rdmsrl(MSR_IA32_QM_CTR, val); + if (val & (RMID_VAL_ERROR | RMID_VAL_UNAVAIL)) + return mbm_current->total_bytes; + + if (first) { + mbm_current->prev_msr = val; + mbm_current->total_bytes = 0; + return mbm_current->total_bytes; + } + + /* + * The h/w guarantees that counters will not overflow + * so long as we poll them at least once per second. + */ + shift = 64 - MBM_CNTR_WIDTH; + bytes = (val << shift) - (mbm_current->prev_msr << shift); + bytes >>= shift; + + bytes *= cqm_l3_scale; + + mbm_current->total_bytes += bytes; + mbm_current->prev_msr = val; + + return mbm_current->total_bytes; +} + +static u64 rmid_read_mbm(unsigned int rmid, u32 evt_type) +{ + return update_sample(rmid, evt_type, 0); +} + +static void __intel_mbm_event_init(void *info) +{ + struct rmid_read *rr = info; + + update_sample(rr->rmid, rr->evt_type, 1); +} + +static void init_mbm_sample(u32 rmid, u32 evt_type) +{ + struct rmid_read rr = { + .rmid = rmid, + .evt_type = evt_type, + .value = ATOMIC64_INIT(0), + }; + + /* on each socket, init sample */ + on_each_cpu_mask(&cqm_cpumask, __intel_mbm_event_init, &rr, 1); +} + /* * Find a group and setup RMID. * @@ -849,6 +1005,7 @@ static void intel_cqm_setup_event(struct perf_event *event, bool conflict = false; u32 rmid; + event->hw.is_group_event = false; list_for_each_entry(iter, &cache_groups, hw.cqm_groups_entry) { rmid = iter->hw.cqm_rmid; @@ -856,6 +1013,8 @@ static void intel_cqm_setup_event(struct perf_event *event, /* All tasks in a group share an RMID */ event->hw.cqm_rmid = rmid; *group = iter; + if (is_mbm_event(event->attr.config) && __rmid_valid(rmid)) + init_mbm_sample(rmid, event->attr.config); return; } @@ -872,6 +1031,9 @@ static void intel_cqm_setup_event(struct perf_event *event, else rmid = __get_rmid(); + if (is_mbm_event(event->attr.config) && __rmid_valid(rmid)) + init_mbm_sample(rmid, event->attr.config); + event->hw.cqm_rmid = rmid; } @@ -893,7 +1055,10 @@ static void intel_cqm_event_read(struct perf_event *event) if (!__rmid_valid(rmid)) goto out; - val = __rmid_read(rmid); + if (is_mbm_event(event->attr.config)) + val = rmid_read_mbm(rmid, event->attr.config); + else + val = __rmid_read(rmid); /* * Ignore this reading on error states and do not update the value. @@ -924,10 +1089,100 @@ static inline bool cqm_group_leader(struct perf_event *event) return !list_empty(&event->hw.cqm_groups_entry); } +static void __intel_mbm_event_count(void *info) +{ + struct rmid_read *rr = info; + u64 val; + + val = rmid_read_mbm(rr->rmid, rr->evt_type); + if (val & (RMID_VAL_ERROR | RMID_VAL_UNAVAIL)) + return; + atomic64_add(val, &rr->value); +} + +static enum hrtimer_restart mbm_hrtimer_handle(struct hrtimer *hrtimer) +{ + struct perf_event *iter, *iter1; + int ret = HRTIMER_RESTART; + struct list_head *head; + unsigned long flags; + u32 grp_rmid; + + /* + * Need to cache_lock as the timer Event Select MSR reads + * can race with the mbm/cqm count() and mbm_init() reads. + */ + raw_spin_lock_irqsave(&cache_lock, flags); + + if (list_empty(&cache_groups)) { + ret = HRTIMER_NORESTART; + goto out; + } + + list_for_each_entry(iter, &cache_groups, hw.cqm_groups_entry) { + grp_rmid = iter->hw.cqm_rmid; + if (!__rmid_valid(grp_rmid)) + continue; + if (is_mbm_event(iter->attr.config)) + update_sample(grp_rmid, iter->attr.config, 0); + + head = &iter->hw.cqm_group_entry; + if (list_empty(head)) + continue; + list_for_each_entry(iter1, head, hw.cqm_group_entry) { + if (!iter1->hw.is_group_event) + break; + if (is_mbm_event(iter1->attr.config)) + update_sample(iter1->hw.cqm_rmid, + iter1->attr.config, 0); + } + } + + hrtimer_forward_now(hrtimer, ms_to_ktime(MBM_CTR_OVERFLOW_TIME)); +out: + raw_spin_unlock_irqrestore(&cache_lock, flags); + + return ret; +} + +static void __mbm_start_timer(void *info) +{ + hrtimer_start(&mbm_timers[pkg_id], ms_to_ktime(MBM_CTR_OVERFLOW_TIME), + HRTIMER_MODE_REL_PINNED); +} + +static void __mbm_stop_timer(void *info) +{ + hrtimer_cancel(&mbm_timers[pkg_id]); +} + +static void mbm_start_timers(void) +{ + on_each_cpu_mask(&cqm_cpumask, __mbm_start_timer, NULL, 1); +} + +static void mbm_stop_timers(void) +{ + on_each_cpu_mask(&cqm_cpumask, __mbm_stop_timer, NULL, 1); +} + +static void mbm_hrtimer_init(void) +{ + struct hrtimer *hr; + int i; + + for (i = 0; i < mbm_socket_max; i++) { + hr = &mbm_timers[i]; + hrtimer_init(hr, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + hr->function = mbm_hrtimer_handle; + } +} + static u64 intel_cqm_event_count(struct perf_event *event) { unsigned long flags; struct rmid_read rr = { + .evt_type = event->attr.config, .value = ATOMIC64_INIT(0), }; @@ -940,7 +1195,9 @@ static u64 intel_cqm_event_count(struct perf_event *event) return __perf_event_count(event); /* - * Only the group leader gets to report values. This stops us + * Only the group leader gets to report values except in case of + * multiple events in the same group, we still need to read the + * other events.This stops us * reporting duplicate values to userspace, and gives us a clear * rule for which task gets to report the values. * @@ -948,7 +1205,7 @@ static u64 intel_cqm_event_count(struct perf_event *event) * specific packages - we forfeit that ability when we create * task events. */ - if (!cqm_group_leader(event)) + if (!cqm_group_leader(event) && !event->hw.is_group_event) return 0; /* @@ -975,7 +1232,7 @@ static u64 intel_cqm_event_count(struct perf_event *event) if (!__rmid_valid(rr.rmid)) goto out; - on_each_cpu_mask(&cqm_cpumask, __intel_cqm_event_count, &rr, 1); + cqm_mask_call(&rr); raw_spin_lock_irqsave(&cache_lock, flags); if (event->hw.cqm_rmid == rr.rmid) @@ -1046,8 +1303,14 @@ static int intel_cqm_event_add(struct perf_event *event, int mode) static void intel_cqm_event_destroy(struct perf_event *event) { struct perf_event *group_other = NULL; + unsigned long flags; mutex_lock(&cache_mutex); + /* + * Hold the cache_lock as mbm timer handlers could be + * scanning the list of events. + */ + raw_spin_lock_irqsave(&cache_lock, flags); /* * If there's another event in this group... @@ -1079,6 +1342,14 @@ static void intel_cqm_event_destroy(struct perf_event *event) } } + raw_spin_unlock_irqrestore(&cache_lock, flags); + + /* + * Stop the mbm overflow timers when the last event is destroyed. + */ + if (mbm_enabled && list_empty(&cache_groups)) + mbm_stop_timers(); + mutex_unlock(&cache_mutex); } @@ -1086,11 +1357,13 @@ static int intel_cqm_event_init(struct perf_event *event) { struct perf_event *group = NULL; bool rotate = false; + unsigned long flags; if (event->attr.type != intel_cqm_pmu.type) return -ENOENT; - if (event->attr.config & ~QOS_EVENT_MASK) + if ((event->attr.config < QOS_L3_OCCUP_EVENT_ID) || + (event->attr.config > QOS_MBM_LOCAL_EVENT_ID)) return -EINVAL; /* unsupported modes and filters */ @@ -1110,9 +1383,21 @@ static int intel_cqm_event_init(struct perf_event *event) mutex_lock(&cache_mutex); + /* + * Start the mbm overflow timers when the first event is created. + */ + if (mbm_enabled && list_empty(&cache_groups)) + mbm_start_timers(); + /* Will also set rmid */ intel_cqm_setup_event(event, &group); + /* + * Hold the cache_lock as mbm timer handlers be + * scanning the list of events. + */ + raw_spin_lock_irqsave(&cache_lock, flags); + if (group) { list_add_tail(&event->hw.cqm_group_entry, &group->hw.cqm_group_entry); @@ -1131,6 +1416,7 @@ static int intel_cqm_event_init(struct perf_event *event) rotate = true; } + raw_spin_unlock_irqrestore(&cache_lock, flags); mutex_unlock(&cache_mutex); if (rotate) @@ -1145,6 +1431,16 @@ EVENT_ATTR_STR(llc_occupancy.unit, intel_cqm_llc_unit, "Bytes"); EVENT_ATTR_STR(llc_occupancy.scale, intel_cqm_llc_scale, NULL); EVENT_ATTR_STR(llc_occupancy.snapshot, intel_cqm_llc_snapshot, "1"); +EVENT_ATTR_STR(total_bytes, intel_cqm_total_bytes, "event=0x02"); +EVENT_ATTR_STR(total_bytes.per-pkg, intel_cqm_total_bytes_pkg, "1"); +EVENT_ATTR_STR(total_bytes.unit, intel_cqm_total_bytes_unit, "MB"); +EVENT_ATTR_STR(total_bytes.scale, intel_cqm_total_bytes_scale, "1e-6"); + +EVENT_ATTR_STR(local_bytes, intel_cqm_local_bytes, "event=0x03"); +EVENT_ATTR_STR(local_bytes.per-pkg, intel_cqm_local_bytes_pkg, "1"); +EVENT_ATTR_STR(local_bytes.unit, intel_cqm_local_bytes_unit, "MB"); +EVENT_ATTR_STR(local_bytes.scale, intel_cqm_local_bytes_scale, "1e-6"); + static struct attribute *intel_cqm_events_attr[] = { EVENT_PTR(intel_cqm_llc), EVENT_PTR(intel_cqm_llc_pkg), @@ -1154,9 +1450,38 @@ static struct attribute *intel_cqm_events_attr[] = { NULL, }; +static struct attribute *intel_mbm_events_attr[] = { + EVENT_PTR(intel_cqm_total_bytes), + EVENT_PTR(intel_cqm_local_bytes), + EVENT_PTR(intel_cqm_total_bytes_pkg), + EVENT_PTR(intel_cqm_local_bytes_pkg), + EVENT_PTR(intel_cqm_total_bytes_unit), + EVENT_PTR(intel_cqm_local_bytes_unit), + EVENT_PTR(intel_cqm_total_bytes_scale), + EVENT_PTR(intel_cqm_local_bytes_scale), + NULL, +}; + +static struct attribute *intel_cmt_mbm_events_attr[] = { + EVENT_PTR(intel_cqm_llc), + EVENT_PTR(intel_cqm_total_bytes), + EVENT_PTR(intel_cqm_local_bytes), + EVENT_PTR(intel_cqm_llc_pkg), + EVENT_PTR(intel_cqm_total_bytes_pkg), + EVENT_PTR(intel_cqm_local_bytes_pkg), + EVENT_PTR(intel_cqm_llc_unit), + EVENT_PTR(intel_cqm_total_bytes_unit), + EVENT_PTR(intel_cqm_local_bytes_unit), + EVENT_PTR(intel_cqm_llc_scale), + EVENT_PTR(intel_cqm_total_bytes_scale), + EVENT_PTR(intel_cqm_local_bytes_scale), + EVENT_PTR(intel_cqm_llc_snapshot), + NULL, +}; + static struct attribute_group intel_cqm_events_group = { .name = "events", - .attrs = intel_cqm_events_attr, + .attrs = NULL, }; PMU_FORMAT_ATTR(event, "config:0-7"); @@ -1303,12 +1628,70 @@ static const struct x86_cpu_id intel_cqm_match[] = { {} }; +static void mbm_cleanup(void) +{ + if (!mbm_enabled) + return; + + kfree(mbm_local); + kfree(mbm_total); + mbm_enabled = false; +} + +static const struct x86_cpu_id intel_mbm_local_match[] = { + { .vendor = X86_VENDOR_INTEL, .feature = X86_FEATURE_CQM_MBM_LOCAL }, + {} +}; + +static const struct x86_cpu_id intel_mbm_total_match[] = { + { .vendor = X86_VENDOR_INTEL, .feature = X86_FEATURE_CQM_MBM_TOTAL }, + {} +}; + +static int intel_mbm_init(void) +{ + int ret = 0, array_size, maxid = cqm_max_rmid + 1; + + mbm_socket_max = topology_max_packages(); + array_size = sizeof(struct sample) * maxid * mbm_socket_max; + mbm_local = kmalloc(array_size, GFP_KERNEL); + if (!mbm_local) + return -ENOMEM; + + mbm_total = kmalloc(array_size, GFP_KERNEL); + if (!mbm_total) { + ret = -ENOMEM; + goto out; + } + + array_size = sizeof(struct hrtimer) * mbm_socket_max; + mbm_timers = kmalloc(array_size, GFP_KERNEL); + if (!mbm_timers) { + ret = -ENOMEM; + goto out; + } + mbm_hrtimer_init(); + +out: + if (ret) + mbm_cleanup(); + + return ret; +} + static int __init intel_cqm_init(void) { - char *str, scale[20]; + char *str = NULL, scale[20]; int i, cpu, ret; - if (!x86_match_cpu(intel_cqm_match)) + if (x86_match_cpu(intel_cqm_match)) + cqm_enabled = true; + + if (x86_match_cpu(intel_mbm_local_match) && + x86_match_cpu(intel_mbm_total_match)) + mbm_enabled = true; + + if (!cqm_enabled && !mbm_enabled) return -ENODEV; cqm_l3_scale = boot_cpu_data.x86_cache_occ_scale; @@ -1365,16 +1748,41 @@ static int __init intel_cqm_init(void) cqm_pick_event_reader(i); } - __perf_cpu_notifier(intel_cqm_cpu_notifier); + if (mbm_enabled) + ret = intel_mbm_init(); + if (ret && !cqm_enabled) + goto out; + + if (cqm_enabled && mbm_enabled) + intel_cqm_events_group.attrs = intel_cmt_mbm_events_attr; + else if (!cqm_enabled && mbm_enabled) + intel_cqm_events_group.attrs = intel_mbm_events_attr; + else if (cqm_enabled && !mbm_enabled) + intel_cqm_events_group.attrs = intel_cqm_events_attr; ret = perf_pmu_register(&intel_cqm_pmu, "intel_cqm", -1); - if (ret) + if (ret) { pr_err("Intel CQM perf registration failed: %d\n", ret); - else + goto out; + } + + if (cqm_enabled) pr_info("Intel CQM monitoring enabled\n"); + if (mbm_enabled) + pr_info("Intel MBM enabled\n"); + /* + * Register the hot cpu notifier once we are sure cqm + * is enabled to avoid notifier leak. + */ + __perf_cpu_notifier(intel_cqm_cpu_notifier); out: cpu_notifier_register_done(); + if (ret) { + kfree(str); + cqm_cleanup(); + mbm_cleanup(); + } return ret; } diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index ce7211a..8584b90 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -570,11 +570,12 @@ int intel_pmu_drain_bts_buffer(void) * We will overwrite the from and to address before we output * the sample. */ + rcu_read_lock(); perf_prepare_sample(&header, &data, event, ®s); if (perf_output_begin(&handle, event, header.size * (top - base - skip))) - return 1; + goto unlock; for (at = base; at < top; at++) { /* Filter out any records that contain kernel addresses. */ @@ -593,6 +594,8 @@ int intel_pmu_drain_bts_buffer(void) /* There's new data available. */ event->hw.interrupts++; event->pending_kill = POLL_IN; +unlock: + rcu_read_unlock(); return 1; } diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c index b834a3f..70c93f9 100644 --- a/arch/x86/events/intel/rapl.c +++ b/arch/x86/events/intel/rapl.c @@ -711,6 +711,7 @@ static int __init rapl_pmu_init(void) rapl_pmu_events_group.attrs = rapl_events_cln_attr; break; case 63: /* Haswell-Server */ + case 79: /* Broadwell-Server */ apply_quirk = true; rapl_cntr_mask = RAPL_IDX_SRV; rapl_pmu_events_group.attrs = rapl_events_srv_attr; @@ -718,6 +719,7 @@ static int __init rapl_pmu_init(void) case 60: /* Haswell */ case 69: /* Haswell-Celeron */ case 61: /* Broadwell */ + case 71: /* Broadwell-H */ rapl_cntr_mask = RAPL_IDX_HSW; rapl_pmu_events_group.attrs = rapl_events_hsw_attr; break; diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index 93f6bd9..ab2bcaa 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -46,7 +46,6 @@ (SNBEP_PMON_CTL_EV_SEL_MASK | \ SNBEP_PCU_MSR_PMON_CTL_OCC_SEL_MASK | \ SNBEP_PMON_CTL_EDGE_DET | \ - SNBEP_PMON_CTL_EV_SEL_EXT | \ SNBEP_PMON_CTL_INVERT | \ SNBEP_PCU_MSR_PMON_CTL_TRESH_MASK | \ SNBEP_PCU_MSR_PMON_CTL_OCC_INVERT | \ @@ -148,7 +147,6 @@ /* IVBEP PCU */ #define IVBEP_PCU_MSR_PMON_RAW_EVENT_MASK \ (SNBEP_PMON_CTL_EV_SEL_MASK | \ - SNBEP_PMON_CTL_EV_SEL_EXT | \ SNBEP_PCU_MSR_PMON_CTL_OCC_SEL_MASK | \ SNBEP_PMON_CTL_EDGE_DET | \ SNBEP_PCU_MSR_PMON_CTL_TRESH_MASK | \ @@ -258,7 +256,6 @@ SNBEP_PCU_MSR_PMON_CTL_OCC_SEL_MASK | \ SNBEP_PMON_CTL_EDGE_DET | \ SNBEP_CBO_PMON_CTL_TID_EN | \ - SNBEP_PMON_CTL_EV_SEL_EXT | \ SNBEP_PMON_CTL_INVERT | \ KNL_PCU_MSR_PMON_CTL_TRESH_MASK | \ SNBEP_PCU_MSR_PMON_CTL_OCC_INVERT | \ @@ -472,7 +469,7 @@ static struct attribute *snbep_uncore_cbox_formats_attr[] = { }; static struct attribute *snbep_uncore_pcu_formats_attr[] = { - &format_attr_event_ext.attr, + &format_attr_event.attr, &format_attr_occ_sel.attr, &format_attr_edge.attr, &format_attr_inv.attr, @@ -1313,7 +1310,7 @@ static struct attribute *ivbep_uncore_cbox_formats_attr[] = { }; static struct attribute *ivbep_uncore_pcu_formats_attr[] = { - &format_attr_event_ext.attr, + &format_attr_event.attr, &format_attr_occ_sel.attr, &format_attr_edge.attr, &format_attr_thresh5.attr, diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 3d1a843..8f9afef 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -94,7 +94,7 @@ #define X86_FEATURE_REP_GOOD ( 3*32+16) /* rep microcode works well */ #define X86_FEATURE_MFENCE_RDTSC ( 3*32+17) /* "" Mfence synchronizes RDTSC */ #define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) /* "" Lfence synchronizes RDTSC */ -/* free, was #define X86_FEATURE_11AP ( 3*32+19) * "" Bad local APIC aka 11AP */ +#define X86_FEATURE_ACC_POWER ( 3*32+19) /* AMD Accumulated Power Mechanism */ #define X86_FEATURE_NOPL ( 3*32+20) /* The NOPL (0F 1F) instructions */ #define X86_FEATURE_ALWAYS ( 3*32+21) /* "" Always-present feature */ #define X86_FEATURE_XTOPOLOGY ( 3*32+22) /* cpu topology enum extensions */ @@ -245,6 +245,8 @@ /* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:1 (edx), word 12 */ #define X86_FEATURE_CQM_OCCUP_LLC (12*32+ 0) /* LLC occupancy monitoring if 1 */ +#define X86_FEATURE_CQM_MBM_TOTAL (12*32+ 1) /* LLC Total MBM monitoring */ +#define X86_FEATURE_CQM_MBM_LOCAL (12*32+ 2) /* LLC Local MBM monitoring */ /* AMD-defined CPU features, CPUID level 0x80000008 (ebx), word 13 */ #define X86_FEATURE_CLZERO (13*32+0) /* CLZERO instruction */ diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index e51021c..6e47e3a 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -309,7 +309,6 @@ static void amd_get_topology(struct cpuinfo_x86 *c) u32 eax, ebx, ecx, edx; cpuid(0x8000001e, &eax, &ebx, &ecx, &edx); - nodes_per_socket = ((ecx >> 8) & 7) + 1; node_id = ecx & 7; /* get compute unit information */ @@ -320,7 +319,6 @@ static void amd_get_topology(struct cpuinfo_x86 *c) u64 value; rdmsrl(MSR_FAM10H_NODE_ID, value); - nodes_per_socket = ((value >> 3) & 7) + 1; node_id = value & 7; } else return; @@ -522,6 +520,18 @@ static void bsp_init_amd(struct cpuinfo_x86 *c) if (cpu_has(c, X86_FEATURE_MWAITX)) use_mwaitx_delay(); + + if (boot_cpu_has(X86_FEATURE_TOPOEXT)) { + u32 ecx; + + ecx = cpuid_ecx(0x8000001e); + nodes_per_socket = ((ecx >> 8) & 7) + 1; + } else if (boot_cpu_has(X86_FEATURE_NODEID_MSR)) { + u64 value; + + rdmsrl(MSR_FAM10H_NODE_ID, value); + nodes_per_socket = ((value >> 3) & 7) + 1; + } } static void early_init_amd(struct cpuinfo_x86 *c) @@ -539,6 +549,10 @@ static void early_init_amd(struct cpuinfo_x86 *c) set_sched_clock_stable(); } + /* Bit 12 of 8000_0007 edx is accumulated power mechanism. */ + if (c->x86_power & BIT(12)) + set_cpu_cap(c, X86_FEATURE_ACC_POWER); + #ifdef CONFIG_X86_64 set_cpu_cap(c, X86_FEATURE_SYSCALL32); #else diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 9988caf..8394b3d 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -692,7 +692,9 @@ void get_cpu_cap(struct cpuinfo_x86 *c) cpuid_count(0x0000000F, 1, &eax, &ebx, &ecx, &edx); c->x86_capability[CPUID_F_1_EDX] = edx; - if (cpu_has(c, X86_FEATURE_CQM_OCCUP_LLC)) { + if ((cpu_has(c, X86_FEATURE_CQM_OCCUP_LLC)) || + ((cpu_has(c, X86_FEATURE_CQM_MBM_TOTAL)) || + (cpu_has(c, X86_FEATURE_CQM_MBM_LOCAL)))) { c->x86_cache_max_rmid = ecx; c->x86_cache_occ_scale = ebx; } diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 78fda2a..f291275 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -121,6 +121,7 @@ struct hw_perf_event { struct { /* intel_cqm */ int cqm_state; u32 cqm_rmid; + int is_group_event; struct list_head cqm_events_entry; struct list_head cqm_groups_entry; struct list_head cqm_group_entry; @@ -128,6 +129,10 @@ struct hw_perf_event { struct { /* itrace */ int itrace_started; }; + struct { /* amd_power */ + u64 pwr_acc; + u64 ptsc; + }; #ifdef CONFIG_HAVE_HW_BREAKPOINT struct { /* breakpoint */ /* diff --git a/kernel/events/core.c b/kernel/events/core.c index 712570d..de24fbc 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -376,8 +376,11 @@ static void update_perf_cpu_limits(void) u64 tmp = perf_sample_period_ns; tmp *= sysctl_perf_cpu_time_max_percent; - do_div(tmp, 100); - ACCESS_ONCE(perf_sample_allowed_ns) = tmp; + tmp = div_u64(tmp, 100); + if (!tmp) + tmp = 1; + + WRITE_ONCE(perf_sample_allowed_ns, tmp); } static int perf_rotate_context(struct perf_cpu_context *cpuctx); @@ -409,7 +412,13 @@ int perf_cpu_time_max_percent_handler(struct ctl_table *table, int write, if (ret || !write) return ret; - update_perf_cpu_limits(); + if (sysctl_perf_cpu_time_max_percent == 100) { + printk(KERN_WARNING + "perf: Dynamic interrupt throttling disabled, can hang your system!\n"); + WRITE_ONCE(perf_sample_allowed_ns, 0); + } else { + update_perf_cpu_limits(); + } return 0; } @@ -423,62 +432,68 @@ int perf_cpu_time_max_percent_handler(struct ctl_table *table, int write, #define NR_ACCUMULATED_SAMPLES 128 static DEFINE_PER_CPU(u64, running_sample_length); +static u64 __report_avg; +static u64 __report_allowed; + static void perf_duration_warn(struct irq_work *w) { - u64 allowed_ns = ACCESS_ONCE(perf_sample_allowed_ns); - u64 avg_local_sample_len; - u64 local_samples_len; - - local_samples_len = __this_cpu_read(running_sample_length); - avg_local_sample_len = local_samples_len/NR_ACCUMULATED_SAMPLES; - printk_ratelimited(KERN_WARNING - "perf interrupt took too long (%lld > %lld), lowering " - "kernel.perf_event_max_sample_rate to %d\n", - avg_local_sample_len, allowed_ns >> 1, - sysctl_perf_event_sample_rate); + "perf: interrupt took too long (%lld > %lld), lowering " + "kernel.perf_event_max_sample_rate to %d\n", + __report_avg, __report_allowed, + sysctl_perf_event_sample_rate); } static DEFINE_IRQ_WORK(perf_duration_work, perf_duration_warn); void perf_sample_event_took(u64 sample_len_ns) { - u64 allowed_ns = ACCESS_ONCE(perf_sample_allowed_ns); - u64 avg_local_sample_len; - u64 local_samples_len; + u64 max_len = READ_ONCE(perf_sample_allowed_ns); + u64 running_len; + u64 avg_len; + u32 max; - if (allowed_ns == 0) + if (max_len == 0) return; - /* decay the counter by 1 average sample */ - local_samples_len = __this_cpu_read(running_sample_length); - local_samples_len -= local_samples_len/NR_ACCUMULATED_SAMPLES; - local_samples_len += sample_len_ns; - __this_cpu_write(running_sample_length, local_samples_len); + /* Decay the counter by 1 average sample. */ + running_len = __this_cpu_read(running_sample_length); + running_len -= running_len/NR_ACCUMULATED_SAMPLES; + running_len += sample_len_ns; + __this_cpu_write(running_sample_length, running_len); /* - * note: this will be biased artifically low until we have - * seen NR_ACCUMULATED_SAMPLES. Doing it this way keeps us + * Note: this will be biased artifically low until we have + * seen NR_ACCUMULATED_SAMPLES. Doing it this way keeps us * from having to maintain a count. */ - avg_local_sample_len = local_samples_len/NR_ACCUMULATED_SAMPLES; - - if (avg_local_sample_len <= allowed_ns) + avg_len = running_len/NR_ACCUMULATED_SAMPLES; + if (avg_len <= max_len) return; - if (max_samples_per_tick <= 1) - return; + __report_avg = avg_len; + __report_allowed = max_len; - max_samples_per_tick = DIV_ROUND_UP(max_samples_per_tick, 2); - sysctl_perf_event_sample_rate = max_samples_per_tick * HZ; - perf_sample_period_ns = NSEC_PER_SEC / sysctl_perf_event_sample_rate; + /* + * Compute a throttle threshold 25% below the current duration. + */ + avg_len += avg_len / 4; + max = (TICK_NSEC / 100) * sysctl_perf_cpu_time_max_percent; + if (avg_len < max) + max /= (u32)avg_len; + else + max = 1; - update_perf_cpu_limits(); + WRITE_ONCE(perf_sample_allowed_ns, avg_len); + WRITE_ONCE(max_samples_per_tick, max); + + sysctl_perf_event_sample_rate = max * HZ; + perf_sample_period_ns = NSEC_PER_SEC / sysctl_perf_event_sample_rate; if (!irq_work_queue(&perf_duration_work)) { - early_printk("perf interrupt took too long (%lld > %lld), lowering " + early_printk("perf: interrupt took too long (%lld > %lld), lowering " "kernel.perf_event_max_sample_rate to %d\n", - avg_local_sample_len, allowed_ns >> 1, + __report_avg, __report_allowed, sysctl_perf_event_sample_rate); } } @@ -4210,6 +4225,14 @@ static void __perf_event_period(struct perf_event *event, active = (event->state == PERF_EVENT_STATE_ACTIVE); if (active) { perf_pmu_disable(ctx->pmu); + /* + * We could be throttled; unthrottle now to avoid the tick + * trying to unthrottle while we already re-started the event. + */ + if (event->hw.interrupts == MAX_INTERRUPTS) { + event->hw.interrupts = 0; + perf_log_throttle(event, 1); + } event->pmu->stop(event, PERF_EF_UPDATE); } @@ -9426,10 +9449,29 @@ perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) switch (action & ~CPU_TASKS_FROZEN) { case CPU_UP_PREPARE: + /* + * This must be done before the CPU comes alive, because the + * moment we can run tasks we can encounter (software) events. + * + * Specifically, someone can have inherited events on kthreadd + * or a pre-existing worker thread that gets re-bound. + */ perf_event_init_cpu(cpu); break; case CPU_DOWN_PREPARE: + /* + * This must be done before the CPU dies because after that an + * active event might want to IPI the CPU and that'll not work + * so great for dead CPUs. + * + * XXX smp_call_function_single() return -ENXIO without a warn + * so we could possibly deal with this. + * + * This is safe against new events arriving because + * sys_perf_event_open() serializes against hotplug using + * get_online_cpus(). + */ perf_event_exit_cpu(cpu); break; default: diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index 1faad2c..c61f0cb 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -746,8 +746,10 @@ struct ring_buffer *rb_alloc(int nr_pages, long watermark, int cpu, int flags) rb->user_page = all_buf; rb->data_pages[0] = all_buf + PAGE_SIZE; - rb->page_order = ilog2(nr_pages); - rb->nr_pages = !!nr_pages; + if (nr_pages) { + rb->nr_pages = 1; + rb->page_order = ilog2(nr_pages); + } ring_buffer_init(rb, watermark, flags); diff --git a/tools/include/linux/stringify.h b/tools/include/linux/stringify.h new file mode 100644 index 0000000..841cec8 --- /dev/null +++ b/tools/include/linux/stringify.h @@ -0,0 +1,12 @@ +#ifndef __LINUX_STRINGIFY_H +#define __LINUX_STRINGIFY_H + +/* Indirect stringification. Doing two levels allows the parameter to be a + * macro itself. For example, compile with -DFOO=bar, __stringify(FOO) + * converts to "bar". + */ + +#define __stringify_1(x...) #x +#define __stringify(x...) __stringify_1(x) + +#endif /* !__LINUX_STRINGIFY_H */ diff --git a/tools/lib/api/Makefile b/tools/lib/api/Makefile index bbc82c6..316f308 100644 --- a/tools/lib/api/Makefile +++ b/tools/lib/api/Makefile @@ -1,5 +1,5 @@ include ../../scripts/Makefile.include -include ../../perf/config/utilities.mak # QUIET_CLEAN +include ../../scripts/utilities.mak # QUIET_CLEAN ifeq ($(srctree),) srctree := $(patsubst %/,%,$(dir $(shell pwd))) diff --git a/tools/lib/subcmd/Makefile b/tools/lib/subcmd/Makefile index 1faecb8..a810370 100644 --- a/tools/lib/subcmd/Makefile +++ b/tools/lib/subcmd/Makefile @@ -1,5 +1,5 @@ include ../../scripts/Makefile.include -include ../../perf/config/utilities.mak # QUIET_CLEAN +include ../../scripts/utilities.mak # QUIET_CLEAN ifeq ($(srctree),) srctree := $(patsubst %/,%,$(dir $(shell pwd))) diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index 190cc88..a8b6357 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -5427,10 +5427,8 @@ void pevent_print_event_time(struct pevent *pevent, struct trace_seq *s, } if (pevent->latency_format) { - trace_seq_printf(s, " %3d", record->cpu); pevent_data_lat_fmt(pevent, s, record); - } else - trace_seq_printf(s, " [%03d]", record->cpu); + } if (use_usec_format) { if (pevent->flags & PEVENT_NSEC_OUTPUT) { diff --git a/tools/perf/Documentation/Makefile b/tools/perf/Documentation/Makefile index 3ba1c0b..098cfb9 100644 --- a/tools/perf/Documentation/Makefile +++ b/tools/perf/Documentation/Makefile @@ -1,5 +1,5 @@ include ../../scripts/Makefile.include -include ../config/utilities.mak +include ../../scripts/utilities.mak MAN1_TXT= \ $(filter-out $(addsuffix .txt, $(ARTICLES) $(SP_ARTICLES)), \ diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt index 79483f4..ec723d0 100644 --- a/tools/perf/Documentation/perf-list.txt +++ b/tools/perf/Documentation/perf-list.txt @@ -40,10 +40,12 @@ address should be. The 'p' modifier can be specified multiple times: 0 - SAMPLE_IP can have arbitrary skid 1 - SAMPLE_IP must have constant skid 2 - SAMPLE_IP requested to have 0 skid - 3 - SAMPLE_IP must have 0 skid + 3 - SAMPLE_IP must have 0 skid, or uses randomization to avoid + sample shadowing effects. For Intel systems precise event sampling is implemented with PEBS -which supports up to precise-level 2. +which supports up to precise-level 2, and precise level 3 for +some special cases On AMD systems it is implemented using IBS (up to precise-level 2). The precise modifier works with event types 0x76 (cpu-cycles, CPU diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 4a4fad4..000ea21 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -3,7 +3,7 @@ include ../scripts/Makefile.include # The default target of this Makefile is... all: -include config/utilities.mak +include ../scripts/utilities.mak # Define V to have a more verbose compile. # diff --git a/tools/perf/arch/powerpc/util/header.c b/tools/perf/arch/powerpc/util/header.c index 6c1b8a7..6138bde 100644 --- a/tools/perf/arch/powerpc/util/header.c +++ b/tools/perf/arch/powerpc/util/header.c @@ -3,9 +3,7 @@ #include <stdio.h> #include <stdlib.h> #include <string.h> - -#include "../../util/header.h" -#include "../../util/util.h" +#include <linux/stringify.h> #define mfspr(rn) ({unsigned long rval; \ asm volatile("mfspr %0," __stringify(rn) \ diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h index a50df86..579a592 100644 --- a/tools/perf/bench/bench.h +++ b/tools/perf/bench/bench.h @@ -25,19 +25,17 @@ # endif #endif -extern int bench_numa(int argc, const char **argv, const char *prefix); -extern int bench_sched_messaging(int argc, const char **argv, const char *prefix); -extern int bench_sched_pipe(int argc, const char **argv, const char *prefix); -extern int bench_mem_memcpy(int argc, const char **argv, - const char *prefix __maybe_unused); -extern int bench_mem_memset(int argc, const char **argv, const char *prefix); -extern int bench_futex_hash(int argc, const char **argv, const char *prefix); -extern int bench_futex_wake(int argc, const char **argv, const char *prefix); -extern int bench_futex_wake_parallel(int argc, const char **argv, - const char *prefix); -extern int bench_futex_requeue(int argc, const char **argv, const char *prefix); +int bench_numa(int argc, const char **argv, const char *prefix); +int bench_sched_messaging(int argc, const char **argv, const char *prefix); +int bench_sched_pipe(int argc, const char **argv, const char *prefix); +int bench_mem_memcpy(int argc, const char **argv, const char *prefix); +int bench_mem_memset(int argc, const char **argv, const char *prefix); +int bench_futex_hash(int argc, const char **argv, const char *prefix); +int bench_futex_wake(int argc, const char **argv, const char *prefix); +int bench_futex_wake_parallel(int argc, const char **argv, const char *prefix); +int bench_futex_requeue(int argc, const char **argv, const char *prefix); /* pi futexes */ -extern int bench_futex_lock_pi(int argc, const char **argv, const char *prefix); +int bench_futex_lock_pi(int argc, const char **argv, const char *prefix); #define BENCH_FORMAT_DEFAULT_STR "default" #define BENCH_FORMAT_DEFAULT 0 diff --git a/tools/perf/bench/mem-memcpy-arch.h b/tools/perf/bench/mem-memcpy-arch.h index 57b4ed8..5aad2a9 100644 --- a/tools/perf/bench/mem-memcpy-arch.h +++ b/tools/perf/bench/mem-memcpy-arch.h @@ -2,7 +2,7 @@ #ifdef HAVE_ARCH_X86_64_SUPPORT #define MEMCPY_FN(fn, name, desc) \ - extern void *fn(void *, const void *, size_t); + void *fn(void *, const void *, size_t); #include "mem-memcpy-x86-64-asm-def.h" diff --git a/tools/perf/bench/mem-memset-arch.h b/tools/perf/bench/mem-memset-arch.h index 633800c..0d15786 100644 --- a/tools/perf/bench/mem-memset-arch.h +++ b/tools/perf/bench/mem-memset-arch.h @@ -2,7 +2,7 @@ #ifdef HAVE_ARCH_X86_64_SUPPORT #define MEMSET_FN(fn, name, desc) \ - extern void *fn(void *, int, size_t); + void *fn(void *, int, size_t); #include "mem-memset-x86-64-asm-def.h" diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c index 5049d63..7500d95 100644 --- a/tools/perf/bench/numa.c +++ b/tools/perf/bench/numa.c @@ -293,7 +293,7 @@ static void bind_to_memnode(int node) if (node == -1) return; - BUG_ON(g->p.nr_nodes > (int)sizeof(nodemask)); + BUG_ON(g->p.nr_nodes > (int)sizeof(nodemask)*8); nodemask = 1L << node; ret = set_mempolicy(MPOL_BIND, &nodemask, sizeof(nodemask)*8); diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index cfe3663..8141583 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -94,7 +94,7 @@ static int process_sample_event(struct perf_tool *tool, struct addr_location al; int ret = 0; - if (perf_event__preprocess_sample(event, machine, &al, sample) < 0) { + if (machine__resolve(machine, &al, sample) < 0) { pr_warning("problem processing %d event, skipping it.\n", event->header.type); return -1; diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c index 4d72359..8053a8c 100644 --- a/tools/perf/builtin-diff.c +++ b/tools/perf/builtin-diff.c @@ -330,7 +330,7 @@ static int diff__process_sample_event(struct perf_tool *tool __maybe_unused, struct hists *hists = evsel__hists(evsel); int ret = -1; - if (perf_event__preprocess_sample(event, machine, &al, sample) < 0) { + if (machine__resolve(machine, &al, sample) < 0) { pr_warning("problem processing %d event, skipping it.\n", event->header.type); return -1; diff --git a/tools/perf/builtin-help.c b/tools/perf/builtin-help.c index 49d55e2..bc1de9b 100644 --- a/tools/perf/builtin-help.c +++ b/tools/perf/builtin-help.c @@ -106,12 +106,14 @@ static void exec_woman_emacs(const char *path, const char *page) if (!check_emacsclient_version()) { /* This works only with emacsclient version >= 22. */ - struct strbuf man_page = STRBUF_INIT; + char *man_page; if (!path) path = "emacsclient"; - strbuf_addf(&man_page, "(woman \"%s\")", page); - execlp(path, "emacsclient", "-e", man_page.buf, NULL); + if (asprintf(&man_page, "(woman \"%s\")", page) > 0) { + execlp(path, "emacsclient", "-e", man_page, NULL); + free(man_page); + } warning("failed to exec '%s': %s", path, strerror_r(errno, sbuf, sizeof(sbuf))); } @@ -122,7 +124,7 @@ static void exec_man_konqueror(const char *path, const char *page) const char *display = getenv("DISPLAY"); if (display && *display) { - struct strbuf man_page = STRBUF_INIT; + char *man_page; const char *filename = "kfmclient"; char sbuf[STRERR_BUFSIZE]; @@ -141,8 +143,10 @@ static void exec_man_konqueror(const char *path, const char *page) filename = file; } else path = "kfmclient"; - strbuf_addf(&man_page, "man:%s(1)", page); - execlp(path, filename, "newTab", man_page.buf, NULL); + if (asprintf(&man_page, "man:%s(1)", page) > 0) { + execlp(path, filename, "newTab", man_page, NULL); + free(man_page); + } warning("failed to exec '%s': %s", path, strerror_r(errno, sbuf, sizeof(sbuf))); } @@ -161,11 +165,13 @@ static void exec_man_man(const char *path, const char *page) static void exec_man_cmd(const char *cmd, const char *page) { - struct strbuf shell_cmd = STRBUF_INIT; char sbuf[STRERR_BUFSIZE]; + char *shell_cmd; - strbuf_addf(&shell_cmd, "%s %s", cmd, page); - execl("/bin/sh", "sh", "-c", shell_cmd.buf, NULL); + if (asprintf(&shell_cmd, "%s %s", cmd, page) > 0) { + execl("/bin/sh", "sh", "-c", shell_cmd, NULL); + free(shell_cmd); + } warning("failed to exec '%s': %s", cmd, strerror_r(errno, sbuf, sizeof(sbuf))); } @@ -299,43 +305,33 @@ static int is_perf_command(const char *s) is_in_cmdlist(&other_cmds, s); } -static const char *prepend(const char *prefix, const char *cmd) -{ - size_t pre_len = strlen(prefix); - size_t cmd_len = strlen(cmd); - char *p = malloc(pre_len + cmd_len + 1); - memcpy(p, prefix, pre_len); - strcpy(p + pre_len, cmd); - return p; -} - static const char *cmd_to_page(const char *perf_cmd) { + char *s; + if (!perf_cmd) return "perf"; else if (!prefixcmp(perf_cmd, "perf")) return perf_cmd; - else - return prepend("perf-", perf_cmd); + + return asprintf(&s, "perf-%s", perf_cmd) < 0 ? NULL : s; } static void setup_man_path(void) { - struct strbuf new_path = STRBUF_INIT; + char *new_path; const char *old_path = getenv("MANPATH"); /* We should always put ':' after our path. If there is no * old_path, the ':' at the end will let 'man' to try * system-wide paths after ours to find the manual page. If * there is old_path, we need ':' as delimiter. */ - strbuf_addstr(&new_path, system_path(PERF_MAN_PATH)); - strbuf_addch(&new_path, ':'); - if (old_path) - strbuf_addstr(&new_path, old_path); - - setenv("MANPATH", new_path.buf, 1); - - strbuf_release(&new_path); + if (asprintf(&new_path, "%s:%s", system_path(PERF_MAN_PATH), old_path ?: "") > 0) { + setenv("MANPATH", new_path, 1); + free(new_path); + } else { + error("Unable to setup man path"); + } } static void exec_viewer(const char *name, const char *page) @@ -380,7 +376,7 @@ static int show_info_page(const char *perf_cmd) return -1; } -static int get_html_page_path(struct strbuf *page_path, const char *page) +static int get_html_page_path(char **page_path, const char *page) { struct stat st; const char *html_path = system_path(PERF_HTML_PATH); @@ -392,10 +388,7 @@ static int get_html_page_path(struct strbuf *page_path, const char *page) return -1; } - strbuf_init(page_path, 0); - strbuf_addf(page_path, "%s/%s.html", html_path, page); - - return 0; + return asprintf(page_path, "%s/%s.html", html_path, page); } /* @@ -413,12 +406,12 @@ static void open_html(const char *path) static int show_html_page(const char *perf_cmd) { const char *page = cmd_to_page(perf_cmd); - struct strbuf page_path; /* it leaks but we exec bellow */ + char *page_path; /* it leaks but we exec bellow */ - if (get_html_page_path(&page_path, page) != 0) + if (get_html_page_path(&page_path, page) < 0) return -1; - open_html(page_path.buf); + open_html(page_path); return 0; } diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index 7fa6866..d1a2d10 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -131,8 +131,7 @@ static int copy_bytes(struct perf_inject *inject, int fd, off_t size) static s64 perf_event__repipe_auxtrace(struct perf_tool *tool, union perf_event *event, - struct perf_session *session - __maybe_unused) + struct perf_session *session) { struct perf_inject *inject = container_of(tool, struct perf_inject, tool); @@ -417,9 +416,6 @@ static int perf_event__inject_buildid(struct perf_tool *tool, { struct addr_location al; struct thread *thread; - u8 cpumode; - - cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; thread = machine__findnew_thread(machine, sample->pid, sample->tid); if (thread == NULL) { @@ -428,7 +424,7 @@ static int perf_event__inject_buildid(struct perf_tool *tool, goto repipe; } - thread__find_addr_map(thread, cpumode, MAP__FUNCTION, sample->ip, &al); + thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, sample->ip, &al); if (al.map != NULL) { if (!al.map->dso->hit) { diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c index 88aeac9..85db3be 100644 --- a/tools/perf/builtin-mem.c +++ b/tools/perf/builtin-mem.c @@ -131,7 +131,7 @@ dump_raw_samples(struct perf_tool *tool, struct addr_location al; const char *fmt; - if (perf_event__preprocess_sample(event, machine, &al, sample) < 0) { + if (machine__resolve(machine, &al, sample) < 0) { fprintf(stderr, "problem processing %d event, skipping it.\n", event->header.type); return -1; diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 7eea49f..160ea23 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -41,6 +41,7 @@ #include <dlfcn.h> #include <linux/bitmap.h> +#include <linux/stringify.h> struct report { struct perf_tool tool; @@ -154,7 +155,7 @@ static int process_sample_event(struct perf_tool *tool, }; int ret = 0; - if (perf_event__preprocess_sample(event, machine, &al, sample) < 0) { + if (machine__resolve(machine, &al, sample) < 0) { pr_debug("problem processing %d event, skipping it.\n", event->header.type); return -1; diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 57f9a7e..3770c3d 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -405,9 +405,7 @@ out: return 0; } -static void print_sample_iregs(union perf_event *event __maybe_unused, - struct perf_sample *sample, - struct thread *thread __maybe_unused, +static void print_sample_iregs(struct perf_sample *sample, struct perf_event_attr *attr) { struct regs_dump *regs = &sample->intr_regs; @@ -476,10 +474,7 @@ mispred_str(struct branch_entry *br) return br->flags.predicted ? 'P' : 'M'; } -static void print_sample_brstack(union perf_event *event __maybe_unused, - struct perf_sample *sample, - struct thread *thread __maybe_unused, - struct perf_event_attr *attr __maybe_unused) +static void print_sample_brstack(struct perf_sample *sample) { struct branch_stack *br = sample->branch_stack; u64 i; @@ -498,14 +493,11 @@ static void print_sample_brstack(union perf_event *event __maybe_unused, } } -static void print_sample_brstacksym(union perf_event *event __maybe_unused, - struct perf_sample *sample, - struct thread *thread __maybe_unused, - struct perf_event_attr *attr __maybe_unused) +static void print_sample_brstacksym(struct perf_sample *sample, + struct thread *thread) { struct branch_stack *br = sample->branch_stack; struct addr_location alf, alt; - u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; u64 i, from, to; if (!(br && br->nr)) @@ -518,11 +510,11 @@ static void print_sample_brstacksym(union perf_event *event __maybe_unused, from = br->entries[i].from; to = br->entries[i].to; - thread__find_addr_map(thread, cpumode, MAP__FUNCTION, from, &alf); + thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, from, &alf); if (alf.map) alf.sym = map__find_symbol(alf.map, alf.addr, NULL); - thread__find_addr_map(thread, cpumode, MAP__FUNCTION, to, &alt); + thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, to, &alt); if (alt.map) alt.sym = map__find_symbol(alt.map, alt.addr, NULL); @@ -538,8 +530,7 @@ static void print_sample_brstacksym(union perf_event *event __maybe_unused, } -static void print_sample_addr(union perf_event *event, - struct perf_sample *sample, +static void print_sample_addr(struct perf_sample *sample, struct thread *thread, struct perf_event_attr *attr) { @@ -550,7 +541,7 @@ static void print_sample_addr(union perf_event *event, if (!sample_addr_correlates_sym(attr)) return; - perf_event__preprocess_sample_addr(event, sample, thread, &al); + thread__resolve(thread, &al, sample); if (PRINT_FIELD(SYM)) { printf(" "); @@ -567,8 +558,7 @@ static void print_sample_addr(union perf_event *event, } } -static void print_sample_bts(union perf_event *event, - struct perf_sample *sample, +static void print_sample_bts(struct perf_sample *sample, struct perf_evsel *evsel, struct thread *thread, struct addr_location *al) @@ -598,7 +588,7 @@ static void print_sample_bts(union perf_event *event, ((evsel->attr.sample_type & PERF_SAMPLE_ADDR) && !output[attr->type].user_set)) { printf(" => "); - print_sample_addr(event, sample, thread, attr); + print_sample_addr(sample, thread, attr); } if (print_srcline_last) @@ -747,7 +737,7 @@ static size_t data_src__printf(u64 data_src) return printf("%-*s", maxlen, out); } -static void process_event(struct perf_script *script, union perf_event *event, +static void process_event(struct perf_script *script, struct perf_sample *sample, struct perf_evsel *evsel, struct addr_location *al) { @@ -776,7 +766,7 @@ static void process_event(struct perf_script *script, union perf_event *event, print_sample_flags(sample->flags); if (is_bts_event(attr)) { - print_sample_bts(event, sample, evsel, thread, al); + print_sample_bts(sample, evsel, thread, al); return; } @@ -784,7 +774,7 @@ static void process_event(struct perf_script *script, union perf_event *event, event_format__print(evsel->tp_format, sample->cpu, sample->raw_data, sample->raw_size); if (PRINT_FIELD(ADDR)) - print_sample_addr(event, sample, thread, attr); + print_sample_addr(sample, thread, attr); if (PRINT_FIELD(DATA_SRC)) data_src__printf(sample->data_src); @@ -804,12 +794,12 @@ static void process_event(struct perf_script *script, union perf_event *event, } if (PRINT_FIELD(IREGS)) - print_sample_iregs(event, sample, thread, attr); + print_sample_iregs(sample, attr); if (PRINT_FIELD(BRSTACK)) - print_sample_brstack(event, sample, thread, attr); + print_sample_brstack(sample); else if (PRINT_FIELD(BRSTACKSYM)) - print_sample_brstacksym(event, sample, thread, attr); + print_sample_brstacksym(sample, thread); if (perf_evsel__is_bpf_output(evsel) && PRINT_FIELD(BPF_OUTPUT)) print_sample_bpf_output(sample); @@ -905,7 +895,7 @@ static int process_sample_event(struct perf_tool *tool, return 0; } - if (perf_event__preprocess_sample(event, machine, &al, sample) < 0) { + if (machine__resolve(machine, &al, sample) < 0) { pr_err("problem processing %d event, skipping it.\n", event->header.type); return -1; @@ -920,7 +910,7 @@ static int process_sample_event(struct perf_tool *tool, if (scripting_ops) scripting_ops->process_event(event, sample, evsel, &al); else - process_event(scr, event, sample, evsel, &al); + process_event(scr, sample, evsel, &al); out_put: addr_location__put(&al); diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c index bd7a775..40cc9bb 100644 --- a/tools/perf/builtin-timechart.c +++ b/tools/perf/builtin-timechart.c @@ -489,7 +489,7 @@ static const char *cat_backtrace(union perf_event *event, if (!chain) goto exit; - if (perf_event__preprocess_sample(event, machine, &al, sample) < 0) { + if (machine__resolve(machine, &al, sample) < 0) { fprintf(stderr, "problem processing %d event, skipping it.\n", event->header.type); goto exit; diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 94af190..8332149 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -67,6 +67,7 @@ #include <sys/utsname.h> #include <sys/mman.h> +#include <linux/stringify.h> #include <linux/types.h> static volatile int done; @@ -728,7 +729,7 @@ static void perf_event__process_sample(struct perf_tool *tool, if (event->header.misc & PERF_RECORD_MISC_EXACT_IP) top->exact_samples++; - if (perf_event__preprocess_sample(event, machine, &al, sample) < 0) + if (machine__resolve(machine, &al, sample) < 0) return; if (!top->kptr_restrict_warned && @@ -809,7 +810,6 @@ static void perf_top__mmap_read_idx(struct perf_top *top, int idx) struct perf_session *session = top->session; union perf_event *event; struct machine *machine; - u8 origin; int ret; while ((event = perf_evlist__mmap_read(top->evlist, idx)) != NULL) { @@ -822,12 +822,10 @@ static void perf_top__mmap_read_idx(struct perf_top *top, int idx) evsel = perf_evlist__id2evsel(session->evlist, sample.id); assert(evsel != NULL); - origin = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; - if (event->header.type == PERF_RECORD_SAMPLE) ++top->samples; - switch (origin) { + switch (sample.cpumode) { case PERF_RECORD_MISC_USER: ++top->us_samples; if (top->hide_user_symbols) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 8dc98c5..93ac724 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -2256,11 +2256,10 @@ static void print_location(FILE *f, struct perf_sample *sample, static int trace__pgfault(struct trace *trace, struct perf_evsel *evsel, - union perf_event *event, + union perf_event *event __maybe_unused, struct perf_sample *sample) { struct thread *thread; - u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; struct addr_location al; char map_type = 'd'; struct thread_trace *ttrace; @@ -2279,7 +2278,7 @@ static int trace__pgfault(struct trace *trace, if (trace->summary_only) goto out; - thread__find_addr_location(thread, cpumode, MAP__FUNCTION, + thread__find_addr_location(thread, sample->cpumode, MAP__FUNCTION, sample->ip, &al); trace__fprintf_entry_head(trace, thread, 0, sample->time, trace->output); @@ -2292,11 +2291,11 @@ static int trace__pgfault(struct trace *trace, fprintf(trace->output, "] => "); - thread__find_addr_location(thread, cpumode, MAP__VARIABLE, + thread__find_addr_location(thread, sample->cpumode, MAP__VARIABLE, sample->addr, &al); if (!al.map) { - thread__find_addr_location(thread, cpumode, + thread__find_addr_location(thread, sample->cpumode, MAP__FUNCTION, sample->addr, &al); if (al.map) diff --git a/tools/perf/builtin.h b/tools/perf/builtin.h index 3f871b5..41c24010 100644 --- a/tools/perf/builtin.h +++ b/tools/perf/builtin.h @@ -7,38 +7,38 @@ extern const char perf_usage_string[]; extern const char perf_more_info_string[]; -extern void list_common_cmds_help(void); -extern const char *help_unknown_cmd(const char *cmd); -extern void prune_packed_objects(int); -extern int read_line_with_nul(char *buf, int size, FILE *file); -extern int check_pager_config(const char *cmd); +void list_common_cmds_help(void); +const char *help_unknown_cmd(const char *cmd); +void prune_packed_objects(int); +int read_line_with_nul(char *buf, int size, FILE *file); +int check_pager_config(const char *cmd); -extern int cmd_annotate(int argc, const char **argv, const char *prefix); -extern int cmd_bench(int argc, const char **argv, const char *prefix); -extern int cmd_buildid_cache(int argc, const char **argv, const char *prefix); -extern int cmd_buildid_list(int argc, const char **argv, const char *prefix); -extern int cmd_config(int argc, const char **argv, const char *prefix); -extern int cmd_diff(int argc, const char **argv, const char *prefix); -extern int cmd_evlist(int argc, const char **argv, const char *prefix); -extern int cmd_help(int argc, const char **argv, const char *prefix); -extern int cmd_sched(int argc, const char **argv, const char *prefix); -extern int cmd_list(int argc, const char **argv, const char *prefix); -extern int cmd_record(int argc, const char **argv, const char *prefix); -extern int cmd_report(int argc, const char **argv, const char *prefix); -extern int cmd_stat(int argc, const char **argv, const char *prefix); -extern int cmd_timechart(int argc, const char **argv, const char *prefix); -extern int cmd_top(int argc, const char **argv, const char *prefix); -extern int cmd_script(int argc, const char **argv, const char *prefix); -extern int cmd_version(int argc, const char **argv, const char *prefix); -extern int cmd_probe(int argc, const char **argv, const char *prefix); -extern int cmd_kmem(int argc, const char **argv, const char *prefix); -extern int cmd_lock(int argc, const char **argv, const char *prefix); -extern int cmd_kvm(int argc, const char **argv, const char *prefix); -extern int cmd_test(int argc, const char **argv, const char *prefix); -extern int cmd_trace(int argc, const char **argv, const char *prefix); -extern int cmd_inject(int argc, const char **argv, const char *prefix); -extern int cmd_mem(int argc, const char **argv, const char *prefix); -extern int cmd_data(int argc, const char **argv, const char *prefix); +int cmd_annotate(int argc, const char **argv, const char *prefix); +int cmd_bench(int argc, const char **argv, const char *prefix); +int cmd_buildid_cache(int argc, const char **argv, const char *prefix); +int cmd_buildid_list(int argc, const char **argv, const char *prefix); +int cmd_config(int argc, const char **argv, const char *prefix); +int cmd_diff(int argc, const char **argv, const char *prefix); +int cmd_evlist(int argc, const char **argv, const char *prefix); +int cmd_help(int argc, const char **argv, const char *prefix); +int cmd_sched(int argc, const char **argv, const char *prefix); +int cmd_list(int argc, const char **argv, const char *prefix); +int cmd_record(int argc, const char **argv, const char *prefix); +int cmd_report(int argc, const char **argv, const char *prefix); +int cmd_stat(int argc, const char **argv, const char *prefix); +int cmd_timechart(int argc, const char **argv, const char *prefix); +int cmd_top(int argc, const char **argv, const char *prefix); +int cmd_script(int argc, const char **argv, const char *prefix); +int cmd_version(int argc, const char **argv, const char *prefix); +int cmd_probe(int argc, const char **argv, const char *prefix); +int cmd_kmem(int argc, const char **argv, const char *prefix); +int cmd_lock(int argc, const char **argv, const char *prefix); +int cmd_kvm(int argc, const char **argv, const char *prefix); +int cmd_test(int argc, const char **argv, const char *prefix); +int cmd_trace(int argc, const char **argv, const char *prefix); +int cmd_inject(int argc, const char **argv, const char *prefix); +int cmd_mem(int argc, const char **argv, const char *prefix); +int cmd_data(int argc, const char **argv, const char *prefix); -extern int find_scripts(char **scripts_array, char **scripts_path_array); +int find_scripts(char **scripts_array, char **scripts_path_array); #endif diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index eca6a91..f7d7f5a 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -109,7 +109,7 @@ ifdef PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET CFLAGS += -DHAVE_ARCH_REGS_QUERY_REGISTER_OFFSET endif -include $(src-perf)/config/utilities.mak +include $(srctree)/tools/scripts/utilities.mak ifeq ($(call get-executable,$(FLEX)),) dummy := $(error Error: $(FLEX) is missing on this system, please install it) diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c index afc9ad0..abd3f0e 100644 --- a/tools/perf/tests/code-reading.c +++ b/tools/perf/tests/code-reading.c @@ -293,7 +293,6 @@ static int process_sample_event(struct machine *machine, { struct perf_sample sample; struct thread *thread; - u8 cpumode; int ret; if (perf_evlist__parse_sample(evlist, event, &sample)) { @@ -307,9 +306,7 @@ static int process_sample_event(struct machine *machine, return -1; } - cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; - - ret = read_object_code(sample.ip, READLEN, cpumode, thread, state); + ret = read_object_code(sample.ip, READLEN, sample.cpumode, thread, state); thread__put(thread); return ret; } diff --git a/tools/perf/tests/dwarf-unwind.c b/tools/perf/tests/dwarf-unwind.c index 1c5c022..8f6eb85 100644 --- a/tools/perf/tests/dwarf-unwind.c +++ b/tools/perf/tests/dwarf-unwind.c @@ -20,10 +20,10 @@ static int mmap_handler(struct perf_tool *tool __maybe_unused, union perf_event *event, - struct perf_sample *sample __maybe_unused, + struct perf_sample *sample, struct machine *machine) { - return machine__process_mmap2_event(machine, event, NULL); + return machine__process_mmap2_event(machine, event, sample); } static int init_live_machine(struct machine *machine) diff --git a/tools/perf/tests/hists_common.c b/tools/perf/tests/hists_common.c index 071a8b5..f55f4bd 100644 --- a/tools/perf/tests/hists_common.c +++ b/tools/perf/tests/hists_common.c @@ -100,9 +100,11 @@ struct machine *setup_fake_machine(struct machines *machines) } for (i = 0; i < ARRAY_SIZE(fake_mmap_info); i++) { + struct perf_sample sample = { + .cpumode = PERF_RECORD_MISC_USER, + }; union perf_event fake_mmap_event = { .mmap = { - .header = { .misc = PERF_RECORD_MISC_USER, }, .pid = fake_mmap_info[i].pid, .tid = fake_mmap_info[i].pid, .start = fake_mmap_info[i].start, @@ -114,7 +116,7 @@ struct machine *setup_fake_machine(struct machines *machines) strcpy(fake_mmap_event.mmap.filename, fake_mmap_info[i].filename); - machine__process_mmap_event(machine, &fake_mmap_event, NULL); + machine__process_mmap_event(machine, &fake_mmap_event, &sample); } for (i = 0; i < ARRAY_SIZE(fake_symbols); i++) { diff --git a/tools/perf/tests/hists_cumulate.c b/tools/perf/tests/hists_cumulate.c index ecf136c..ed5aa9e 100644 --- a/tools/perf/tests/hists_cumulate.c +++ b/tools/perf/tests/hists_cumulate.c @@ -81,11 +81,6 @@ static int add_hist_entries(struct hists *hists, struct machine *machine) size_t i; for (i = 0; i < ARRAY_SIZE(fake_samples); i++) { - const union perf_event event = { - .header = { - .misc = PERF_RECORD_MISC_USER, - }, - }; struct hist_entry_iter iter = { .evsel = evsel, .sample = &sample, @@ -97,13 +92,13 @@ static int add_hist_entries(struct hists *hists, struct machine *machine) else iter.ops = &hist_iter_normal; + sample.cpumode = PERF_RECORD_MISC_USER; sample.pid = fake_samples[i].pid; sample.tid = fake_samples[i].pid; sample.ip = fake_samples[i].ip; sample.callchain = (struct ip_callchain *)fake_callchains[i]; - if (perf_event__preprocess_sample(&event, machine, &al, - &sample) < 0) + if (machine__resolve(machine, &al, &sample) < 0) goto out; if (hist_entry_iter__add(&iter, &al, PERF_MAX_STACK_DEPTH, diff --git a/tools/perf/tests/hists_filter.c b/tools/perf/tests/hists_filter.c index 34b945a..b825d24 100644 --- a/tools/perf/tests/hists_filter.c +++ b/tools/perf/tests/hists_filter.c @@ -58,11 +58,6 @@ static int add_hist_entries(struct perf_evlist *evlist, */ evlist__for_each(evlist, evsel) { for (i = 0; i < ARRAY_SIZE(fake_samples); i++) { - const union perf_event event = { - .header = { - .misc = PERF_RECORD_MISC_USER, - }, - }; struct hist_entry_iter iter = { .evsel = evsel, .sample = &sample, @@ -76,12 +71,12 @@ static int add_hist_entries(struct perf_evlist *evlist, hists->dso_filter = NULL; hists->symbol_filter_str = NULL; + sample.cpumode = PERF_RECORD_MISC_USER; sample.pid = fake_samples[i].pid; sample.tid = fake_samples[i].pid; sample.ip = fake_samples[i].ip; - if (perf_event__preprocess_sample(&event, machine, &al, - &sample) < 0) + if (machine__resolve(machine, &al, &sample) < 0) goto out; al.socket = fake_samples[i].socket; diff --git a/tools/perf/tests/hists_link.c b/tools/perf/tests/hists_link.c index 64b257d..358324e 100644 --- a/tools/perf/tests/hists_link.c +++ b/tools/perf/tests/hists_link.c @@ -76,17 +76,12 @@ static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine) struct hists *hists = evsel__hists(evsel); for (k = 0; k < ARRAY_SIZE(fake_common_samples); k++) { - const union perf_event event = { - .header = { - .misc = PERF_RECORD_MISC_USER, - }, - }; - + sample.cpumode = PERF_RECORD_MISC_USER; sample.pid = fake_common_samples[k].pid; sample.tid = fake_common_samples[k].pid; sample.ip = fake_common_samples[k].ip; - if (perf_event__preprocess_sample(&event, machine, &al, - &sample) < 0) + + if (machine__resolve(machine, &al, &sample) < 0) goto out; he = __hists__add_entry(hists, &al, NULL, @@ -102,17 +97,10 @@ static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine) } for (k = 0; k < ARRAY_SIZE(fake_samples[i]); k++) { - const union perf_event event = { - .header = { - .misc = PERF_RECORD_MISC_USER, - }, - }; - sample.pid = fake_samples[i][k].pid; sample.tid = fake_samples[i][k].pid; sample.ip = fake_samples[i][k].ip; - if (perf_event__preprocess_sample(&event, machine, &al, - &sample) < 0) + if (machine__resolve(machine, &al, &sample) < 0) goto out; he = __hists__add_entry(hists, &al, NULL, diff --git a/tools/perf/tests/hists_output.c b/tools/perf/tests/hists_output.c index 23cce67..d3556fb 100644 --- a/tools/perf/tests/hists_output.c +++ b/tools/perf/tests/hists_output.c @@ -51,11 +51,6 @@ static int add_hist_entries(struct hists *hists, struct machine *machine) size_t i; for (i = 0; i < ARRAY_SIZE(fake_samples); i++) { - const union perf_event event = { - .header = { - .misc = PERF_RECORD_MISC_USER, - }, - }; struct hist_entry_iter iter = { .evsel = evsel, .sample = &sample, @@ -63,13 +58,13 @@ static int add_hist_entries(struct hists *hists, struct machine *machine) .hide_unresolved = false, }; + sample.cpumode = PERF_RECORD_MISC_USER; sample.cpu = fake_samples[i].cpu; sample.pid = fake_samples[i].pid; sample.tid = fake_samples[i].pid; sample.ip = fake_samples[i].ip; - if (perf_event__preprocess_sample(&event, machine, &al, - &sample) < 0) + if (machine__resolve(machine, &al, &sample) < 0) goto out; if (hist_entry_iter__add(&iter, &al, PERF_MAX_STACK_DEPTH, diff --git a/tools/perf/ui/gtk/hists.c b/tools/perf/ui/gtk/hists.c index bd9bf7e..2aa45b6 100644 --- a/tools/perf/ui/gtk/hists.c +++ b/tools/perf/ui/gtk/hists.c @@ -55,7 +55,7 @@ static u64 he_get_acc_##_field(struct hist_entry *he) \ return he->stat_acc->_field; \ } \ \ -static int perf_gtk__hpp_color_##_type(struct perf_hpp_fmt *fmt __maybe_unused, \ +static int perf_gtk__hpp_color_##_type(struct perf_hpp_fmt *fmt, \ struct perf_hpp *hpp, \ struct hist_entry *he) \ { \ diff --git a/tools/perf/util/Build b/tools/perf/util/Build index eea25e2..da48fd8 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -1,4 +1,3 @@ -libperf-y += abspath.o libperf-y += alias.o libperf-y += annotate.o libperf-y += build-id.o diff --git a/tools/perf/util/abspath.c b/tools/perf/util/abspath.c deleted file mode 100644 index 0e76aff..0000000 --- a/tools/perf/util/abspath.c +++ /dev/null @@ -1,37 +0,0 @@ -#include "cache.h" - -static const char *get_pwd_cwd(void) -{ - static char cwd[PATH_MAX + 1]; - char *pwd; - struct stat cwd_stat, pwd_stat; - if (getcwd(cwd, PATH_MAX) == NULL) - return NULL; - pwd = getenv("PWD"); - if (pwd && strcmp(pwd, cwd)) { - stat(cwd, &cwd_stat); - if (!stat(pwd, &pwd_stat) && - pwd_stat.st_dev == cwd_stat.st_dev && - pwd_stat.st_ino == cwd_stat.st_ino) { - strlcpy(cwd, pwd, PATH_MAX); - } - } - return cwd; -} - -const char *make_nonrelative_path(const char *path) -{ - static char buf[PATH_MAX + 1]; - - if (is_absolute_path(path)) { - if (strlcpy(buf, path, PATH_MAX) >= PATH_MAX) - die("Too long path: %.*s", 60, path); - } else { - const char *cwd = get_pwd_cwd(); - if (!cwd) - die("Cannot determine the current working directory"); - if (snprintf(buf, PATH_MAX, "%s/%s", cwd, path) >= PATH_MAX) - die("Too long path: %.*s", 60, path); - } - return buf; -} diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index cea323d..9241f8c 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -158,7 +158,7 @@ int symbol__annotate(struct symbol *sym, struct map *map, size_t privsize); int hist_entry__annotate(struct hist_entry *he, size_t privsize); -int symbol__annotate_init(struct map *map __maybe_unused, struct symbol *sym); +int symbol__annotate_init(struct map *map, struct symbol *sym); int symbol__annotate_printf(struct symbol *sym, struct map *map, struct perf_evsel *evsel, bool full_paths, int min_pcnt, int max_lines, int context); diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h index e5a8e2d..57ff31e 100644 --- a/tools/perf/util/auxtrace.h +++ b/tools/perf/util/auxtrace.h @@ -517,7 +517,7 @@ static inline void auxtrace__free(struct perf_session *session) static inline struct auxtrace_record * auxtrace_record__init(struct perf_evlist *evlist __maybe_unused, - int *err __maybe_unused) + int *err) { *err = 0; return NULL; diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index f1479ee..0573c2e 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c @@ -28,7 +28,6 @@ int build_id__mark_dso_hit(struct perf_tool *tool __maybe_unused, struct machine *machine) { struct addr_location al; - u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; struct thread *thread = machine__findnew_thread(machine, sample->pid, sample->tid); @@ -38,7 +37,7 @@ int build_id__mark_dso_hit(struct perf_tool *tool __maybe_unused, return -1; } - thread__find_addr_map(thread, cpumode, MAP__FUNCTION, sample->ip, &al); + thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, sample->ip, &al); if (al.map != NULL) al.map->dso->hit = 1; diff --git a/tools/perf/util/cache.h b/tools/perf/util/cache.h index 3ca453f..1f5a93c 100644 --- a/tools/perf/util/cache.h +++ b/tools/perf/util/cache.h @@ -26,14 +26,14 @@ extern const char *config_exclusive_filename; typedef int (*config_fn_t)(const char *, const char *, void *); -extern int perf_default_config(const char *, const char *, void *); -extern int perf_config(config_fn_t fn, void *); -extern int perf_config_int(const char *, const char *); -extern u64 perf_config_u64(const char *, const char *); -extern int perf_config_bool(const char *, const char *); -extern int config_error_nonbool(const char *); -extern const char *perf_config_dirname(const char *, const char *); -extern const char *perf_etc_perfconfig(void); +int perf_default_config(const char *, const char *, void *); +int perf_config(config_fn_t fn, void *); +int perf_config_int(const char *, const char *); +u64 perf_config_u64(const char *, const char *); +int perf_config_bool(const char *, const char *); +int config_error_nonbool(const char *); +const char *perf_config_dirname(const char *, const char *); +const char *perf_etc_perfconfig(void); char *alias_lookup(const char *alias); int split_cmdline(char *cmdline, const char ***argv); @@ -64,13 +64,9 @@ static inline int is_absolute_path(const char *path) return path[0] == '/'; } -const char *make_nonrelative_path(const char *path); char *strip_path_suffix(const char *path, const char *suffix); -extern char *mkpath(const char *fmt, ...) __attribute__((format (printf, 1, 2))); -extern char *perf_path(const char *fmt, ...) __attribute__((format (printf, 1, 2))); - -extern char *perf_pathdup(const char *fmt, ...) - __attribute__((format (printf, 1, 2))); +char *mkpath(const char *fmt, ...) __attribute__((format (printf, 1, 2))); +char *perf_path(const char *fmt, ...) __attribute__((format (printf, 1, 2))); #endif /* __PERF_CACHE_H */ diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index 18dd222..d2a9e69 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h @@ -220,7 +220,7 @@ int fill_callchain_info(struct addr_location *al, struct callchain_cursor_node * bool hide_unresolved); extern const char record_callchain_help[]; -extern int parse_callchain_record(const char *arg, struct callchain_param *param); +int parse_callchain_record(const char *arg, struct callchain_param *param); int parse_callchain_record_opt(const char *arg, struct callchain_param *param); int parse_callchain_report_opt(const char *arg); int parse_callchain_top_opt(const char *arg); @@ -236,7 +236,7 @@ static inline void callchain_cursor_snapshot(struct callchain_cursor *dest, } #ifdef HAVE_SKIP_CALLCHAIN_IDX -extern int arch_skip_callchain_idx(struct thread *thread, struct ip_callchain *chain); +int arch_skip_callchain_idx(struct thread *thread, struct ip_callchain *chain); #else static inline int arch_skip_callchain_idx(struct thread *thread __maybe_unused, struct ip_callchain *chain __maybe_unused) diff --git a/tools/perf/util/cgroup.h b/tools/perf/util/cgroup.h index b4b8cb4..31f8dcd 100644 --- a/tools/perf/util/cgroup.h +++ b/tools/perf/util/cgroup.h @@ -13,7 +13,7 @@ struct cgroup_sel { extern int nr_cgroups; /* number of explicit cgroups defined */ -extern void close_cgroup(struct cgroup_sel *cgrp); -extern int parse_cgroups(const struct option *opt, const char *str, int unset); +void close_cgroup(struct cgroup_sel *cgrp); +int parse_cgroups(const struct option *opt, const char *str, int unset); #endif /* __CGROUP_H__ */ diff --git a/tools/perf/util/cloexec.h b/tools/perf/util/cloexec.h index 3bee677..d0d4659 100644 --- a/tools/perf/util/cloexec.h +++ b/tools/perf/util/cloexec.h @@ -5,7 +5,7 @@ unsigned long perf_event_open_cloexec_flag(void); #ifdef __GLIBC_PREREQ #if !__GLIBC_PREREQ(2, 6) && !defined(__UCLIBC__) -extern int sched_getcpu(void) __THROW; +int sched_getcpu(void) __THROW; #endif #endif diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c index 811af89..bbf69d2 100644 --- a/tools/perf/util/data-convert-bt.c +++ b/tools/perf/util/data-convert-bt.c @@ -632,7 +632,7 @@ static bool is_flush_needed(struct ctf_stream *cs) } static int process_sample_event(struct perf_tool *tool, - union perf_event *_event __maybe_unused, + union perf_event *_event, struct perf_sample *sample, struct perf_evsel *evsel, struct machine *machine __maybe_unused) diff --git a/tools/perf/util/db-export.c b/tools/perf/util/db-export.c index 1c9689e..049438d 100644 --- a/tools/perf/util/db-export.c +++ b/tools/perf/util/db-export.c @@ -333,7 +333,7 @@ int db_export__sample(struct db_export *dbe, union perf_event *event, sample_addr_correlates_sym(&evsel->attr)) { struct addr_location addr_al; - perf_event__preprocess_sample_addr(event, sample, thread, &addr_al); + thread__resolve(thread, &addr_al, sample); err = db_ids_from_al(dbe, &addr_al, &es.addr_dso_db_id, &es.addr_sym_db_id, &es.addr_offset); if (err) diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h index 45ec4d0..0953280 100644 --- a/tools/perf/util/dso.h +++ b/tools/perf/util/dso.h @@ -162,6 +162,7 @@ struct dso { u8 loaded; u8 rel; u8 build_id[BUILD_ID_SIZE]; + u64 text_offset; const char *short_name; const char *long_name; u16 long_name_len; @@ -301,7 +302,7 @@ int __kmod_path__parse(struct kmod_path *m, const char *path, * TODO */ int dso__data_get_fd(struct dso *dso, struct machine *machine); -void dso__data_put_fd(struct dso *dso __maybe_unused); +void dso__data_put_fd(struct dso *dso); void dso__data_close(struct dso *dso); off_t dso__data_size(struct dso *dso, struct machine *machine); diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c index a509aa84..577e600 100644 --- a/tools/perf/util/dwarf-aux.c +++ b/tools/perf/util/dwarf-aux.c @@ -915,7 +915,7 @@ int die_get_typename(Dwarf_Die *vr_die, struct strbuf *buf) tmp = "*"; else if (tag == DW_TAG_subroutine_type) { /* Function pointer */ - strbuf_addf(buf, "(function_type)"); + strbuf_add(buf, "(function_type)", 15); return 0; } else { if (!dwarf_diename(&type)) @@ -932,7 +932,7 @@ int die_get_typename(Dwarf_Die *vr_die, struct strbuf *buf) } ret = die_get_typename(&type, buf); if (ret == 0) - strbuf_addf(buf, "%s", tmp); + strbuf_addstr(buf, tmp); return ret; } @@ -951,7 +951,7 @@ int die_get_varname(Dwarf_Die *vr_die, struct strbuf *buf) ret = die_get_typename(vr_die, buf); if (ret < 0) { pr_debug("Failed to get type, make it unknown.\n"); - strbuf_addf(buf, "(unknown_type)"); + strbuf_add(buf, " (unknown_type)", 14); } strbuf_addf(buf, "\t%s", dwarf_diename(vr_die)); @@ -1013,7 +1013,7 @@ static int die_get_var_innermost_scope(Dwarf_Die *sp_die, Dwarf_Die *vr_die, } if (!first) - strbuf_addf(buf, "]>"); + strbuf_add(buf, "]>", 2); out: free(scopes); @@ -1076,7 +1076,7 @@ int die_get_var_range(Dwarf_Die *sp_die, Dwarf_Die *vr_die, struct strbuf *buf) } if (!first) - strbuf_addf(buf, "]>"); + strbuf_add(buf, "]>", 2); return ret; } diff --git a/tools/perf/util/dwarf-aux.h b/tools/perf/util/dwarf-aux.h index c42ec36..dc0ce1a 100644 --- a/tools/perf/util/dwarf-aux.h +++ b/tools/perf/util/dwarf-aux.h @@ -25,48 +25,48 @@ #include <elfutils/version.h> /* Find the realpath of the target file */ -extern const char *cu_find_realpath(Dwarf_Die *cu_die, const char *fname); +const char *cu_find_realpath(Dwarf_Die *cu_die, const char *fname); /* Get DW_AT_comp_dir (should be NULL with older gcc) */ -extern const char *cu_get_comp_dir(Dwarf_Die *cu_die); +const char *cu_get_comp_dir(Dwarf_Die *cu_die); /* Get a line number and file name for given address */ -extern int cu_find_lineinfo(Dwarf_Die *cudie, unsigned long addr, - const char **fname, int *lineno); +int cu_find_lineinfo(Dwarf_Die *cudie, unsigned long addr, + const char **fname, int *lineno); /* Walk on funcitons at given address */ -extern int cu_walk_functions_at(Dwarf_Die *cu_die, Dwarf_Addr addr, - int (*callback)(Dwarf_Die *, void *), void *data); +int cu_walk_functions_at(Dwarf_Die *cu_die, Dwarf_Addr addr, + int (*callback)(Dwarf_Die *, void *), void *data); /* Ensure that this DIE is a subprogram and definition (not declaration) */ -extern bool die_is_func_def(Dwarf_Die *dw_die); +bool die_is_func_def(Dwarf_Die *dw_die); /* Ensure that this DIE is an instance of a subprogram */ -extern bool die_is_func_instance(Dwarf_Die *dw_die); +bool die_is_func_instance(Dwarf_Die *dw_die); /* Compare diename and tname */ -extern bool die_compare_name(Dwarf_Die *dw_die, const char *tname); +bool die_compare_name(Dwarf_Die *dw_die, const char *tname); /* Matching diename with glob pattern */ -extern bool die_match_name(Dwarf_Die *dw_die, const char *glob); +bool die_match_name(Dwarf_Die *dw_die, const char *glob); /* Get callsite line number of inline-function instance */ -extern int die_get_call_lineno(Dwarf_Die *in_die); +int die_get_call_lineno(Dwarf_Die *in_die); /* Get callsite file name of inlined function instance */ -extern const char *die_get_call_file(Dwarf_Die *in_die); +const char *die_get_call_file(Dwarf_Die *in_die); /* Get type die */ -extern Dwarf_Die *die_get_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem); +Dwarf_Die *die_get_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem); /* Get a type die, but skip qualifiers and typedef */ -extern Dwarf_Die *die_get_real_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem); +Dwarf_Die *die_get_real_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem); /* Check whether the DIE is signed or not */ -extern bool die_is_signed_type(Dwarf_Die *tp_die); +bool die_is_signed_type(Dwarf_Die *tp_die); /* Get data_member_location offset */ -extern int die_get_data_member_location(Dwarf_Die *mb_die, Dwarf_Word *offs); +int die_get_data_member_location(Dwarf_Die *mb_die, Dwarf_Word *offs); /* Return values for die_find_child() callbacks */ enum { @@ -77,29 +77,29 @@ enum { }; /* Search child DIEs */ -extern Dwarf_Die *die_find_child(Dwarf_Die *rt_die, - int (*callback)(Dwarf_Die *, void *), - void *data, Dwarf_Die *die_mem); +Dwarf_Die *die_find_child(Dwarf_Die *rt_die, + int (*callback)(Dwarf_Die *, void *), + void *data, Dwarf_Die *die_mem); /* Search a non-inlined function including given address */ -extern Dwarf_Die *die_find_realfunc(Dwarf_Die *cu_die, Dwarf_Addr addr, - Dwarf_Die *die_mem); +Dwarf_Die *die_find_realfunc(Dwarf_Die *cu_die, Dwarf_Addr addr, + Dwarf_Die *die_mem); /* Search a non-inlined function with tail call at given address */ Dwarf_Die *die_find_tailfunc(Dwarf_Die *cu_die, Dwarf_Addr addr, Dwarf_Die *die_mem); /* Search the top inlined function including given address */ -extern Dwarf_Die *die_find_top_inlinefunc(Dwarf_Die *sp_die, Dwarf_Addr addr, - Dwarf_Die *die_mem); +Dwarf_Die *die_find_top_inlinefunc(Dwarf_Die *sp_die, Dwarf_Addr addr, + Dwarf_Die *die_mem); /* Search the deepest inlined function including given address */ -extern Dwarf_Die *die_find_inlinefunc(Dwarf_Die *sp_die, Dwarf_Addr addr, - Dwarf_Die *die_mem); +Dwarf_Die *die_find_inlinefunc(Dwarf_Die *sp_die, Dwarf_Addr addr, + Dwarf_Die *die_mem); /* Walk on the instances of given DIE */ -extern int die_walk_instances(Dwarf_Die *in_die, - int (*callback)(Dwarf_Die *, void *), void *data); +int die_walk_instances(Dwarf_Die *in_die, + int (*callback)(Dwarf_Die *, void *), void *data); /* Walker on lines (Note: line number will not be sorted) */ typedef int (* line_walk_callback_t) (const char *fname, int lineno, @@ -109,22 +109,20 @@ typedef int (* line_walk_callback_t) (const char *fname, int lineno, * Walk on lines inside given DIE. If the DIE is a subprogram, walk only on * the lines inside the subprogram, otherwise the DIE must be a CU DIE. */ -extern int die_walk_lines(Dwarf_Die *rt_die, line_walk_callback_t callback, - void *data); +int die_walk_lines(Dwarf_Die *rt_die, line_walk_callback_t callback, void *data); /* Find a variable called 'name' at given address */ -extern Dwarf_Die *die_find_variable_at(Dwarf_Die *sp_die, const char *name, - Dwarf_Addr addr, Dwarf_Die *die_mem); +Dwarf_Die *die_find_variable_at(Dwarf_Die *sp_die, const char *name, + Dwarf_Addr addr, Dwarf_Die *die_mem); /* Find a member called 'name' */ -extern Dwarf_Die *die_find_member(Dwarf_Die *st_die, const char *name, - Dwarf_Die *die_mem); +Dwarf_Die *die_find_member(Dwarf_Die *st_die, const char *name, + Dwarf_Die *die_mem); /* Get the name of given variable DIE */ -extern int die_get_typename(Dwarf_Die *vr_die, struct strbuf *buf); +int die_get_typename(Dwarf_Die *vr_die, struct strbuf *buf); /* Get the name and type of given variable DIE, stored as "type\tname" */ -extern int die_get_varname(Dwarf_Die *vr_die, struct strbuf *buf); -extern int die_get_var_range(Dwarf_Die *sp_die, Dwarf_Die *vr_die, - struct strbuf *buf); +int die_get_varname(Dwarf_Die *vr_die, struct strbuf *buf); +int die_get_var_range(Dwarf_Die *sp_die, Dwarf_Die *vr_die, struct strbuf *buf); #endif diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 7bad5c3..52cf479 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -1295,12 +1295,9 @@ void thread__find_addr_location(struct thread *thread, * Callers need to drop the reference to al->thread, obtained in * machine__findnew_thread() */ -int perf_event__preprocess_sample(const union perf_event *event, - struct machine *machine, - struct addr_location *al, - struct perf_sample *sample) +int machine__resolve(struct machine *machine, struct addr_location *al, + struct perf_sample *sample) { - u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; struct thread *thread = machine__findnew_thread(machine, sample->pid, sample->tid); @@ -1315,11 +1312,11 @@ int perf_event__preprocess_sample(const union perf_event *event, * events, but for older perf.data files there was no such thing, so do * it now. */ - if (cpumode == PERF_RECORD_MISC_KERNEL && + if (sample->cpumode == PERF_RECORD_MISC_KERNEL && machine__kernel_map(machine) == NULL) machine__create_kernel_maps(machine); - thread__find_addr_map(thread, cpumode, MAP__FUNCTION, sample->ip, al); + thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, sample->ip, al); dump_printf(" ...... dso: %s\n", al->map ? al->map->dso->long_name : al->level == 'H' ? "[hypervisor]" : "<not found>"); @@ -1395,16 +1392,12 @@ bool sample_addr_correlates_sym(struct perf_event_attr *attr) return false; } -void perf_event__preprocess_sample_addr(union perf_event *event, - struct perf_sample *sample, - struct thread *thread, - struct addr_location *al) +void thread__resolve(struct thread *thread, struct addr_location *al, + struct perf_sample *sample) { - u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; - - thread__find_addr_map(thread, cpumode, MAP__FUNCTION, sample->addr, al); + thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, sample->addr, al); if (!al->map) - thread__find_addr_map(thread, cpumode, MAP__VARIABLE, + thread__find_addr_map(thread, sample->cpumode, MAP__VARIABLE, sample->addr, al); al->cpu = sample->cpu; diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index b7ffb7e..6bb1c92 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -192,6 +192,7 @@ struct perf_sample { u64 data_src; u32 flags; u16 insn_len; + u8 cpumode; void *raw_data; struct ip_callchain *callchain; struct branch_stack *branch_stack; @@ -597,10 +598,8 @@ int perf_event__process(struct perf_tool *tool, struct addr_location; -int perf_event__preprocess_sample(const union perf_event *event, - struct machine *machine, - struct addr_location *al, - struct perf_sample *sample); +int machine__resolve(struct machine *machine, struct addr_location *al, + struct perf_sample *sample); void addr_location__put(struct addr_location *al); @@ -608,10 +607,8 @@ struct thread; bool is_bts_event(struct perf_event_attr *attr); bool sample_addr_correlates_sym(struct perf_event_attr *attr); -void perf_event__preprocess_sample_addr(union perf_event *event, - struct perf_sample *sample, - struct thread *thread, - struct addr_location *al); +void thread__resolve(struct thread *thread, struct addr_location *al, + struct perf_sample *sample); const char *perf_event__name(unsigned int id); diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 0902fe4..738ce22 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -1643,6 +1643,7 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event, data->stream_id = data->id = data->time = -1ULL; data->period = evsel->attr.sample_period; data->weight = 0; + data->cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; if (event->header.type != PERF_RECORD_SAMPLE) { if (!evsel->attr.sample_id_all) diff --git a/tools/perf/util/genelf.h b/tools/perf/util/genelf.h index 45bf9c6..cd67e64 100644 --- a/tools/perf/util/genelf.h +++ b/tools/perf/util/genelf.h @@ -2,12 +2,10 @@ #define __GENELF_H__ /* genelf.c */ -extern int jit_write_elf(int fd, uint64_t code_addr, const char *sym, - const void *code, int csize, - void *debug, int nr_debug_entries); +int jit_write_elf(int fd, uint64_t code_addr, const char *sym, + const void *code, int csize, void *debug, int nr_debug_entries); /* genelf_debug.c */ -extern int jit_add_debug_info(Elf *e, uint64_t code_addr, - void *debug, int nr_debug_entries); +int jit_add_debug_info(Elf *e, uint64_t code_addr, void *debug, int nr_debug_entries); #if defined(__arm__) #define GEN_ELF_ARCH EM_ARM diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 73e38e4..90680ec 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -1872,11 +1872,6 @@ static int process_cpu_topology(struct perf_file_section *section, if (ph->needs_swap) nr = bswap_32(nr); - if (nr > (u32)cpu_nr) { - pr_debug("core_id number is too big." - "You may need to upgrade the perf tool.\n"); - goto free_cpu; - } ph->env.cpu[i].core_id = nr; ret = readn(fd, &nr, sizeof(nr)); diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h index 3d87ca8..d306ca1 100644 --- a/tools/perf/util/header.h +++ b/tools/perf/util/header.h @@ -121,7 +121,7 @@ int perf_event__synthesize_event_update_cpus(struct perf_tool *tool, perf_event__handler_t process); int perf_event__process_attr(struct perf_tool *tool, union perf_event *event, struct perf_evlist **pevlist); -int perf_event__process_event_update(struct perf_tool *tool __maybe_unused, +int perf_event__process_event_update(struct perf_tool *tool, union perf_event *event, struct perf_evlist **pevlist); size_t perf_event__fprintf_event_update(union perf_event *event, FILE *fp); diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 290b3cb..31c4641 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -670,7 +670,7 @@ iter_prepare_branch_entry(struct hist_entry_iter *iter, struct addr_location *al } static int -iter_add_single_branch_entry(struct hist_entry_iter *iter __maybe_unused, +iter_add_single_branch_entry(struct hist_entry_iter *iter, struct addr_location *al __maybe_unused) { /* to avoid calling callback function */ diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index ead18c8..bec0cd6 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -433,8 +433,7 @@ void hist__account_cycles(struct branch_stack *bs, struct addr_location *al, struct perf_sample *sample, bool nonany_branch_mode); struct option; -int parse_filter_percentage(const struct option *opt __maybe_unused, - const char *arg, int unset __maybe_unused); +int parse_filter_percentage(const struct option *opt, const char *arg, int unset); int perf_hist_config(const char *var, const char *value); void perf_hpp_list__init(struct perf_hpp_list *list); diff --git a/tools/perf/util/intel-bts.c b/tools/perf/util/intel-bts.c index eb0e7f8..6bc3ecd 100644 --- a/tools/perf/util/intel-bts.c +++ b/tools/perf/util/intel-bts.c @@ -678,7 +678,7 @@ static int intel_bts_process_auxtrace_event(struct perf_session *session, return 0; } -static int intel_bts_flush(struct perf_session *session __maybe_unused, +static int intel_bts_flush(struct perf_session *session, struct perf_tool *tool __maybe_unused) { struct intel_bts *bts = container_of(session->auxtrace, struct intel_bts, diff --git a/tools/perf/util/jit.h b/tools/perf/util/jit.h index a1e99da..3f42ee4 100644 --- a/tools/perf/util/jit.h +++ b/tools/perf/util/jit.h @@ -3,13 +3,9 @@ #include <data.h> -extern int jit_process(struct perf_session *session, - struct perf_data_file *output, - struct machine *machine, - char *filename, - pid_t pid, - u64 *nbytes); - -extern int jit_inject_record(const char *filename); +int jit_process(struct perf_session *session, struct perf_data_file *output, + struct machine *machine, char *filename, pid_t pid, u64 *nbytes); + +int jit_inject_record(const char *filename); #endif /* __JIT_H__ */ diff --git a/tools/perf/util/llvm-utils.c b/tools/perf/util/llvm-utils.c index 00724d4..33071d6 100644 --- a/tools/perf/util/llvm-utils.c +++ b/tools/perf/util/llvm-utils.c @@ -3,11 +3,11 @@ * Copyright (C) 2015, Huawei Inc. */ +#include <limits.h> #include <stdio.h> -#include "util.h" +#include <stdlib.h> #include "debug.h" #include "llvm-utils.h" -#include "cache.h" #define CLANG_BPF_CMD_DEFAULT_TEMPLATE \ "$CLANG_EXEC -D__KERNEL__ -D__NR_CPUS__=$NR_CPUS "\ @@ -98,11 +98,12 @@ read_from_pipe(const char *cmd, void **p_buf, size_t *p_read_sz) void *buf = NULL; FILE *file = NULL; size_t read_sz = 0, buf_sz = 0; + char serr[STRERR_BUFSIZE]; file = popen(cmd, "r"); if (!file) { pr_err("ERROR: unable to popen cmd: %s\n", - strerror(errno)); + strerror_r(errno, serr, sizeof(serr))); return -EINVAL; } @@ -136,7 +137,7 @@ read_from_pipe(const char *cmd, void **p_buf, size_t *p_read_sz) if (ferror(file)) { pr_err("ERROR: error occurred when reading from pipe: %s\n", - strerror(errno)); + strerror_r(errno, serr, sizeof(serr))); err = -EIO; goto errout; } @@ -334,10 +335,18 @@ int llvm__compile_bpf(const char *path, void **p_obj_buf, unsigned int kernel_version; char linux_version_code_str[64]; const char *clang_opt = llvm_param.clang_opt; - char clang_path[PATH_MAX], nr_cpus_avail_str[64]; + char clang_path[PATH_MAX], abspath[PATH_MAX], nr_cpus_avail_str[64]; + char serr[STRERR_BUFSIZE]; char *kbuild_dir = NULL, *kbuild_include_opts = NULL; const char *template = llvm_param.clang_bpf_cmd_template; + if (path[0] != '-' && realpath(path, abspath) == NULL) { + err = errno; + pr_err("ERROR: problems with path %s: %s\n", + path, strerror_r(err, serr, sizeof(serr))); + return -err; + } + if (!template) template = CLANG_BPF_CMD_DEFAULT_TEMPLATE; @@ -362,7 +371,7 @@ int llvm__compile_bpf(const char *path, void **p_obj_buf, if (nr_cpus_avail <= 0) { pr_err( "WARNING:\tunable to get available CPUs in this system: %s\n" -" \tUse 128 instead.\n", strerror(errno)); +" \tUse 128 instead.\n", strerror_r(errno, serr, sizeof(serr))); nr_cpus_avail = 128; } snprintf(nr_cpus_avail_str, sizeof(nr_cpus_avail_str), "%d", @@ -387,8 +396,7 @@ int llvm__compile_bpf(const char *path, void **p_obj_buf, * stdin to be source file (testing). */ force_set_env("CLANG_SOURCE", - (path[0] == '-') ? path : - make_nonrelative_path(path)); + (path[0] == '-') ? path : abspath); pr_debug("llvm compiling command template: %s\n", template); err = read_from_pipe(template, &obj_buf, &obj_buf_sz); diff --git a/tools/perf/util/llvm-utils.h b/tools/perf/util/llvm-utils.h index 5b3cf1c..23b9a74 100644 --- a/tools/perf/util/llvm-utils.h +++ b/tools/perf/util/llvm-utils.h @@ -39,11 +39,10 @@ struct llvm_param { }; extern struct llvm_param llvm_param; -extern int perf_llvm_config(const char *var, const char *value); +int perf_llvm_config(const char *var, const char *value); -extern int llvm__compile_bpf(const char *path, void **p_obj_buf, - size_t *p_obj_buf_sz); +int llvm__compile_bpf(const char *path, void **p_obj_buf, size_t *p_obj_buf_sz); /* This function is for test__llvm() use only */ -extern int llvm__search_clang(void); +int llvm__search_clang(void); #endif diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index ad79297..80b9b6a 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -1301,9 +1301,8 @@ out_problem: int machine__process_mmap2_event(struct machine *machine, union perf_event *event, - struct perf_sample *sample __maybe_unused) + struct perf_sample *sample) { - u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; struct thread *thread; struct map *map; enum map_type type; @@ -1312,8 +1311,8 @@ int machine__process_mmap2_event(struct machine *machine, if (dump_trace) perf_event__fprintf_mmap2(event, stdout); - if (cpumode == PERF_RECORD_MISC_GUEST_KERNEL || - cpumode == PERF_RECORD_MISC_KERNEL) { + if (sample->cpumode == PERF_RECORD_MISC_GUEST_KERNEL || + sample->cpumode == PERF_RECORD_MISC_KERNEL) { ret = machine__process_kernel_mmap_event(machine, event); if (ret < 0) goto out_problem; @@ -1355,9 +1354,8 @@ out_problem: } int machine__process_mmap_event(struct machine *machine, union perf_event *event, - struct perf_sample *sample __maybe_unused) + struct perf_sample *sample) { - u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; struct thread *thread; struct map *map; enum map_type type; @@ -1366,8 +1364,8 @@ int machine__process_mmap_event(struct machine *machine, union perf_event *event if (dump_trace) perf_event__fprintf_mmap(event, stdout); - if (cpumode == PERF_RECORD_MISC_GUEST_KERNEL || - cpumode == PERF_RECORD_MISC_KERNEL) { + if (sample->cpumode == PERF_RECORD_MISC_GUEST_KERNEL || + sample->cpumode == PERF_RECORD_MISC_KERNEL) { ret = machine__process_kernel_mmap_event(machine, event); if (ret < 0) goto out_problem; diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h index 1a3e45b..8499db2 100644 --- a/tools/perf/util/machine.h +++ b/tools/perf/util/machine.h @@ -94,7 +94,7 @@ int machine__process_aux_event(struct machine *machine, union perf_event *event); int machine__process_itrace_start_event(struct machine *machine, union perf_event *event); -int machine__process_switch_event(struct machine *machine __maybe_unused, +int machine__process_switch_event(struct machine *machine, union perf_event *event); int machine__process_mmap_event(struct machine *machine, union perf_event *event, struct perf_sample *sample); diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index 67e4930..d740c3c 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -22,19 +22,18 @@ struct tracepoint_path { struct tracepoint_path *next; }; -extern struct tracepoint_path *tracepoint_id_to_path(u64 config); -extern struct tracepoint_path *tracepoint_name_to_path(const char *name); -extern bool have_tracepoints(struct list_head *evlist); +struct tracepoint_path *tracepoint_id_to_path(u64 config); +struct tracepoint_path *tracepoint_name_to_path(const char *name); +bool have_tracepoints(struct list_head *evlist); const char *event_type(int type); -extern int parse_events_option(const struct option *opt, const char *str, - int unset); -extern int parse_events(struct perf_evlist *evlist, const char *str, - struct parse_events_error *error); -extern int parse_events_terms(struct list_head *terms, const char *str); -extern int parse_filter(const struct option *opt, const char *str, int unset); -extern int exclude_perf(const struct option *opt, const char *arg, int unset); +int parse_events_option(const struct option *opt, const char *str, int unset); +int parse_events(struct perf_evlist *evlist, const char *str, + struct parse_events_error *error); +int parse_events_terms(struct list_head *terms, const char *str); +int parse_filter(const struct option *opt, const char *str, int unset); +int exclude_perf(const struct option *opt, const char *arg, int unset); #define EVENTS_HELP_MAX (128*1024) @@ -183,7 +182,7 @@ void print_symbol_events(const char *event_glob, unsigned type, void print_tracepoint_events(const char *subsys_glob, const char *event_glob, bool name_only); int print_hwcache_events(const char *event_glob, bool name_only); -extern int is_valid_tracepoint(const char *event_string); +int is_valid_tracepoint(const char *event_string); int valid_event_mount(const char *eventfs); char *parse_events_formats_error_string(char *additional_terms); diff --git a/tools/perf/util/path.c b/tools/perf/util/path.c index 3654d96..3bf6bf8 100644 --- a/tools/perf/util/path.c +++ b/tools/perf/util/path.c @@ -41,36 +41,6 @@ static char *cleanup_path(char *path) return path; } -static char *perf_vsnpath(char *buf, size_t n, const char *fmt, va_list args) -{ - const char *perf_dir = get_perf_dir(); - size_t len; - - len = strlen(perf_dir); - if (n < len + 1) - goto bad; - memcpy(buf, perf_dir, len); - if (len && !is_dir_sep(perf_dir[len-1])) - buf[len++] = '/'; - len += vsnprintf(buf + len, n - len, fmt, args); - if (len >= n) - goto bad; - return cleanup_path(buf); -bad: - strlcpy(buf, bad_path, n); - return buf; -} - -char *perf_pathdup(const char *fmt, ...) -{ - char path[PATH_MAX]; - va_list args; - va_start(args, fmt); - (void)perf_vsnpath(path, sizeof(path), fmt, args); - va_end(args); - return xstrdup(path); -} - char *mkpath(const char *fmt, ...) { va_list args; diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 93996ec..8319fbb 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -2179,7 +2179,7 @@ static int perf_probe_event__sprintf(const char *group, const char *event, strbuf_addf(result, " in %s", module); if (pev->nargs > 0) { - strbuf_addstr(result, " with"); + strbuf_add(result, " with", 5); for (i = 0; i < pev->nargs; i++) { ret = synthesize_perf_probe_arg(&pev->args[i], buf, 128); diff --git a/tools/perf/util/probe-event.h b/tools/perf/util/probe-event.h index ba926c3..e54e7b0 100644 --- a/tools/perf/util/probe-event.h +++ b/tools/perf/util/probe-event.h @@ -114,49 +114,44 @@ int init_probe_symbol_maps(bool user_only); void exit_probe_symbol_maps(void); /* Command string to events */ -extern int parse_perf_probe_command(const char *cmd, - struct perf_probe_event *pev); -extern int parse_probe_trace_command(const char *cmd, - struct probe_trace_event *tev); +int parse_perf_probe_command(const char *cmd, struct perf_probe_event *pev); +int parse_probe_trace_command(const char *cmd, struct probe_trace_event *tev); /* Events to command string */ -extern char *synthesize_perf_probe_command(struct perf_probe_event *pev); -extern char *synthesize_probe_trace_command(struct probe_trace_event *tev); -extern int synthesize_perf_probe_arg(struct perf_probe_arg *pa, char *buf, - size_t len); +char *synthesize_perf_probe_command(struct perf_probe_event *pev); +char *synthesize_probe_trace_command(struct probe_trace_event *tev); +int synthesize_perf_probe_arg(struct perf_probe_arg *pa, char *buf, size_t len); /* Check the perf_probe_event needs debuginfo */ -extern bool perf_probe_event_need_dwarf(struct perf_probe_event *pev); +bool perf_probe_event_need_dwarf(struct perf_probe_event *pev); /* Release event contents */ -extern void clear_perf_probe_event(struct perf_probe_event *pev); -extern void clear_probe_trace_event(struct probe_trace_event *tev); +void clear_perf_probe_event(struct perf_probe_event *pev); +void clear_probe_trace_event(struct probe_trace_event *tev); /* Command string to line-range */ -extern int parse_line_range_desc(const char *cmd, struct line_range *lr); +int parse_line_range_desc(const char *cmd, struct line_range *lr); /* Release line range members */ -extern void line_range__clear(struct line_range *lr); +void line_range__clear(struct line_range *lr); /* Initialize line range */ -extern int line_range__init(struct line_range *lr); - -extern int add_perf_probe_events(struct perf_probe_event *pevs, int npevs); -extern int convert_perf_probe_events(struct perf_probe_event *pevs, int npevs); -extern int apply_perf_probe_events(struct perf_probe_event *pevs, int npevs); -extern void cleanup_perf_probe_events(struct perf_probe_event *pevs, int npevs); -extern int del_perf_probe_events(struct strfilter *filter); - -extern int show_perf_probe_event(const char *group, const char *event, - struct perf_probe_event *pev, - const char *module, bool use_stdout); -extern int show_perf_probe_events(struct strfilter *filter); -extern int show_line_range(struct line_range *lr, const char *module, - bool user); -extern int show_available_vars(struct perf_probe_event *pevs, int npevs, - struct strfilter *filter); -extern int show_available_funcs(const char *module, struct strfilter *filter, - bool user); +int line_range__init(struct line_range *lr); + +int add_perf_probe_events(struct perf_probe_event *pevs, int npevs); +int convert_perf_probe_events(struct perf_probe_event *pevs, int npevs); +int apply_perf_probe_events(struct perf_probe_event *pevs, int npevs); +void cleanup_perf_probe_events(struct perf_probe_event *pevs, int npevs); +int del_perf_probe_events(struct strfilter *filter); + +int show_perf_probe_event(const char *group, const char *event, + struct perf_probe_event *pev, + const char *module, bool use_stdout); +int show_perf_probe_events(struct strfilter *filter); +int show_line_range(struct line_range *lr, const char *module, bool user); +int show_available_vars(struct perf_probe_event *pevs, int npevs, + struct strfilter *filter); +int show_available_funcs(const char *module, struct strfilter *filter, bool user); bool arch__prefers_symtab(void); void arch__fix_tev_from_maps(struct perf_probe_event *pev, struct probe_trace_event *tev, struct map *map); diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index 4ce5c5e..b3bd0fb 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -1314,18 +1314,18 @@ static int collect_variables_cb(Dwarf_Die *die_mem, void *data) if (probe_conf.show_location_range) { if (!externs) { if (ret) - strbuf_addf(&buf, "[INV]\t"); + strbuf_add(&buf, "[INV]\t", 6); else - strbuf_addf(&buf, "[VAL]\t"); + strbuf_add(&buf, "[VAL]\t", 6); } else - strbuf_addf(&buf, "[EXT]\t"); + strbuf_add(&buf, "[EXT]\t", 6); } ret2 = die_get_varname(die_mem, &buf); if (!ret2 && probe_conf.show_location_range && !externs) { - strbuf_addf(&buf, "\t"); + strbuf_addch(&buf, '\t'); ret2 = die_get_var_range(&af->pf.sp_die, die_mem, &buf); } diff --git a/tools/perf/util/probe-finder.h b/tools/perf/util/probe-finder.h index 0aec770..51137fc 100644 --- a/tools/perf/util/probe-finder.h +++ b/tools/perf/util/probe-finder.h @@ -34,27 +34,25 @@ struct debuginfo { }; /* This also tries to open distro debuginfo */ -extern struct debuginfo *debuginfo__new(const char *path); -extern void debuginfo__delete(struct debuginfo *dbg); +struct debuginfo *debuginfo__new(const char *path); +void debuginfo__delete(struct debuginfo *dbg); /* Find probe_trace_events specified by perf_probe_event from debuginfo */ -extern int debuginfo__find_trace_events(struct debuginfo *dbg, - struct perf_probe_event *pev, - struct probe_trace_event **tevs); +int debuginfo__find_trace_events(struct debuginfo *dbg, + struct perf_probe_event *pev, + struct probe_trace_event **tevs); /* Find a perf_probe_point from debuginfo */ -extern int debuginfo__find_probe_point(struct debuginfo *dbg, - unsigned long addr, - struct perf_probe_point *ppt); +int debuginfo__find_probe_point(struct debuginfo *dbg, unsigned long addr, + struct perf_probe_point *ppt); /* Find a line range */ -extern int debuginfo__find_line_range(struct debuginfo *dbg, - struct line_range *lr); +int debuginfo__find_line_range(struct debuginfo *dbg, struct line_range *lr); /* Find available variables */ -extern int debuginfo__find_available_vars_at(struct debuginfo *dbg, - struct perf_probe_event *pev, - struct variable_list **vls); +int debuginfo__find_available_vars_at(struct debuginfo *dbg, + struct perf_probe_event *pev, + struct variable_list **vls); /* Find a src file from a DWARF tag path */ int get_real_path(const char *raw_path, const char *comp_dir, diff --git a/tools/perf/util/quote.h b/tools/perf/util/quote.h index 172889e..3340c9c 100644 --- a/tools/perf/util/quote.h +++ b/tools/perf/util/quote.h @@ -24,6 +24,6 @@ * sq_quote() in a real application. */ -extern void sq_quote_argv(struct strbuf *, const char **argv, size_t maxlen); +void sq_quote_argv(struct strbuf *, const char **argv, size_t maxlen); #endif /* __PERF_QUOTE_H */ diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 60b3593..4abd85c 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -1107,12 +1107,11 @@ static struct machine *machines__find_for_cpumode(struct machines *machines, union perf_event *event, struct perf_sample *sample) { - const u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; struct machine *machine; if (perf_guest && - ((cpumode == PERF_RECORD_MISC_GUEST_KERNEL) || - (cpumode == PERF_RECORD_MISC_GUEST_USER))) { + ((sample->cpumode == PERF_RECORD_MISC_GUEST_KERNEL) || + (sample->cpumode == PERF_RECORD_MISC_GUEST_USER))) { u32 pid; if (event->header.type == PERF_RECORD_MMAP diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 93fa136..47966a1 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -2225,7 +2225,7 @@ int hpp_dimension__add_output(unsigned col) } static int sort_dimension__add(struct perf_hpp_list *list, const char *tok, - struct perf_evlist *evlist __maybe_unused, + struct perf_evlist *evlist, int level) { unsigned int i; diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index b33ffb2..fdb7196 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -152,8 +152,7 @@ static const char *get_ratio_color(enum grc_type type, double ratio) } static void print_stalled_cycles_frontend(int cpu, - struct perf_evsel *evsel - __maybe_unused, double avg, + struct perf_evsel *evsel, double avg, struct perf_stat_output_ctx *out) { double total, ratio = 0.0; @@ -175,8 +174,7 @@ static void print_stalled_cycles_frontend(int cpu, } static void print_stalled_cycles_backend(int cpu, - struct perf_evsel *evsel - __maybe_unused, double avg, + struct perf_evsel *evsel, double avg, struct perf_stat_output_ctx *out) { double total, ratio = 0.0; @@ -194,7 +192,7 @@ static void print_stalled_cycles_backend(int cpu, } static void print_branch_misses(int cpu, - struct perf_evsel *evsel __maybe_unused, + struct perf_evsel *evsel, double avg, struct perf_stat_output_ctx *out) { @@ -213,7 +211,7 @@ static void print_branch_misses(int cpu, } static void print_l1_dcache_misses(int cpu, - struct perf_evsel *evsel __maybe_unused, + struct perf_evsel *evsel, double avg, struct perf_stat_output_ctx *out) { @@ -232,7 +230,7 @@ static void print_l1_dcache_misses(int cpu, } static void print_l1_icache_misses(int cpu, - struct perf_evsel *evsel __maybe_unused, + struct perf_evsel *evsel, double avg, struct perf_stat_output_ctx *out) { @@ -250,7 +248,7 @@ static void print_l1_icache_misses(int cpu, } static void print_dtlb_cache_misses(int cpu, - struct perf_evsel *evsel __maybe_unused, + struct perf_evsel *evsel, double avg, struct perf_stat_output_ctx *out) { @@ -268,7 +266,7 @@ static void print_dtlb_cache_misses(int cpu, } static void print_itlb_cache_misses(int cpu, - struct perf_evsel *evsel __maybe_unused, + struct perf_evsel *evsel, double avg, struct perf_stat_output_ctx *out) { @@ -286,7 +284,7 @@ static void print_itlb_cache_misses(int cpu, } static void print_ll_cache_misses(int cpu, - struct perf_evsel *evsel __maybe_unused, + struct perf_evsel *evsel, double avg, struct perf_stat_output_ctx *out) { diff --git a/tools/perf/util/strbuf.c b/tools/perf/util/strbuf.c index d3d2792..8fb7329 100644 --- a/tools/perf/util/strbuf.c +++ b/tools/perf/util/strbuf.c @@ -51,6 +51,13 @@ void strbuf_grow(struct strbuf *sb, size_t extra) ALLOC_GROW(sb->buf, sb->len + extra + 1, sb->alloc); } +void strbuf_addch(struct strbuf *sb, int c) +{ + strbuf_grow(sb, 1); + sb->buf[sb->len++] = c; + sb->buf[sb->len] = '\0'; +} + void strbuf_add(struct strbuf *sb, const void *data, size_t len) { strbuf_grow(sb, len); @@ -58,7 +65,7 @@ void strbuf_add(struct strbuf *sb, const void *data, size_t len) strbuf_setlen(sb, sb->len + len); } -void strbuf_addv(struct strbuf *sb, const char *fmt, va_list ap) +static void strbuf_addv(struct strbuf *sb, const char *fmt, va_list ap) { int len; va_list ap_saved; diff --git a/tools/perf/util/strbuf.h b/tools/perf/util/strbuf.h index 7a32c83..ab9be0fb 100644 --- a/tools/perf/util/strbuf.h +++ b/tools/perf/util/strbuf.h @@ -51,16 +51,16 @@ struct strbuf { #define STRBUF_INIT { 0, 0, strbuf_slopbuf } /*----- strbuf life cycle -----*/ -extern void strbuf_init(struct strbuf *buf, ssize_t hint); -extern void strbuf_release(struct strbuf *); -extern char *strbuf_detach(struct strbuf *, size_t *); +void strbuf_init(struct strbuf *buf, ssize_t hint); +void strbuf_release(struct strbuf *buf); +char *strbuf_detach(struct strbuf *buf, size_t *); /*----- strbuf size related -----*/ static inline ssize_t strbuf_avail(const struct strbuf *sb) { return sb->alloc ? sb->alloc - sb->len - 1 : 0; } -extern void strbuf_grow(struct strbuf *, size_t); +void strbuf_grow(struct strbuf *buf, size_t); static inline void strbuf_setlen(struct strbuf *sb, size_t len) { if (!sb->alloc) @@ -71,22 +71,17 @@ static inline void strbuf_setlen(struct strbuf *sb, size_t len) { } /*----- add data in your buffer -----*/ -static inline void strbuf_addch(struct strbuf *sb, int c) { - strbuf_grow(sb, 1); - sb->buf[sb->len++] = c; - sb->buf[sb->len] = '\0'; -} +void strbuf_addch(struct strbuf *sb, int c); -extern void strbuf_add(struct strbuf *, const void *, size_t); +void strbuf_add(struct strbuf *buf, const void *, size_t); static inline void strbuf_addstr(struct strbuf *sb, const char *s) { strbuf_add(sb, s, strlen(s)); } __attribute__((format(printf,2,3))) -extern void strbuf_addf(struct strbuf *sb, const char *fmt, ...); -extern void strbuf_addv(struct strbuf *sb, const char *fmt, va_list ap); +void strbuf_addf(struct strbuf *sb, const char *fmt, ...); /* XXX: if read fails, any partial read is undone */ -extern ssize_t strbuf_read(struct strbuf *, int fd, ssize_t hint); +ssize_t strbuf_read(struct strbuf *, int fd, ssize_t hint); #endif /* __PERF_STRBUF_H */ diff --git a/tools/perf/util/svghelper.h b/tools/perf/util/svghelper.h index 9292a52..946fdf2 100644 --- a/tools/perf/util/svghelper.h +++ b/tools/perf/util/svghelper.h @@ -3,32 +3,31 @@ #include <linux/types.h> -extern void open_svg(const char *filename, int cpus, int rows, u64 start, u64 end); -extern void svg_ubox(int Yslot, u64 start, u64 end, double height, const char *type, int fd, int err, int merges); -extern void svg_lbox(int Yslot, u64 start, u64 end, double height, const char *type, int fd, int err, int merges); -extern void svg_fbox(int Yslot, u64 start, u64 end, double height, const char *type, int fd, int err, int merges); -extern void svg_box(int Yslot, u64 start, u64 end, const char *type); -extern void svg_blocked(int Yslot, int cpu, u64 start, u64 end, const char *backtrace); -extern void svg_running(int Yslot, int cpu, u64 start, u64 end, const char *backtrace); -extern void svg_waiting(int Yslot, int cpu, u64 start, u64 end, const char *backtrace); -extern void svg_cpu_box(int cpu, u64 max_frequency, u64 turbo_frequency); - - -extern void svg_process(int cpu, u64 start, u64 end, int pid, const char *name, const char *backtrace); -extern void svg_cstate(int cpu, u64 start, u64 end, int type); -extern void svg_pstate(int cpu, u64 start, u64 end, u64 freq); - - -extern void svg_time_grid(double min_thickness); -extern void svg_io_legenda(void); -extern void svg_legenda(void); -extern void svg_wakeline(u64 start, int row1, int row2, const char *backtrace); -extern void svg_partial_wakeline(u64 start, int row1, char *desc1, int row2, char *desc2, const char *backtrace); -extern void svg_interrupt(u64 start, int row, const char *backtrace); -extern void svg_text(int Yslot, u64 start, const char *text); -extern void svg_close(void); -extern int svg_build_topology_map(char *sib_core, int sib_core_nr, - char *sib_thr, int sib_thr_nr); +void open_svg(const char *filename, int cpus, int rows, u64 start, u64 end); +void svg_ubox(int Yslot, u64 start, u64 end, double height, const char *type, int fd, int err, int merges); +void svg_lbox(int Yslot, u64 start, u64 end, double height, const char *type, int fd, int err, int merges); +void svg_fbox(int Yslot, u64 start, u64 end, double height, const char *type, int fd, int err, int merges); +void svg_box(int Yslot, u64 start, u64 end, const char *type); +void svg_blocked(int Yslot, int cpu, u64 start, u64 end, const char *backtrace); +void svg_running(int Yslot, int cpu, u64 start, u64 end, const char *backtrace); +void svg_waiting(int Yslot, int cpu, u64 start, u64 end, const char *backtrace); +void svg_cpu_box(int cpu, u64 max_frequency, u64 turbo_frequency); + + +void svg_process(int cpu, u64 start, u64 end, int pid, const char *name, const char *backtrace); +void svg_cstate(int cpu, u64 start, u64 end, int type); +void svg_pstate(int cpu, u64 start, u64 end, u64 freq); + + +void svg_time_grid(double min_thickness); +void svg_io_legenda(void); +void svg_legenda(void); +void svg_wakeline(u64 start, int row1, int row2, const char *backtrace); +void svg_partial_wakeline(u64 start, int row1, char *desc1, int row2, char *desc2, const char *backtrace); +void svg_interrupt(u64 start, int row, const char *backtrace); +void svg_text(int Yslot, u64 start, const char *text); +void svg_close(void); +int svg_build_topology_map(char *sib_core, int sib_core_nr, char *sib_thr, int sib_thr_nr); extern int svg_page_width; extern u64 svg_highlight; diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index b1dd68f..bc229a7 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -793,6 +793,7 @@ int dso__load_sym(struct dso *dso, struct map *map, uint32_t idx; GElf_Ehdr ehdr; GElf_Shdr shdr; + GElf_Shdr tshdr; Elf_Data *syms, *opddata = NULL; GElf_Sym sym; Elf_Scn *sec, *sec_strndx; @@ -832,6 +833,9 @@ int dso__load_sym(struct dso *dso, struct map *map, sec = syms_ss->symtab; shdr = syms_ss->symshdr; + if (elf_section_by_name(elf, &ehdr, &tshdr, ".text", NULL)) + dso->text_offset = tshdr.sh_addr - tshdr.sh_offset; + if (runtime_ss->opdsec) opddata = elf_rawdata(runtime_ss->opdsec, NULL); @@ -880,12 +884,8 @@ int dso__load_sym(struct dso *dso, struct map *map, * Handle any relocation of vdso necessary because older kernels * attempted to prelink vdso to its virtual address. */ - if (dso__is_vdso(dso)) { - GElf_Shdr tshdr; - - if (elf_section_by_name(elf, &ehdr, &tshdr, ".text", NULL)) - map->reloc = map->start - tshdr.sh_addr + tshdr.sh_offset; - } + if (dso__is_vdso(dso)) + map->reloc = map->start - dso->text_offset; dso->adjust_symbols = runtime_ss->adjust_symbols || ref_reloc(kmap); /* diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index a937053..c8b7544 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -34,8 +34,8 @@ #endif #ifdef HAVE_LIBELF_SUPPORT -extern Elf_Scn *elf_section_by_name(Elf *elf, GElf_Ehdr *ep, - GElf_Shdr *shp, const char *name, size_t *idx); +Elf_Scn *elf_section_by_name(Elf *elf, GElf_Ehdr *ep, + GElf_Shdr *shp, const char *name, size_t *idx); #endif #ifndef DMGL_PARAMS diff --git a/tools/perf/util/usage.c b/tools/perf/util/usage.c index 6adfa18..996046a 100644 --- a/tools/perf/util/usage.c +++ b/tools/perf/util/usage.c @@ -41,15 +41,9 @@ static void warn_builtin(const char *warn, va_list params) /* If we are in a dlopen()ed .so write to a global variable would segfault * (ugh), so keep things static. */ static void (*usage_routine)(const char *err) NORETURN = usage_builtin; -static void (*die_routine)(const char *err, va_list params) NORETURN = die_builtin; static void (*error_routine)(const char *err, va_list params) = error_builtin; static void (*warn_routine)(const char *err, va_list params) = warn_builtin; -void set_die_routine(void (*routine)(const char *err, va_list params) NORETURN) -{ - die_routine = routine; -} - void set_warning_routine(void (*routine)(const char *err, va_list params)) { warn_routine = routine; @@ -65,7 +59,7 @@ void die(const char *err, ...) va_list params; va_start(params, err); - die_routine(err, params); + die_builtin(err, params); va_end(params); } diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index d0d50ce..8298d60 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -133,25 +133,15 @@ extern char buildid_dir[]; #define PERF_GTK_DSO "libperf-gtk.so" /* General helper functions */ -extern void usage(const char *err) NORETURN; -extern void die(const char *err, ...) NORETURN __attribute__((format (printf, 1, 2))); -extern int error(const char *err, ...) __attribute__((format (printf, 1, 2))); -extern void warning(const char *err, ...) __attribute__((format (printf, 1, 2))); +void usage(const char *err) NORETURN; +void die(const char *err, ...) NORETURN __attribute__((format (printf, 1, 2))); +int error(const char *err, ...) __attribute__((format (printf, 1, 2))); +void warning(const char *err, ...) __attribute__((format (printf, 1, 2))); -#include "../../../include/linux/stringify.h" +void set_warning_routine(void (*routine)(const char *err, va_list params)); -#define DIE_IF(cnd) \ - do { if (cnd) \ - die(" at (" __FILE__ ":" __stringify(__LINE__) "): " \ - __stringify(cnd) "\n"); \ - } while (0) - - -extern void set_die_routine(void (*routine)(const char *err, va_list params) NORETURN); -extern void set_warning_routine(void (*routine)(const char *err, va_list params)); - -extern int prefixcmp(const char *str, const char *prefix); -extern void set_buildid_dir(const char *dir); +int prefixcmp(const char *str, const char *prefix); +void set_buildid_dir(const char *dir); #ifdef __GLIBC_PREREQ #if __GLIBC_PREREQ(2, 1) @@ -172,8 +162,7 @@ static inline char *gitstrchrnul(const char *s, int c) /* * Wrappers: */ -extern char *xstrdup(const char *str); -extern void *xrealloc(void *ptr, size_t size) __attribute__((weak)); +void *xrealloc(void *ptr, size_t size) __attribute__((weak)); static inline void *zalloc(size_t size) diff --git a/tools/perf/util/wrapper.c b/tools/perf/util/wrapper.c index 19f15b6..5f1a07c 100644 --- a/tools/perf/util/wrapper.c +++ b/tools/perf/util/wrapper.c @@ -12,18 +12,6 @@ static inline void release_pack_memory(size_t size __maybe_unused, { } -char *xstrdup(const char *str) -{ - char *ret = strdup(str); - if (!ret) { - release_pack_memory(strlen(str) + 1, -1); - ret = strdup(str); - if (!ret) - die("Out of memory, strdup failed"); - } - return ret; -} - void *xrealloc(void *ptr, size_t size) { void *ret = realloc(ptr, size); diff --git a/tools/perf/config/utilities.mak b/tools/scripts/utilities.mak index c16ce83..c16ce83 100644 --- a/tools/perf/config/utilities.mak +++ b/tools/scripts/utilities.mak |