summaryrefslogtreecommitdiff
path: root/tools
diff options
context:
space:
mode:
Diffstat (limited to 'tools')
-rw-r--r--tools/perf/Documentation/perf-stat.txt5
-rw-r--r--tools/perf/builtin-stat.c22
2 files changed, 26 insertions, 1 deletions
diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
index 2fe87fb..73c9759 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -132,6 +132,11 @@ is a useful mode to detect imbalance between physical cores. To enable this mod
use --per-core in addition to -a. (system-wide). The output includes the
core number and the number of online logical processors on that physical processor.
+-D msecs::
+--initial-delay msecs::
+After starting the program, wait msecs before measuring. This is useful to
+filter out the startup phase of the program, which is often very different.
+
EXAMPLES
--------
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 352fbd7..2e637e4 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -100,6 +100,7 @@ static const char *pre_cmd = NULL;
static const char *post_cmd = NULL;
static bool sync_run = false;
static unsigned int interval = 0;
+static unsigned int initial_delay = 0;
static bool forever = false;
static struct timespec ref_time;
static struct cpu_map *aggr_map;
@@ -254,7 +255,8 @@ static int create_perf_stat_counter(struct perf_evsel *evsel)
if (!perf_target__has_task(&target) &&
perf_evsel__is_group_leader(evsel)) {
attr->disabled = 1;
- attr->enable_on_exec = 1;
+ if (!initial_delay)
+ attr->enable_on_exec = 1;
}
return perf_evsel__open_per_thread(evsel, evsel_list->threads);
@@ -416,6 +418,20 @@ static void print_interval(void)
}
}
+static void handle_initial_delay(void)
+{
+ struct perf_evsel *counter;
+
+ if (initial_delay) {
+ const int ncpus = cpu_map__nr(evsel_list->cpus),
+ nthreads = thread_map__nr(evsel_list->threads);
+
+ usleep(initial_delay * 1000);
+ list_for_each_entry(counter, &evsel_list->entries, node)
+ perf_evsel__enable(counter, ncpus, nthreads);
+ }
+}
+
static int __run_perf_stat(int argc, const char **argv)
{
char msg[512];
@@ -486,6 +502,7 @@ static int __run_perf_stat(int argc, const char **argv)
if (forks) {
perf_evlist__start_workload(evsel_list);
+ handle_initial_delay();
if (interval) {
while (!waitpid(child_pid, &status, WNOHANG)) {
@@ -497,6 +514,7 @@ static int __run_perf_stat(int argc, const char **argv)
if (WIFSIGNALED(status))
psignal(WTERMSIG(status), argv[0]);
} else {
+ handle_initial_delay();
while (!done) {
nanosleep(&ts, NULL);
if (interval)
@@ -1419,6 +1437,8 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
"aggregate counts per processor socket", AGGR_SOCKET),
OPT_SET_UINT(0, "per-core", &aggr_mode,
"aggregate counts per physical processor core", AGGR_CORE),
+ OPT_UINTEGER('D', "delay", &initial_delay,
+ "ms to wait before starting measurement after program start"),
OPT_END()
};
const char * const stat_usage[] = {