From acac03fa15a8684bb60489ed87b5aae5258c0838 Mon Sep 17 00:00:00 2001 From: Kirill Smelkov Date: Wed, 12 Jan 2011 17:59:36 +0300 Subject: perf record: Add "nodelay" mode, disabled by default Sometimes there is a need to use perf in "live-log" mode. The problem is, for seldom events, actual info output is largely delayed because perf-record reads sample data in whole pages. So for such scenarious, add flag for perf-record to go in "nodelay" mode. To track e.g. what's going on in icmp_rcv while ping is running Use it with something like this: (1) $ perf probe -L icmp_rcv | grep -U8 '^ *43\>' goto error; } 38 if (!pskb_pull(skb, sizeof(*icmph))) goto error; icmph = icmp_hdr(skb); 43 ICMPMSGIN_INC_STATS_BH(net, icmph->type); /* * 18 is the highest 'known' ICMP type. Anything else is a mystery * * RFC 1122: 3.2.2 Unknown ICMP messages types MUST be silently * discarded. */ 50 if (icmph->type > NR_ICMP_TYPES) goto error; $ perf probe icmp_rcv:43 'type=icmph->type' (2) $ cat trace-icmp.py [...] def trace_begin(): print "in trace_begin" def trace_end(): print "in trace_end" def probe__icmp_rcv(event_name, context, common_cpu, common_secs, common_nsecs, common_pid, common_comm, __probe_ip, type): print_header(event_name, common_cpu, common_secs, common_nsecs, common_pid, common_comm) print "__probe_ip=%u, type=%u\n" % \ (__probe_ip, type), [...] (3) $ perf record -a -D -e probe:icmp_rcv -o - | \ perf script -i - -s trace-icmp.py Thanks to Peter Zijlstra for pointing how to do it. Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker Cc: Ingo Molnar , Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Tom Zanussi LKML-Reference: <20110112140613.GA11698@tugrik.mns.mnsspb.ru> Signed-off-by: Kirill Smelkov Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index 52462ae..e032716 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -61,6 +61,9 @@ OPTIONS -r:: --realtime=:: Collect data with this RT SCHED_FIFO priority. +-D:: +--no-delay:: + Collect data without buffering. -A:: --append:: Append to the output file to do incremental profiling. diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 1210e64..df6064a 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -49,6 +49,7 @@ static int pipe_output = 0; static const char *output_name = "perf.data"; static int group = 0; static int realtime_prio = 0; +static bool nodelay = false; static bool raw_samples = false; static bool sample_id_all_avail = true; static bool system_wide = false; @@ -307,6 +308,11 @@ static void create_counter(struct perf_evsel *evsel, int cpu) attr->sample_type |= PERF_SAMPLE_CPU; } + if (nodelay) { + attr->watermark = 0; + attr->wakeup_events = 1; + } + attr->mmap = track; attr->comm = track; attr->inherit = !no_inherit; @@ -843,6 +849,8 @@ const struct option record_options[] = { "record events on existing thread id"), OPT_INTEGER('r', "realtime", &realtime_prio, "collect data with this RT SCHED_FIFO priority"), + OPT_BOOLEAN('D', "no-delay", &nodelay, + "collect data without buffering"), OPT_BOOLEAN('R', "raw-samples", &raw_samples, "collect raw sample records from all opened counters"), OPT_BOOLEAN('a', "all-cpus", &system_wide, -- cgit v0.10.2