From 80c0120a3cca30166c0ab8b24e44be67e97b79af Mon Sep 17 00:00:00 2001 From: David Ahern Date: Fri, 8 Jun 2012 11:47:51 -0300 Subject: perf tools: Fix endianity swapping for adds_features bitmask Based on Jiri's latest attempt: https://lkml.org/lkml/2012/5/16/61 Basically, adds_features should be byte swapped assuming unsigned longs are either 8-bytes (u64) or 4-bytes (u32). Fixes 32-bit ppc dumping 64-bit x86 feature data: ======== captured on: Sun May 20 19:23:23 2012 hostname : nxos-vdc-dev3 os release : 3.4.0-rc7+ perf version : 3.4.rc4.137.g978da3 arch : x86_64 nrcpus online : 16 nrcpus avail : 16 cpudesc : Intel(R) Xeon(R) CPU E5540 @ 2.53GHz cpuid : GenuineIntel,6,26,5 total memory : 24680324 kB ... Verified 64-bit x86 can still dump feature data for 32-bit ppc. Signed-off-by: David Ahern Reviewed-by: Jiri Olsa Cc: Corey Ashford Cc: Frederic Weisbecker Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/4FBBB539.5010805@gmail.com Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 2dd5edf..4f9b247 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -1942,7 +1942,6 @@ int perf_file_header__read(struct perf_file_header *header, else return -1; } else if (ph->needs_swap) { - unsigned int i; /* * feature bitmap is declared as an array of unsigned longs -- * not good since its size can differ between the host that @@ -1958,14 +1957,17 @@ int perf_file_header__read(struct perf_file_header *header, * file), punt and fallback to the original behavior -- * clearing all feature bits and setting buildid. */ - for (i = 0; i < BITS_TO_LONGS(HEADER_FEAT_BITS); ++i) - header->adds_features[i] = bswap_64(header->adds_features[i]); + mem_bswap_64(&header->adds_features, + BITS_TO_U64(HEADER_FEAT_BITS)); if (!test_bit(HEADER_HOSTNAME, header->adds_features)) { - for (i = 0; i < BITS_TO_LONGS(HEADER_FEAT_BITS); ++i) { - header->adds_features[i] = bswap_64(header->adds_features[i]); - header->adds_features[i] = bswap_32(header->adds_features[i]); - } + /* unswap as u64 */ + mem_bswap_64(&header->adds_features, + BITS_TO_U64(HEADER_FEAT_BITS)); + + /* unswap as u32 */ + mem_bswap_32(&header->adds_features, + BITS_TO_U32(HEADER_FEAT_BITS)); } if (!test_bit(HEADER_HOSTNAME, header->adds_features)) { diff --git a/tools/perf/util/include/linux/bitops.h b/tools/perf/util/include/linux/bitops.h index f1584833..587a230 100644 --- a/tools/perf/util/include/linux/bitops.h +++ b/tools/perf/util/include/linux/bitops.h @@ -8,6 +8,8 @@ #define BITS_PER_LONG __WORDSIZE #define BITS_PER_BYTE 8 #define BITS_TO_LONGS(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(long)) +#define BITS_TO_U64(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(u64)) +#define BITS_TO_U32(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(u32)) #define for_each_set_bit(bit, addr, size) \ for ((bit) = find_first_bit((addr), (size)); \ diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 2600916..c3e399b 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -442,6 +442,16 @@ static void perf_tool__fill_defaults(struct perf_tool *tool) tool->finished_round = process_finished_round_stub; } } + +void mem_bswap_32(void *src, int byte_size) +{ + u32 *m = src; + while (byte_size > 0) { + *m = bswap_32(*m); + byte_size -= sizeof(u32); + ++m; + } +} void mem_bswap_64(void *src, int byte_size) { diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h index 7a5434c..0c702e3 100644 --- a/tools/perf/util/session.h +++ b/tools/perf/util/session.h @@ -80,6 +80,7 @@ struct branch_info *machine__resolve_bstack(struct machine *self, bool perf_session__has_traces(struct perf_session *self, const char *msg); void mem_bswap_64(void *src, int byte_size); +void mem_bswap_32(void *src, int byte_size); void perf_event__attr_swap(struct perf_event_attr *attr); int perf_session__create_kernel_maps(struct perf_session *self); -- cgit v0.10.2 From fc3e4d077d5c7a7bc1ad5bc143895b4e070e5a8b Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Tue, 15 May 2012 13:11:11 +0200 Subject: perf stat: Fix default output file The following commit: commit 56f3bae70638b33477a6015fd362ccfe354fd3ee Author: Jim Cromie Date: Wed Sep 7 17:14:00 2011 -0600 perf stat: Add --log-fd option to redirect stderr elsewhere introduced a bug in the way perf stat outputs the results by default, i.e., without the --log-fd or --output option. It would default to writing to file descriptor 0, i.e., stdin. Writing to stdin is allowed and is equivalent to writing to stdout. However, there is a major difference for any script that was already capturing the output of perf stat via redirection: perf stat >/tmp/log .... or perf stat 2>/tmp/log .... They would not capture anything anymore. They would have to do: perf stat 0>/tmp/log ... This breaks compatibility with existing scripts and does not look very natural. This patch fixes the problem by looking at output_fd only when it was modified by user (> 0). It also checks that the value if positive. Passing --log-fd 0 is ignored. I would also argue that defaulting to stderr for the results is not the right thing to do, though this patch does not address this specific issue. Signed-off-by: Stephane Eranian Cc: David Ahern Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Jim Cromie Link: http://lkml.kernel.org/r/20120515111111.GA9870@quad Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 2625899..07b5c77 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -1179,6 +1179,12 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used) fprintf(stderr, "cannot use both --output and --log-fd\n"); usage_with_options(stat_usage, options); } + + if (output_fd < 0) { + fprintf(stderr, "argument to --log-fd must be a > 0\n"); + usage_with_options(stat_usage, options); + } + if (!output) { struct timespec tm; mode = append_file ? "a" : "w"; @@ -1190,7 +1196,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used) } clock_gettime(CLOCK_REALTIME, &tm); fprintf(output, "# started on %s\n", ctime(&tm.tv_sec)); - } else if (output_fd != 2) { + } else if (output_fd > 0) { mode = append_file ? "a" : "w"; output = fdopen(output_fd, mode); if (!output) { -- cgit v0.10.2 From cb9dd49e11f83d548c822d7022ac180b0518b25c Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 11 Jun 2012 19:03:32 -0300 Subject: perf tools: Fix synthesizing tracepoint names from the perf.data headers We need to use the per event info snapshoted at record time to synthesize the events name, so do it just after reading the perf.data headers, when we already processed the /sys events data, otherwise we'll end up using the local /sys that only by sheer luck will have the same tracepoint ID -> real event association. Example: # uname -a Linux felicio.ghostprotocols.net 3.4.0-rc5+ #1 SMP Sat May 19 15:27:11 BRT 2012 x86_64 x86_64 x86_64 GNU/Linux # perf record -e sched:sched_switch usleep 1 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.015 MB perf.data (~648 samples) ] # cat /t/events/sched/sched_switch/id 279 # perf evlist -v sched:sched_switch: sample_freq=1, type: 2, config: 279, size: 80, sample_type: 1159, read_format: 7, disabled: 1, inherit: 1, mmap: 1, comm: 1, enable_on_exec: 1, sample_id_all: 1, exclude_guest: 1 # So on the above machine the sched:sched_switch has tracepoint id 279, but on the machine were we'll analyse it it has a different id: $ cat /t/events/sched/sched_switch/id 56 $ perf evlist -i /tmp/perf.data kmem:mm_balancedirty_writeout $ cat /t/events/kmem/mm_balancedirty_writeout/id 279 With this fix: $ perf evlist -i /tmp/perf.data sched:sched_switch Reported-by: Dmitry Antipov Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Mike Galbraith Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-auwks8fpuhmrdpiefs55o5oz@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 4f9b247..e909d43 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -2093,6 +2093,35 @@ static int read_attr(int fd, struct perf_header *ph, return ret <= 0 ? -1 : 0; } +static int perf_evsel__set_tracepoint_name(struct perf_evsel *evsel) +{ + struct event_format *event = trace_find_event(evsel->attr.config); + char bf[128]; + + if (event == NULL) + return -1; + + snprintf(bf, sizeof(bf), "%s:%s", event->system, event->name); + evsel->name = strdup(bf); + if (event->name == NULL) + return -1; + + return 0; +} + +static int perf_evlist__set_tracepoint_names(struct perf_evlist *evlist) +{ + struct perf_evsel *pos; + + list_for_each_entry(pos, &evlist->entries, node) { + if (pos->attr.type == PERF_TYPE_TRACEPOINT && + perf_evsel__set_tracepoint_name(pos)) + return -1; + } + + return 0; +} + int perf_session__read_header(struct perf_session *session, int fd) { struct perf_header *header = &session->header; @@ -2174,6 +2203,9 @@ int perf_session__read_header(struct perf_session *session, int fd) lseek(fd, header->data_offset, SEEK_SET); + if (perf_evlist__set_tracepoint_names(session->evlist)) + goto out_delete_evlist; + header->frozen = 1; return 0; out_errno: -- cgit v0.10.2 From 9c5da09d266ca9b32eb16cf940f8161d949c2fe5 Mon Sep 17 00:00:00 2001 From: Salman Qazi Date: Thu, 14 Jun 2012 15:31:09 -0700 Subject: perf: Use css_tryget() to avoid propping up css refcount An rmdir pushes css's ref count to zero. However, if the associated directory is open at the time, the dentry ref count is non-zero. If the fd for this directory is then passed into perf_event_open, it does a css_get(). This bounces the ref count back up from zero. This is a problem by itself. But what makes it turn into a crash is the fact that we end up doing an extra dput, since we perform a dput when css_put sees the ref count go down to zero. css_tryget() does not fall into that trap. So, we use that instead. Reproduction test-case for the bug: #include #include #include #include #include #include #include #include #include #define PERF_FLAG_PID_CGROUP (1U << 2) int perf_event_open(struct perf_event_attr *hw_event_uptr, pid_t pid, int cpu, int group_fd, unsigned long flags) { return syscall(__NR_perf_event_open,hw_event_uptr, pid, cpu, group_fd, flags); } /* * Directly poke at the perf_event bug, since it's proving hard to repro * depending on where in the kernel tree. what moved? */ int main(int argc, char **argv) { int fd; struct perf_event_attr attr; memset(&attr, 0, sizeof(attr)); attr.exclude_kernel = 1; attr.size = sizeof(attr); mkdir("/dev/cgroup/perf_event/blah", 0777); fd = open("/dev/cgroup/perf_event/blah", O_RDONLY); perror("open"); rmdir("/dev/cgroup/perf_event/blah"); sleep(2); perf_event_open(&attr, fd, 0, -1, PERF_FLAG_PID_CGROUP); perror("perf_event_open"); close(fd); return 0; } Signed-off-by: Salman Qazi Signed-off-by: Peter Zijlstra Acked-by: Tejun Heo Link: http://lkml.kernel.org/r/20120614223108.1025.2503.stgit@dungbeetle.mtv.corp.google.com Signed-off-by: Ingo Molnar diff --git a/kernel/events/core.c b/kernel/events/core.c index f85c015..d7d71d6 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -253,9 +253,9 @@ perf_cgroup_match(struct perf_event *event) return !event->cgrp || event->cgrp == cpuctx->cgrp; } -static inline void perf_get_cgroup(struct perf_event *event) +static inline bool perf_tryget_cgroup(struct perf_event *event) { - css_get(&event->cgrp->css); + return css_tryget(&event->cgrp->css); } static inline void perf_put_cgroup(struct perf_event *event) @@ -484,7 +484,11 @@ static inline int perf_cgroup_connect(int fd, struct perf_event *event, event->cgrp = cgrp; /* must be done before we fput() the file */ - perf_get_cgroup(event); + if (!perf_tryget_cgroup(event)) { + event->cgrp = NULL; + ret = -ENOENT; + goto out; + } /* * all events in a group must monitor -- cgit v0.10.2 From 93b3cca1ccd30b1ad290951a3fc7c10c73db7313 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 14 Jun 2012 10:54:28 -0400 Subject: ftrace: Make all inline tags also include notrace Commit 5963e317b1e9d2a ("ftrace/x86: Do not change stacks in DEBUG when calling lockdep") prevented lockdep calls from the int3 breakpoint handler from reseting the stack if a function that was called was in the process of being converted for tracing and had a breakpoint on it. The idea is, before calling the lockdep code, do a load_idt() to the special IDT that kept the breakpoint stack from reseting. This worked well as a quick fix for this kernel release, until a certain config caused a lockup in the function tracer start up tests. Investigating it, I found that the load_idt that was used to prevent the int3 from changing stacks was itself being traced! Even though the config had CONFIG_OPTIMIZE_INLINING disabled, and all 'inline' tags were set to always inline, there were still cases that it did not inline! This was caused by CONFIG_PARAVIRT_GUEST, where it would add a pointer to the native_load_idt() which made that function to be traced. Commit 45959ee7aa645815a ("ftrace: Do not function trace inlined functions") only touched the 'inline' tags when CONFIG_OPMITIZE_INLINING was enabled. PARAVIRT_GUEST shows that this was not enough and we need to also mark always_inline with notrace as well. Reported-by: Fengguang Wu Tested-by: Fengguang Wu Signed-off-by: Steven Rostedt diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index e5834aa..6a6d7ae 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -47,9 +47,9 @@ */ #if !defined(CONFIG_ARCH_SUPPORTS_OPTIMIZED_INLINING) || \ !defined(CONFIG_OPTIMIZE_INLINING) || (__GNUC__ < 4) -# define inline inline __attribute__((always_inline)) -# define __inline__ __inline__ __attribute__((always_inline)) -# define __inline __inline __attribute__((always_inline)) +# define inline inline __attribute__((always_inline)) notrace +# define __inline__ __inline__ __attribute__((always_inline)) notrace +# define __inline __inline __attribute__((always_inline)) notrace #else /* A lot of inline functions can cause havoc with function tracing */ # define inline inline notrace -- cgit v0.10.2