summaryrefslogtreecommitdiff
path: root/tools
diff options
context:
space:
mode:
Diffstat (limited to 'tools')
-rw-r--r--tools/arch/x86/include/asm/cpufeatures.h1
-rw-r--r--tools/arch/x86/lib/memcpy_64.S6
-rw-r--r--tools/build/Build2
-rw-r--r--tools/build/Build.include6
-rw-r--r--tools/build/Makefile8
-rw-r--r--tools/build/Makefile.build26
-rw-r--r--tools/build/Makefile.feature2
-rw-r--r--tools/build/Makefile.include4
-rw-r--r--tools/build/feature/Makefile10
-rw-r--r--tools/build/feature/test-cxx.cpp15
-rw-r--r--tools/include/uapi/linux/bpf.h4
-rw-r--r--tools/lib/subcmd/pager.c16
-rw-r--r--tools/lib/subcmd/pager.h1
-rw-r--r--tools/lib/traceevent/kbuffer-parse.c1
-rw-r--r--tools/perf/Documentation/perf-list.txt12
-rw-r--r--tools/perf/Documentation/tips.txt4
-rw-r--r--tools/perf/Makefile.perf34
-rw-r--r--tools/perf/arch/powerpc/util/header.c11
-rw-r--r--tools/perf/arch/powerpc/util/sym-handling.c3
-rw-r--r--tools/perf/arch/x86/util/header.c24
-rw-r--r--tools/perf/builtin-list.c20
-rw-r--r--tools/perf/pmu-events/Build13
-rw-r--r--tools/perf/pmu-events/README147
-rw-r--r--tools/perf/pmu-events/jevents.c814
-rw-r--r--tools/perf/pmu-events/jevents.h18
-rw-r--r--tools/perf/pmu-events/jsmn.c313
-rw-r--r--tools/perf/pmu-events/jsmn.h67
-rw-r--r--tools/perf/pmu-events/json.c162
-rw-r--r--tools/perf/pmu-events/json.h38
-rw-r--r--tools/perf/pmu-events/pmu-events.h37
-rw-r--r--tools/perf/util/evlist.c12
-rw-r--r--tools/perf/util/evsel.c3
-rw-r--r--tools/perf/util/header.h1
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-decoder.c38
-rw-r--r--tools/perf/util/machine.c6
-rw-r--r--tools/perf/util/parse-events.c8
-rw-r--r--tools/perf/util/parse-events.h3
-rw-r--r--tools/perf/util/pmu.c176
-rw-r--r--tools/perf/util/pmu.h6
-rw-r--r--tools/perf/util/probe-event.c2
-rw-r--r--tools/perf/util/strbuf.h3
-rw-r--r--tools/perf/util/thread.c9
-rw-r--r--tools/testing/nvdimm/Kbuild1
-rw-r--r--tools/testing/nvdimm/config_check.c1
-rw-r--r--tools/testing/nvdimm/test/iomap.c151
-rw-r--r--tools/testing/nvdimm/test/nfit.c160
-rw-r--r--tools/testing/nvdimm/test/nfit_test.h12
-rw-r--r--tools/testing/radix-tree/Makefile3
-rw-r--r--tools/testing/radix-tree/iteration_check.c180
-rw-r--r--tools/testing/radix-tree/main.c1
-rw-r--r--tools/testing/radix-tree/regression1.c2
-rw-r--r--tools/testing/radix-tree/test.h1
-rw-r--r--tools/testing/selftests/x86/Makefile3
-rw-r--r--tools/testing/selftests/x86/pkey-helpers.h219
-rw-r--r--tools/testing/selftests/x86/protection_keys.c1410
55 files changed, 4103 insertions, 127 deletions
diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h
index 92a8308..1188bc8 100644
--- a/tools/arch/x86/include/asm/cpufeatures.h
+++ b/tools/arch/x86/include/asm/cpufeatures.h
@@ -106,7 +106,6 @@
#define X86_FEATURE_APERFMPERF ( 3*32+28) /* APERFMPERF */
#define X86_FEATURE_EAGER_FPU ( 3*32+29) /* "eagerfpu" Non lazy FPU restore */
#define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */
-#define X86_FEATURE_MCE_RECOVERY ( 3*32+31) /* cpu has recoverable machine checks */
/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
#define X86_FEATURE_XMM3 ( 4*32+ 0) /* "pni" SSE-3 */
diff --git a/tools/arch/x86/lib/memcpy_64.S b/tools/arch/x86/lib/memcpy_64.S
index 2ec0b0abb..49e6eba 100644
--- a/tools/arch/x86/lib/memcpy_64.S
+++ b/tools/arch/x86/lib/memcpy_64.S
@@ -181,11 +181,11 @@ ENDPROC(memcpy_orig)
#ifndef CONFIG_UML
/*
- * memcpy_mcsafe - memory copy with machine check exception handling
+ * memcpy_mcsafe_unrolled - memory copy with machine check exception handling
* Note that we only catch machine checks when reading the source addresses.
* Writes to target are posted and don't generate machine checks.
*/
-ENTRY(memcpy_mcsafe)
+ENTRY(memcpy_mcsafe_unrolled)
cmpl $8, %edx
/* Less than 8 bytes? Go to byte copy loop */
jb .L_no_whole_words
@@ -273,7 +273,7 @@ ENTRY(memcpy_mcsafe)
.L_done_memcpy_trap:
xorq %rax, %rax
ret
-ENDPROC(memcpy_mcsafe)
+ENDPROC(memcpy_mcsafe_unrolled)
.section .fixup, "ax"
/* Return -EFAULT for any failure */
diff --git a/tools/build/Build b/tools/build/Build
index 63a6c34..76d1a49 100644
--- a/tools/build/Build
+++ b/tools/build/Build
@@ -1 +1,3 @@
+hostprogs := fixdep
+
fixdep-y := fixdep.o
diff --git a/tools/build/Build.include b/tools/build/Build.include
index 4d000bc..1dcb95e 100644
--- a/tools/build/Build.include
+++ b/tools/build/Build.include
@@ -90,3 +90,9 @@ if_changed = $(if $(strip $(any-prereq) $(arg-check)), \
# - per object C flags
# - BUILD_STR macro to allow '-D"$(variable)"' constructs
c_flags = -Wp,-MD,$(depfile),-MT,$@ $(CFLAGS) -D"BUILD_STR(s)=\#s" $(CFLAGS_$(basetarget).o) $(CFLAGS_$(obj))
+cxx_flags = -Wp,-MD,$(depfile),-MT,$@ $(CXXFLAGS) -D"BUILD_STR(s)=\#s" $(CXXFLAGS_$(basetarget).o) $(CXXFLAGS_$(obj))
+
+###
+## HOSTCC C flags
+
+host_c_flags = -Wp,-MD,$(depfile),-MT,$@ $(CHOSTFLAGS) -D"BUILD_STR(s)=\#s" $(CHOSTFLAGS_$(basetarget).o) $(CHOSTFLAGS_$(obj))
diff --git a/tools/build/Makefile b/tools/build/Makefile
index 0d5a0e3..8332959 100644
--- a/tools/build/Makefile
+++ b/tools/build/Makefile
@@ -14,6 +14,12 @@ endef
$(call allow-override,CC,$(CROSS_COMPILE)gcc)
$(call allow-override,LD,$(CROSS_COMPILE)ld)
+HOSTCC ?= gcc
+HOSTLD ?= ld
+HOSTAR ?= ar
+
+export HOSTCC HOSTLD HOSTAR
+
ifeq ($(V),1)
Q =
else
@@ -36,7 +42,7 @@ $(OUTPUT)fixdep-in.o: FORCE
$(Q)$(MAKE) $(build)=fixdep
$(OUTPUT)fixdep: $(OUTPUT)fixdep-in.o
- $(QUIET_LINK)$(CC) $(LDFLAGS) -o $@ $<
+ $(QUIET_LINK)$(HOSTCC) $(LDFLAGS) -o $@ $<
FORCE:
diff --git a/tools/build/Makefile.build b/tools/build/Makefile.build
index 27f3583..99c0ccd 100644
--- a/tools/build/Makefile.build
+++ b/tools/build/Makefile.build
@@ -58,6 +58,12 @@ quiet_cmd_mkdir = MKDIR $(dir $@)
quiet_cmd_cc_o_c = CC $@
cmd_cc_o_c = $(CC) $(c_flags) -c -o $@ $<
+quiet_cmd_host_cc_o_c = HOSTCC $@
+ cmd_host_cc_o_c = $(HOSTCC) $(host_c_flags) -c -o $@ $<
+
+quiet_cmd_cxx_o_c = CXX $@
+ cmd_cxx_o_c = $(CXX) $(cxx_flags) -c -o $@ $<
+
quiet_cmd_cpp_i_c = CPP $@
cmd_cpp_i_c = $(CC) $(c_flags) -E -o $@ $<
@@ -70,16 +76,28 @@ quiet_cmd_gen = GEN $@
# If there's nothing to link, create empty $@ object.
quiet_cmd_ld_multi = LD $@
cmd_ld_multi = $(if $(strip $(obj-y)),\
- $(LD) -r -o $@ $(filter $(obj-y),$^),rm -f $@; $(AR) rcs $@)
+ $(LD) -r -o $@ $(filter $(obj-y),$^),rm -f $@; $(AR) rcs $@)
+
+quiet_cmd_host_ld_multi = HOSTLD $@
+ cmd_host_ld_multi = $(if $(strip $(obj-y)),\
+ $(HOSTLD) -r -o $@ $(filter $(obj-y),$^),rm -f $@; $(HOSTAR) rcs $@)
+
+ifneq ($(filter $(obj),$(hostprogs)),)
+ host = host_
+endif
# Build rules
$(OUTPUT)%.o: %.c FORCE
$(call rule_mkdir)
- $(call if_changed_dep,cc_o_c)
+ $(call if_changed_dep,$(host)cc_o_c)
+
+$(OUTPUT)%.o: %.cpp FORCE
+ $(call rule_mkdir)
+ $(call if_changed_dep,cxx_o_c)
$(OUTPUT)%.o: %.S FORCE
$(call rule_mkdir)
- $(call if_changed_dep,cc_o_c)
+ $(call if_changed_dep,$(host)cc_o_c)
$(OUTPUT)%.i: %.c FORCE
$(call rule_mkdir)
@@ -119,7 +137,7 @@ $(sort $(subdir-obj-y)): $(subdir-y) ;
$(in-target): $(obj-y) FORCE
$(call rule_mkdir)
- $(call if_changed,ld_multi)
+ $(call if_changed,$(host)ld_multi)
__build: $(in-target)
@:
diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature
index a120c6b..ae52e02 100644
--- a/tools/build/Makefile.feature
+++ b/tools/build/Makefile.feature
@@ -7,7 +7,7 @@ endif
feature_check = $(eval $(feature_check_code))
define feature_check_code
- feature-$(1) := $(shell $(MAKE) OUTPUT=$(OUTPUT_FEATURES) CFLAGS="$(EXTRA_CFLAGS) $(FEATURE_CHECK_CFLAGS-$(1))" LDFLAGS="$(LDFLAGS) $(FEATURE_CHECK_LDFLAGS-$(1))" -C $(feature_dir) $(OUTPUT_FEATURES)test-$1.bin >/dev/null 2>/dev/null && echo 1 || echo 0)
+ feature-$(1) := $(shell $(MAKE) OUTPUT=$(OUTPUT_FEATURES) CFLAGS="$(EXTRA_CFLAGS) $(FEATURE_CHECK_CFLAGS-$(1))" CXXFLAGS="$(EXTRA_CXXFLAGS) $(FEATURE_CHECK_CXXFLAGS-$(1))" LDFLAGS="$(LDFLAGS) $(FEATURE_CHECK_LDFLAGS-$(1))" -C $(feature_dir) $(OUTPUT_FEATURES)test-$1.bin >/dev/null 2>/dev/null && echo 1 || echo 0)
endef
feature_set = $(eval $(feature_set_code))
diff --git a/tools/build/Makefile.include b/tools/build/Makefile.include
index be630be..ad22e4e 100644
--- a/tools/build/Makefile.include
+++ b/tools/build/Makefile.include
@@ -1,10 +1,6 @@
build := -f $(srctree)/tools/build/Makefile.build dir=. obj
-ifdef CROSS_COMPILE
-fixdep:
-else
fixdep:
$(Q)$(MAKE) -C $(srctree)/tools/build CFLAGS= LDFLAGS= $(OUTPUT)fixdep
-endif
.PHONY: fixdep
diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile
index a0b29a3..ac9c477 100644
--- a/tools/build/feature/Makefile
+++ b/tools/build/feature/Makefile
@@ -46,11 +46,13 @@ FILES= \
test-lzma.bin \
test-bpf.bin \
test-get_cpuid.bin \
- test-sdt.bin
+ test-sdt.bin \
+ test-cxx.bin
FILES := $(addprefix $(OUTPUT),$(FILES))
CC := $(CROSS_COMPILE)gcc -MD
+CXX := $(CROSS_COMPILE)g++ -MD
PKG_CONFIG := $(CROSS_COMPILE)pkg-config
all: $(FILES)
@@ -58,6 +60,9 @@ all: $(FILES)
__BUILD = $(CC) $(CFLAGS) -Wall -Werror -o $@ $(patsubst %.bin,%.c,$(@F)) $(LDFLAGS)
BUILD = $(__BUILD) > $(@:.bin=.make.output) 2>&1
+__BUILDXX = $(CXX) $(CXXFLAGS) -Wall -Werror -o $@ $(patsubst %.bin,%.cpp,$(@F)) $(LDFLAGS)
+ BUILDXX = $(__BUILDXX) > $(@:.bin=.make.output) 2>&1
+
###############################
$(OUTPUT)test-all.bin:
@@ -217,6 +222,9 @@ $(OUTPUT)test-bpf.bin:
$(OUTPUT)test-sdt.bin:
$(BUILD)
+$(OUTPUT)test-cxx.bin:
+ $(BUILDXX) -std=gnu++11
+
-include $(OUTPUT)*.d
###############################
diff --git a/tools/build/feature/test-cxx.cpp b/tools/build/feature/test-cxx.cpp
new file mode 100644
index 0000000..b1dee9a
--- /dev/null
+++ b/tools/build/feature/test-cxx.cpp
@@ -0,0 +1,15 @@
+#include <iostream>
+#include <memory>
+
+static void print_str(std::string s)
+{
+ std::cout << s << std::endl;
+}
+
+int main()
+{
+ std::string s("Hello World!");
+ print_str(std::move(s));
+ std::cout << "|" << s << "|" << std::endl;
+ return 0;
+}
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index da218fe..9e5fc16 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -339,7 +339,7 @@ enum bpf_func_id {
BPF_FUNC_skb_change_type,
/**
- * bpf_skb_in_cgroup(skb, map, index) - Check cgroup2 membership of skb
+ * bpf_skb_under_cgroup(skb, map, index) - Check cgroup2 membership of skb
* @skb: pointer to skb
* @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type
* @index: index of the cgroup in the bpf_map
@@ -348,7 +348,7 @@ enum bpf_func_id {
* == 1 skb succeeded the cgroup2 descendant test
* < 0 error
*/
- BPF_FUNC_skb_in_cgroup,
+ BPF_FUNC_skb_under_cgroup,
/**
* bpf_get_hash_recalc(skb)
diff --git a/tools/lib/subcmd/pager.c b/tools/lib/subcmd/pager.c
index d50f3b58..6518bea 100644
--- a/tools/lib/subcmd/pager.c
+++ b/tools/lib/subcmd/pager.c
@@ -3,6 +3,7 @@
#include <stdio.h>
#include <string.h>
#include <signal.h>
+#include <sys/ioctl.h>
#include "pager.h"
#include "run-command.h"
#include "sigchain.h"
@@ -14,6 +15,7 @@
*/
static int spawned_pager;
+static int pager_columns;
void pager_init(const char *pager_env)
{
@@ -58,9 +60,12 @@ static void wait_for_pager_signal(int signo)
void setup_pager(void)
{
const char *pager = getenv(subcmd_config.pager_env);
+ struct winsize sz;
if (!isatty(1))
return;
+ if (ioctl(1, TIOCGWINSZ, &sz) == 0)
+ pager_columns = sz.ws_col;
if (!pager)
pager = getenv("PAGER");
if (!(pager || access("/usr/bin/pager", X_OK)))
@@ -98,3 +103,14 @@ int pager_in_use(void)
{
return spawned_pager;
}
+
+int pager_get_columns(void)
+{
+ char *s;
+
+ s = getenv("COLUMNS");
+ if (s)
+ return atoi(s);
+
+ return (pager_columns ? pager_columns : 80) - 2;
+}
diff --git a/tools/lib/subcmd/pager.h b/tools/lib/subcmd/pager.h
index 8b83714..623f554 100644
--- a/tools/lib/subcmd/pager.h
+++ b/tools/lib/subcmd/pager.h
@@ -5,5 +5,6 @@ extern void pager_init(const char *pager_env);
extern void setup_pager(void);
extern int pager_in_use(void);
+extern int pager_get_columns(void);
#endif /* __SUBCMD_PAGER_H */
diff --git a/tools/lib/traceevent/kbuffer-parse.c b/tools/lib/traceevent/kbuffer-parse.c
index 3bcada3..65984f1 100644
--- a/tools/lib/traceevent/kbuffer-parse.c
+++ b/tools/lib/traceevent/kbuffer-parse.c
@@ -622,6 +622,7 @@ void *kbuffer_read_at_offset(struct kbuffer *kbuf, int offset,
/* Reset the buffer */
kbuffer_load_subbuffer(kbuf, kbuf->subbuffer);
+ data = kbuffer_read_event(kbuf, ts);
while (kbuf->curr < offset) {
data = kbuffer_next_event(kbuf, ts);
diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt
index a126e97..41857cc 100644
--- a/tools/perf/Documentation/perf-list.txt
+++ b/tools/perf/Documentation/perf-list.txt
@@ -8,13 +8,23 @@ perf-list - List all symbolic event types
SYNOPSIS
--------
[verse]
-'perf list' [hw|sw|cache|tracepoint|pmu|event_glob]
+'perf list' [--no-desc] [--long-desc] [hw|sw|cache|tracepoint|pmu|event_glob]
DESCRIPTION
-----------
This command displays the symbolic event types which can be selected in the
various perf commands with the -e option.
+OPTIONS
+-------
+--no-desc::
+Don't print descriptions.
+
+-v::
+--long-desc::
+Print longer event descriptions.
+
+
[[EVENT_MODIFIERS]]
EVENT MODIFIERS
---------------
diff --git a/tools/perf/Documentation/tips.txt b/tools/perf/Documentation/tips.txt
index 5950b5a..8a6479c 100644
--- a/tools/perf/Documentation/tips.txt
+++ b/tools/perf/Documentation/tips.txt
@@ -28,3 +28,7 @@ To change sampling frequency to 100 Hz: perf record -F 100
See assembly instructions with percentage: perf annotate <symbol>
If you prefer Intel style assembly, try: perf annotate -M intel
For hierarchical output, try: perf report --hierarchy
+Order by the overhead of source file name and line number: perf report -s srcline
+System-wide collection from all CPUs: perf record -a
+Show current config key-value pairs: perf config --list
+Show user configuration overrides: perf config --user --list
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index d710db1..982d643 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -144,6 +144,10 @@ $(call allow-override,LD,$(CROSS_COMPILE)ld)
LD += $(EXTRA_LDFLAGS)
+HOSTCC ?= gcc
+HOSTLD ?= ld
+HOSTAR ?= ar
+
PKG_CONFIG = $(CROSS_COMPILE)pkg-config
RM = rm -f
@@ -345,8 +349,18 @@ strip: $(PROGRAMS) $(OUTPUT)perf
PERF_IN := $(OUTPUT)perf-in.o
export srctree OUTPUT RM CC LD AR CFLAGS V BISON FLEX AWK
+export HOSTCC HOSTLD HOSTAR
include $(srctree)/tools/build/Makefile.include
+JEVENTS := $(OUTPUT)pmu-events/jevents
+JEVENTS_IN := $(OUTPUT)pmu-events/jevents-in.o
+
+PMU_EVENTS_IN := $(OUTPUT)pmu-events/pmu-events-in.o
+
+export JEVENTS
+
+build := -f $(srctree)/tools/build/Makefile.build dir=. obj
+
$(PERF_IN): prepare FORCE
@(test -f ../../include/uapi/linux/perf_event.h && ( \
(diff -B ../include/uapi/linux/perf_event.h ../../include/uapi/linux/perf_event.h >/dev/null) \
@@ -443,9 +457,18 @@ $(PERF_IN): prepare FORCE
|| echo "Warning: tools/include/uapi/linux/mman.h differs from kernel" >&2 )) || true
$(Q)$(MAKE) $(build)=perf
-$(OUTPUT)perf: $(PERFLIBS) $(PERF_IN) $(LIBTRACEEVENT_DYNAMIC_LIST)
+$(JEVENTS_IN): FORCE
+ $(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=pmu-events obj=jevents
+
+$(JEVENTS): $(JEVENTS_IN)
+ $(QUIET_LINK)$(HOSTCC) $(JEVENTS_IN) -o $@
+
+$(PMU_EVENTS_IN): $(JEVENTS) FORCE
+ $(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=pmu-events obj=pmu-events
+
+$(OUTPUT)perf: $(PERFLIBS) $(PERF_IN) $(PMU_EVENTS_IN) $(LIBTRACEEVENT_DYNAMIC_LIST)
$(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $(LIBTRACEEVENT_DYNAMIC_LIST_LDFLAGS) \
- $(PERF_IN) $(LIBS) -o $@
+ $(PERF_IN) $(PMU_EVENTS_IN) $(LIBS) -o $@
$(GTK_IN): fixdep FORCE
$(Q)$(MAKE) $(build)=gtk
@@ -474,6 +497,8 @@ perf.spec $(SCRIPTS) \
ifneq ($(OUTPUT),)
%.o: $(OUTPUT)%.o
@echo " # Redirected target $@ => $(OUTPUT)$@"
+pmu-events/%.o: $(OUTPUT)pmu-events/%.o
+ @echo " # Redirected target $@ => $(OUTPUT)$@"
util/%.o: $(OUTPUT)util/%.o
@echo " # Redirected target $@ => $(OUTPUT)$@"
bench/%.o: $(OUTPUT)bench/%.o
@@ -729,10 +754,11 @@ clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clea
$(call QUIET_CLEAN, core-objs) $(RM) $(LIB_FILE) $(OUTPUT)perf-archive $(OUTPUT)perf-with-kcore $(LANG_BINDINGS)
$(Q)find $(if $(OUTPUT),$(OUTPUT),.) -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete
$(Q)$(RM) $(OUTPUT).config-detected
- $(call QUIET_CLEAN, core-progs) $(RM) $(ALL_PROGRAMS) perf perf-read-vdso32 perf-read-vdsox32
+ $(call QUIET_CLEAN, core-progs) $(RM) $(ALL_PROGRAMS) perf perf-read-vdso32 perf-read-vdsox32 $(OUTPUT)pmu-events/jevents
$(call QUIET_CLEAN, core-gen) $(RM) *.spec *.pyc *.pyo */*.pyc */*.pyo $(OUTPUT)common-cmds.h TAGS tags cscope* $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)FEATURE-DUMP $(OUTPUT)util/*-bison* $(OUTPUT)util/*-flex* \
$(OUTPUT)util/intel-pt-decoder/inat-tables.c $(OUTPUT)fixdep \
- $(OUTPUT)tests/llvm-src-{base,kbuild,prologue,relocation}.c
+ $(OUTPUT)tests/llvm-src-{base,kbuild,prologue,relocation}.c \
+ $(OUTPUT)pmu-events/pmu-events.c
$(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) clean
$(python-clean)
diff --git a/tools/perf/arch/powerpc/util/header.c b/tools/perf/arch/powerpc/util/header.c
index f8ccee1..9aaa6f5 100644
--- a/tools/perf/arch/powerpc/util/header.c
+++ b/tools/perf/arch/powerpc/util/header.c
@@ -32,3 +32,14 @@ get_cpuid(char *buffer, size_t sz)
}
return -1;
}
+
+char *
+get_cpuid_str(void)
+{
+ char *bufp;
+
+ if (asprintf(&bufp, "%.8lx", mfspr(SPRN_PVR)) < 0)
+ bufp = NULL;
+
+ return bufp;
+}
diff --git a/tools/perf/arch/powerpc/util/sym-handling.c b/tools/perf/arch/powerpc/util/sym-handling.c
index ed9d5d1..1030a6e 100644
--- a/tools/perf/arch/powerpc/util/sym-handling.c
+++ b/tools/perf/arch/powerpc/util/sym-handling.c
@@ -82,7 +82,8 @@ void arch__fix_tev_from_maps(struct perf_probe_event *pev,
*
* In addition, we shouldn't specify an offset for kretprobes.
*/
- if (pev->point.offset || pev->point.retprobe || !map || !sym)
+ if (pev->point.offset || (!pev->uprobes && pev->point.retprobe) ||
+ !map || !sym)
return;
lep_offset = PPC64_LOCAL_ENTRY_OFFSET(sym->arch_sym);
diff --git a/tools/perf/arch/x86/util/header.c b/tools/perf/arch/x86/util/header.c
index 146d12a..a74a48d 100644
--- a/tools/perf/arch/x86/util/header.c
+++ b/tools/perf/arch/x86/util/header.c
@@ -19,8 +19,8 @@ cpuid(unsigned int op, unsigned int *a, unsigned int *b, unsigned int *c,
: "a" (op));
}
-int
-get_cpuid(char *buffer, size_t sz)
+static int
+__get_cpuid(char *buffer, size_t sz, const char *fmt)
{
unsigned int a, b, c, d, lvl;
int family = -1, model = -1, step = -1;
@@ -48,7 +48,7 @@ get_cpuid(char *buffer, size_t sz)
if (family >= 0x6)
model += ((a >> 16) & 0xf) << 4;
}
- nb = scnprintf(buffer, sz, "%s,%u,%u,%u$", vendor, family, model, step);
+ nb = scnprintf(buffer, sz, fmt, vendor, family, model, step);
/* look for end marker to ensure the entire data fit */
if (strchr(buffer, '$')) {
@@ -57,3 +57,21 @@ get_cpuid(char *buffer, size_t sz)
}
return -1;
}
+
+int
+get_cpuid(char *buffer, size_t sz)
+{
+ return __get_cpuid(buffer, sz, "%s,%u,%u,%u$");
+}
+
+char *
+get_cpuid_str(void)
+{
+ char *buf = malloc(128);
+
+ if (__get_cpuid(buf, 128, "%s-%u-%X$") < 0) {
+ free(buf);
+ return NULL;
+ }
+ return buf;
+}
diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c
index 88ee419..ba9322f 100644
--- a/tools/perf/builtin-list.c
+++ b/tools/perf/builtin-list.c
@@ -16,16 +16,23 @@
#include "util/pmu.h"
#include <subcmd/parse-options.h>
+static bool desc_flag = true;
+
int cmd_list(int argc, const char **argv, const char *prefix __maybe_unused)
{
int i;
bool raw_dump = false;
+ bool long_desc_flag = false;
struct option list_options[] = {
OPT_BOOLEAN(0, "raw-dump", &raw_dump, "Dump raw events"),
+ OPT_BOOLEAN('d', "desc", &desc_flag,
+ "Print extra event descriptions. --no-desc to not print."),
+ OPT_BOOLEAN('v', "long-desc", &long_desc_flag,
+ "Print longer event descriptions."),
OPT_END()
};
const char * const list_usage[] = {
- "perf list [hw|sw|cache|tracepoint|pmu|sdt|event_glob]",
+ "perf list [<options>] [hw|sw|cache|tracepoint|pmu|sdt|event_glob]",
NULL
};
@@ -40,7 +47,7 @@ int cmd_list(int argc, const char **argv, const char *prefix __maybe_unused)
printf("\nList of pre-defined events (to be used in -e):\n\n");
if (argc == 0) {
- print_events(NULL, raw_dump);
+ print_events(NULL, raw_dump, !desc_flag, long_desc_flag);
return 0;
}
@@ -61,14 +68,16 @@ int cmd_list(int argc, const char **argv, const char *prefix __maybe_unused)
strcmp(argv[i], "hwcache") == 0)
print_hwcache_events(NULL, raw_dump);
else if (strcmp(argv[i], "pmu") == 0)
- print_pmu_events(NULL, raw_dump);
+ print_pmu_events(NULL, raw_dump, !desc_flag,
+ long_desc_flag);
else if (strcmp(argv[i], "sdt") == 0)
print_sdt_events(NULL, NULL, raw_dump);
else if ((sep = strchr(argv[i], ':')) != NULL) {
int sep_idx;
if (sep == NULL) {
- print_events(argv[i], raw_dump);
+ print_events(argv[i], raw_dump, !desc_flag,
+ long_desc_flag);
continue;
}
sep_idx = sep - argv[i];
@@ -90,7 +99,8 @@ int cmd_list(int argc, const char **argv, const char *prefix __maybe_unused)
print_symbol_events(s, PERF_TYPE_SOFTWARE,
event_symbols_sw, PERF_COUNT_SW_MAX, raw_dump);
print_hwcache_events(s, raw_dump);
- print_pmu_events(s, raw_dump);
+ print_pmu_events(s, raw_dump, !desc_flag,
+ long_desc_flag);
print_tracepoint_events(NULL, s, raw_dump);
print_sdt_events(NULL, s, raw_dump);
free(s);
diff --git a/tools/perf/pmu-events/Build b/tools/perf/pmu-events/Build
new file mode 100644
index 0000000..9213a12
--- /dev/null
+++ b/tools/perf/pmu-events/Build
@@ -0,0 +1,13 @@
+hostprogs := jevents
+
+jevents-y += json.o jsmn.o jevents.o
+pmu-events-y += pmu-events.o
+JDIR = pmu-events/arch/$(ARCH)
+JSON = $(shell [ -d $(JDIR) ] && \
+ find $(JDIR) -name '*.json' -o -name 'mapfile.csv')
+#
+# Locate/process JSON files in pmu-events/arch/
+# directory and create tables in pmu-events.c.
+#
+$(OUTPUT)pmu-events/pmu-events.c: $(JSON) $(JEVENTS)
+ $(Q)$(call echo-cmd,gen)$(JEVENTS) $(ARCH) pmu-events/arch $(OUTPUT)pmu-events/pmu-events.c $(V)
diff --git a/tools/perf/pmu-events/README b/tools/perf/pmu-events/README
new file mode 100644
index 0000000..1408ade
--- /dev/null
+++ b/tools/perf/pmu-events/README
@@ -0,0 +1,147 @@
+
+The contents of this directory allow users to specify PMU events in their
+CPUs by their symbolic names rather than raw event codes (see example below).
+
+The main program in this directory, is the 'jevents', which is built and
+executed _BEFORE_ the perf binary itself is built.
+
+The 'jevents' program tries to locate and process JSON files in the directory
+tree tools/perf/pmu-events/arch/foo.
+
+ - Regular files with '.json' extension in the name are assumed to be
+ JSON files, each of which describes a set of PMU events.
+
+ - Regular files with basename starting with 'mapfile.csv' are assumed
+ to be a CSV file that maps a specific CPU to its set of PMU events.
+ (see below for mapfile format)
+
+ - Directories are traversed, but all other files are ignored.
+
+The PMU events supported by a CPU model are expected to grouped into topics
+such as Pipelining, Cache, Memory, Floating-point etc. All events for a topic
+should be placed in a separate JSON file - where the file name identifies
+the topic. Eg: "Floating-point.json".
+
+All the topic JSON files for a CPU model/family should be in a separate
+sub directory. Thus for the Silvermont X86 CPU:
+
+ $ ls tools/perf/pmu-events/arch/x86/Silvermont_core
+ Cache.json Memory.json Virtual-Memory.json
+ Frontend.json Pipeline.json
+
+Using the JSON files and the mapfile, 'jevents' generates the C source file,
+'pmu-events.c', which encodes the two sets of tables:
+
+ - Set of 'PMU events tables' for all known CPUs in the architecture,
+ (one table like the following, per JSON file; table name 'pme_power8'
+ is derived from JSON file name, 'power8.json').
+
+ struct pmu_event pme_power8[] = {
+
+ ...
+
+ {
+ .name = "pm_1plus_ppc_cmpl",
+ .event = "event=0x100f2",
+ .desc = "1 or more ppc insts finished,",
+ },
+
+ ...
+ }
+
+ - A 'mapping table' that maps each CPU of the architecture, to its
+ 'PMU events table'
+
+ struct pmu_events_map pmu_events_map[] = {
+ {
+ .cpuid = "004b0000",
+ .version = "1",
+ .type = "core",
+ .table = pme_power8
+ },
+ ...
+
+ };
+
+After the 'pmu-events.c' is generated, it is compiled and the resulting
+'pmu-events.o' is added to 'libperf.a' which is then used to build perf.
+
+NOTES:
+ 1. Several CPUs can support same set of events and hence use a common
+ JSON file. Hence several entries in the pmu_events_map[] could map
+ to a single 'PMU events table'.
+
+ 2. The 'pmu-events.h' has an extern declaration for the mapping table
+ and the generated 'pmu-events.c' defines this table.
+
+ 3. _All_ known CPU tables for architecture are included in the perf
+ binary.
+
+At run time, perf determines the actual CPU it is running on, finds the
+matching events table and builds aliases for those events. This allows
+users to specify events by their name:
+
+ $ perf stat -e pm_1plus_ppc_cmpl sleep 1
+
+where 'pm_1plus_ppc_cmpl' is a Power8 PMU event.
+
+In case of errors when processing files in the tools/perf/pmu-events/arch
+directory, 'jevents' tries to create an empty mapping file to allow the perf
+build to succeed even if the PMU event aliases cannot be used.
+
+However some errors in processing may cause the perf build to fail.
+
+Mapfile format
+===============
+
+The mapfile enables multiple CPU models to share a single set of PMU events.
+It is required even if such mapping is 1:1.
+
+The mapfile.csv format is expected to be:
+
+ Header line
+ CPUID,Version,Dir/path/name,Type
+
+where:
+
+ Comma:
+ is the required field delimiter (i.e other fields cannot
+ have commas within them).
+
+ Comments:
+ Lines in which the first character is either '\n' or '#'
+ are ignored.
+
+ Header line
+ The header line is the first line in the file, which is
+ always _IGNORED_. It can empty.
+
+ CPUID:
+ CPUID is an arch-specific char string, that can be used
+ to identify CPU (and associate it with a set of PMU events
+ it supports). Multiple CPUIDS can point to the same
+ File/path/name.json.
+
+ Example:
+ CPUID == 'GenuineIntel-6-2E' (on x86).
+ CPUID == '004b0100' (PVR value in Powerpc)
+ Version:
+ is the Version of the mapfile.
+
+ Dir/path/name:
+ is the pathname to the directory containing the CPU's JSON
+ files, relative to the directory containing the mapfile.csv
+
+ Type:
+ indicates whether the events or "core" or "uncore" events.
+
+
+ Eg:
+
+ $ grep Silvermont tools/perf/pmu-events/arch/x86/mapfile.csv
+ GenuineIntel-6-37,V13,Silvermont_core,core
+ GenuineIntel-6-4D,V13,Silvermont_core,core
+ GenuineIntel-6-4C,V13,Silvermont_core,core
+
+ i.e the three CPU models use the JSON files (i.e PMU events) listed
+ in the directory 'tools/perf/pmu-events/arch/x86/Silvermont_core'.
diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c
new file mode 100644
index 0000000..41611d7
--- /dev/null
+++ b/tools/perf/pmu-events/jevents.c
@@ -0,0 +1,814 @@
+#define _XOPEN_SOURCE 500 /* needed for nftw() */
+#define _GNU_SOURCE /* needed for asprintf() */
+
+/* Parse event JSON files */
+
+/*
+ * Copyright (c) 2014, Intel Corporation
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <string.h>
+#include <ctype.h>
+#include <unistd.h>
+#include <stdarg.h>
+#include <libgen.h>
+#include <dirent.h>
+#include <sys/time.h> /* getrlimit */
+#include <sys/resource.h> /* getrlimit */
+#include <ftw.h>
+#include <sys/stat.h>
+#include "jsmn.h"
+#include "json.h"
+#include "jevents.h"
+
+#ifndef __maybe_unused
+#define __maybe_unused __attribute__((unused))
+#endif
+
+int verbose;
+char *prog;
+
+int eprintf(int level, int var, const char *fmt, ...)
+{
+
+ int ret;
+ va_list args;
+
+ if (var < level)
+ return 0;
+
+ va_start(args, fmt);
+
+ ret = vfprintf(stderr, fmt, args);
+
+ va_end(args);
+
+ return ret;
+}
+
+__attribute__((weak)) char *get_cpu_str(void)
+{
+ return NULL;
+}
+
+static void addfield(char *map, char **dst, const char *sep,
+ const char *a, jsmntok_t *bt)
+{
+ unsigned int len = strlen(a) + 1 + strlen(sep);
+ int olen = *dst ? strlen(*dst) : 0;
+ int blen = bt ? json_len(bt) : 0;
+ char *out;
+
+ out = realloc(*dst, len + olen + blen);
+ if (!out) {
+ /* Don't add field in this case */
+ return;
+ }
+ *dst = out;
+
+ if (!olen)
+ *(*dst) = 0;
+ else
+ strcat(*dst, sep);
+ strcat(*dst, a);
+ if (bt)
+ strncat(*dst, map + bt->start, blen);
+}
+
+static void fixname(char *s)
+{
+ for (; *s; s++)
+ *s = tolower(*s);
+}
+
+static void fixdesc(char *s)
+{
+ char *e = s + strlen(s);
+
+ /* Remove trailing dots that look ugly in perf list */
+ --e;
+ while (e >= s && isspace(*e))
+ --e;
+ if (*e == '.')
+ *e = 0;
+}
+
+static struct msrmap {
+ const char *num;
+ const char *pname;
+} msrmap[] = {
+ { "0x3F6", "ldlat=" },
+ { "0x1A6", "offcore_rsp=" },
+ { "0x1A7", "offcore_rsp=" },
+ { "0x3F7", "frontend=" },
+ { NULL, NULL }
+};
+
+static struct field {
+ const char *field;
+ const char *kernel;
+} fields[] = {
+ { "EventCode", "event=" },
+ { "UMask", "umask=" },
+ { "CounterMask", "cmask=" },
+ { "Invert", "inv=" },
+ { "AnyThread", "any=" },
+ { "EdgeDetect", "edge=" },
+ { "SampleAfterValue", "period=" },
+ { NULL, NULL }
+};
+
+static void cut_comma(char *map, jsmntok_t *newval)
+{
+ int i;
+
+ /* Cut off everything after comma */
+ for (i = newval->start; i < newval->end; i++) {
+ if (map[i] == ',')
+ newval->end = i;
+ }
+}
+
+static int match_field(char *map, jsmntok_t *field, int nz,
+ char **event, jsmntok_t *val)
+{
+ struct field *f;
+ jsmntok_t newval = *val;
+
+ for (f = fields; f->field; f++)
+ if (json_streq(map, field, f->field) && nz) {
+ cut_comma(map, &newval);
+ addfield(map, event, ",", f->kernel, &newval);
+ return 1;
+ }
+ return 0;
+}
+
+static struct msrmap *lookup_msr(char *map, jsmntok_t *val)
+{
+ jsmntok_t newval = *val;
+ static bool warned;
+ int i;
+
+ cut_comma(map, &newval);
+ for (i = 0; msrmap[i].num; i++)
+ if (json_streq(map, &newval, msrmap[i].num))
+ return &msrmap[i];
+ if (!warned) {
+ warned = true;
+ pr_err("%s: Unknown MSR in event file %.*s\n", prog,
+ json_len(val), map + val->start);
+ }
+ return NULL;
+}
+
+#define EXPECT(e, t, m) do { if (!(e)) { \
+ jsmntok_t *loc = (t); \
+ if (!(t)->start && (t) > tokens) \
+ loc = (t) - 1; \
+ pr_err("%s:%d: " m ", got %s\n", fn, \
+ json_line(map, loc), \
+ json_name(t)); \
+ goto out_free; \
+} } while (0)
+
+#define TOPIC_DEPTH 256
+static char *topic_array[TOPIC_DEPTH];
+static int topic_level;
+
+static char *get_topic(void)
+{
+ char *tp_old, *tp = NULL;
+ int i;
+
+ for (i = 0; i < topic_level + 1; i++) {
+ int n;
+
+ tp_old = tp;
+ n = asprintf(&tp, "%s%s", tp ?: "", topic_array[i]);
+ if (n < 0) {
+ pr_info("%s: asprintf() error %s\n", prog);
+ return NULL;
+ }
+ free(tp_old);
+ }
+
+ for (i = 0; i < (int) strlen(tp); i++) {
+ char c = tp[i];
+
+ if (c == '-')
+ tp[i] = ' ';
+ else if (c == '.') {
+ tp[i] = '\0';
+ break;
+ }
+ }
+
+ return tp;
+}
+
+static int add_topic(int level, char *bname)
+{
+ char *topic;
+
+ level -= 2;
+
+ if (level >= TOPIC_DEPTH)
+ return -EINVAL;
+
+ topic = strdup(bname);
+ if (!topic) {
+ pr_info("%s: strdup() error %s for file %s\n", prog,
+ strerror(errno), bname);
+ return -ENOMEM;
+ }
+
+ free(topic_array[topic_level]);
+ topic_array[topic_level] = topic;
+ topic_level = level;
+ return 0;
+}
+
+struct perf_entry_data {
+ FILE *outfp;
+ char *topic;
+};
+
+static int close_table;
+
+static void print_events_table_prefix(FILE *fp, const char *tblname)
+{
+ fprintf(fp, "struct pmu_event %s[] = {\n", tblname);
+ close_table = 1;
+}
+
+static int print_events_table_entry(void *data, char *name, char *event,
+ char *desc, char *long_desc)
+{
+ struct perf_entry_data *pd = data;
+ FILE *outfp = pd->outfp;
+ char *topic = pd->topic;
+
+ /*
+ * TODO: Remove formatting chars after debugging to reduce
+ * string lengths.
+ */
+ fprintf(outfp, "{\n");
+
+ fprintf(outfp, "\t.name = \"%s\",\n", name);
+ fprintf(outfp, "\t.event = \"%s\",\n", event);
+ fprintf(outfp, "\t.desc = \"%s\",\n", desc);
+ fprintf(outfp, "\t.topic = \"%s\",\n", topic);
+ if (long_desc && long_desc[0])
+ fprintf(outfp, "\t.long_desc = \"%s\",\n", long_desc);
+
+ fprintf(outfp, "},\n");
+
+ return 0;
+}
+
+static void print_events_table_suffix(FILE *outfp)
+{
+ fprintf(outfp, "{\n");
+
+ fprintf(outfp, "\t.name = 0,\n");
+ fprintf(outfp, "\t.event = 0,\n");
+ fprintf(outfp, "\t.desc = 0,\n");
+
+ fprintf(outfp, "},\n");
+ fprintf(outfp, "};\n");
+ close_table = 0;
+}
+
+static struct fixed {
+ const char *name;
+ const char *event;
+} fixed[] = {
+ { "inst_retired.any", "event=0xc0" },
+ { "inst_retired.any_p", "event=0xc0" },
+ { "cpu_clk_unhalted.ref", "event=0x0,umask=0x03" },
+ { "cpu_clk_unhalted.thread", "event=0x3c" },
+ { "cpu_clk_unhalted.thread_any", "event=0x3c,any=1" },
+ { NULL, NULL},
+};
+
+/*
+ * Handle different fixed counter encodings between JSON and perf.
+ */
+static char *real_event(const char *name, char *event)
+{
+ int i;
+
+ for (i = 0; fixed[i].name; i++)
+ if (!strcasecmp(name, fixed[i].name))
+ return (char *)fixed[i].event;
+ return event;
+}
+
+/* Call func with each event in the json file */
+int json_events(const char *fn,
+ int (*func)(void *data, char *name, char *event, char *desc,
+ char *long_desc),
+ void *data)
+{
+ int err = -EIO;
+ size_t size;
+ jsmntok_t *tokens, *tok;
+ int i, j, len;
+ char *map;
+
+ if (!fn)
+ return -ENOENT;
+
+ tokens = parse_json(fn, &map, &size, &len);
+ if (!tokens)
+ return -EIO;
+ EXPECT(tokens->type == JSMN_ARRAY, tokens, "expected top level array");
+ tok = tokens + 1;
+ for (i = 0; i < tokens->size; i++) {
+ char *event = NULL, *desc = NULL, *name = NULL;
+ char *long_desc = NULL;
+ char *extra_desc = NULL;
+ struct msrmap *msr = NULL;
+ jsmntok_t *msrval = NULL;
+ jsmntok_t *precise = NULL;
+ jsmntok_t *obj = tok++;
+
+ EXPECT(obj->type == JSMN_OBJECT, obj, "expected object");
+ for (j = 0; j < obj->size; j += 2) {
+ jsmntok_t *field, *val;
+ int nz;
+
+ field = tok + j;
+ EXPECT(field->type == JSMN_STRING, tok + j,
+ "Expected field name");
+ val = tok + j + 1;
+ EXPECT(val->type == JSMN_STRING, tok + j + 1,
+ "Expected string value");
+
+ nz = !json_streq(map, val, "0");
+ if (match_field(map, field, nz, &event, val)) {
+ /* ok */
+ } else if (json_streq(map, field, "EventName")) {
+ addfield(map, &name, "", "", val);
+ } else if (json_streq(map, field, "BriefDescription")) {
+ addfield(map, &desc, "", "", val);
+ fixdesc(desc);
+ } else if (json_streq(map, field,
+ "PublicDescription")) {
+ addfield(map, &long_desc, "", "", val);
+ fixdesc(long_desc);
+ } else if (json_streq(map, field, "PEBS") && nz) {
+ precise = val;
+ } else if (json_streq(map, field, "MSRIndex") && nz) {
+ msr = lookup_msr(map, val);
+ } else if (json_streq(map, field, "MSRValue")) {
+ msrval = val;
+ } else if (json_streq(map, field, "Errata") &&
+ !json_streq(map, val, "null")) {
+ addfield(map, &extra_desc, ". ",
+ " Spec update: ", val);
+ } else if (json_streq(map, field, "Data_LA") && nz) {
+ addfield(map, &extra_desc, ". ",
+ " Supports address when precise",
+ NULL);
+ }
+ /* ignore unknown fields */
+ }
+ if (precise && desc && !strstr(desc, "(Precise Event)")) {
+ if (json_streq(map, precise, "2"))
+ addfield(map, &extra_desc, " ",
+ "(Must be precise)", NULL);
+ else
+ addfield(map, &extra_desc, " ",
+ "(Precise event)", NULL);
+ }
+ if (desc && extra_desc)
+ addfield(map, &desc, " ", extra_desc, NULL);
+ if (long_desc && extra_desc)
+ addfield(map, &long_desc, " ", extra_desc, NULL);
+ if (msr != NULL)
+ addfield(map, &event, ",", msr->pname, msrval);
+ fixname(name);
+
+ err = func(data, name, real_event(name, event), desc, long_desc);
+ free(event);
+ free(desc);
+ free(name);
+ free(long_desc);
+ free(extra_desc);
+ if (err)
+ break;
+ tok += j;
+ }
+ EXPECT(tok - tokens == len, tok, "unexpected objects at end");
+ err = 0;
+out_free:
+ free_json(map, size, tokens);
+ return err;
+}
+
+static char *file_name_to_table_name(char *fname)
+{
+ unsigned int i;
+ int n;
+ int c;
+ char *tblname;
+
+ /*
+ * Ensure tablename starts with alphabetic character.
+ * Derive rest of table name from basename of the JSON file,
+ * replacing hyphens and stripping out .json suffix.
+ */
+ n = asprintf(&tblname, "pme_%s", basename(fname));
+ if (n < 0) {
+ pr_info("%s: asprintf() error %s for file %s\n", prog,
+ strerror(errno), fname);
+ return NULL;
+ }
+
+ for (i = 0; i < strlen(tblname); i++) {
+ c = tblname[i];
+
+ if (c == '-')
+ tblname[i] = '_';
+ else if (c == '.') {
+ tblname[i] = '\0';
+ break;
+ } else if (!isalnum(c) && c != '_') {
+ pr_err("%s: Invalid character '%c' in file name %s\n",
+ prog, c, basename(fname));
+ free(tblname);
+ tblname = NULL;
+ break;
+ }
+ }
+
+ return tblname;
+}
+
+static void print_mapping_table_prefix(FILE *outfp)
+{
+ fprintf(outfp, "struct pmu_events_map pmu_events_map[] = {\n");
+}
+
+static void print_mapping_table_suffix(FILE *outfp)
+{
+ /*
+ * Print the terminating, NULL entry.
+ */
+ fprintf(outfp, "{\n");
+ fprintf(outfp, "\t.cpuid = 0,\n");
+ fprintf(outfp, "\t.version = 0,\n");
+ fprintf(outfp, "\t.type = 0,\n");
+ fprintf(outfp, "\t.table = 0,\n");
+ fprintf(outfp, "},\n");
+
+ /* and finally, the closing curly bracket for the struct */
+ fprintf(outfp, "};\n");
+}
+
+static int process_mapfile(FILE *outfp, char *fpath)
+{
+ int n = 16384;
+ FILE *mapfp;
+ char *save = NULL;
+ char *line, *p;
+ int line_num;
+ char *tblname;
+
+ pr_info("%s: Processing mapfile %s\n", prog, fpath);
+
+ line = malloc(n);
+ if (!line)
+ return -1;
+
+ mapfp = fopen(fpath, "r");
+ if (!mapfp) {
+ pr_info("%s: Error %s opening %s\n", prog, strerror(errno),
+ fpath);
+ return -1;
+ }
+
+ print_mapping_table_prefix(outfp);
+
+ /* Skip first line (header) */
+ p = fgets(line, n, mapfp);
+ if (!p)
+ goto out;
+
+ line_num = 1;
+ while (1) {
+ char *cpuid, *version, *type, *fname;
+
+ line_num++;
+ p = fgets(line, n, mapfp);
+ if (!p)
+ break;
+
+ if (line[0] == '#' || line[0] == '\n')
+ continue;
+
+ if (line[strlen(line)-1] != '\n') {
+ /* TODO Deal with lines longer than 16K */
+ pr_info("%s: Mapfile %s: line %d too long, aborting\n",
+ prog, fpath, line_num);
+ return -1;
+ }
+ line[strlen(line)-1] = '\0';
+
+ cpuid = strtok_r(p, ",", &save);
+ version = strtok_r(NULL, ",", &save);
+ fname = strtok_r(NULL, ",", &save);
+ type = strtok_r(NULL, ",", &save);
+
+ tblname = file_name_to_table_name(fname);
+ fprintf(outfp, "{\n");
+ fprintf(outfp, "\t.cpuid = \"%s\",\n", cpuid);
+ fprintf(outfp, "\t.version = \"%s\",\n", version);
+ fprintf(outfp, "\t.type = \"%s\",\n", type);
+
+ /*
+ * CHECK: We can't use the type (eg "core") field in the
+ * table name. For us to do that, we need to somehow tweak
+ * the other caller of file_name_to_table(), process_json()
+ * to determine the type. process_json() file has no way
+ * of knowing these are "core" events unless file name has
+ * core in it. If filename has core in it, we can safely
+ * ignore the type field here also.
+ */
+ fprintf(outfp, "\t.table = %s\n", tblname);
+ fprintf(outfp, "},\n");
+ }
+
+out:
+ print_mapping_table_suffix(outfp);
+ return 0;
+}
+
+/*
+ * If we fail to locate/process JSON and map files, create a NULL mapping
+ * table. This would at least allow perf to build even if we can't find/use
+ * the aliases.
+ */
+static void create_empty_mapping(const char *output_file)
+{
+ FILE *outfp;
+
+ pr_info("%s: Creating empty pmu_events_map[] table\n", prog);
+
+ /* Truncate file to clear any partial writes to it */
+ outfp = fopen(output_file, "w");
+ if (!outfp) {
+ perror("fopen()");
+ _Exit(1);
+ }
+
+ fprintf(outfp, "#include \"../../pmu-events/pmu-events.h\"\n");
+ print_mapping_table_prefix(outfp);
+ print_mapping_table_suffix(outfp);
+ fclose(outfp);
+}
+
+static int get_maxfds(void)
+{
+ struct rlimit rlim;
+
+ if (getrlimit(RLIMIT_NOFILE, &rlim) == 0)
+ return min((int)rlim.rlim_max / 2, 512);
+
+ return 512;
+}
+
+/*
+ * nftw() doesn't let us pass an argument to the processing function,
+ * so use a global variables.
+ */
+static FILE *eventsfp;
+static char *mapfile;
+
+static int process_one_file(const char *fpath, const struct stat *sb,
+ int typeflag, struct FTW *ftwbuf)
+{
+ char *tblname, *bname = (char *) fpath + ftwbuf->base;
+ int is_dir = typeflag == FTW_D;
+ int is_file = typeflag == FTW_F;
+ int level = ftwbuf->level;
+ int err = 0;
+
+ pr_debug("%s %d %7jd %-20s %s\n",
+ is_file ? "f" : is_dir ? "d" : "x",
+ level, sb->st_size, bname, fpath);
+
+ /* base dir */
+ if (level == 0)
+ return 0;
+
+ /* model directory, reset topic */
+ if (level == 1 && is_dir) {
+ if (close_table)
+ print_events_table_suffix(eventsfp);
+
+ /*
+ * Drop file name suffix. Replace hyphens with underscores.
+ * Fail if file name contains any alphanum characters besides
+ * underscores.
+ */
+ tblname = file_name_to_table_name(bname);
+ if (!tblname) {
+ pr_info("%s: Error determining table name for %s\n", prog,
+ bname);
+ return -1;
+ }
+
+ print_events_table_prefix(eventsfp, tblname);
+ return 0;
+ }
+
+ /*
+ * Save the mapfile name for now. We will process mapfile
+ * after processing all JSON files (so we can write out the
+ * mapping table after all PMU events tables).
+ *
+ * TODO: Allow for multiple mapfiles? Punt for now.
+ */
+ if (level == 1 && is_file) {
+ if (!strncmp(bname, "mapfile.csv", 11)) {
+ if (mapfile) {
+ pr_info("%s: Many mapfiles? Using %s, ignoring %s\n",
+ prog, mapfile, fpath);
+ } else {
+ mapfile = strdup(fpath);
+ }
+ return 0;
+ }
+
+ pr_info("%s: Ignoring file %s\n", prog, fpath);
+ return 0;
+ }
+
+ /*
+ * If the file name does not have a .json extension,
+ * ignore it. It could be a readme.txt for instance.
+ */
+ if (is_file) {
+ char *suffix = bname + strlen(bname) - 5;
+
+ if (strncmp(suffix, ".json", 5)) {
+ pr_info("%s: Ignoring file without .json suffix %s\n", prog,
+ fpath);
+ return 0;
+ }
+ }
+
+ if (level > 1 && add_topic(level, bname))
+ return -ENOMEM;
+
+ /*
+ * Assume all other files are JSON files.
+ *
+ * If mapfile refers to 'power7_core.json', we create a table
+ * named 'power7_core'. Any inconsistencies between the mapfile
+ * and directory tree could result in build failure due to table
+ * names not being found.
+ *
+ * Atleast for now, be strict with processing JSON file names.
+ * i.e. if JSON file name cannot be mapped to C-style table name,
+ * fail.
+ */
+ if (is_file) {
+ struct perf_entry_data data = {
+ .topic = get_topic(),
+ .outfp = eventsfp,
+ };
+
+ err = json_events(fpath, print_events_table_entry, &data);
+
+ free(data.topic);
+ }
+
+ return err;
+}
+
+#ifndef PATH_MAX
+#define PATH_MAX 4096
+#endif
+
+/*
+ * Starting in directory 'start_dirname', find the "mapfile.csv" and
+ * the set of JSON files for the architecture 'arch'.
+ *
+ * From each JSON file, create a C-style "PMU events table" from the
+ * JSON file (see struct pmu_event).
+ *
+ * From the mapfile, create a mapping between the CPU revisions and
+ * PMU event tables (see struct pmu_events_map).
+ *
+ * Write out the PMU events tables and the mapping table to pmu-event.c.
+ *
+ * If unable to process the JSON or arch files, create an empty mapping
+ * table so we can continue to build/use perf even if we cannot use the
+ * PMU event aliases.
+ */
+int main(int argc, char *argv[])
+{
+ int rc;
+ int maxfds;
+ char ldirname[PATH_MAX];
+
+ const char *arch;
+ const char *output_file;
+ const char *start_dirname;
+
+ prog = basename(argv[0]);
+ if (argc < 4) {
+ pr_err("Usage: %s <arch> <starting_dir> <output_file>\n", prog);
+ return 1;
+ }
+
+ arch = argv[1];
+ start_dirname = argv[2];
+ output_file = argv[3];
+
+ if (argc > 4)
+ verbose = atoi(argv[4]);
+
+ eventsfp = fopen(output_file, "w");
+ if (!eventsfp) {
+ pr_err("%s Unable to create required file %s (%s)\n",
+ prog, output_file, strerror(errno));
+ return 2;
+ }
+
+ /* Include pmu-events.h first */
+ fprintf(eventsfp, "#include \"../../pmu-events/pmu-events.h\"\n");
+
+ sprintf(ldirname, "%s/%s", start_dirname, arch);
+
+ /*
+ * The mapfile allows multiple CPUids to point to the same JSON file,
+ * so, not sure if there is a need for symlinks within the pmu-events
+ * directory.
+ *
+ * For now, treat symlinks of JSON files as regular files and create
+ * separate tables for each symlink (presumably, each symlink refers
+ * to specific version of the CPU).
+ */
+
+ maxfds = get_maxfds();
+ mapfile = NULL;
+ rc = nftw(ldirname, process_one_file, maxfds, 0);
+ if (rc && verbose) {
+ pr_info("%s: Error walking file tree %s\n", prog, ldirname);
+ goto empty_map;
+ } else if (rc) {
+ goto empty_map;
+ }
+
+ if (close_table)
+ print_events_table_suffix(eventsfp);
+
+ if (!mapfile) {
+ pr_info("%s: No CPU->JSON mapping?\n", prog);
+ goto empty_map;
+ }
+
+ if (process_mapfile(eventsfp, mapfile)) {
+ pr_info("%s: Error processing mapfile %s\n", prog, mapfile);
+ goto empty_map;
+ }
+
+ return 0;
+
+empty_map:
+ fclose(eventsfp);
+ create_empty_mapping(output_file);
+ return 0;
+}
diff --git a/tools/perf/pmu-events/jevents.h b/tools/perf/pmu-events/jevents.h
new file mode 100644
index 0000000..b0eb274
--- /dev/null
+++ b/tools/perf/pmu-events/jevents.h
@@ -0,0 +1,18 @@
+#ifndef JEVENTS_H
+#define JEVENTS_H 1
+
+int json_events(const char *fn,
+ int (*func)(void *data, char *name, char *event, char *desc,
+ char *long_desc),
+ void *data);
+char *get_cpu_str(void);
+
+#ifndef min
+#define min(x, y) ({ \
+ typeof(x) _min1 = (x); \
+ typeof(y) _min2 = (y); \
+ (void) (&_min1 == &_min2); \
+ _min1 < _min2 ? _min1 : _min2; })
+#endif
+
+#endif
diff --git a/tools/perf/pmu-events/jsmn.c b/tools/perf/pmu-events/jsmn.c
new file mode 100644
index 0000000..11d1fa1
--- /dev/null
+++ b/tools/perf/pmu-events/jsmn.c
@@ -0,0 +1,313 @@
+/*
+ * Copyright (c) 2010 Serge A. Zaitsev
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ * Slightly modified by AK to not assume 0 terminated input.
+ */
+
+#include <stdlib.h>
+#include "jsmn.h"
+
+/*
+ * Allocates a fresh unused token from the token pool.
+ */
+static jsmntok_t *jsmn_alloc_token(jsmn_parser *parser,
+ jsmntok_t *tokens, size_t num_tokens)
+{
+ jsmntok_t *tok;
+
+ if ((unsigned)parser->toknext >= num_tokens)
+ return NULL;
+ tok = &tokens[parser->toknext++];
+ tok->start = tok->end = -1;
+ tok->size = 0;
+ return tok;
+}
+
+/*
+ * Fills token type and boundaries.
+ */
+static void jsmn_fill_token(jsmntok_t *token, jsmntype_t type,
+ int start, int end)
+{
+ token->type = type;
+ token->start = start;
+ token->end = end;
+ token->size = 0;
+}
+
+/*
+ * Fills next available token with JSON primitive.
+ */
+static jsmnerr_t jsmn_parse_primitive(jsmn_parser *parser, const char *js,
+ size_t len,
+ jsmntok_t *tokens, size_t num_tokens)
+{
+ jsmntok_t *token;
+ int start;
+
+ start = parser->pos;
+
+ for (; parser->pos < len; parser->pos++) {
+ switch (js[parser->pos]) {
+#ifndef JSMN_STRICT
+ /*
+ * In strict mode primitive must be followed by ","
+ * or "}" or "]"
+ */
+ case ':':
+#endif
+ case '\t':
+ case '\r':
+ case '\n':
+ case ' ':
+ case ',':
+ case ']':
+ case '}':
+ goto found;
+ default:
+ break;
+ }
+ if (js[parser->pos] < 32 || js[parser->pos] >= 127) {
+ parser->pos = start;
+ return JSMN_ERROR_INVAL;
+ }
+ }
+#ifdef JSMN_STRICT
+ /*
+ * In strict mode primitive must be followed by a
+ * comma/object/array.
+ */
+ parser->pos = start;
+ return JSMN_ERROR_PART;
+#endif
+
+found:
+ token = jsmn_alloc_token(parser, tokens, num_tokens);
+ if (token == NULL) {
+ parser->pos = start;
+ return JSMN_ERROR_NOMEM;
+ }
+ jsmn_fill_token(token, JSMN_PRIMITIVE, start, parser->pos);
+ parser->pos--; /* parent sees closing brackets */
+ return JSMN_SUCCESS;
+}
+
+/*
+ * Fills next token with JSON string.
+ */
+static jsmnerr_t jsmn_parse_string(jsmn_parser *parser, const char *js,
+ size_t len,
+ jsmntok_t *tokens, size_t num_tokens)
+{
+ jsmntok_t *token;
+ int start = parser->pos;
+
+ /* Skip starting quote */
+ parser->pos++;
+
+ for (; parser->pos < len; parser->pos++) {
+ char c = js[parser->pos];
+
+ /* Quote: end of string */
+ if (c == '\"') {
+ token = jsmn_alloc_token(parser, tokens, num_tokens);
+ if (token == NULL) {
+ parser->pos = start;
+ return JSMN_ERROR_NOMEM;
+ }
+ jsmn_fill_token(token, JSMN_STRING, start+1,
+ parser->pos);
+ return JSMN_SUCCESS;
+ }
+
+ /* Backslash: Quoted symbol expected */
+ if (c == '\\') {
+ parser->pos++;
+ switch (js[parser->pos]) {
+ /* Allowed escaped symbols */
+ case '\"':
+ case '/':
+ case '\\':
+ case 'b':
+ case 'f':
+ case 'r':
+ case 'n':
+ case 't':
+ break;
+ /* Allows escaped symbol \uXXXX */
+ case 'u':
+ /* TODO */
+ break;
+ /* Unexpected symbol */
+ default:
+ parser->pos = start;
+ return JSMN_ERROR_INVAL;
+ }
+ }
+ }
+ parser->pos = start;
+ return JSMN_ERROR_PART;
+}
+
+/*
+ * Parse JSON string and fill tokens.
+ */
+jsmnerr_t jsmn_parse(jsmn_parser *parser, const char *js, size_t len,
+ jsmntok_t *tokens, unsigned int num_tokens)
+{
+ jsmnerr_t r;
+ int i;
+ jsmntok_t *token;
+
+ for (; parser->pos < len; parser->pos++) {
+ char c;
+ jsmntype_t type;
+
+ c = js[parser->pos];
+ switch (c) {
+ case '{':
+ case '[':
+ token = jsmn_alloc_token(parser, tokens, num_tokens);
+ if (token == NULL)
+ return JSMN_ERROR_NOMEM;
+ if (parser->toksuper != -1)
+ tokens[parser->toksuper].size++;
+ token->type = (c == '{' ? JSMN_OBJECT : JSMN_ARRAY);
+ token->start = parser->pos;
+ parser->toksuper = parser->toknext - 1;
+ break;
+ case '}':
+ case ']':
+ type = (c == '}' ? JSMN_OBJECT : JSMN_ARRAY);
+ for (i = parser->toknext - 1; i >= 0; i--) {
+ token = &tokens[i];
+ if (token->start != -1 && token->end == -1) {
+ if (token->type != type)
+ return JSMN_ERROR_INVAL;
+ parser->toksuper = -1;
+ token->end = parser->pos + 1;
+ break;
+ }
+ }
+ /* Error if unmatched closing bracket */
+ if (i == -1)
+ return JSMN_ERROR_INVAL;
+ for (; i >= 0; i--) {
+ token = &tokens[i];
+ if (token->start != -1 && token->end == -1) {
+ parser->toksuper = i;
+ break;
+ }
+ }
+ break;
+ case '\"':
+ r = jsmn_parse_string(parser, js, len, tokens,
+ num_tokens);
+ if (r < 0)
+ return r;
+ if (parser->toksuper != -1)
+ tokens[parser->toksuper].size++;
+ break;
+ case '\t':
+ case '\r':
+ case '\n':
+ case ':':
+ case ',':
+ case ' ':
+ break;
+#ifdef JSMN_STRICT
+ /*
+ * In strict mode primitives are:
+ * numbers and booleans.
+ */
+ case '-':
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ case 't':
+ case 'f':
+ case 'n':
+#else
+ /*
+ * In non-strict mode every unquoted value
+ * is a primitive.
+ */
+ /*FALL THROUGH */
+ default:
+#endif
+ r = jsmn_parse_primitive(parser, js, len, tokens,
+ num_tokens);
+ if (r < 0)
+ return r;
+ if (parser->toksuper != -1)
+ tokens[parser->toksuper].size++;
+ break;
+
+#ifdef JSMN_STRICT
+ /* Unexpected char in strict mode */
+ default:
+ return JSMN_ERROR_INVAL;
+#endif
+ }
+ }
+
+ for (i = parser->toknext - 1; i >= 0; i--) {
+ /* Unmatched opened object or array */
+ if (tokens[i].start != -1 && tokens[i].end == -1)
+ return JSMN_ERROR_PART;
+ }
+
+ return JSMN_SUCCESS;
+}
+
+/*
+ * Creates a new parser based over a given buffer with an array of tokens
+ * available.
+ */
+void jsmn_init(jsmn_parser *parser)
+{
+ parser->pos = 0;
+ parser->toknext = 0;
+ parser->toksuper = -1;
+}
+
+const char *jsmn_strerror(jsmnerr_t err)
+{
+ switch (err) {
+ case JSMN_ERROR_NOMEM:
+ return "No enough tokens";
+ case JSMN_ERROR_INVAL:
+ return "Invalid character inside JSON string";
+ case JSMN_ERROR_PART:
+ return "The string is not a full JSON packet, more bytes expected";
+ case JSMN_SUCCESS:
+ return "Success";
+ default:
+ return "Unknown json error";
+ }
+}
diff --git a/tools/perf/pmu-events/jsmn.h b/tools/perf/pmu-events/jsmn.h
new file mode 100644
index 0000000..d666b10
--- /dev/null
+++ b/tools/perf/pmu-events/jsmn.h
@@ -0,0 +1,67 @@
+#ifndef __JSMN_H_
+#define __JSMN_H_
+
+/*
+ * JSON type identifier. Basic types are:
+ * o Object
+ * o Array
+ * o String
+ * o Other primitive: number, boolean (true/false) or null
+ */
+typedef enum {
+ JSMN_PRIMITIVE = 0,
+ JSMN_OBJECT = 1,
+ JSMN_ARRAY = 2,
+ JSMN_STRING = 3
+} jsmntype_t;
+
+typedef enum {
+ /* Not enough tokens were provided */
+ JSMN_ERROR_NOMEM = -1,
+ /* Invalid character inside JSON string */
+ JSMN_ERROR_INVAL = -2,
+ /* The string is not a full JSON packet, more bytes expected */
+ JSMN_ERROR_PART = -3,
+ /* Everything was fine */
+ JSMN_SUCCESS = 0
+} jsmnerr_t;
+
+/*
+ * JSON token description.
+ * @param type type (object, array, string etc.)
+ * @param start start position in JSON data string
+ * @param end end position in JSON data string
+ */
+typedef struct {
+ jsmntype_t type;
+ int start;
+ int end;
+ int size;
+} jsmntok_t;
+
+/*
+ * JSON parser. Contains an array of token blocks available. Also stores
+ * the string being parsed now and current position in that string
+ */
+typedef struct {
+ unsigned int pos; /* offset in the JSON string */
+ int toknext; /* next token to allocate */
+ int toksuper; /* superior token node, e.g parent object or array */
+} jsmn_parser;
+
+/*
+ * Create JSON parser over an array of tokens
+ */
+void jsmn_init(jsmn_parser *parser);
+
+/*
+ * Run JSON parser. It parses a JSON data string into and array of tokens,
+ * each describing a single JSON object.
+ */
+jsmnerr_t jsmn_parse(jsmn_parser *parser, const char *js,
+ size_t len,
+ jsmntok_t *tokens, unsigned int num_tokens);
+
+const char *jsmn_strerror(jsmnerr_t err);
+
+#endif /* __JSMN_H_ */
diff --git a/tools/perf/pmu-events/json.c b/tools/perf/pmu-events/json.c
new file mode 100644
index 0000000..f67bbb0
--- /dev/null
+++ b/tools/perf/pmu-events/json.c
@@ -0,0 +1,162 @@
+/* Parse JSON files using the JSMN parser. */
+
+/*
+ * Copyright (c) 2014, Intel Corporation
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <errno.h>
+#include <unistd.h>
+#include "jsmn.h"
+#include "json.h"
+#include <linux/kernel.h>
+
+
+static char *mapfile(const char *fn, size_t *size)
+{
+ unsigned ps = sysconf(_SC_PAGESIZE);
+ struct stat st;
+ char *map = NULL;
+ int err;
+ int fd = open(fn, O_RDONLY);
+
+ if (fd < 0 && verbose && fn) {
+ pr_err("Error opening events file '%s': %s\n", fn,
+ strerror(errno));
+ }
+
+ if (fd < 0)
+ return NULL;
+ err = fstat(fd, &st);
+ if (err < 0)
+ goto out;
+ *size = st.st_size;
+ map = mmap(NULL,
+ (st.st_size + ps - 1) & ~(ps - 1),
+ PROT_READ|PROT_WRITE, MAP_PRIVATE, fd, 0);
+ if (map == MAP_FAILED)
+ map = NULL;
+out:
+ close(fd);
+ return map;
+}
+
+static void unmapfile(char *map, size_t size)
+{
+ unsigned ps = sysconf(_SC_PAGESIZE);
+ munmap(map, roundup(size, ps));
+}
+
+/*
+ * Parse json file using jsmn. Return array of tokens,
+ * and mapped file. Caller needs to free array.
+ */
+jsmntok_t *parse_json(const char *fn, char **map, size_t *size, int *len)
+{
+ jsmn_parser parser;
+ jsmntok_t *tokens;
+ jsmnerr_t res;
+ unsigned sz;
+
+ *map = mapfile(fn, size);
+ if (!*map)
+ return NULL;
+ /* Heuristic */
+ sz = *size * 16;
+ tokens = malloc(sz);
+ if (!tokens)
+ goto error;
+ jsmn_init(&parser);
+ res = jsmn_parse(&parser, *map, *size, tokens,
+ sz / sizeof(jsmntok_t));
+ if (res != JSMN_SUCCESS) {
+ pr_err("%s: json error %s\n", fn, jsmn_strerror(res));
+ goto error_free;
+ }
+ if (len)
+ *len = parser.toknext;
+ return tokens;
+error_free:
+ free(tokens);
+error:
+ unmapfile(*map, *size);
+ return NULL;
+}
+
+void free_json(char *map, size_t size, jsmntok_t *tokens)
+{
+ free(tokens);
+ unmapfile(map, size);
+}
+
+static int countchar(char *map, char c, int end)
+{
+ int i;
+ int count = 0;
+ for (i = 0; i < end; i++)
+ if (map[i] == c)
+ count++;
+ return count;
+}
+
+/* Return line number of a jsmn token */
+int json_line(char *map, jsmntok_t *t)
+{
+ return countchar(map, '\n', t->start) + 1;
+}
+
+static const char * const jsmn_types[] = {
+ [JSMN_PRIMITIVE] = "primitive",
+ [JSMN_ARRAY] = "array",
+ [JSMN_OBJECT] = "object",
+ [JSMN_STRING] = "string"
+};
+
+#define LOOKUP(a, i) ((i) < (sizeof(a)/sizeof(*(a))) ? ((a)[i]) : "?")
+
+/* Return type name of a jsmn token */
+const char *json_name(jsmntok_t *t)
+{
+ return LOOKUP(jsmn_types, t->type);
+}
+
+int json_len(jsmntok_t *t)
+{
+ return t->end - t->start;
+}
+
+/* Is string t equal to s? */
+int json_streq(char *map, jsmntok_t *t, const char *s)
+{
+ unsigned len = json_len(t);
+ return len == strlen(s) && !strncasecmp(map + t->start, s, len);
+}
diff --git a/tools/perf/pmu-events/json.h b/tools/perf/pmu-events/json.h
new file mode 100644
index 0000000..278ebd3
--- /dev/null
+++ b/tools/perf/pmu-events/json.h
@@ -0,0 +1,38 @@
+#ifndef JSON_H
+#define JSON_H 1
+
+#include "jsmn.h"
+
+jsmntok_t *parse_json(const char *fn, char **map, size_t *size, int *len);
+void free_json(char *map, size_t size, jsmntok_t *tokens);
+int json_line(char *map, jsmntok_t *t);
+const char *json_name(jsmntok_t *t);
+int json_streq(char *map, jsmntok_t *t, const char *s);
+int json_len(jsmntok_t *t);
+
+extern int verbose;
+
+#include <stdbool.h>
+
+extern int eprintf(int level, int var, const char *fmt, ...);
+#define pr_fmt(fmt) fmt
+
+#define pr_err(fmt, ...) \
+ eprintf(0, verbose, pr_fmt(fmt), ##__VA_ARGS__)
+
+#define pr_info(fmt, ...) \
+ eprintf(1, verbose, pr_fmt(fmt), ##__VA_ARGS__)
+
+#define pr_debug(fmt, ...) \
+ eprintf(2, verbose, pr_fmt(fmt), ##__VA_ARGS__)
+
+#ifndef roundup
+#define roundup(x, y) ( \
+{ \
+ const typeof(y) __y = y; \
+ (((x) + (__y - 1)) / __y) * __y; \
+} \
+)
+#endif
+
+#endif
diff --git a/tools/perf/pmu-events/pmu-events.h b/tools/perf/pmu-events/pmu-events.h
new file mode 100644
index 0000000..2eaef59
--- /dev/null
+++ b/tools/perf/pmu-events/pmu-events.h
@@ -0,0 +1,37 @@
+#ifndef PMU_EVENTS_H
+#define PMU_EVENTS_H
+
+/*
+ * Describe each PMU event. Each CPU has a table of PMU events.
+ */
+struct pmu_event {
+ const char *name;
+ const char *event;
+ const char *desc;
+ const char *topic;
+ const char *long_desc;
+};
+
+/*
+ *
+ * Map a CPU to its table of PMU events. The CPU is identified by the
+ * cpuid field, which is an arch-specific identifier for the CPU.
+ * The identifier specified in tools/perf/pmu-events/arch/xxx/mapfile
+ * must match the get_cpustr() in tools/perf/arch/xxx/util/header.c)
+ *
+ * The cpuid can contain any character other than the comma.
+ */
+struct pmu_events_map {
+ const char *cpuid;
+ const char *version;
+ const char *type; /* core, uncore etc */
+ struct pmu_event *table;
+};
+
+/*
+ * Global table mapping each known CPU for the architecture to its
+ * table of PMU events.
+ */
+extern struct pmu_events_map pmu_events_map[];
+
+#endif
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index ea34c5a..d92e020 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -384,15 +384,14 @@ void perf_evlist__toggle_enable(struct perf_evlist *evlist)
static int perf_evlist__enable_event_cpu(struct perf_evlist *evlist,
struct perf_evsel *evsel, int cpu)
{
- int thread, err;
+ int thread;
int nr_threads = perf_evlist__nr_threads(evlist, evsel);
if (!evsel->fd)
return -EINVAL;
for (thread = 0; thread < nr_threads; thread++) {
- err = ioctl(FD(evsel, cpu, thread),
- PERF_EVENT_IOC_ENABLE, 0);
+ int err = ioctl(FD(evsel, cpu, thread), PERF_EVENT_IOC_ENABLE, 0);
if (err)
return err;
}
@@ -403,14 +402,14 @@ static int perf_evlist__enable_event_thread(struct perf_evlist *evlist,
struct perf_evsel *evsel,
int thread)
{
- int cpu, err;
+ int cpu;
int nr_cpus = cpu_map__nr(evlist->cpus);
if (!evsel->fd)
return -EINVAL;
for (cpu = 0; cpu < nr_cpus; cpu++) {
- err = ioctl(FD(evsel, cpu, thread), PERF_EVENT_IOC_ENABLE, 0);
+ int err = ioctl(FD(evsel, cpu, thread), PERF_EVENT_IOC_ENABLE, 0);
if (err)
return err;
}
@@ -1606,10 +1605,9 @@ void perf_evlist__close(struct perf_evlist *evlist)
struct perf_evsel *evsel;
int ncpus = cpu_map__nr(evlist->cpus);
int nthreads = thread_map__nr(evlist->threads);
- int n;
evlist__for_each_entry_reverse(evlist, evsel) {
- n = evsel->cpus ? evsel->cpus->nr : ncpus;
+ int n = evsel->cpus ? evsel->cpus->nr : ncpus;
perf_evsel__close(evsel, n, nthreads);
}
}
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 380e84c..8bc2711 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -985,14 +985,13 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts,
static int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads)
{
- int cpu, thread;
-
if (evsel->system_wide)
nthreads = 1;
evsel->fd = xyarray__new(ncpus, nthreads, sizeof(int));
if (evsel->fd) {
+ int cpu, thread;
for (cpu = 0; cpu < ncpus; cpu++) {
for (thread = 0; thread < nthreads; thread++) {
FD(evsel, cpu, thread) = -1;
diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h
index d306ca1..d30109b 100644
--- a/tools/perf/util/header.h
+++ b/tools/perf/util/header.h
@@ -151,4 +151,5 @@ int write_padded(int fd, const void *bf, size_t count, size_t count_aligned);
*/
int get_cpuid(char *buffer, size_t sz);
+char *get_cpuid_str(void);
#endif /* __PERF_HEADER_H */
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
index 7591a0c..16c06d3 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
@@ -90,6 +90,7 @@ struct intel_pt_decoder {
bool pge;
bool have_tma;
bool have_cyc;
+ bool fixup_last_mtc;
uint64_t pos;
uint64_t last_ip;
uint64_t ip;
@@ -586,10 +587,31 @@ struct intel_pt_calc_cyc_to_tsc_info {
uint64_t tsc_timestamp;
uint64_t timestamp;
bool have_tma;
+ bool fixup_last_mtc;
bool from_mtc;
double cbr_cyc_to_tsc;
};
+/*
+ * MTC provides a 8-bit slice of CTC but the TMA packet only provides the lower
+ * 16 bits of CTC. If mtc_shift > 8 then some of the MTC bits are not in the CTC
+ * provided by the TMA packet. Fix-up the last_mtc calculated from the TMA
+ * packet by copying the missing bits from the current MTC assuming the least
+ * difference between the two, and that the current MTC comes after last_mtc.
+ */
+static void intel_pt_fixup_last_mtc(uint32_t mtc, int mtc_shift,
+ uint32_t *last_mtc)
+{
+ uint32_t first_missing_bit = 1U << (16 - mtc_shift);
+ uint32_t mask = ~(first_missing_bit - 1);
+
+ *last_mtc |= mtc & mask;
+ if (*last_mtc >= mtc) {
+ *last_mtc -= first_missing_bit;
+ *last_mtc &= 0xff;
+ }
+}
+
static int intel_pt_calc_cyc_cb(struct intel_pt_pkt_info *pkt_info)
{
struct intel_pt_decoder *decoder = pkt_info->decoder;
@@ -619,6 +641,11 @@ static int intel_pt_calc_cyc_cb(struct intel_pt_pkt_info *pkt_info)
return 0;
mtc = pkt_info->packet.payload;
+ if (decoder->mtc_shift > 8 && data->fixup_last_mtc) {
+ data->fixup_last_mtc = false;
+ intel_pt_fixup_last_mtc(mtc, decoder->mtc_shift,
+ &data->last_mtc);
+ }
if (mtc > data->last_mtc)
mtc_delta = mtc - data->last_mtc;
else
@@ -687,6 +714,7 @@ static int intel_pt_calc_cyc_cb(struct intel_pt_pkt_info *pkt_info)
data->ctc_delta = 0;
data->have_tma = true;
+ data->fixup_last_mtc = true;
return 0;
@@ -753,6 +781,7 @@ static void intel_pt_calc_cyc_to_tsc(struct intel_pt_decoder *decoder,
.tsc_timestamp = decoder->tsc_timestamp,
.timestamp = decoder->timestamp,
.have_tma = decoder->have_tma,
+ .fixup_last_mtc = decoder->fixup_last_mtc,
.from_mtc = from_mtc,
.cbr_cyc_to_tsc = 0,
};
@@ -1271,6 +1300,7 @@ static void intel_pt_calc_tma(struct intel_pt_decoder *decoder)
}
decoder->ctc_delta = 0;
decoder->have_tma = true;
+ decoder->fixup_last_mtc = true;
intel_pt_log("CTC timestamp " x64_fmt " last MTC %#x CTC rem %#x\n",
decoder->ctc_timestamp, decoder->last_mtc, ctc_rem);
}
@@ -1285,6 +1315,12 @@ static void intel_pt_calc_mtc_timestamp(struct intel_pt_decoder *decoder)
mtc = decoder->packet.payload;
+ if (decoder->mtc_shift > 8 && decoder->fixup_last_mtc) {
+ decoder->fixup_last_mtc = false;
+ intel_pt_fixup_last_mtc(mtc, decoder->mtc_shift,
+ &decoder->last_mtc);
+ }
+
if (mtc > decoder->last_mtc)
mtc_delta = mtc - decoder->last_mtc;
else
@@ -1353,6 +1389,8 @@ static void intel_pt_calc_cyc_timestamp(struct intel_pt_decoder *decoder)
timestamp, decoder->timestamp);
else
decoder->timestamp = timestamp;
+
+ decoder->timestamp_insn_cnt = 0;
}
/* Walk PSB+ packets when already in sync. */
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 18e4519..df85b9e 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -1745,9 +1745,8 @@ static int resolve_lbr_callchain_sample(struct thread *thread,
int max_stack)
{
struct ip_callchain *chain = sample->callchain;
- int chain_nr = min(max_stack, (int)chain->nr);
+ int chain_nr = min(max_stack, (int)chain->nr), i;
u8 cpumode = PERF_RECORD_MISC_USER;
- int i, j, err;
u64 ip;
for (i = 0; i < chain_nr; i++) {
@@ -1758,7 +1757,7 @@ static int resolve_lbr_callchain_sample(struct thread *thread,
/* LBR only affects the user callchain */
if (i != chain_nr) {
struct branch_stack *lbr_stack = sample->branch_stack;
- int lbr_nr = lbr_stack->nr;
+ int lbr_nr = lbr_stack->nr, j;
/*
* LBR callstack can only get user call chain.
* The mix_chain_nr is kernel call chain
@@ -1772,6 +1771,7 @@ static int resolve_lbr_callchain_sample(struct thread *thread,
int mix_chain_nr = i + 1 + lbr_nr + 1;
for (j = 0; j < mix_chain_nr; j++) {
+ int err;
if (callchain_param.order == ORDER_CALLEE) {
if (j < i + 1)
ip = chain->ips[j];
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 33546c3..4e778ea 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -924,6 +924,7 @@ config_term_avail(int term_type, struct parse_events_error *err)
case PARSE_EVENTS__TERM_TYPE_CONFIG1:
case PARSE_EVENTS__TERM_TYPE_CONFIG2:
case PARSE_EVENTS__TERM_TYPE_NAME:
+ case PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD:
return true;
default:
if (!err)
@@ -1458,7 +1459,7 @@ comp_pmu(const void *p1, const void *p2)
struct perf_pmu_event_symbol *pmu1 = (struct perf_pmu_event_symbol *) p1;
struct perf_pmu_event_symbol *pmu2 = (struct perf_pmu_event_symbol *) p2;
- return strcmp(pmu1->symbol, pmu2->symbol);
+ return strcasecmp(pmu1->symbol, pmu2->symbol);
}
static void perf_pmu__parse_cleanup(void)
@@ -2263,7 +2264,8 @@ out_enomem:
/*
* Print the help text for the event symbols:
*/
-void print_events(const char *event_glob, bool name_only)
+void print_events(const char *event_glob, bool name_only, bool quiet_flag,
+ bool long_desc)
{
print_symbol_events(event_glob, PERF_TYPE_HARDWARE,
event_symbols_hw, PERF_COUNT_HW_MAX, name_only);
@@ -2273,7 +2275,7 @@ void print_events(const char *event_glob, bool name_only)
print_hwcache_events(event_glob, name_only);
- print_pmu_events(event_glob, name_only);
+ print_pmu_events(event_glob, name_only, quiet_flag, long_desc);
if (event_glob != NULL)
return;
diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
index 8d09a97..da246a3 100644
--- a/tools/perf/util/parse-events.h
+++ b/tools/perf/util/parse-events.h
@@ -172,7 +172,8 @@ void parse_events_update_lists(struct list_head *list_event,
void parse_events_evlist_error(struct parse_events_evlist *data,
int idx, const char *str);
-void print_events(const char *event_glob, bool name_only);
+void print_events(const char *event_glob, bool name_only, bool quiet,
+ bool long_desc);
struct event_symbol {
const char *symbol;
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index 2babcdf..b1474dc 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -12,6 +12,9 @@
#include "pmu.h"
#include "parse-events.h"
#include "cpumap.h"
+#include "header.h"
+#include "pmu-events/pmu-events.h"
+#include "cache.h"
struct perf_pmu_format {
char *name;
@@ -220,7 +223,8 @@ static int perf_pmu__parse_snapshot(struct perf_pmu_alias *alias,
}
static int __perf_pmu__new_alias(struct list_head *list, char *dir, char *name,
- char *desc __maybe_unused, char *val)
+ char *desc, char *val, char *long_desc,
+ char *topic)
{
struct perf_pmu_alias *alias;
int ret;
@@ -253,6 +257,11 @@ static int __perf_pmu__new_alias(struct list_head *list, char *dir, char *name,
perf_pmu__parse_snapshot(alias, dir, name);
}
+ alias->desc = desc ? strdup(desc) : NULL;
+ alias->long_desc = long_desc ? strdup(long_desc) :
+ desc ? strdup(desc) : NULL;
+ alias->topic = topic ? strdup(topic) : NULL;
+
list_add_tail(&alias->list, list);
return 0;
@@ -269,7 +278,7 @@ static int perf_pmu__new_alias(struct list_head *list, char *dir, char *name, FI
buf[ret] = 0;
- return __perf_pmu__new_alias(list, dir, name, NULL, buf);
+ return __perf_pmu__new_alias(list, dir, name, NULL, buf, NULL, NULL);
}
static inline bool pmu_alias_info_file(char *name)
@@ -473,6 +482,68 @@ static struct cpu_map *pmu_cpumask(const char *name)
return cpus;
}
+/*
+ * Return the CPU id as a raw string.
+ *
+ * Each architecture should provide a more precise id string that
+ * can be use to match the architecture's "mapfile".
+ */
+char * __weak get_cpuid_str(void)
+{
+ return NULL;
+}
+
+/*
+ * From the pmu_events_map, find the table of PMU events that corresponds
+ * to the current running CPU. Then, add all PMU events from that table
+ * as aliases.
+ */
+static void pmu_add_cpu_aliases(struct list_head *head)
+{
+ int i;
+ struct pmu_events_map *map;
+ struct pmu_event *pe;
+ char *cpuid;
+
+ cpuid = getenv("PERF_CPUID");
+ if (cpuid)
+ cpuid = strdup(cpuid);
+ if (!cpuid)
+ cpuid = get_cpuid_str();
+ if (!cpuid)
+ return;
+
+ pr_debug("Using CPUID %s\n", cpuid);
+
+ i = 0;
+ while (1) {
+ map = &pmu_events_map[i++];
+ if (!map->table)
+ goto out;
+
+ if (!strcmp(map->cpuid, cpuid))
+ break;
+ }
+
+ /*
+ * Found a matching PMU events table. Create aliases
+ */
+ i = 0;
+ while (1) {
+ pe = &map->table[i++];
+ if (!pe->name)
+ break;
+
+ /* need type casts to override 'const' */
+ __perf_pmu__new_alias(head, NULL, (char *)pe->name,
+ (char *)pe->desc, (char *)pe->event,
+ (char *)pe->long_desc, (char *)pe->topic);
+ }
+
+out:
+ free(cpuid);
+}
+
struct perf_event_attr * __weak
perf_pmu__get_default_config(struct perf_pmu *pmu __maybe_unused)
{
@@ -497,6 +568,9 @@ static struct perf_pmu *pmu_lookup(const char *name)
if (pmu_aliases(name, &aliases))
return NULL;
+ if (!strcmp(name, "cpu"))
+ pmu_add_cpu_aliases(&aliases);
+
if (pmu_type(name, &type))
return NULL;
@@ -983,21 +1057,63 @@ static char *format_alias_or(char *buf, int len, struct perf_pmu *pmu,
return buf;
}
-static int cmp_string(const void *a, const void *b)
+struct sevent {
+ char *name;
+ char *desc;
+ char *topic;
+};
+
+static int cmp_sevent(const void *a, const void *b)
{
- const char * const *as = a;
- const char * const *bs = b;
- return strcmp(*as, *bs);
+ const struct sevent *as = a;
+ const struct sevent *bs = b;
+
+ /* Put extra events last */
+ if (!!as->desc != !!bs->desc)
+ return !!as->desc - !!bs->desc;
+ if (as->topic && bs->topic) {
+ int n = strcmp(as->topic, bs->topic);
+
+ if (n)
+ return n;
+ }
+ return strcmp(as->name, bs->name);
}
-void print_pmu_events(const char *event_glob, bool name_only)
+static void wordwrap(char *s, int start, int max, int corr)
+{
+ int column = start;
+ int n;
+
+ while (*s) {
+ int wlen = strcspn(s, " \t");
+
+ if (column + wlen >= max && column > start) {
+ printf("\n%*s", start, "");
+ column = start + corr;
+ }
+ n = printf("%s%.*s", column > start ? " " : "", wlen, s);
+ if (n <= 0)
+ break;
+ s += wlen;
+ column += n;
+ while (isspace(*s))
+ s++;
+ }
+}
+
+void print_pmu_events(const char *event_glob, bool name_only, bool quiet_flag,
+ bool long_desc)
{
struct perf_pmu *pmu;
struct perf_pmu_alias *alias;
char buf[1024];
int printed = 0;
int len, j;
- char **aliases;
+ struct sevent *aliases;
+ int numdesc = 0;
+ int columns = pager_get_columns();
+ char *topic = NULL;
pmu = NULL;
len = 0;
@@ -1007,14 +1123,15 @@ void print_pmu_events(const char *event_glob, bool name_only)
if (pmu->selectable)
len++;
}
- aliases = zalloc(sizeof(char *) * len);
+ aliases = zalloc(sizeof(struct sevent) * len);
if (!aliases)
goto out_enomem;
pmu = NULL;
j = 0;
while ((pmu = perf_pmu__scan(pmu)) != NULL) {
list_for_each_entry(alias, &pmu->aliases, list) {
- char *name = format_alias(buf, sizeof(buf), pmu, alias);
+ char *name = alias->desc ? alias->name :
+ format_alias(buf, sizeof(buf), pmu, alias);
bool is_cpu = !strcmp(pmu->name, "cpu");
if (event_glob != NULL &&
@@ -1023,12 +1140,21 @@ void print_pmu_events(const char *event_glob, bool name_only)
event_glob))))
continue;
- if (is_cpu && !name_only)
+ if (is_cpu && !name_only && !alias->desc)
name = format_alias_or(buf, sizeof(buf), pmu, alias);
- aliases[j] = strdup(name);
- if (aliases[j] == NULL)
+ aliases[j].name = name;
+ if (is_cpu && !name_only && !alias->desc)
+ aliases[j].name = format_alias_or(buf,
+ sizeof(buf),
+ pmu, alias);
+ aliases[j].name = strdup(aliases[j].name);
+ if (!aliases[j].name)
goto out_enomem;
+
+ aliases[j].desc = long_desc ? alias->long_desc :
+ alias->desc;
+ aliases[j].topic = alias->topic;
j++;
}
if (pmu->selectable &&
@@ -1036,25 +1162,39 @@ void print_pmu_events(const char *event_glob, bool name_only)
char *s;
if (asprintf(&s, "%s//", pmu->name) < 0)
goto out_enomem;
- aliases[j] = s;
+ aliases[j].name = s;
j++;
}
}
len = j;
- qsort(aliases, len, sizeof(char *), cmp_string);
+ qsort(aliases, len, sizeof(struct sevent), cmp_sevent);
for (j = 0; j < len; j++) {
if (name_only) {
- printf("%s ", aliases[j]);
+ printf("%s ", aliases[j].name);
continue;
}
- printf(" %-50s [Kernel PMU event]\n", aliases[j]);
+ if (aliases[j].desc && !quiet_flag) {
+ if (numdesc++ == 0)
+ printf("\n");
+ if (aliases[j].topic && (!topic ||
+ strcmp(topic, aliases[j].topic))) {
+ printf("%s%s:\n", topic ? "\n" : "",
+ aliases[j].topic);
+ topic = aliases[j].topic;
+ }
+ printf(" %-50s\n", aliases[j].name);
+ printf("%*s", 8, "[");
+ wordwrap(aliases[j].desc, 8, columns, 0);
+ printf("]\n");
+ } else
+ printf(" %-50s [Kernel PMU event]\n", aliases[j].name);
printed++;
}
if (printed && pager_in_use())
printf("\n");
out_free:
for (j = 0; j < len; j++)
- zfree(&aliases[j]);
+ zfree(&aliases[j].name);
zfree(&aliases);
return;
diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h
index 743422a..2571203 100644
--- a/tools/perf/util/pmu.h
+++ b/tools/perf/util/pmu.h
@@ -40,6 +40,9 @@ struct perf_pmu_info {
struct perf_pmu_alias {
char *name;
+ char *desc;
+ char *long_desc;
+ char *topic;
struct list_head terms; /* HEAD struct parse_events_term -> list */
struct list_head list; /* ELEM */
char unit[UNIT_MAX_LEN+1];
@@ -71,7 +74,8 @@ int perf_pmu__format_parse(char *dir, struct list_head *head);
struct perf_pmu *perf_pmu__scan(struct perf_pmu *pmu);
-void print_pmu_events(const char *event_glob, bool name_only);
+void print_pmu_events(const char *event_glob, bool name_only, bool quiet,
+ bool long_desc);
bool pmu_have_event(const char *pname, const char *name);
int perf_pmu__scan_file(struct perf_pmu *pmu, const char *name, const char *fmt,
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index fcfbef0..d281ae2 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -213,7 +213,7 @@ static int convert_exec_to_group(const char *exec, char **result)
goto out;
}
- for (ptr2 = ptr1; ptr2 != '\0'; ptr2++) {
+ for (ptr2 = ptr1; *ptr2 != '\0'; ptr2++) {
if (!isalnum(*ptr2) && *ptr2 != '_') {
*ptr2 = '\0';
break;
diff --git a/tools/perf/util/strbuf.h b/tools/perf/util/strbuf.h
index b268a66..318424e 100644
--- a/tools/perf/util/strbuf.h
+++ b/tools/perf/util/strbuf.h
@@ -66,9 +66,8 @@ static inline ssize_t strbuf_avail(const struct strbuf *sb) {
int strbuf_grow(struct strbuf *buf, size_t);
static inline int strbuf_setlen(struct strbuf *sb, size_t len) {
- int ret;
if (!sb->alloc) {
- ret = strbuf_grow(sb, 0);
+ int ret = strbuf_grow(sb, 0);
if (ret)
return ret;
}
diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c
index 8b10a55..f5af87f 100644
--- a/tools/perf/util/thread.c
+++ b/tools/perf/util/thread.c
@@ -14,13 +14,12 @@
int thread__init_map_groups(struct thread *thread, struct machine *machine)
{
- struct thread *leader;
pid_t pid = thread->pid_;
if (pid == thread->tid || pid == -1) {
thread->mg = map_groups__new(machine);
} else {
- leader = __machine__findnew_thread(machine, pid, pid);
+ struct thread *leader = __machine__findnew_thread(machine, pid, pid);
if (leader) {
thread->mg = map_groups__get(leader->mg);
thread__put(leader);
@@ -130,11 +129,10 @@ int __thread__set_comm(struct thread *thread, const char *str, u64 timestamp,
bool exec)
{
struct comm *new, *curr = thread__comm(thread);
- int err;
/* Override the default :tid entry */
if (!thread->comm_set) {
- err = comm__override(curr, str, timestamp, exec);
+ int err = comm__override(curr, str, timestamp, exec);
if (err)
return err;
} else {
@@ -270,10 +268,9 @@ static int thread__clone_map_groups(struct thread *thread,
int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp)
{
- int err;
-
if (parent->comm_set) {
const char *comm = thread__comm_str(parent);
+ int err;
if (!comm)
return -ENOMEM;
err = thread__set_comm(thread, comm, timestamp);
diff --git a/tools/testing/nvdimm/Kbuild b/tools/testing/nvdimm/Kbuild
index ad6dd05..582db951 100644
--- a/tools/testing/nvdimm/Kbuild
+++ b/tools/testing/nvdimm/Kbuild
@@ -13,6 +13,7 @@ ldflags-y += --wrap=__release_region
ldflags-y += --wrap=devm_memremap_pages
ldflags-y += --wrap=insert_resource
ldflags-y += --wrap=remove_resource
+ldflags-y += --wrap=acpi_evaluate_object
DRIVERS := ../../../drivers
NVDIMM_SRC := $(DRIVERS)/nvdimm
diff --git a/tools/testing/nvdimm/config_check.c b/tools/testing/nvdimm/config_check.c
index 878daf3..7dc5a0a 100644
--- a/tools/testing/nvdimm/config_check.c
+++ b/tools/testing/nvdimm/config_check.c
@@ -1,4 +1,3 @@
-#include <linux/kconfig.h>
#include <linux/bug.h>
void check(void)
diff --git a/tools/testing/nvdimm/test/iomap.c b/tools/testing/nvdimm/test/iomap.c
index c29f8dc..3ccef73 100644
--- a/tools/testing/nvdimm/test/iomap.c
+++ b/tools/testing/nvdimm/test/iomap.c
@@ -17,6 +17,7 @@
#include <linux/module.h>
#include <linux/types.h>
#include <linux/pfn_t.h>
+#include <linux/acpi.h>
#include <linux/io.h>
#include <linux/mm.h>
#include "nfit_test.h"
@@ -73,7 +74,7 @@ void __iomem *__nfit_test_ioremap(resource_size_t offset, unsigned long size,
if (nfit_res)
return (void __iomem *) nfit_res->buf + offset
- - nfit_res->res->start;
+ - nfit_res->res.start;
return fallback_fn(offset, size);
}
@@ -84,7 +85,7 @@ void __iomem *__wrap_devm_ioremap_nocache(struct device *dev,
if (nfit_res)
return (void __iomem *) nfit_res->buf + offset
- - nfit_res->res->start;
+ - nfit_res->res.start;
return devm_ioremap_nocache(dev, offset, size);
}
EXPORT_SYMBOL(__wrap_devm_ioremap_nocache);
@@ -95,7 +96,7 @@ void *__wrap_devm_memremap(struct device *dev, resource_size_t offset,
struct nfit_test_resource *nfit_res = get_nfit_res(offset);
if (nfit_res)
- return nfit_res->buf + offset - nfit_res->res->start;
+ return nfit_res->buf + offset - nfit_res->res.start;
return devm_memremap(dev, offset, size, flags);
}
EXPORT_SYMBOL(__wrap_devm_memremap);
@@ -107,7 +108,7 @@ void *__wrap_devm_memremap_pages(struct device *dev, struct resource *res,
struct nfit_test_resource *nfit_res = get_nfit_res(offset);
if (nfit_res)
- return nfit_res->buf + offset - nfit_res->res->start;
+ return nfit_res->buf + offset - nfit_res->res.start;
return devm_memremap_pages(dev, res, ref, altmap);
}
EXPORT_SYMBOL(__wrap_devm_memremap_pages);
@@ -128,7 +129,7 @@ void *__wrap_memremap(resource_size_t offset, size_t size,
struct nfit_test_resource *nfit_res = get_nfit_res(offset);
if (nfit_res)
- return nfit_res->buf + offset - nfit_res->res->start;
+ return nfit_res->buf + offset - nfit_res->res.start;
return memremap(offset, size, flags);
}
EXPORT_SYMBOL(__wrap_memremap);
@@ -174,6 +175,63 @@ void __wrap_memunmap(void *addr)
}
EXPORT_SYMBOL(__wrap_memunmap);
+static bool nfit_test_release_region(struct device *dev,
+ struct resource *parent, resource_size_t start,
+ resource_size_t n);
+
+static void nfit_devres_release(struct device *dev, void *data)
+{
+ struct resource *res = *((struct resource **) data);
+
+ WARN_ON(!nfit_test_release_region(NULL, &iomem_resource, res->start,
+ resource_size(res)));
+}
+
+static int match(struct device *dev, void *__res, void *match_data)
+{
+ struct resource *res = *((struct resource **) __res);
+ resource_size_t start = *((resource_size_t *) match_data);
+
+ return res->start == start;
+}
+
+static bool nfit_test_release_region(struct device *dev,
+ struct resource *parent, resource_size_t start,
+ resource_size_t n)
+{
+ if (parent == &iomem_resource) {
+ struct nfit_test_resource *nfit_res = get_nfit_res(start);
+
+ if (nfit_res) {
+ struct nfit_test_request *req;
+ struct resource *res = NULL;
+
+ if (dev) {
+ devres_release(dev, nfit_devres_release, match,
+ &start);
+ return true;
+ }
+
+ spin_lock(&nfit_res->lock);
+ list_for_each_entry(req, &nfit_res->requests, list)
+ if (req->res.start == start) {
+ res = &req->res;
+ list_del(&req->list);
+ break;
+ }
+ spin_unlock(&nfit_res->lock);
+
+ WARN(!res || resource_size(res) != n,
+ "%s: start: %llx n: %llx mismatch: %pr\n",
+ __func__, start, n, res);
+ if (res)
+ kfree(req);
+ return true;
+ }
+ }
+ return false;
+}
+
static struct resource *nfit_test_request_region(struct device *dev,
struct resource *parent, resource_size_t start,
resource_size_t n, const char *name, int flags)
@@ -183,21 +241,57 @@ static struct resource *nfit_test_request_region(struct device *dev,
if (parent == &iomem_resource) {
nfit_res = get_nfit_res(start);
if (nfit_res) {
- struct resource *res = nfit_res->res + 1;
+ struct nfit_test_request *req;
+ struct resource *res = NULL;
- if (start + n > nfit_res->res->start
- + resource_size(nfit_res->res)) {
+ if (start + n > nfit_res->res.start
+ + resource_size(&nfit_res->res)) {
pr_debug("%s: start: %llx n: %llx overflow: %pr\n",
__func__, start, n,
- nfit_res->res);
+ &nfit_res->res);
return NULL;
}
+ spin_lock(&nfit_res->lock);
+ list_for_each_entry(req, &nfit_res->requests, list)
+ if (start == req->res.start) {
+ res = &req->res;
+ break;
+ }
+ spin_unlock(&nfit_res->lock);
+
+ if (res) {
+ WARN(1, "%pr already busy\n", res);
+ return NULL;
+ }
+
+ req = kzalloc(sizeof(*req), GFP_KERNEL);
+ if (!req)
+ return NULL;
+ INIT_LIST_HEAD(&req->list);
+ res = &req->res;
+
res->start = start;
res->end = start + n - 1;
res->name = name;
res->flags = resource_type(parent);
res->flags |= IORESOURCE_BUSY | flags;
+ spin_lock(&nfit_res->lock);
+ list_add(&req->list, &nfit_res->requests);
+ spin_unlock(&nfit_res->lock);
+
+ if (dev) {
+ struct resource **d;
+
+ d = devres_alloc(nfit_devres_release,
+ sizeof(struct resource *),
+ GFP_KERNEL);
+ if (!d)
+ return NULL;
+ *d = res;
+ devres_add(dev, d);
+ }
+
pr_debug("%s: %pr\n", __func__, res);
return res;
}
@@ -241,29 +335,10 @@ struct resource *__wrap___devm_request_region(struct device *dev,
}
EXPORT_SYMBOL(__wrap___devm_request_region);
-static bool nfit_test_release_region(struct resource *parent,
- resource_size_t start, resource_size_t n)
-{
- if (parent == &iomem_resource) {
- struct nfit_test_resource *nfit_res = get_nfit_res(start);
- if (nfit_res) {
- struct resource *res = nfit_res->res + 1;
-
- if (start != res->start || resource_size(res) != n)
- pr_info("%s: start: %llx n: %llx mismatch: %pr\n",
- __func__, start, n, res);
- else
- memset(res, 0, sizeof(*res));
- return true;
- }
- }
- return false;
-}
-
void __wrap___release_region(struct resource *parent, resource_size_t start,
resource_size_t n)
{
- if (!nfit_test_release_region(parent, start, n))
+ if (!nfit_test_release_region(NULL, parent, start, n))
__release_region(parent, start, n);
}
EXPORT_SYMBOL(__wrap___release_region);
@@ -271,9 +346,25 @@ EXPORT_SYMBOL(__wrap___release_region);
void __wrap___devm_release_region(struct device *dev, struct resource *parent,
resource_size_t start, resource_size_t n)
{
- if (!nfit_test_release_region(parent, start, n))
+ if (!nfit_test_release_region(dev, parent, start, n))
__devm_release_region(dev, parent, start, n);
}
EXPORT_SYMBOL(__wrap___devm_release_region);
+acpi_status __wrap_acpi_evaluate_object(acpi_handle handle, acpi_string path,
+ struct acpi_object_list *p, struct acpi_buffer *buf)
+{
+ struct nfit_test_resource *nfit_res = get_nfit_res((long) handle);
+ union acpi_object **obj;
+
+ if (!nfit_res || strcmp(path, "_FIT") || !buf)
+ return acpi_evaluate_object(handle, path, p, buf);
+
+ obj = nfit_res->buf;
+ buf->length = sizeof(union acpi_object);
+ buf->pointer = *obj;
+ return AE_OK;
+}
+EXPORT_SYMBOL(__wrap_acpi_evaluate_object);
+
MODULE_LICENSE("GPL v2");
diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c
index f64c57b..c9a6458 100644
--- a/tools/testing/nvdimm/test/nfit.c
+++ b/tools/testing/nvdimm/test/nfit.c
@@ -132,6 +132,8 @@ static u32 handle[NUM_DCR] = {
[4] = NFIT_DIMM_HANDLE(0, 1, 0, 0, 0),
};
+static unsigned long dimm_fail_cmd_flags[NUM_DCR];
+
struct nfit_test {
struct acpi_nfit_desc acpi_desc;
struct platform_device pdev;
@@ -154,11 +156,14 @@ struct nfit_test {
int (*alloc)(struct nfit_test *t);
void (*setup)(struct nfit_test *t);
int setup_hotplug;
+ union acpi_object **_fit;
+ dma_addr_t _fit_dma;
struct ars_state {
struct nd_cmd_ars_status *ars_status;
unsigned long deadline;
spinlock_t lock;
} ars_state;
+ struct device *dimm_dev[NUM_DCR];
};
static struct nfit_test *to_nfit_test(struct device *dev)
@@ -411,6 +416,9 @@ static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc,
if (i >= ARRAY_SIZE(handle))
return -ENXIO;
+ if ((1 << func) & dimm_fail_cmd_flags[i])
+ return -EIO;
+
switch (func) {
case ND_CMD_GET_CONFIG_SIZE:
rc = nfit_test_cmd_get_config_size(buf, buf_len);
@@ -428,6 +436,9 @@ static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc,
break;
case ND_CMD_SMART_THRESHOLD:
rc = nfit_test_cmd_smart_threshold(buf, buf_len);
+ device_lock(&t->pdev.dev);
+ __acpi_nvdimm_notify(t->dimm_dev[i], 0x81);
+ device_unlock(&t->pdev.dev);
break;
default:
return -ENOTTY;
@@ -467,14 +478,12 @@ static struct nfit_test *instances[NUM_NFITS];
static void release_nfit_res(void *data)
{
struct nfit_test_resource *nfit_res = data;
- struct resource *res = nfit_res->res;
spin_lock(&nfit_test_lock);
list_del(&nfit_res->list);
spin_unlock(&nfit_test_lock);
vfree(nfit_res->buf);
- kfree(res);
kfree(nfit_res);
}
@@ -482,12 +491,11 @@ static void *__test_alloc(struct nfit_test *t, size_t size, dma_addr_t *dma,
void *buf)
{
struct device *dev = &t->pdev.dev;
- struct resource *res = kzalloc(sizeof(*res) * 2, GFP_KERNEL);
struct nfit_test_resource *nfit_res = kzalloc(sizeof(*nfit_res),
GFP_KERNEL);
int rc;
- if (!res || !buf || !nfit_res)
+ if (!buf || !nfit_res)
goto err;
rc = devm_add_action(dev, release_nfit_res, nfit_res);
if (rc)
@@ -496,10 +504,11 @@ static void *__test_alloc(struct nfit_test *t, size_t size, dma_addr_t *dma,
memset(buf, 0, size);
nfit_res->dev = dev;
nfit_res->buf = buf;
- nfit_res->res = res;
- res->start = *dma;
- res->end = *dma + size - 1;
- res->name = "NFIT";
+ nfit_res->res.start = *dma;
+ nfit_res->res.end = *dma + size - 1;
+ nfit_res->res.name = "NFIT";
+ spin_lock_init(&nfit_res->lock);
+ INIT_LIST_HEAD(&nfit_res->requests);
spin_lock(&nfit_test_lock);
list_add(&nfit_res->list, &t->resources);
spin_unlock(&nfit_test_lock);
@@ -508,7 +517,6 @@ static void *__test_alloc(struct nfit_test *t, size_t size, dma_addr_t *dma,
err:
if (buf)
vfree(buf);
- kfree(res);
kfree(nfit_res);
return NULL;
}
@@ -533,13 +541,13 @@ static struct nfit_test_resource *nfit_test_lookup(resource_size_t addr)
continue;
spin_lock(&nfit_test_lock);
list_for_each_entry(n, &t->resources, list) {
- if (addr >= n->res->start && (addr < n->res->start
- + resource_size(n->res))) {
+ if (addr >= n->res.start && (addr < n->res.start
+ + resource_size(&n->res))) {
nfit_res = n;
break;
} else if (addr >= (unsigned long) n->buf
&& (addr < (unsigned long) n->buf
- + resource_size(n->res))) {
+ + resource_size(&n->res))) {
nfit_res = n;
break;
}
@@ -564,6 +572,86 @@ static int ars_state_init(struct device *dev, struct ars_state *ars_state)
return 0;
}
+static void put_dimms(void *data)
+{
+ struct device **dimm_dev = data;
+ int i;
+
+ for (i = 0; i < NUM_DCR; i++)
+ if (dimm_dev[i])
+ device_unregister(dimm_dev[i]);
+}
+
+static struct class *nfit_test_dimm;
+
+static int dimm_name_to_id(struct device *dev)
+{
+ int dimm;
+
+ if (sscanf(dev_name(dev), "test_dimm%d", &dimm) != 1
+ || dimm >= NUM_DCR || dimm < 0)
+ return -ENXIO;
+ return dimm;
+}
+
+
+static ssize_t handle_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ int dimm = dimm_name_to_id(dev);
+
+ if (dimm < 0)
+ return dimm;
+
+ return sprintf(buf, "%#x", handle[dimm]);
+}
+DEVICE_ATTR_RO(handle);
+
+static ssize_t fail_cmd_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ int dimm = dimm_name_to_id(dev);
+
+ if (dimm < 0)
+ return dimm;
+
+ return sprintf(buf, "%#lx\n", dimm_fail_cmd_flags[dimm]);
+}
+
+static ssize_t fail_cmd_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t size)
+{
+ int dimm = dimm_name_to_id(dev);
+ unsigned long val;
+ ssize_t rc;
+
+ if (dimm < 0)
+ return dimm;
+
+ rc = kstrtol(buf, 0, &val);
+ if (rc)
+ return rc;
+
+ dimm_fail_cmd_flags[dimm] = val;
+ return size;
+}
+static DEVICE_ATTR_RW(fail_cmd);
+
+static struct attribute *nfit_test_dimm_attributes[] = {
+ &dev_attr_fail_cmd.attr,
+ &dev_attr_handle.attr,
+ NULL,
+};
+
+static struct attribute_group nfit_test_dimm_attribute_group = {
+ .attrs = nfit_test_dimm_attributes,
+};
+
+static const struct attribute_group *nfit_test_dimm_attribute_groups[] = {
+ &nfit_test_dimm_attribute_group,
+ NULL,
+};
+
static int nfit_test0_alloc(struct nfit_test *t)
{
size_t nfit_size = sizeof(struct acpi_nfit_system_address) * NUM_SPA
@@ -616,6 +704,21 @@ static int nfit_test0_alloc(struct nfit_test *t)
return -ENOMEM;
}
+ t->_fit = test_alloc(t, sizeof(union acpi_object **), &t->_fit_dma);
+ if (!t->_fit)
+ return -ENOMEM;
+
+ if (devm_add_action_or_reset(&t->pdev.dev, put_dimms, t->dimm_dev))
+ return -ENOMEM;
+ for (i = 0; i < NUM_DCR; i++) {
+ t->dimm_dev[i] = device_create_with_groups(nfit_test_dimm,
+ &t->pdev.dev, 0, NULL,
+ nfit_test_dimm_attribute_groups,
+ "test_dimm%d", i);
+ if (!t->dimm_dev[i])
+ return -ENOMEM;
+ }
+
return ars_state_init(&t->pdev.dev, &t->ars_state);
}
@@ -1409,6 +1512,8 @@ static int nfit_test_probe(struct platform_device *pdev)
struct acpi_nfit_desc *acpi_desc;
struct device *dev = &pdev->dev;
struct nfit_test *nfit_test;
+ struct nfit_mem *nfit_mem;
+ union acpi_object *obj;
int rc;
nfit_test = to_nfit_test(&pdev->dev);
@@ -1476,14 +1581,30 @@ static int nfit_test_probe(struct platform_device *pdev)
if (nfit_test->setup != nfit_test0_setup)
return 0;
- flush_work(&acpi_desc->work);
nfit_test->setup_hotplug = 1;
nfit_test->setup(nfit_test);
- rc = acpi_nfit_init(acpi_desc, nfit_test->nfit_buf,
- nfit_test->nfit_size);
- if (rc)
- return rc;
+ obj = kzalloc(sizeof(*obj), GFP_KERNEL);
+ if (!obj)
+ return -ENOMEM;
+ obj->type = ACPI_TYPE_BUFFER;
+ obj->buffer.length = nfit_test->nfit_size;
+ obj->buffer.pointer = nfit_test->nfit_buf;
+ *(nfit_test->_fit) = obj;
+ __acpi_nfit_notify(&pdev->dev, nfit_test, 0x80);
+
+ /* associate dimm devices with nfit_mem data for notification testing */
+ mutex_lock(&acpi_desc->init_mutex);
+ list_for_each_entry(nfit_mem, &acpi_desc->dimms, list) {
+ u32 nfit_handle = __to_nfit_memdev(nfit_mem)->device_handle;
+ int i;
+
+ for (i = 0; i < NUM_DCR; i++)
+ if (nfit_handle == handle[i])
+ dev_set_drvdata(nfit_test->dimm_dev[i],
+ nfit_mem);
+ }
+ mutex_unlock(&acpi_desc->init_mutex);
return 0;
}
@@ -1518,6 +1639,10 @@ static __init int nfit_test_init(void)
{
int rc, i;
+ nfit_test_dimm = class_create(THIS_MODULE, "nfit_test_dimm");
+ if (IS_ERR(nfit_test_dimm))
+ return PTR_ERR(nfit_test_dimm);
+
nfit_test_setup(nfit_test_lookup);
for (i = 0; i < NUM_NFITS; i++) {
@@ -1584,6 +1709,7 @@ static __exit void nfit_test_exit(void)
for (i = 0; i < NUM_NFITS; i++)
platform_device_unregister(&instances[i]->pdev);
nfit_test_teardown();
+ class_destroy(nfit_test_dimm);
}
module_init(nfit_test_init);
diff --git a/tools/testing/nvdimm/test/nfit_test.h b/tools/testing/nvdimm/test/nfit_test.h
index 9f18e2a..c281dd2 100644
--- a/tools/testing/nvdimm/test/nfit_test.h
+++ b/tools/testing/nvdimm/test/nfit_test.h
@@ -13,11 +13,21 @@
#ifndef __NFIT_TEST_H__
#define __NFIT_TEST_H__
#include <linux/list.h>
+#include <linux/ioport.h>
+#include <linux/spinlock_types.h>
+
+struct nfit_test_request {
+ struct list_head list;
+ struct resource res;
+};
struct nfit_test_resource {
+ struct list_head requests;
struct list_head list;
- struct resource *res;
+ struct resource res;
struct device *dev;
+ spinlock_t lock;
+ int req_count;
void *buf;
};
diff --git a/tools/testing/radix-tree/Makefile b/tools/testing/radix-tree/Makefile
index 9d0919ed..f2e07f2 100644
--- a/tools/testing/radix-tree/Makefile
+++ b/tools/testing/radix-tree/Makefile
@@ -3,7 +3,8 @@ CFLAGS += -I. -g -O2 -Wall -D_LGPL_SOURCE
LDFLAGS += -lpthread -lurcu
TARGETS = main
OFILES = main.o radix-tree.o linux.o test.o tag_check.o find_next_bit.o \
- regression1.o regression2.o regression3.o multiorder.o
+ regression1.o regression2.o regression3.o multiorder.o \
+ iteration_check.o
targets: $(TARGETS)
diff --git a/tools/testing/radix-tree/iteration_check.c b/tools/testing/radix-tree/iteration_check.c
new file mode 100644
index 0000000..9adb8e7
--- /dev/null
+++ b/tools/testing/radix-tree/iteration_check.c
@@ -0,0 +1,180 @@
+/*
+ * iteration_check.c: test races having to do with radix tree iteration
+ * Copyright (c) 2016 Intel Corporation
+ * Author: Ross Zwisler <ross.zwisler@linux.intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ */
+#include <linux/radix-tree.h>
+#include <pthread.h>
+#include "test.h"
+
+#define NUM_THREADS 4
+#define TAG 0
+static pthread_mutex_t tree_lock = PTHREAD_MUTEX_INITIALIZER;
+static pthread_t threads[NUM_THREADS];
+RADIX_TREE(tree, GFP_KERNEL);
+bool test_complete;
+
+/* relentlessly fill the tree with tagged entries */
+static void *add_entries_fn(void *arg)
+{
+ int pgoff;
+
+ while (!test_complete) {
+ for (pgoff = 0; pgoff < 100; pgoff++) {
+ pthread_mutex_lock(&tree_lock);
+ if (item_insert(&tree, pgoff) == 0)
+ item_tag_set(&tree, pgoff, TAG);
+ pthread_mutex_unlock(&tree_lock);
+ }
+ }
+
+ return NULL;
+}
+
+/*
+ * Iterate over the tagged entries, doing a radix_tree_iter_retry() as we find
+ * things that have been removed and randomly resetting our iteration to the
+ * next chunk with radix_tree_iter_next(). Both radix_tree_iter_retry() and
+ * radix_tree_iter_next() cause radix_tree_next_slot() to be called with a
+ * NULL 'slot' variable.
+ */
+static void *tagged_iteration_fn(void *arg)
+{
+ struct radix_tree_iter iter;
+ void **slot;
+
+ while (!test_complete) {
+ rcu_read_lock();
+ radix_tree_for_each_tagged(slot, &tree, &iter, 0, TAG) {
+ void *entry;
+ int i;
+
+ /* busy wait to let removals happen */
+ for (i = 0; i < 1000000; i++)
+ ;
+
+ entry = radix_tree_deref_slot(slot);
+ if (unlikely(!entry))
+ continue;
+
+ if (radix_tree_deref_retry(entry)) {
+ slot = radix_tree_iter_retry(&iter);
+ continue;
+ }
+
+ if (rand() % 50 == 0)
+ slot = radix_tree_iter_next(&iter);
+ }
+ rcu_read_unlock();
+ }
+
+ return NULL;
+}
+
+/*
+ * Iterate over the entries, doing a radix_tree_iter_retry() as we find things
+ * that have been removed and randomly resetting our iteration to the next
+ * chunk with radix_tree_iter_next(). Both radix_tree_iter_retry() and
+ * radix_tree_iter_next() cause radix_tree_next_slot() to be called with a
+ * NULL 'slot' variable.
+ */
+static void *untagged_iteration_fn(void *arg)
+{
+ struct radix_tree_iter iter;
+ void **slot;
+
+ while (!test_complete) {
+ rcu_read_lock();
+ radix_tree_for_each_slot(slot, &tree, &iter, 0) {
+ void *entry;
+ int i;
+
+ /* busy wait to let removals happen */
+ for (i = 0; i < 1000000; i++)
+ ;
+
+ entry = radix_tree_deref_slot(slot);
+ if (unlikely(!entry))
+ continue;
+
+ if (radix_tree_deref_retry(entry)) {
+ slot = radix_tree_iter_retry(&iter);
+ continue;
+ }
+
+ if (rand() % 50 == 0)
+ slot = radix_tree_iter_next(&iter);
+ }
+ rcu_read_unlock();
+ }
+
+ return NULL;
+}
+
+/*
+ * Randomly remove entries to help induce radix_tree_iter_retry() calls in the
+ * two iteration functions.
+ */
+static void *remove_entries_fn(void *arg)
+{
+ while (!test_complete) {
+ int pgoff;
+
+ pgoff = rand() % 100;
+
+ pthread_mutex_lock(&tree_lock);
+ item_delete(&tree, pgoff);
+ pthread_mutex_unlock(&tree_lock);
+ }
+
+ return NULL;
+}
+
+/* This is a unit test for a bug found by the syzkaller tester */
+void iteration_test(void)
+{
+ int i;
+
+ printf("Running iteration tests for 10 seconds\n");
+
+ srand(time(0));
+ test_complete = false;
+
+ if (pthread_create(&threads[0], NULL, tagged_iteration_fn, NULL)) {
+ perror("pthread_create");
+ exit(1);
+ }
+ if (pthread_create(&threads[1], NULL, untagged_iteration_fn, NULL)) {
+ perror("pthread_create");
+ exit(1);
+ }
+ if (pthread_create(&threads[2], NULL, add_entries_fn, NULL)) {
+ perror("pthread_create");
+ exit(1);
+ }
+ if (pthread_create(&threads[3], NULL, remove_entries_fn, NULL)) {
+ perror("pthread_create");
+ exit(1);
+ }
+
+ sleep(10);
+ test_complete = true;
+
+ for (i = 0; i < NUM_THREADS; i++) {
+ if (pthread_join(threads[i], NULL)) {
+ perror("pthread_join");
+ exit(1);
+ }
+ }
+
+ item_kill_tree(&tree);
+}
diff --git a/tools/testing/radix-tree/main.c b/tools/testing/radix-tree/main.c
index b7619ff..daa9010 100644
--- a/tools/testing/radix-tree/main.c
+++ b/tools/testing/radix-tree/main.c
@@ -332,6 +332,7 @@ int main(int argc, char **argv)
regression1_test();
regression2_test();
regression3_test();
+ iteration_test();
single_thread_tests(long_run);
sleep(1);
diff --git a/tools/testing/radix-tree/regression1.c b/tools/testing/radix-tree/regression1.c
index 2d03a63..0d6813a 100644
--- a/tools/testing/radix-tree/regression1.c
+++ b/tools/testing/radix-tree/regression1.c
@@ -43,7 +43,7 @@
#include "regression.h"
static RADIX_TREE(mt_tree, GFP_KERNEL);
-static pthread_mutex_t mt_lock;
+static pthread_mutex_t mt_lock = PTHREAD_MUTEX_INITIALIZER;
struct page {
pthread_mutex_t lock;
diff --git a/tools/testing/radix-tree/test.h b/tools/testing/radix-tree/test.h
index e851313..217fb24 100644
--- a/tools/testing/radix-tree/test.h
+++ b/tools/testing/radix-tree/test.h
@@ -27,6 +27,7 @@ void item_kill_tree(struct radix_tree_root *root);
void tag_check(void);
void multiorder_checks(void);
+void iteration_test(void);
struct item *
item_tag_set(struct radix_tree_root *root, unsigned long index, int tag);
diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile
index 4f747ee..a89f80a 100644
--- a/tools/testing/selftests/x86/Makefile
+++ b/tools/testing/selftests/x86/Makefile
@@ -5,7 +5,8 @@ include ../lib.mk
.PHONY: all all_32 all_64 warn_32bit_failure clean
TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt ptrace_syscall test_mremap_vdso \
- check_initial_reg_state sigreturn ldt_gdt iopl mpx-mini-test
+ check_initial_reg_state sigreturn ldt_gdt iopl \
+ protection_keys
TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault test_syscall_vdso unwind_vdso \
test_FCMOV test_FCOMI test_FISTTP \
vdso_restorer
diff --git a/tools/testing/selftests/x86/pkey-helpers.h b/tools/testing/selftests/x86/pkey-helpers.h
new file mode 100644
index 0000000..b202939
--- /dev/null
+++ b/tools/testing/selftests/x86/pkey-helpers.h
@@ -0,0 +1,219 @@
+#ifndef _PKEYS_HELPER_H
+#define _PKEYS_HELPER_H
+#define _GNU_SOURCE
+#include <string.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <signal.h>
+#include <assert.h>
+#include <stdlib.h>
+#include <ucontext.h>
+#include <sys/mman.h>
+
+#define NR_PKEYS 16
+#define PKRU_BITS_PER_PKEY 2
+
+#ifndef DEBUG_LEVEL
+#define DEBUG_LEVEL 0
+#endif
+#define DPRINT_IN_SIGNAL_BUF_SIZE 4096
+extern int dprint_in_signal;
+extern char dprint_in_signal_buffer[DPRINT_IN_SIGNAL_BUF_SIZE];
+static inline void sigsafe_printf(const char *format, ...)
+{
+ va_list ap;
+
+ va_start(ap, format);
+ if (!dprint_in_signal) {
+ vprintf(format, ap);
+ } else {
+ int len = vsnprintf(dprint_in_signal_buffer,
+ DPRINT_IN_SIGNAL_BUF_SIZE,
+ format, ap);
+ /*
+ * len is amount that would have been printed,
+ * but actual write is truncated at BUF_SIZE.
+ */
+ if (len > DPRINT_IN_SIGNAL_BUF_SIZE)
+ len = DPRINT_IN_SIGNAL_BUF_SIZE;
+ write(1, dprint_in_signal_buffer, len);
+ }
+ va_end(ap);
+}
+#define dprintf_level(level, args...) do { \
+ if (level <= DEBUG_LEVEL) \
+ sigsafe_printf(args); \
+ fflush(NULL); \
+} while (0)
+#define dprintf0(args...) dprintf_level(0, args)
+#define dprintf1(args...) dprintf_level(1, args)
+#define dprintf2(args...) dprintf_level(2, args)
+#define dprintf3(args...) dprintf_level(3, args)
+#define dprintf4(args...) dprintf_level(4, args)
+
+extern unsigned int shadow_pkru;
+static inline unsigned int __rdpkru(void)
+{
+ unsigned int eax, edx;
+ unsigned int ecx = 0;
+ unsigned int pkru;
+
+ asm volatile(".byte 0x0f,0x01,0xee\n\t"
+ : "=a" (eax), "=d" (edx)
+ : "c" (ecx));
+ pkru = eax;
+ return pkru;
+}
+
+static inline unsigned int _rdpkru(int line)
+{
+ unsigned int pkru = __rdpkru();
+
+ dprintf4("rdpkru(line=%d) pkru: %x shadow: %x\n",
+ line, pkru, shadow_pkru);
+ assert(pkru == shadow_pkru);
+
+ return pkru;
+}
+
+#define rdpkru() _rdpkru(__LINE__)
+
+static inline void __wrpkru(unsigned int pkru)
+{
+ unsigned int eax = pkru;
+ unsigned int ecx = 0;
+ unsigned int edx = 0;
+
+ dprintf4("%s() changing %08x to %08x\n", __func__, __rdpkru(), pkru);
+ asm volatile(".byte 0x0f,0x01,0xef\n\t"
+ : : "a" (eax), "c" (ecx), "d" (edx));
+ assert(pkru == __rdpkru());
+}
+
+static inline void wrpkru(unsigned int pkru)
+{
+ dprintf4("%s() changing %08x to %08x\n", __func__, __rdpkru(), pkru);
+ /* will do the shadow check for us: */
+ rdpkru();
+ __wrpkru(pkru);
+ shadow_pkru = pkru;
+ dprintf4("%s(%08x) pkru: %08x\n", __func__, pkru, __rdpkru());
+}
+
+/*
+ * These are technically racy. since something could
+ * change PKRU between the read and the write.
+ */
+static inline void __pkey_access_allow(int pkey, int do_allow)
+{
+ unsigned int pkru = rdpkru();
+ int bit = pkey * 2;
+
+ if (do_allow)
+ pkru &= (1<<bit);
+ else
+ pkru |= (1<<bit);
+
+ dprintf4("pkru now: %08x\n", rdpkru());
+ wrpkru(pkru);
+}
+
+static inline void __pkey_write_allow(int pkey, int do_allow_write)
+{
+ long pkru = rdpkru();
+ int bit = pkey * 2 + 1;
+
+ if (do_allow_write)
+ pkru &= (1<<bit);
+ else
+ pkru |= (1<<bit);
+
+ wrpkru(pkru);
+ dprintf4("pkru now: %08x\n", rdpkru());
+}
+
+#define PROT_PKEY0 0x10 /* protection key value (bit 0) */
+#define PROT_PKEY1 0x20 /* protection key value (bit 1) */
+#define PROT_PKEY2 0x40 /* protection key value (bit 2) */
+#define PROT_PKEY3 0x80 /* protection key value (bit 3) */
+
+#define PAGE_SIZE 4096
+#define MB (1<<20)
+
+static inline void __cpuid(unsigned int *eax, unsigned int *ebx,
+ unsigned int *ecx, unsigned int *edx)
+{
+ /* ecx is often an input as well as an output. */
+ asm volatile(
+ "cpuid;"
+ : "=a" (*eax),
+ "=b" (*ebx),
+ "=c" (*ecx),
+ "=d" (*edx)
+ : "0" (*eax), "2" (*ecx));
+}
+
+/* Intel-defined CPU features, CPUID level 0x00000007:0 (ecx) */
+#define X86_FEATURE_PKU (1<<3) /* Protection Keys for Userspace */
+#define X86_FEATURE_OSPKE (1<<4) /* OS Protection Keys Enable */
+
+static inline int cpu_has_pku(void)
+{
+ unsigned int eax;
+ unsigned int ebx;
+ unsigned int ecx;
+ unsigned int edx;
+
+ eax = 0x7;
+ ecx = 0x0;
+ __cpuid(&eax, &ebx, &ecx, &edx);
+
+ if (!(ecx & X86_FEATURE_PKU)) {
+ dprintf2("cpu does not have PKU\n");
+ return 0;
+ }
+ if (!(ecx & X86_FEATURE_OSPKE)) {
+ dprintf2("cpu does not have OSPKE\n");
+ return 0;
+ }
+ return 1;
+}
+
+#define XSTATE_PKRU_BIT (9)
+#define XSTATE_PKRU 0x200
+
+int pkru_xstate_offset(void)
+{
+ unsigned int eax;
+ unsigned int ebx;
+ unsigned int ecx;
+ unsigned int edx;
+ int xstate_offset;
+ int xstate_size;
+ unsigned long XSTATE_CPUID = 0xd;
+ int leaf;
+
+ /* assume that XSTATE_PKRU is set in XCR0 */
+ leaf = XSTATE_PKRU_BIT;
+ {
+ eax = XSTATE_CPUID;
+ ecx = leaf;
+ __cpuid(&eax, &ebx, &ecx, &edx);
+
+ if (leaf == XSTATE_PKRU_BIT) {
+ xstate_offset = ebx;
+ xstate_size = eax;
+ }
+ }
+
+ if (xstate_size == 0) {
+ printf("could not find size/offset of PKRU in xsave state\n");
+ return 0;
+ }
+
+ return xstate_offset;
+}
+
+#endif /* _PKEYS_HELPER_H */
diff --git a/tools/testing/selftests/x86/protection_keys.c b/tools/testing/selftests/x86/protection_keys.c
new file mode 100644
index 0000000..bdd58c7
--- /dev/null
+++ b/tools/testing/selftests/x86/protection_keys.c
@@ -0,0 +1,1410 @@
+/*
+ * Tests x86 Memory Protection Keys (see Documentation/x86/protection-keys.txt)
+ *
+ * There are examples in here of:
+ * * how to set protection keys on memory
+ * * how to set/clear bits in PKRU (the rights register)
+ * * how to handle SEGV_PKRU signals and extract pkey-relevant
+ * information from the siginfo
+ *
+ * Things to add:
+ * make sure KSM and KSM COW breaking works
+ * prefault pages in at malloc, or not
+ * protect MPX bounds tables with protection keys?
+ * make sure VMA splitting/merging is working correctly
+ * OOMs can destroy mm->mmap (see exit_mmap()), so make sure it is immune to pkeys
+ * look for pkey "leaks" where it is still set on a VMA but "freed" back to the kernel
+ * do a plain mprotect() to a mprotect_pkey() area and make sure the pkey sticks
+ *
+ * Compile like this:
+ * gcc -o protection_keys -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm
+ * gcc -m32 -o protection_keys_32 -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm
+ */
+#define _GNU_SOURCE
+#include <errno.h>
+#include <linux/futex.h>
+#include <sys/time.h>
+#include <sys/syscall.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <signal.h>
+#include <assert.h>
+#include <stdlib.h>
+#include <ucontext.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/ptrace.h>
+#include <setjmp.h>
+
+#include "pkey-helpers.h"
+
+int iteration_nr = 1;
+int test_nr;
+
+unsigned int shadow_pkru;
+
+#define HPAGE_SIZE (1UL<<21)
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x)))
+#define ALIGN_UP(x, align_to) (((x) + ((align_to)-1)) & ~((align_to)-1))
+#define ALIGN_DOWN(x, align_to) ((x) & ~((align_to)-1))
+#define ALIGN_PTR_UP(p, ptr_align_to) ((typeof(p))ALIGN_UP((unsigned long)(p), ptr_align_to))
+#define ALIGN_PTR_DOWN(p, ptr_align_to) ((typeof(p))ALIGN_DOWN((unsigned long)(p), ptr_align_to))
+#define __stringify_1(x...) #x
+#define __stringify(x...) __stringify_1(x)
+
+#define PTR_ERR_ENOTSUP ((void *)-ENOTSUP)
+
+int dprint_in_signal;
+char dprint_in_signal_buffer[DPRINT_IN_SIGNAL_BUF_SIZE];
+
+extern void abort_hooks(void);
+#define pkey_assert(condition) do { \
+ if (!(condition)) { \
+ dprintf0("assert() at %s::%d test_nr: %d iteration: %d\n", \
+ __FILE__, __LINE__, \
+ test_nr, iteration_nr); \
+ dprintf0("errno at assert: %d", errno); \
+ abort_hooks(); \
+ assert(condition); \
+ } \
+} while (0)
+#define raw_assert(cond) assert(cond)
+
+void cat_into_file(char *str, char *file)
+{
+ int fd = open(file, O_RDWR);
+ int ret;
+
+ dprintf2("%s(): writing '%s' to '%s'\n", __func__, str, file);
+ /*
+ * these need to be raw because they are called under
+ * pkey_assert()
+ */
+ raw_assert(fd >= 0);
+ ret = write(fd, str, strlen(str));
+ if (ret != strlen(str)) {
+ perror("write to file failed");
+ fprintf(stderr, "filename: '%s' str: '%s'\n", file, str);
+ raw_assert(0);
+ }
+ close(fd);
+}
+
+#if CONTROL_TRACING > 0
+static int warned_tracing;
+int tracing_root_ok(void)
+{
+ if (geteuid() != 0) {
+ if (!warned_tracing)
+ fprintf(stderr, "WARNING: not run as root, "
+ "can not do tracing control\n");
+ warned_tracing = 1;
+ return 0;
+ }
+ return 1;
+}
+#endif
+
+void tracing_on(void)
+{
+#if CONTROL_TRACING > 0
+#define TRACEDIR "/sys/kernel/debug/tracing"
+ char pidstr[32];
+
+ if (!tracing_root_ok())
+ return;
+
+ sprintf(pidstr, "%d", getpid());
+ cat_into_file("0", TRACEDIR "/tracing_on");
+ cat_into_file("\n", TRACEDIR "/trace");
+ if (1) {
+ cat_into_file("function_graph", TRACEDIR "/current_tracer");
+ cat_into_file("1", TRACEDIR "/options/funcgraph-proc");
+ } else {
+ cat_into_file("nop", TRACEDIR "/current_tracer");
+ }
+ cat_into_file(pidstr, TRACEDIR "/set_ftrace_pid");
+ cat_into_file("1", TRACEDIR "/tracing_on");
+ dprintf1("enabled tracing\n");
+#endif
+}
+
+void tracing_off(void)
+{
+#if CONTROL_TRACING > 0
+ if (!tracing_root_ok())
+ return;
+ cat_into_file("0", "/sys/kernel/debug/tracing/tracing_on");
+#endif
+}
+
+void abort_hooks(void)
+{
+ fprintf(stderr, "running %s()...\n", __func__);
+ tracing_off();
+#ifdef SLEEP_ON_ABORT
+ sleep(SLEEP_ON_ABORT);
+#endif
+}
+
+static inline void __page_o_noops(void)
+{
+ /* 8-bytes of instruction * 512 bytes = 1 page */
+ asm(".rept 512 ; nopl 0x7eeeeeee(%eax) ; .endr");
+}
+
+/*
+ * This attempts to have roughly a page of instructions followed by a few
+ * instructions that do a write, and another page of instructions. That
+ * way, we are pretty sure that the write is in the second page of
+ * instructions and has at least a page of padding behind it.
+ *
+ * *That* lets us be sure to madvise() away the write instruction, which
+ * will then fault, which makes sure that the fault code handles
+ * execute-only memory properly.
+ */
+__attribute__((__aligned__(PAGE_SIZE)))
+void lots_o_noops_around_write(int *write_to_me)
+{
+ dprintf3("running %s()\n", __func__);
+ __page_o_noops();
+ /* Assume this happens in the second page of instructions: */
+ *write_to_me = __LINE__;
+ /* pad out by another page: */
+ __page_o_noops();
+ dprintf3("%s() done\n", __func__);
+}
+
+/* Define some kernel-like types */
+#define u8 uint8_t
+#define u16 uint16_t
+#define u32 uint32_t
+#define u64 uint64_t
+
+#ifdef __i386__
+#define SYS_mprotect_key 380
+#define SYS_pkey_alloc 381
+#define SYS_pkey_free 382
+#define REG_IP_IDX REG_EIP
+#define si_pkey_offset 0x18
+#else
+#define SYS_mprotect_key 329
+#define SYS_pkey_alloc 330
+#define SYS_pkey_free 331
+#define REG_IP_IDX REG_RIP
+#define si_pkey_offset 0x20
+#endif
+
+void dump_mem(void *dumpme, int len_bytes)
+{
+ char *c = (void *)dumpme;
+ int i;
+
+ for (i = 0; i < len_bytes; i += sizeof(u64)) {
+ u64 *ptr = (u64 *)(c + i);
+ dprintf1("dump[%03d][@%p]: %016jx\n", i, ptr, *ptr);
+ }
+}
+
+#define __SI_FAULT (3 << 16)
+#define SEGV_BNDERR (__SI_FAULT|3) /* failed address bound checks */
+#define SEGV_PKUERR (__SI_FAULT|4)
+
+static char *si_code_str(int si_code)
+{
+ if (si_code & SEGV_MAPERR)
+ return "SEGV_MAPERR";
+ if (si_code & SEGV_ACCERR)
+ return "SEGV_ACCERR";
+ if (si_code & SEGV_BNDERR)
+ return "SEGV_BNDERR";
+ if (si_code & SEGV_PKUERR)
+ return "SEGV_PKUERR";
+ return "UNKNOWN";
+}
+
+int pkru_faults;
+int last_si_pkey = -1;
+void signal_handler(int signum, siginfo_t *si, void *vucontext)
+{
+ ucontext_t *uctxt = vucontext;
+ int trapno;
+ unsigned long ip;
+ char *fpregs;
+ u32 *pkru_ptr;
+ u64 si_pkey;
+ u32 *si_pkey_ptr;
+ int pkru_offset;
+ fpregset_t fpregset;
+
+ dprint_in_signal = 1;
+ dprintf1(">>>>===============SIGSEGV============================\n");
+ dprintf1("%s()::%d, pkru: 0x%x shadow: %x\n", __func__, __LINE__,
+ __rdpkru(), shadow_pkru);
+
+ trapno = uctxt->uc_mcontext.gregs[REG_TRAPNO];
+ ip = uctxt->uc_mcontext.gregs[REG_IP_IDX];
+ fpregset = uctxt->uc_mcontext.fpregs;
+ fpregs = (void *)fpregset;
+
+ dprintf2("%s() trapno: %d ip: 0x%lx info->si_code: %s/%d\n", __func__,
+ trapno, ip, si_code_str(si->si_code), si->si_code);
+#ifdef __i386__
+ /*
+ * 32-bit has some extra padding so that userspace can tell whether
+ * the XSTATE header is present in addition to the "legacy" FPU
+ * state. We just assume that it is here.
+ */
+ fpregs += 0x70;
+#endif
+ pkru_offset = pkru_xstate_offset();
+ pkru_ptr = (void *)(&fpregs[pkru_offset]);
+
+ dprintf1("siginfo: %p\n", si);
+ dprintf1(" fpregs: %p\n", fpregs);
+ /*
+ * If we got a PKRU fault, we *HAVE* to have at least one bit set in
+ * here.
+ */
+ dprintf1("pkru_xstate_offset: %d\n", pkru_xstate_offset());
+ if (DEBUG_LEVEL > 4)
+ dump_mem(pkru_ptr - 128, 256);
+ pkey_assert(*pkru_ptr);
+
+ si_pkey_ptr = (u32 *)(((u8 *)si) + si_pkey_offset);
+ dprintf1("si_pkey_ptr: %p\n", si_pkey_ptr);
+ dump_mem(si_pkey_ptr - 8, 24);
+ si_pkey = *si_pkey_ptr;
+ pkey_assert(si_pkey < NR_PKEYS);
+ last_si_pkey = si_pkey;
+
+ if ((si->si_code == SEGV_MAPERR) ||
+ (si->si_code == SEGV_ACCERR) ||
+ (si->si_code == SEGV_BNDERR)) {
+ printf("non-PK si_code, exiting...\n");
+ exit(4);
+ }
+
+ dprintf1("signal pkru from xsave: %08x\n", *pkru_ptr);
+ /* need __rdpkru() version so we do not do shadow_pkru checking */
+ dprintf1("signal pkru from pkru: %08x\n", __rdpkru());
+ dprintf1("si_pkey from siginfo: %jx\n", si_pkey);
+ *(u64 *)pkru_ptr = 0x00000000;
+ dprintf1("WARNING: set PRKU=0 to allow faulting instruction to continue\n");
+ pkru_faults++;
+ dprintf1("<<<<==================================================\n");
+ return;
+ if (trapno == 14) {
+ fprintf(stderr,
+ "ERROR: In signal handler, page fault, trapno = %d, ip = %016lx\n",
+ trapno, ip);
+ fprintf(stderr, "si_addr %p\n", si->si_addr);
+ fprintf(stderr, "REG_ERR: %lx\n",
+ (unsigned long)uctxt->uc_mcontext.gregs[REG_ERR]);
+ exit(1);
+ } else {
+ fprintf(stderr, "unexpected trap %d! at 0x%lx\n", trapno, ip);
+ fprintf(stderr, "si_addr %p\n", si->si_addr);
+ fprintf(stderr, "REG_ERR: %lx\n",
+ (unsigned long)uctxt->uc_mcontext.gregs[REG_ERR]);
+ exit(2);
+ }
+ dprint_in_signal = 0;
+}
+
+int wait_all_children(void)
+{
+ int status;
+ return waitpid(-1, &status, 0);
+}
+
+void sig_chld(int x)
+{
+ dprint_in_signal = 1;
+ dprintf2("[%d] SIGCHLD: %d\n", getpid(), x);
+ dprint_in_signal = 0;
+}
+
+void setup_sigsegv_handler(void)
+{
+ int r, rs;
+ struct sigaction newact;
+ struct sigaction oldact;
+
+ /* #PF is mapped to sigsegv */
+ int signum = SIGSEGV;
+
+ newact.sa_handler = 0;
+ newact.sa_sigaction = signal_handler;
+
+ /*sigset_t - signals to block while in the handler */
+ /* get the old signal mask. */
+ rs = sigprocmask(SIG_SETMASK, 0, &newact.sa_mask);
+ pkey_assert(rs == 0);
+
+ /* call sa_sigaction, not sa_handler*/
+ newact.sa_flags = SA_SIGINFO;
+
+ newact.sa_restorer = 0; /* void(*)(), obsolete */
+ r = sigaction(signum, &newact, &oldact);
+ r = sigaction(SIGALRM, &newact, &oldact);
+ pkey_assert(r == 0);
+}
+
+void setup_handlers(void)
+{
+ signal(SIGCHLD, &sig_chld);
+ setup_sigsegv_handler();
+}
+
+pid_t fork_lazy_child(void)
+{
+ pid_t forkret;
+
+ forkret = fork();
+ pkey_assert(forkret >= 0);
+ dprintf3("[%d] fork() ret: %d\n", getpid(), forkret);
+
+ if (!forkret) {
+ /* in the child */
+ while (1) {
+ dprintf1("child sleeping...\n");
+ sleep(30);
+ }
+ }
+ return forkret;
+}
+
+void davecmp(void *_a, void *_b, int len)
+{
+ int i;
+ unsigned long *a = _a;
+ unsigned long *b = _b;
+
+ for (i = 0; i < len / sizeof(*a); i++) {
+ if (a[i] == b[i])
+ continue;
+
+ dprintf3("[%3d]: a: %016lx b: %016lx\n", i, a[i], b[i]);
+ }
+}
+
+void dumpit(char *f)
+{
+ int fd = open(f, O_RDONLY);
+ char buf[100];
+ int nr_read;
+
+ dprintf2("maps fd: %d\n", fd);
+ do {
+ nr_read = read(fd, &buf[0], sizeof(buf));
+ write(1, buf, nr_read);
+ } while (nr_read > 0);
+ close(fd);
+}
+
+#define PKEY_DISABLE_ACCESS 0x1
+#define PKEY_DISABLE_WRITE 0x2
+
+u32 pkey_get(int pkey, unsigned long flags)
+{
+ u32 mask = (PKEY_DISABLE_ACCESS|PKEY_DISABLE_WRITE);
+ u32 pkru = __rdpkru();
+ u32 shifted_pkru;
+ u32 masked_pkru;
+
+ dprintf1("%s(pkey=%d, flags=%lx) = %x / %d\n",
+ __func__, pkey, flags, 0, 0);
+ dprintf2("%s() raw pkru: %x\n", __func__, pkru);
+
+ shifted_pkru = (pkru >> (pkey * PKRU_BITS_PER_PKEY));
+ dprintf2("%s() shifted_pkru: %x\n", __func__, shifted_pkru);
+ masked_pkru = shifted_pkru & mask;
+ dprintf2("%s() masked pkru: %x\n", __func__, masked_pkru);
+ /*
+ * shift down the relevant bits to the lowest two, then
+ * mask off all the other high bits.
+ */
+ return masked_pkru;
+}
+
+int pkey_set(int pkey, unsigned long rights, unsigned long flags)
+{
+ u32 mask = (PKEY_DISABLE_ACCESS|PKEY_DISABLE_WRITE);
+ u32 old_pkru = __rdpkru();
+ u32 new_pkru;
+
+ /* make sure that 'rights' only contains the bits we expect: */
+ assert(!(rights & ~mask));
+
+ /* copy old pkru */
+ new_pkru = old_pkru;
+ /* mask out bits from pkey in old value: */
+ new_pkru &= ~(mask << (pkey * PKRU_BITS_PER_PKEY));
+ /* OR in new bits for pkey: */
+ new_pkru |= (rights << (pkey * PKRU_BITS_PER_PKEY));
+
+ __wrpkru(new_pkru);
+
+ dprintf3("%s(pkey=%d, rights=%lx, flags=%lx) = %x pkru now: %x old_pkru: %x\n",
+ __func__, pkey, rights, flags, 0, __rdpkru(), old_pkru);
+ return 0;
+}
+
+void pkey_disable_set(int pkey, int flags)
+{
+ unsigned long syscall_flags = 0;
+ int ret;
+ int pkey_rights;
+ u32 orig_pkru;
+
+ dprintf1("START->%s(%d, 0x%x)\n", __func__,
+ pkey, flags);
+ pkey_assert(flags & (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE));
+
+ pkey_rights = pkey_get(pkey, syscall_flags);
+
+ dprintf1("%s(%d) pkey_get(%d): %x\n", __func__,
+ pkey, pkey, pkey_rights);
+ pkey_assert(pkey_rights >= 0);
+
+ pkey_rights |= flags;
+
+ ret = pkey_set(pkey, pkey_rights, syscall_flags);
+ assert(!ret);
+ /*pkru and flags have the same format */
+ shadow_pkru |= flags << (pkey * 2);
+ dprintf1("%s(%d) shadow: 0x%x\n", __func__, pkey, shadow_pkru);
+
+ pkey_assert(ret >= 0);
+
+ pkey_rights = pkey_get(pkey, syscall_flags);
+ dprintf1("%s(%d) pkey_get(%d): %x\n", __func__,
+ pkey, pkey, pkey_rights);
+
+ dprintf1("%s(%d) pkru: 0x%x\n", __func__, pkey, rdpkru());
+ if (flags)
+ pkey_assert(rdpkru() > orig_pkru);
+ dprintf1("END<---%s(%d, 0x%x)\n", __func__,
+ pkey, flags);
+}
+
+void pkey_disable_clear(int pkey, int flags)
+{
+ unsigned long syscall_flags = 0;
+ int ret;
+ int pkey_rights = pkey_get(pkey, syscall_flags);
+ u32 orig_pkru = rdpkru();
+
+ pkey_assert(flags & (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE));
+
+ dprintf1("%s(%d) pkey_get(%d): %x\n", __func__,
+ pkey, pkey, pkey_rights);
+ pkey_assert(pkey_rights >= 0);
+
+ pkey_rights |= flags;
+
+ ret = pkey_set(pkey, pkey_rights, 0);
+ /* pkru and flags have the same format */
+ shadow_pkru &= ~(flags << (pkey * 2));
+ pkey_assert(ret >= 0);
+
+ pkey_rights = pkey_get(pkey, syscall_flags);
+ dprintf1("%s(%d) pkey_get(%d): %x\n", __func__,
+ pkey, pkey, pkey_rights);
+
+ dprintf1("%s(%d) pkru: 0x%x\n", __func__, pkey, rdpkru());
+ if (flags)
+ assert(rdpkru() > orig_pkru);
+}
+
+void pkey_write_allow(int pkey)
+{
+ pkey_disable_clear(pkey, PKEY_DISABLE_WRITE);
+}
+void pkey_write_deny(int pkey)
+{
+ pkey_disable_set(pkey, PKEY_DISABLE_WRITE);
+}
+void pkey_access_allow(int pkey)
+{
+ pkey_disable_clear(pkey, PKEY_DISABLE_ACCESS);
+}
+void pkey_access_deny(int pkey)
+{
+ pkey_disable_set(pkey, PKEY_DISABLE_ACCESS);
+}
+
+int sys_mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot,
+ unsigned long pkey)
+{
+ int sret;
+
+ dprintf2("%s(0x%p, %zx, prot=%lx, pkey=%lx)\n", __func__,
+ ptr, size, orig_prot, pkey);
+
+ errno = 0;
+ sret = syscall(SYS_mprotect_key, ptr, size, orig_prot, pkey);
+ if (errno) {
+ dprintf2("SYS_mprotect_key sret: %d\n", sret);
+ dprintf2("SYS_mprotect_key prot: 0x%lx\n", orig_prot);
+ dprintf2("SYS_mprotect_key failed, errno: %d\n", errno);
+ if (DEBUG_LEVEL >= 2)
+ perror("SYS_mprotect_pkey");
+ }
+ return sret;
+}
+
+int sys_pkey_alloc(unsigned long flags, unsigned long init_val)
+{
+ int ret = syscall(SYS_pkey_alloc, flags, init_val);
+ dprintf1("%s(flags=%lx, init_val=%lx) syscall ret: %d errno: %d\n",
+ __func__, flags, init_val, ret, errno);
+ return ret;
+}
+
+int alloc_pkey(void)
+{
+ int ret;
+ unsigned long init_val = 0x0;
+
+ dprintf1("alloc_pkey()::%d, pkru: 0x%x shadow: %x\n",
+ __LINE__, __rdpkru(), shadow_pkru);
+ ret = sys_pkey_alloc(0, init_val);
+ /*
+ * pkey_alloc() sets PKRU, so we need to reflect it in
+ * shadow_pkru:
+ */
+ dprintf4("alloc_pkey()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n",
+ __LINE__, ret, __rdpkru(), shadow_pkru);
+ if (ret) {
+ /* clear both the bits: */
+ shadow_pkru &= ~(0x3 << (ret * 2));
+ dprintf4("alloc_pkey()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n",
+ __LINE__, ret, __rdpkru(), shadow_pkru);
+ /*
+ * move the new state in from init_val
+ * (remember, we cheated and init_val == pkru format)
+ */
+ shadow_pkru |= (init_val << (ret * 2));
+ }
+ dprintf4("alloc_pkey()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n",
+ __LINE__, ret, __rdpkru(), shadow_pkru);
+ dprintf1("alloc_pkey()::%d errno: %d\n", __LINE__, errno);
+ /* for shadow checking: */
+ rdpkru();
+ dprintf4("alloc_pkey()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n",
+ __LINE__, ret, __rdpkru(), shadow_pkru);
+ return ret;
+}
+
+int sys_pkey_free(unsigned long pkey)
+{
+ int ret = syscall(SYS_pkey_free, pkey);
+ dprintf1("%s(pkey=%ld) syscall ret: %d\n", __func__, pkey, ret);
+ return ret;
+}
+
+/*
+ * I had a bug where pkey bits could be set by mprotect() but
+ * not cleared. This ensures we get lots of random bit sets
+ * and clears on the vma and pte pkey bits.
+ */
+int alloc_random_pkey(void)
+{
+ int max_nr_pkey_allocs;
+ int ret;
+ int i;
+ int alloced_pkeys[NR_PKEYS];
+ int nr_alloced = 0;
+ int random_index;
+ memset(alloced_pkeys, 0, sizeof(alloced_pkeys));
+
+ /* allocate every possible key and make a note of which ones we got */
+ max_nr_pkey_allocs = NR_PKEYS;
+ max_nr_pkey_allocs = 1;
+ for (i = 0; i < max_nr_pkey_allocs; i++) {
+ int new_pkey = alloc_pkey();
+ if (new_pkey < 0)
+ break;
+ alloced_pkeys[nr_alloced++] = new_pkey;
+ }
+
+ pkey_assert(nr_alloced > 0);
+ /* select a random one out of the allocated ones */
+ random_index = rand() % nr_alloced;
+ ret = alloced_pkeys[random_index];
+ /* now zero it out so we don't free it next */
+ alloced_pkeys[random_index] = 0;
+
+ /* go through the allocated ones that we did not want and free them */
+ for (i = 0; i < nr_alloced; i++) {
+ int free_ret;
+ if (!alloced_pkeys[i])
+ continue;
+ free_ret = sys_pkey_free(alloced_pkeys[i]);
+ pkey_assert(!free_ret);
+ }
+ dprintf1("%s()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", __func__,
+ __LINE__, ret, __rdpkru(), shadow_pkru);
+ return ret;
+}
+
+int mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot,
+ unsigned long pkey)
+{
+ int nr_iterations = random() % 100;
+ int ret;
+
+ while (0) {
+ int rpkey = alloc_random_pkey();
+ ret = sys_mprotect_pkey(ptr, size, orig_prot, pkey);
+ dprintf1("sys_mprotect_pkey(%p, %zx, prot=0x%lx, pkey=%ld) ret: %d\n",
+ ptr, size, orig_prot, pkey, ret);
+ if (nr_iterations-- < 0)
+ break;
+
+ dprintf1("%s()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", __func__,
+ __LINE__, ret, __rdpkru(), shadow_pkru);
+ sys_pkey_free(rpkey);
+ dprintf1("%s()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", __func__,
+ __LINE__, ret, __rdpkru(), shadow_pkru);
+ }
+ pkey_assert(pkey < NR_PKEYS);
+
+ ret = sys_mprotect_pkey(ptr, size, orig_prot, pkey);
+ dprintf1("mprotect_pkey(%p, %zx, prot=0x%lx, pkey=%ld) ret: %d\n",
+ ptr, size, orig_prot, pkey, ret);
+ pkey_assert(!ret);
+ dprintf1("%s()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", __func__,
+ __LINE__, ret, __rdpkru(), shadow_pkru);
+ return ret;
+}
+
+struct pkey_malloc_record {
+ void *ptr;
+ long size;
+};
+struct pkey_malloc_record *pkey_malloc_records;
+long nr_pkey_malloc_records;
+void record_pkey_malloc(void *ptr, long size)
+{
+ long i;
+ struct pkey_malloc_record *rec = NULL;
+
+ for (i = 0; i < nr_pkey_malloc_records; i++) {
+ rec = &pkey_malloc_records[i];
+ /* find a free record */
+ if (rec)
+ break;
+ }
+ if (!rec) {
+ /* every record is full */
+ size_t old_nr_records = nr_pkey_malloc_records;
+ size_t new_nr_records = (nr_pkey_malloc_records * 2 + 1);
+ size_t new_size = new_nr_records * sizeof(struct pkey_malloc_record);
+ dprintf2("new_nr_records: %zd\n", new_nr_records);
+ dprintf2("new_size: %zd\n", new_size);
+ pkey_malloc_records = realloc(pkey_malloc_records, new_size);
+ pkey_assert(pkey_malloc_records != NULL);
+ rec = &pkey_malloc_records[nr_pkey_malloc_records];
+ /*
+ * realloc() does not initialize memory, so zero it from
+ * the first new record all the way to the end.
+ */
+ for (i = 0; i < new_nr_records - old_nr_records; i++)
+ memset(rec + i, 0, sizeof(*rec));
+ }
+ dprintf3("filling malloc record[%d/%p]: {%p, %ld}\n",
+ (int)(rec - pkey_malloc_records), rec, ptr, size);
+ rec->ptr = ptr;
+ rec->size = size;
+ nr_pkey_malloc_records++;
+}
+
+void free_pkey_malloc(void *ptr)
+{
+ long i;
+ int ret;
+ dprintf3("%s(%p)\n", __func__, ptr);
+ for (i = 0; i < nr_pkey_malloc_records; i++) {
+ struct pkey_malloc_record *rec = &pkey_malloc_records[i];
+ dprintf4("looking for ptr %p at record[%ld/%p]: {%p, %ld}\n",
+ ptr, i, rec, rec->ptr, rec->size);
+ if ((ptr < rec->ptr) ||
+ (ptr >= rec->ptr + rec->size))
+ continue;
+
+ dprintf3("found ptr %p at record[%ld/%p]: {%p, %ld}\n",
+ ptr, i, rec, rec->ptr, rec->size);
+ nr_pkey_malloc_records--;
+ ret = munmap(rec->ptr, rec->size);
+ dprintf3("munmap ret: %d\n", ret);
+ pkey_assert(!ret);
+ dprintf3("clearing rec->ptr, rec: %p\n", rec);
+ rec->ptr = NULL;
+ dprintf3("done clearing rec->ptr, rec: %p\n", rec);
+ return;
+ }
+ pkey_assert(false);
+}
+
+
+void *malloc_pkey_with_mprotect(long size, int prot, u16 pkey)
+{
+ void *ptr;
+ int ret;
+
+ rdpkru();
+ dprintf1("doing %s(size=%ld, prot=0x%x, pkey=%d)\n", __func__,
+ size, prot, pkey);
+ pkey_assert(pkey < NR_PKEYS);
+ ptr = mmap(NULL, size, prot, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
+ pkey_assert(ptr != (void *)-1);
+ ret = mprotect_pkey((void *)ptr, PAGE_SIZE, prot, pkey);
+ pkey_assert(!ret);
+ record_pkey_malloc(ptr, size);
+ rdpkru();
+
+ dprintf1("%s() for pkey %d @ %p\n", __func__, pkey, ptr);
+ return ptr;
+}
+
+void *malloc_pkey_anon_huge(long size, int prot, u16 pkey)
+{
+ int ret;
+ void *ptr;
+
+ dprintf1("doing %s(size=%ld, prot=0x%x, pkey=%d)\n", __func__,
+ size, prot, pkey);
+ /*
+ * Guarantee we can fit at least one huge page in the resulting
+ * allocation by allocating space for 2:
+ */
+ size = ALIGN_UP(size, HPAGE_SIZE * 2);
+ ptr = mmap(NULL, size, PROT_NONE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
+ pkey_assert(ptr != (void *)-1);
+ record_pkey_malloc(ptr, size);
+ mprotect_pkey(ptr, size, prot, pkey);
+
+ dprintf1("unaligned ptr: %p\n", ptr);
+ ptr = ALIGN_PTR_UP(ptr, HPAGE_SIZE);
+ dprintf1(" aligned ptr: %p\n", ptr);
+ ret = madvise(ptr, HPAGE_SIZE, MADV_HUGEPAGE);
+ dprintf1("MADV_HUGEPAGE ret: %d\n", ret);
+ ret = madvise(ptr, HPAGE_SIZE, MADV_WILLNEED);
+ dprintf1("MADV_WILLNEED ret: %d\n", ret);
+ memset(ptr, 0, HPAGE_SIZE);
+
+ dprintf1("mmap()'d thp for pkey %d @ %p\n", pkey, ptr);
+ return ptr;
+}
+
+int hugetlb_setup_ok;
+#define GET_NR_HUGE_PAGES 10
+void setup_hugetlbfs(void)
+{
+ int err;
+ int fd;
+ int validated_nr_pages;
+ int i;
+ char buf[] = "123";
+
+ if (geteuid() != 0) {
+ fprintf(stderr, "WARNING: not run as root, can not do hugetlb test\n");
+ return;
+ }
+
+ cat_into_file(__stringify(GET_NR_HUGE_PAGES), "/proc/sys/vm/nr_hugepages");
+
+ /*
+ * Now go make sure that we got the pages and that they
+ * are 2M pages. Someone might have made 1G the default.
+ */
+ fd = open("/sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages", O_RDONLY);
+ if (fd < 0) {
+ perror("opening sysfs 2M hugetlb config");
+ return;
+ }
+
+ /* -1 to guarantee leaving the trailing \0 */
+ err = read(fd, buf, sizeof(buf)-1);
+ close(fd);
+ if (err <= 0) {
+ perror("reading sysfs 2M hugetlb config");
+ return;
+ }
+
+ if (atoi(buf) != GET_NR_HUGE_PAGES) {
+ fprintf(stderr, "could not confirm 2M pages, got: '%s' expected %d\n",
+ buf, GET_NR_HUGE_PAGES);
+ return;
+ }
+
+ hugetlb_setup_ok = 1;
+}
+
+void *malloc_pkey_hugetlb(long size, int prot, u16 pkey)
+{
+ void *ptr;
+ int flags = MAP_ANONYMOUS|MAP_PRIVATE|MAP_HUGETLB;
+
+ if (!hugetlb_setup_ok)
+ return PTR_ERR_ENOTSUP;
+
+ dprintf1("doing %s(%ld, %x, %x)\n", __func__, size, prot, pkey);
+ size = ALIGN_UP(size, HPAGE_SIZE * 2);
+ pkey_assert(pkey < NR_PKEYS);
+ ptr = mmap(NULL, size, PROT_NONE, flags, -1, 0);
+ pkey_assert(ptr != (void *)-1);
+ mprotect_pkey(ptr, size, prot, pkey);
+
+ record_pkey_malloc(ptr, size);
+
+ dprintf1("mmap()'d hugetlbfs for pkey %d @ %p\n", pkey, ptr);
+ return ptr;
+}
+
+void *malloc_pkey_mmap_dax(long size, int prot, u16 pkey)
+{
+ void *ptr;
+ int fd;
+
+ dprintf1("doing %s(size=%ld, prot=0x%x, pkey=%d)\n", __func__,
+ size, prot, pkey);
+ pkey_assert(pkey < NR_PKEYS);
+ fd = open("/dax/foo", O_RDWR);
+ pkey_assert(fd >= 0);
+
+ ptr = mmap(0, size, prot, MAP_SHARED, fd, 0);
+ pkey_assert(ptr != (void *)-1);
+
+ mprotect_pkey(ptr, size, prot, pkey);
+
+ record_pkey_malloc(ptr, size);
+
+ dprintf1("mmap()'d for pkey %d @ %p\n", pkey, ptr);
+ close(fd);
+ return ptr;
+}
+
+void *(*pkey_malloc[])(long size, int prot, u16 pkey) = {
+
+ malloc_pkey_with_mprotect,
+ malloc_pkey_anon_huge,
+ malloc_pkey_hugetlb
+/* can not do direct with the pkey_mprotect() API:
+ malloc_pkey_mmap_direct,
+ malloc_pkey_mmap_dax,
+*/
+};
+
+void *malloc_pkey(long size, int prot, u16 pkey)
+{
+ void *ret;
+ static int malloc_type;
+ int nr_malloc_types = ARRAY_SIZE(pkey_malloc);
+
+ pkey_assert(pkey < NR_PKEYS);
+
+ while (1) {
+ pkey_assert(malloc_type < nr_malloc_types);
+
+ ret = pkey_malloc[malloc_type](size, prot, pkey);
+ pkey_assert(ret != (void *)-1);
+
+ malloc_type++;
+ if (malloc_type >= nr_malloc_types)
+ malloc_type = (random()%nr_malloc_types);
+
+ /* try again if the malloc_type we tried is unsupported */
+ if (ret == PTR_ERR_ENOTSUP)
+ continue;
+
+ break;
+ }
+
+ dprintf3("%s(%ld, prot=%x, pkey=%x) returning: %p\n", __func__,
+ size, prot, pkey, ret);
+ return ret;
+}
+
+int last_pkru_faults;
+void expected_pk_fault(int pkey)
+{
+ dprintf2("%s(): last_pkru_faults: %d pkru_faults: %d\n",
+ __func__, last_pkru_faults, pkru_faults);
+ dprintf2("%s(%d): last_si_pkey: %d\n", __func__, pkey, last_si_pkey);
+ pkey_assert(last_pkru_faults + 1 == pkru_faults);
+ pkey_assert(last_si_pkey == pkey);
+ /*
+ * The signal handler shold have cleared out PKRU to let the
+ * test program continue. We now have to restore it.
+ */
+ if (__rdpkru() != 0)
+ pkey_assert(0);
+
+ __wrpkru(shadow_pkru);
+ dprintf1("%s() set PKRU=%x to restore state after signal nuked it\n",
+ __func__, shadow_pkru);
+ last_pkru_faults = pkru_faults;
+ last_si_pkey = -1;
+}
+
+void do_not_expect_pk_fault(void)
+{
+ pkey_assert(last_pkru_faults == pkru_faults);
+}
+
+int test_fds[10] = { -1 };
+int nr_test_fds;
+void __save_test_fd(int fd)
+{
+ pkey_assert(fd >= 0);
+ pkey_assert(nr_test_fds < ARRAY_SIZE(test_fds));
+ test_fds[nr_test_fds] = fd;
+ nr_test_fds++;
+}
+
+int get_test_read_fd(void)
+{
+ int test_fd = open("/etc/passwd", O_RDONLY);
+ __save_test_fd(test_fd);
+ return test_fd;
+}
+
+void close_test_fds(void)
+{
+ int i;
+
+ for (i = 0; i < nr_test_fds; i++) {
+ if (test_fds[i] < 0)
+ continue;
+ close(test_fds[i]);
+ test_fds[i] = -1;
+ }
+ nr_test_fds = 0;
+}
+
+#define barrier() __asm__ __volatile__("": : :"memory")
+__attribute__((noinline)) int read_ptr(int *ptr)
+{
+ /*
+ * Keep GCC from optimizing this away somehow
+ */
+ barrier();
+ return *ptr;
+}
+
+void test_read_of_write_disabled_region(int *ptr, u16 pkey)
+{
+ int ptr_contents;
+
+ dprintf1("disabling write access to PKEY[1], doing read\n");
+ pkey_write_deny(pkey);
+ ptr_contents = read_ptr(ptr);
+ dprintf1("*ptr: %d\n", ptr_contents);
+ dprintf1("\n");
+}
+void test_read_of_access_disabled_region(int *ptr, u16 pkey)
+{
+ int ptr_contents;
+
+ dprintf1("disabling access to PKEY[%02d], doing read @ %p\n", pkey, ptr);
+ rdpkru();
+ pkey_access_deny(pkey);
+ ptr_contents = read_ptr(ptr);
+ dprintf1("*ptr: %d\n", ptr_contents);
+ expected_pk_fault(pkey);
+}
+void test_write_of_write_disabled_region(int *ptr, u16 pkey)
+{
+ dprintf1("disabling write access to PKEY[%02d], doing write\n", pkey);
+ pkey_write_deny(pkey);
+ *ptr = __LINE__;
+ expected_pk_fault(pkey);
+}
+void test_write_of_access_disabled_region(int *ptr, u16 pkey)
+{
+ dprintf1("disabling access to PKEY[%02d], doing write\n", pkey);
+ pkey_access_deny(pkey);
+ *ptr = __LINE__;
+ expected_pk_fault(pkey);
+}
+void test_kernel_write_of_access_disabled_region(int *ptr, u16 pkey)
+{
+ int ret;
+ int test_fd = get_test_read_fd();
+
+ dprintf1("disabling access to PKEY[%02d], "
+ "having kernel read() to buffer\n", pkey);
+ pkey_access_deny(pkey);
+ ret = read(test_fd, ptr, 1);
+ dprintf1("read ret: %d\n", ret);
+ pkey_assert(ret);
+}
+void test_kernel_write_of_write_disabled_region(int *ptr, u16 pkey)
+{
+ int ret;
+ int test_fd = get_test_read_fd();
+
+ pkey_write_deny(pkey);
+ ret = read(test_fd, ptr, 100);
+ dprintf1("read ret: %d\n", ret);
+ if (ret < 0 && (DEBUG_LEVEL > 0))
+ perror("verbose read result (OK for this to be bad)");
+ pkey_assert(ret);
+}
+
+void test_kernel_gup_of_access_disabled_region(int *ptr, u16 pkey)
+{
+ int pipe_ret, vmsplice_ret;
+ struct iovec iov;
+ int pipe_fds[2];
+
+ pipe_ret = pipe(pipe_fds);
+
+ pkey_assert(pipe_ret == 0);
+ dprintf1("disabling access to PKEY[%02d], "
+ "having kernel vmsplice from buffer\n", pkey);
+ pkey_access_deny(pkey);
+ iov.iov_base = ptr;
+ iov.iov_len = PAGE_SIZE;
+ vmsplice_ret = vmsplice(pipe_fds[1], &iov, 1, SPLICE_F_GIFT);
+ dprintf1("vmsplice() ret: %d\n", vmsplice_ret);
+ pkey_assert(vmsplice_ret == -1);
+
+ close(pipe_fds[0]);
+ close(pipe_fds[1]);
+}
+
+void test_kernel_gup_write_to_write_disabled_region(int *ptr, u16 pkey)
+{
+ int ignored = 0xdada;
+ int futex_ret;
+ int some_int = __LINE__;
+
+ dprintf1("disabling write to PKEY[%02d], "
+ "doing futex gunk in buffer\n", pkey);
+ *ptr = some_int;
+ pkey_write_deny(pkey);
+ futex_ret = syscall(SYS_futex, ptr, FUTEX_WAIT, some_int-1, NULL,
+ &ignored, ignored);
+ if (DEBUG_LEVEL > 0)
+ perror("futex");
+ dprintf1("futex() ret: %d\n", futex_ret);
+}
+
+/* Assumes that all pkeys other than 'pkey' are unallocated */
+void test_pkey_syscalls_on_non_allocated_pkey(int *ptr, u16 pkey)
+{
+ int err;
+ int i;
+
+ /* Note: 0 is the default pkey, so don't mess with it */
+ for (i = 1; i < NR_PKEYS; i++) {
+ if (pkey == i)
+ continue;
+
+ dprintf1("trying get/set/free to non-allocated pkey: %2d\n", i);
+ err = sys_pkey_free(i);
+ pkey_assert(err);
+
+ /* not enforced when pkey_get() is not a syscall
+ err = pkey_get(i, 0);
+ pkey_assert(err < 0);
+ */
+
+ err = sys_pkey_free(i);
+ pkey_assert(err);
+
+ err = sys_mprotect_pkey(ptr, PAGE_SIZE, PROT_READ, i);
+ pkey_assert(err);
+ }
+}
+
+/* Assumes that all pkeys other than 'pkey' are unallocated */
+void test_pkey_syscalls_bad_args(int *ptr, u16 pkey)
+{
+ int err;
+ int bad_flag = (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE) + 1;
+ int bad_pkey = NR_PKEYS+99;
+
+ /* not enforced when pkey_get() is not a syscall
+ err = pkey_get(bad_pkey, bad_flag);
+ pkey_assert(err < 0);
+ */
+
+ /* pass a known-invalid pkey in: */
+ err = sys_mprotect_pkey(ptr, PAGE_SIZE, PROT_READ, bad_pkey);
+ pkey_assert(err);
+}
+
+/* Assumes that all pkeys other than 'pkey' are unallocated */
+void test_pkey_alloc_exhaust(int *ptr, u16 pkey)
+{
+ unsigned long flags;
+ unsigned long init_val;
+ int err;
+ int allocated_pkeys[NR_PKEYS] = {0};
+ int nr_allocated_pkeys = 0;
+ int i;
+
+ for (i = 0; i < NR_PKEYS*2; i++) {
+ int new_pkey;
+ dprintf1("%s() alloc loop: %d\n", __func__, i);
+ new_pkey = alloc_pkey();
+ dprintf4("%s()::%d, err: %d pkru: 0x%x shadow: 0x%x\n", __func__,
+ __LINE__, err, __rdpkru(), shadow_pkru);
+ rdpkru(); /* for shadow checking */
+ dprintf2("%s() errno: %d ENOSPC: %d\n", __func__, errno, ENOSPC);
+ if ((new_pkey == -1) && (errno == ENOSPC)) {
+ dprintf2("%s() failed to allocate pkey after %d tries\n",
+ __func__, nr_allocated_pkeys);
+ break;
+ }
+ pkey_assert(nr_allocated_pkeys < NR_PKEYS);
+ allocated_pkeys[nr_allocated_pkeys++] = new_pkey;
+ }
+
+ dprintf3("%s()::%d\n", __func__, __LINE__);
+
+ /*
+ * ensure it did not reach the end of the loop without
+ * failure:
+ */
+ pkey_assert(i < NR_PKEYS*2);
+
+ /*
+ * There are 16 pkeys supported in hardware. One is taken
+ * up for the default (0) and another can be taken up by
+ * an execute-only mapping. Ensure that we can allocate
+ * at least 14 (16-2).
+ */
+ pkey_assert(i >= NR_PKEYS-2);
+
+ for (i = 0; i < nr_allocated_pkeys; i++) {
+ err = sys_pkey_free(allocated_pkeys[i]);
+ pkey_assert(!err);
+ rdpkru(); /* for shadow checking */
+ }
+}
+
+void test_ptrace_of_child(int *ptr, u16 pkey)
+{
+ __attribute__((__unused__)) int peek_result;
+ pid_t child_pid;
+ void *ignored = 0;
+ long ret;
+ int status;
+ /*
+ * This is the "control" for our little expermient. Make sure
+ * we can always access it when ptracing.
+ */
+ int *plain_ptr_unaligned = malloc(HPAGE_SIZE);
+ int *plain_ptr = ALIGN_PTR_UP(plain_ptr_unaligned, PAGE_SIZE);
+
+ /*
+ * Fork a child which is an exact copy of this process, of course.
+ * That means we can do all of our tests via ptrace() and then plain
+ * memory access and ensure they work differently.
+ */
+ child_pid = fork_lazy_child();
+ dprintf1("[%d] child pid: %d\n", getpid(), child_pid);
+
+ ret = ptrace(PTRACE_ATTACH, child_pid, ignored, ignored);
+ if (ret)
+ perror("attach");
+ dprintf1("[%d] attach ret: %ld %d\n", getpid(), ret, __LINE__);
+ pkey_assert(ret != -1);
+ ret = waitpid(child_pid, &status, WUNTRACED);
+ if ((ret != child_pid) || !(WIFSTOPPED(status))) {
+ fprintf(stderr, "weird waitpid result %ld stat %x\n",
+ ret, status);
+ pkey_assert(0);
+ }
+ dprintf2("waitpid ret: %ld\n", ret);
+ dprintf2("waitpid status: %d\n", status);
+
+ pkey_access_deny(pkey);
+ pkey_write_deny(pkey);
+
+ /* Write access, untested for now:
+ ret = ptrace(PTRACE_POKEDATA, child_pid, peek_at, data);
+ pkey_assert(ret != -1);
+ dprintf1("poke at %p: %ld\n", peek_at, ret);
+ */
+
+ /*
+ * Try to access the pkey-protected "ptr" via ptrace:
+ */
+ ret = ptrace(PTRACE_PEEKDATA, child_pid, ptr, ignored);
+ /* expect it to work, without an error: */
+ pkey_assert(ret != -1);
+ /* Now access from the current task, and expect an exception: */
+ peek_result = read_ptr(ptr);
+ expected_pk_fault(pkey);
+
+ /*
+ * Try to access the NON-pkey-protected "plain_ptr" via ptrace:
+ */
+ ret = ptrace(PTRACE_PEEKDATA, child_pid, plain_ptr, ignored);
+ /* expect it to work, without an error: */
+ pkey_assert(ret != -1);
+ /* Now access from the current task, and expect NO exception: */
+ peek_result = read_ptr(plain_ptr);
+ do_not_expect_pk_fault();
+
+ ret = ptrace(PTRACE_DETACH, child_pid, ignored, 0);
+ pkey_assert(ret != -1);
+
+ ret = kill(child_pid, SIGKILL);
+ pkey_assert(ret != -1);
+
+ wait(&status);
+
+ free(plain_ptr_unaligned);
+}
+
+void test_executing_on_unreadable_memory(int *ptr, u16 pkey)
+{
+ void *p1;
+ int scratch;
+ int ptr_contents;
+ int ret;
+
+ p1 = ALIGN_PTR_UP(&lots_o_noops_around_write, PAGE_SIZE);
+ dprintf3("&lots_o_noops: %p\n", &lots_o_noops_around_write);
+ /* lots_o_noops_around_write should be page-aligned already */
+ assert(p1 == &lots_o_noops_around_write);
+
+ /* Point 'p1' at the *second* page of the function: */
+ p1 += PAGE_SIZE;
+
+ madvise(p1, PAGE_SIZE, MADV_DONTNEED);
+ lots_o_noops_around_write(&scratch);
+ ptr_contents = read_ptr(p1);
+ dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents);
+
+ ret = mprotect_pkey(p1, PAGE_SIZE, PROT_EXEC, (u64)pkey);
+ pkey_assert(!ret);
+ pkey_access_deny(pkey);
+
+ dprintf2("pkru: %x\n", rdpkru());
+
+ /*
+ * Make sure this is an *instruction* fault
+ */
+ madvise(p1, PAGE_SIZE, MADV_DONTNEED);
+ lots_o_noops_around_write(&scratch);
+ do_not_expect_pk_fault();
+ ptr_contents = read_ptr(p1);
+ dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents);
+ expected_pk_fault(pkey);
+}
+
+void test_mprotect_pkey_on_unsupported_cpu(int *ptr, u16 pkey)
+{
+ int size = PAGE_SIZE;
+ int sret;
+
+ if (cpu_has_pku()) {
+ dprintf1("SKIP: %s: no CPU support\n", __func__);
+ return;
+ }
+
+ sret = syscall(SYS_mprotect_key, ptr, size, PROT_READ, pkey);
+ pkey_assert(sret < 0);
+}
+
+void (*pkey_tests[])(int *ptr, u16 pkey) = {
+ test_read_of_write_disabled_region,
+ test_read_of_access_disabled_region,
+ test_write_of_write_disabled_region,
+ test_write_of_access_disabled_region,
+ test_kernel_write_of_access_disabled_region,
+ test_kernel_write_of_write_disabled_region,
+ test_kernel_gup_of_access_disabled_region,
+ test_kernel_gup_write_to_write_disabled_region,
+ test_executing_on_unreadable_memory,
+ test_ptrace_of_child,
+ test_pkey_syscalls_on_non_allocated_pkey,
+ test_pkey_syscalls_bad_args,
+ test_pkey_alloc_exhaust,
+};
+
+void run_tests_once(void)
+{
+ int *ptr;
+ int prot = PROT_READ|PROT_WRITE;
+
+ for (test_nr = 0; test_nr < ARRAY_SIZE(pkey_tests); test_nr++) {
+ int pkey;
+ int orig_pkru_faults = pkru_faults;
+
+ dprintf1("======================\n");
+ dprintf1("test %d preparing...\n", test_nr);
+
+ tracing_on();
+ pkey = alloc_random_pkey();
+ dprintf1("test %d starting with pkey: %d\n", test_nr, pkey);
+ ptr = malloc_pkey(PAGE_SIZE, prot, pkey);
+ dprintf1("test %d starting...\n", test_nr);
+ pkey_tests[test_nr](ptr, pkey);
+ dprintf1("freeing test memory: %p\n", ptr);
+ free_pkey_malloc(ptr);
+ sys_pkey_free(pkey);
+
+ dprintf1("pkru_faults: %d\n", pkru_faults);
+ dprintf1("orig_pkru_faults: %d\n", orig_pkru_faults);
+
+ tracing_off();
+ close_test_fds();
+
+ printf("test %2d PASSED (itertation %d)\n", test_nr, iteration_nr);
+ dprintf1("======================\n\n");
+ }
+ iteration_nr++;
+}
+
+void pkey_setup_shadow(void)
+{
+ shadow_pkru = __rdpkru();
+}
+
+int main(void)
+{
+ int nr_iterations = 22;
+
+ setup_handlers();
+
+ printf("has pku: %d\n", cpu_has_pku());
+
+ if (!cpu_has_pku()) {
+ int size = PAGE_SIZE;
+ int *ptr;
+
+ printf("running PKEY tests for unsupported CPU/OS\n");
+
+ ptr = mmap(NULL, size, PROT_NONE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
+ assert(ptr != (void *)-1);
+ test_mprotect_pkey_on_unsupported_cpu(ptr, 1);
+ exit(0);
+ }
+
+ pkey_setup_shadow();
+ printf("startup pkru: %x\n", rdpkru());
+ setup_hugetlbfs();
+
+ while (nr_iterations-- > 0)
+ run_tests_once();
+
+ printf("done (all tests OK)\n");
+ return 0;
+}