summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--MAINTAINERS2
-rw-r--r--tools/lib/api/Makefile52
-rw-r--r--tools/lib/api/fs/tracing_path.c16
-rw-r--r--tools/lib/api/fs/tracing_path.h1
-rw-r--r--tools/lib/bpf/Makefile1
-rw-r--r--tools/lib/perf/Makefile12
-rw-r--r--tools/lib/perf/include/perf/cpumap.h3
-rw-r--r--tools/lib/subcmd/Makefile49
-rw-r--r--tools/lib/symbol/Build1
-rw-r--r--tools/lib/symbol/Makefile115
-rw-r--r--tools/lib/traceevent/Makefile4
-rw-r--r--tools/perf/.gitignore7
-rw-r--r--tools/perf/Documentation/perf-annotate.txt2
-rw-r--r--tools/perf/Documentation/perf-diff.txt2
-rw-r--r--tools/perf/Documentation/perf-intel-pt.txt12
-rw-r--r--tools/perf/Documentation/perf-list.txt10
-rw-r--r--tools/perf/Documentation/perf-lock.txt2
-rw-r--r--tools/perf/Documentation/perf-probe.txt2
-rw-r--r--tools/perf/Documentation/perf-record.txt2
-rw-r--r--tools/perf/Documentation/perf-report.txt2
-rw-r--r--tools/perf/Documentation/perf-stat.txt4
-rw-r--r--tools/perf/MANIFEST3
-rw-r--r--tools/perf/Makefile.config17
-rw-r--r--tools/perf/Makefile.perf188
-rw-r--r--tools/perf/arch/arm/util/unwind-libdw.c2
-rw-r--r--tools/perf/arch/arm64/util/machine.c1
-rw-r--r--tools/perf/arch/arm64/util/pmu.c1
-rw-r--r--tools/perf/arch/arm64/util/unwind-libdw.c2
-rw-r--r--tools/perf/arch/powerpc/util/event.c1
-rw-r--r--tools/perf/arch/powerpc/util/unwind-libdw.c2
-rw-r--r--tools/perf/arch/riscv/util/Build1
-rw-r--r--tools/perf/arch/riscv/util/header.c104
-rw-r--r--tools/perf/arch/s390/util/unwind-libdw.c1
-rw-r--r--tools/perf/arch/x86/include/arch-tests.h1
-rw-r--r--tools/perf/arch/x86/tests/Build2
-rw-r--r--tools/perf/arch/x86/tests/arch-tests.c15
-rw-r--r--tools/perf/arch/x86/tests/intel-pt-test.c (renamed from tools/perf/arch/x86/tests/intel-pt-pkt-decoder-test.c)154
-rw-r--r--tools/perf/arch/x86/tests/sample-parsing.c1
-rw-r--r--tools/perf/arch/x86/util/event.c2
-rw-r--r--tools/perf/arch/x86/util/iostat.c4
-rw-r--r--tools/perf/arch/x86/util/tsc.c1
-rw-r--r--tools/perf/arch/x86/util/unwind-libdw.c2
-rw-r--r--tools/perf/bench/inject-buildid.c2
-rw-r--r--tools/perf/bench/numa.c9
-rw-r--r--tools/perf/builtin-annotate.c2
-rw-r--r--tools/perf/builtin-daemon.c4
-rw-r--r--tools/perf/builtin-diff.c2
-rw-r--r--tools/perf/builtin-ftrace.c4
-rw-r--r--tools/perf/builtin-kwork.c6
-rw-r--r--tools/perf/builtin-list.c565
-rw-r--r--tools/perf/builtin-lock.c100
-rw-r--r--tools/perf/builtin-mem.c1
-rw-r--r--tools/perf/builtin-probe.c7
-rw-r--r--tools/perf/builtin-record.c10
-rw-r--r--tools/perf/builtin-report.c2
-rw-r--r--tools/perf/builtin-script.c4
-rw-r--r--tools/perf/builtin-stat.c211
-rw-r--r--tools/perf/builtin-top.c4
-rw-r--r--tools/perf/builtin-trace.c144
-rw-r--r--tools/perf/examples/bpf/5sec.c8
-rw-r--r--tools/perf/examples/bpf/augmented_raw_syscalls.c175
-rw-r--r--tools/perf/examples/bpf/augmented_syscalls.c169
-rw-r--r--tools/perf/examples/bpf/empty.c13
-rw-r--r--tools/perf/examples/bpf/etcsnoop.c76
-rw-r--r--tools/perf/examples/bpf/hello.c24
-rw-r--r--tools/perf/include/bpf/bpf.h70
-rw-r--r--tools/perf/include/bpf/linux/socket.h24
-rw-r--r--tools/perf/include/bpf/pid_filter.h21
-rw-r--r--tools/perf/include/bpf/stdio.h16
-rw-r--r--tools/perf/include/bpf/unistd.h10
-rw-r--r--tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/branch.json (renamed from tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/branch.json)0
-rw-r--r--tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/bus.json (renamed from tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/bus.json)0
-rw-r--r--tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/cache.json (renamed from tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/cache.json)0
-rw-r--r--tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/exception.json (renamed from tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/exception.json)0
-rw-r--r--tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/instruction.json (renamed from tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/instruction.json)0
-rw-r--r--tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/memory.json (renamed from tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/memory.json)0
-rw-r--r--tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/pipeline.json (renamed from tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/pipeline.json)0
-rw-r--r--tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/spe.json (renamed from tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/spe.json)0
-rw-r--r--tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/trace.json (renamed from tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/trace.json)0
-rw-r--r--tools/perf/pmu-events/arch/arm64/mapfile.csv3
-rw-r--r--tools/perf/pmu-events/arch/riscv/mapfile.csv17
-rw-r--r--tools/perf/pmu-events/arch/riscv/riscv-sbi-firmware.json134
-rw-r--r--tools/perf/pmu-events/arch/riscv/sifive/u74/firmware.json68
-rw-r--r--tools/perf/pmu-events/arch/riscv/sifive/u74/instructions.json92
-rw-r--r--tools/perf/pmu-events/arch/riscv/sifive/u74/memory.json32
-rw-r--r--tools/perf/pmu-events/arch/riscv/sifive/u74/microarch.json57
-rw-r--r--tools/perf/pmu-events/arch/x86/alderlake/adl-metrics.json73
-rw-r--r--tools/perf/pmu-events/arch/x86/alderlake/cache.json1149
-rw-r--r--tools/perf/pmu-events/arch/x86/alderlake/floating-point.json91
-rw-r--r--tools/perf/pmu-events/arch/x86/alderlake/frontend.json224
-rw-r--r--tools/perf/pmu-events/arch/x86/alderlake/memory.json214
-rw-r--r--tools/perf/pmu-events/arch/x86/alderlake/other.json132
-rw-r--r--tools/perf/pmu-events/arch/x86/alderlake/pipeline.json1850
-rw-r--r--tools/perf/pmu-events/arch/x86/alderlake/uncore-memory.json185
-rw-r--r--tools/perf/pmu-events/arch/x86/alderlake/uncore-other.json73
-rw-r--r--tools/perf/pmu-events/arch/x86/alderlake/virtual-memory.json223
-rw-r--r--tools/perf/pmu-events/arch/x86/alderlaken/adln-metrics.json583
-rw-r--r--tools/perf/pmu-events/arch/x86/alderlaken/cache.json330
-rw-r--r--tools/perf/pmu-events/arch/x86/alderlaken/floating-point.json18
-rw-r--r--tools/perf/pmu-events/arch/x86/alderlaken/frontend.json26
-rw-r--r--tools/perf/pmu-events/arch/x86/alderlaken/memory.json81
-rw-r--r--tools/perf/pmu-events/arch/x86/alderlaken/other.json38
-rw-r--r--tools/perf/pmu-events/arch/x86/alderlaken/pipeline.json533
-rw-r--r--tools/perf/pmu-events/arch/x86/alderlaken/uncore-memory.json175
-rw-r--r--tools/perf/pmu-events/arch/x86/alderlaken/uncore-other.json33
-rw-r--r--tools/perf/pmu-events/arch/x86/alderlaken/virtual-memory.json47
-rw-r--r--tools/perf/pmu-events/arch/x86/mapfile.csv3
-rw-r--r--tools/perf/scripts/python/intel-pt-events.py65
-rw-r--r--tools/perf/tests/Build2
-rw-r--r--tools/perf/tests/builtin-test.c29
-rw-r--r--tools/perf/tests/cpumap.c2
-rw-r--r--tools/perf/tests/dlfilter-test.c1
-rw-r--r--tools/perf/tests/expr.c1
-rw-r--r--tools/perf/tests/mmap-basic.c2
-rw-r--r--tools/perf/tests/openat-syscall-tp-fields.c1
-rw-r--r--tools/perf/tests/openat-syscall.c1
-rw-r--r--tools/perf/tests/parse-events.c13
-rw-r--r--tools/perf/tests/parse-metric.c2
-rw-r--r--tools/perf/tests/parse-no-sample-id-all.c1
-rw-r--r--tools/perf/tests/perf-record.c2
-rw-r--r--tools/perf/tests/perf-time-to-tsc.c1
-rw-r--r--tools/perf/tests/pmu-events.c3
-rwxr-xr-xtools/perf/tests/shell/lock_contention.sh15
-rwxr-xr-xtools/perf/tests/shell/pipe_test.sh55
-rwxr-xr-xtools/perf/tests/shell/record.sh129
-rwxr-xr-xtools/perf/tests/shell/test_arm_callgraph_fp.sh34
-rwxr-xr-xtools/perf/tests/shell/test_arm_spe_fork.sh44
-rwxr-xr-xtools/perf/tests/shell/test_brstack.sh66
-rwxr-xr-xtools/perf/tests/shell/test_data_symbol.sh29
-rw-r--r--tools/perf/tests/sw-clock.c1
-rw-r--r--tools/perf/tests/switch-tracking.c1
-rw-r--r--tools/perf/tests/tests.h27
-rw-r--r--tools/perf/tests/thread-map.c1
-rw-r--r--tools/perf/tests/workloads/Build13
-rw-r--r--tools/perf/tests/workloads/brstack.c40
-rw-r--r--tools/perf/tests/workloads/datasym.c24
-rw-r--r--tools/perf/tests/workloads/leafloop.c34
-rw-r--r--tools/perf/tests/workloads/noploop.c32
-rw-r--r--tools/perf/tests/workloads/sqrtloop.c45
-rw-r--r--tools/perf/tests/workloads/thloop.c53
-rw-r--r--tools/perf/tests/wp.c12
-rw-r--r--tools/perf/trace/beauty/Build1
-rw-r--r--tools/perf/trace/beauty/beauty.h3
-rw-r--r--tools/perf/trace/beauty/perf_event_open.c44
-rw-r--r--tools/perf/trace/beauty/timespec.c21
-rw-r--r--tools/perf/ui/util.c5
-rw-r--r--tools/perf/util/Build7
-rw-r--r--tools/perf/util/amd-sample-raw.c1
-rw-r--r--tools/perf/util/arm64-frame-pointer-unwind-support.h6
-rw-r--r--tools/perf/util/auxtrace.c1
-rw-r--r--tools/perf/util/auxtrace.h2
-rw-r--r--tools/perf/util/bpf-loader.c4
-rw-r--r--tools/perf/util/bpf-prologue.h6
-rw-r--r--tools/perf/util/bpf_counter.c2
-rw-r--r--tools/perf/util/bpf_kwork.c3
-rw-r--r--tools/perf/util/bpf_lock_contention.c44
-rw-r--r--tools/perf/util/bpf_map.h3
-rw-r--r--tools/perf/util/bpf_skel/lock_contention.bpf.c34
-rw-r--r--tools/perf/util/branch.h6
-rw-r--r--tools/perf/util/config.c4
-rw-r--r--tools/perf/util/counts.c1
-rw-r--r--tools/perf/util/counts.h1
-rw-r--r--tools/perf/util/cpumap.c17
-rw-r--r--tools/perf/util/cpumap.h10
-rw-r--r--tools/perf/util/data-convert-bt.c1
-rw-r--r--tools/perf/util/data-convert-json.c20
-rw-r--r--tools/perf/util/dwarf-aux.c77
-rw-r--r--tools/perf/util/dwarf-aux.h3
-rw-r--r--tools/perf/util/event.h124
-rw-r--r--tools/perf/util/evlist.c2
-rw-r--r--tools/perf/util/evsel.c36
-rw-r--r--tools/perf/util/evsel.h4
-rw-r--r--tools/perf/util/expr.c1
-rw-r--r--tools/perf/util/expr.h7
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c1
-rw-r--r--tools/perf/util/iostat.c3
-rw-r--r--tools/perf/util/iostat.h4
-rw-r--r--tools/perf/util/kwork.h12
-rw-r--r--tools/perf/util/llvm-utils.c8
-rw-r--r--tools/perf/util/lock-contention.h2
-rw-r--r--tools/perf/util/machine.h3
-rw-r--r--tools/perf/util/metricgroup.c243
-rw-r--r--tools/perf/util/metricgroup.h4
-rw-r--r--tools/perf/util/mmap.h5
-rw-r--r--tools/perf/util/parse-events.c1
-rw-r--r--tools/perf/util/perf_regs.c2
-rw-r--r--tools/perf/util/pfm.c154
-rw-r--r--tools/perf/util/pfm.h7
-rw-r--r--tools/perf/util/pmu.c292
-rw-r--r--tools/perf/util/pmu.h128
-rw-r--r--tools/perf/util/print-events.c639
-rw-r--r--tools/perf/util/print-events.h42
-rw-r--r--tools/perf/util/probe-finder.c37
-rw-r--r--tools/perf/util/python.c6
-rw-r--r--tools/perf/util/s390-cpumsf.c1
-rw-r--r--tools/perf/util/s390-sample-raw.c1
-rw-r--r--tools/perf/util/sample.h117
-rw-r--r--tools/perf/util/scripting-engines/trace-event-python.c9
-rw-r--r--tools/perf/util/session.c3
-rw-r--r--tools/perf/util/setup.py3
-rw-r--r--tools/perf/util/stat-display.c1495
-rw-r--r--tools/perf/util/stat-shadow.c1
-rw-r--r--tools/perf/util/stat.c406
-rw-r--r--tools/perf/util/stat.h41
-rw-r--r--tools/perf/util/symbol-elf.c2
-rw-r--r--tools/perf/util/symbol.h2
-rw-r--r--tools/perf/util/synthetic-events.c3
-rw-r--r--tools/perf/util/thread.h3
-rw-r--r--tools/perf/util/thread_map.c1
-rw-r--r--tools/perf/util/thread_map.h2
-rw-r--r--tools/perf/util/trace-event-scripting.c2
211 files changed, 8225 insertions, 5715 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index c962415d962c..7a3334f6fe9c 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -16201,7 +16201,7 @@ F: tools/lib/perf/
F: tools/perf/
PERFORMANCE EVENTS TOOLING ARM64
-R: John Garry <john.garry@huawei.com>
+R: John Garry <john.g.garry@oracle.com>
R: Will Deacon <will@kernel.org>
R: James Clark <james.clark@arm.com>
R: Mike Leach <mike.leach@linaro.org>
diff --git a/tools/lib/api/Makefile b/tools/lib/api/Makefile
index e21e1b40b525..3649c7f7ea65 100644
--- a/tools/lib/api/Makefile
+++ b/tools/lib/api/Makefile
@@ -15,6 +15,16 @@ LD ?= $(CROSS_COMPILE)ld
MAKEFLAGS += --no-print-directory
+INSTALL = install
+
+
+# Use DESTDIR for installing into a different root directory.
+# This is useful for building a package. The program will be
+# installed in this directory as if it was the root directory.
+# Then the build tool can move it later.
+DESTDIR ?=
+DESTDIR_SQ = '$(subst ','\'',$(DESTDIR))'
+
LIBFILE = $(OUTPUT)libapi.a
CFLAGS := $(EXTRA_WARNINGS) $(EXTRA_CFLAGS)
@@ -45,10 +55,23 @@ RM = rm -f
API_IN := $(OUTPUT)libapi-in.o
+ifeq ($(LP64), 1)
+ libdir_relative = lib64
+else
+ libdir_relative = lib
+endif
+
+prefix ?=
+libdir = $(prefix)/$(libdir_relative)
+
+# Shell quotes
+libdir_SQ = $(subst ','\'',$(libdir))
+
all:
export srctree OUTPUT CC LD CFLAGS V
include $(srctree)/tools/build/Makefile.include
+include $(srctree)/tools/scripts/Makefile.include
all: fixdep $(LIBFILE)
@@ -58,6 +81,35 @@ $(API_IN): FORCE
$(LIBFILE): $(API_IN)
$(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(API_IN)
+define do_install_mkdir
+ if [ ! -d '$(DESTDIR_SQ)$1' ]; then \
+ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$1'; \
+ fi
+endef
+
+define do_install
+ if [ ! -d '$(DESTDIR_SQ)$2' ]; then \
+ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$2'; \
+ fi; \
+ $(INSTALL) $1 $(if $3,-m $3,) '$(DESTDIR_SQ)$2'
+endef
+
+install_lib: $(LIBFILE)
+ $(call QUIET_INSTALL, $(LIBFILE)) \
+ $(call do_install_mkdir,$(libdir_SQ)); \
+ cp -fpR $(LIBFILE) $(DESTDIR)$(libdir_SQ)
+
+install_headers:
+ $(call QUIET_INSTALL, libapi_headers) \
+ $(call do_install,cpu.h,$(prefix)/include/api,644); \
+ $(call do_install,debug.h,$(prefix)/include/api,644); \
+ $(call do_install,io.h,$(prefix)/include/api,644); \
+ $(call do_install,fd/array.h,$(prefix)/include/api/fd,644); \
+ $(call do_install,fs/fs.h,$(prefix)/include/api/fs,644); \
+ $(call do_install,fs/tracing_path.h,$(prefix)/include/api/fs,644);
+
+install: install_lib install_headers
+
clean:
$(call QUIET_CLEAN, libapi) $(RM) $(LIBFILE); \
find $(or $(OUTPUT),.) -name \*.o -or -name \*.o.cmd -or -name \*.o.d | xargs $(RM)
diff --git a/tools/lib/api/fs/tracing_path.c b/tools/lib/api/fs/tracing_path.c
index 5afb11b30fca..b8e457c841ab 100644
--- a/tools/lib/api/fs/tracing_path.c
+++ b/tools/lib/api/fs/tracing_path.c
@@ -113,6 +113,22 @@ DIR *tracing_events__opendir(void)
return dir;
}
+int tracing_events__scandir_alphasort(struct dirent ***namelist)
+{
+ char *path = get_tracing_file("events");
+ int ret;
+
+ if (!path) {
+ *namelist = NULL;
+ return 0;
+ }
+
+ ret = scandir(path, namelist, NULL, alphasort);
+ put_events_file(path);
+
+ return ret;
+}
+
int tracing_path__strerror_open_tp(int err, char *buf, size_t size,
const char *sys, const char *name)
{
diff --git a/tools/lib/api/fs/tracing_path.h b/tools/lib/api/fs/tracing_path.h
index a19136b086dc..fc6347c11deb 100644
--- a/tools/lib/api/fs/tracing_path.h
+++ b/tools/lib/api/fs/tracing_path.h
@@ -6,6 +6,7 @@
#include <dirent.h>
DIR *tracing_events__opendir(void);
+int tracing_events__scandir_alphasort(struct dirent ***namelist);
void tracing_path_set(const char *mountpoint);
const char *tracing_path_mount(void);
diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile
index 4c904ef0b47e..7f5f7d2ebe1f 100644
--- a/tools/lib/bpf/Makefile
+++ b/tools/lib/bpf/Makefile
@@ -255,6 +255,7 @@ $(INSTALL_GEN_HDRS): $(INSTALL_PFX)/%.h: $(OUTPUT)%.h
$(call do_install,$<,$(prefix)/include/bpf,644)
install_headers: $(BPF_GENERATED) $(INSTALL_SRC_HDRS) $(INSTALL_GEN_HDRS)
+ $(call QUIET_INSTALL, libbpf_headers)
install_pkgconfig: $(PC_FILE)
$(call QUIET_INSTALL, $(PC_FILE)) \
diff --git a/tools/lib/perf/Makefile b/tools/lib/perf/Makefile
index 21df023a2103..a90fb8c6bed4 100644
--- a/tools/lib/perf/Makefile
+++ b/tools/lib/perf/Makefile
@@ -188,14 +188,22 @@ install_lib: libs
cp -fpR $(LIBPERF_ALL) $(DESTDIR)$(libdir_SQ)
install_headers:
- $(call QUIET_INSTALL, headers) \
+ $(call QUIET_INSTALL, libperf_headers) \
+ $(call do_install,include/perf/bpf_perf.h,$(prefix)/include/perf,644); \
$(call do_install,include/perf/core.h,$(prefix)/include/perf,644); \
$(call do_install,include/perf/cpumap.h,$(prefix)/include/perf,644); \
$(call do_install,include/perf/threadmap.h,$(prefix)/include/perf,644); \
$(call do_install,include/perf/evlist.h,$(prefix)/include/perf,644); \
$(call do_install,include/perf/evsel.h,$(prefix)/include/perf,644); \
$(call do_install,include/perf/event.h,$(prefix)/include/perf,644); \
- $(call do_install,include/perf/mmap.h,$(prefix)/include/perf,644);
+ $(call do_install,include/perf/mmap.h,$(prefix)/include/perf,644); \
+ $(call do_install,include/internal/cpumap.h,$(prefix)/include/internal,644); \
+ $(call do_install,include/internal/evlist.h,$(prefix)/include/internal,644); \
+ $(call do_install,include/internal/evsel.h,$(prefix)/include/internal,644); \
+ $(call do_install,include/internal/lib.h,$(prefix)/include/internal,644); \
+ $(call do_install,include/internal/mmap.h,$(prefix)/include/internal,644); \
+ $(call do_install,include/internal/threadmap.h,$(prefix)/include/internal,644); \
+ $(call do_install,include/internal/xyarray.h,$(prefix)/include/internal,644);
install_pkgconfig: $(LIBPERF_PC)
$(call QUIET_INSTALL, $(LIBPERF_PC)) \
diff --git a/tools/lib/perf/include/perf/cpumap.h b/tools/lib/perf/include/perf/cpumap.h
index 03aceb72a783..3f43f770cdac 100644
--- a/tools/lib/perf/include/perf/cpumap.h
+++ b/tools/lib/perf/include/perf/cpumap.h
@@ -3,7 +3,6 @@
#define __LIBPERF_CPUMAP_H
#include <perf/core.h>
-#include <perf/cpumap.h>
#include <stdio.h>
#include <stdbool.h>
@@ -12,6 +11,8 @@ struct perf_cpu {
int cpu;
};
+struct perf_cpu_map;
+
LIBPERF_API struct perf_cpu_map *perf_cpu_map__dummy_new(void);
LIBPERF_API struct perf_cpu_map *perf_cpu_map__default_new(void);
LIBPERF_API struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list);
diff --git a/tools/lib/subcmd/Makefile b/tools/lib/subcmd/Makefile
index 8f1a09cdfd17..9a316d8b89df 100644
--- a/tools/lib/subcmd/Makefile
+++ b/tools/lib/subcmd/Makefile
@@ -17,6 +17,15 @@ RM = rm -f
MAKEFLAGS += --no-print-directory
+INSTALL = install
+
+# Use DESTDIR for installing into a different root directory.
+# This is useful for building a package. The program will be
+# installed in this directory as if it was the root directory.
+# Then the build tool can move it later.
+DESTDIR ?=
+DESTDIR_SQ = '$(subst ','\'',$(DESTDIR))'
+
LIBFILE = $(OUTPUT)libsubcmd.a
CFLAGS := -ggdb3 -Wall -Wextra -std=gnu99 -fPIC
@@ -48,6 +57,18 @@ CFLAGS += $(EXTRA_WARNINGS) $(EXTRA_CFLAGS)
SUBCMD_IN := $(OUTPUT)libsubcmd-in.o
+ifeq ($(LP64), 1)
+ libdir_relative = lib64
+else
+ libdir_relative = lib
+endif
+
+prefix ?=
+libdir = $(prefix)/$(libdir_relative)
+
+# Shell quotes
+libdir_SQ = $(subst ','\'',$(libdir))
+
all:
export srctree OUTPUT CC LD CFLAGS V
@@ -61,6 +82,34 @@ $(SUBCMD_IN): FORCE
$(LIBFILE): $(SUBCMD_IN)
$(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(SUBCMD_IN)
+define do_install_mkdir
+ if [ ! -d '$(DESTDIR_SQ)$1' ]; then \
+ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$1'; \
+ fi
+endef
+
+define do_install
+ if [ ! -d '$(DESTDIR_SQ)$2' ]; then \
+ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$2'; \
+ fi; \
+ $(INSTALL) $1 $(if $3,-m $3,) '$(DESTDIR_SQ)$2'
+endef
+
+install_lib: $(LIBFILE)
+ $(call QUIET_INSTALL, $(LIBFILE)) \
+ $(call do_install_mkdir,$(libdir_SQ)); \
+ cp -fpR $(LIBFILE) $(DESTDIR)$(libdir_SQ)
+
+install_headers:
+ $(call QUIET_INSTALL, libsubcmd_headers) \
+ $(call do_install,exec-cmd.h,$(prefix)/include/subcmd,644); \
+ $(call do_install,help.h,$(prefix)/include/subcmd,644); \
+ $(call do_install,pager.h,$(prefix)/include/subcmd,644); \
+ $(call do_install,parse-options.h,$(prefix)/include/subcmd,644); \
+ $(call do_install,run-command.h,$(prefix)/include/subcmd,644);
+
+install: install_lib install_headers
+
clean:
$(call QUIET_CLEAN, libsubcmd) $(RM) $(LIBFILE); \
find $(or $(OUTPUT),.) -name \*.o -or -name \*.o.cmd -or -name \*.o.d | xargs $(RM)
diff --git a/tools/lib/symbol/Build b/tools/lib/symbol/Build
new file mode 100644
index 000000000000..9b9a9c78d3c9
--- /dev/null
+++ b/tools/lib/symbol/Build
@@ -0,0 +1 @@
+libsymbol-y += kallsyms.o
diff --git a/tools/lib/symbol/Makefile b/tools/lib/symbol/Makefile
new file mode 100644
index 000000000000..ea8707b3442a
--- /dev/null
+++ b/tools/lib/symbol/Makefile
@@ -0,0 +1,115 @@
+# SPDX-License-Identifier: GPL-2.0
+include ../../scripts/Makefile.include
+include ../../scripts/utilities.mak # QUIET_CLEAN
+
+ifeq ($(srctree),)
+srctree := $(patsubst %/,%,$(dir $(CURDIR)))
+srctree := $(patsubst %/,%,$(dir $(srctree)))
+srctree := $(patsubst %/,%,$(dir $(srctree)))
+#$(info Determined 'srctree' to be $(srctree))
+endif
+
+CC ?= $(CROSS_COMPILE)gcc
+AR ?= $(CROSS_COMPILE)ar
+LD ?= $(CROSS_COMPILE)ld
+
+MAKEFLAGS += --no-print-directory
+
+INSTALL = install
+
+
+# Use DESTDIR for installing into a different root directory.
+# This is useful for building a package. The program will be
+# installed in this directory as if it was the root directory.
+# Then the build tool can move it later.
+DESTDIR ?=
+DESTDIR_SQ = '$(subst ','\'',$(DESTDIR))'
+
+LIBFILE = $(OUTPUT)libsymbol.a
+
+CFLAGS := $(EXTRA_WARNINGS) $(EXTRA_CFLAGS)
+CFLAGS += -ggdb3 -Wall -Wextra -std=gnu11 -U_FORTIFY_SOURCE -fPIC
+
+ifeq ($(DEBUG),0)
+ifeq ($(CC_NO_CLANG), 0)
+ CFLAGS += -O3
+else
+ CFLAGS += -O6
+endif
+endif
+
+ifeq ($(DEBUG),0)
+ CFLAGS += -D_FORTIFY_SOURCE
+endif
+
+# Treat warnings as errors unless directed not to
+ifneq ($(WERROR),0)
+ CFLAGS += -Werror
+endif
+
+CFLAGS += -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64
+
+CFLAGS += -I$(srctree)/tools/lib
+CFLAGS += -I$(srctree)/tools/include
+
+RM = rm -f
+
+SYMBOL_IN := $(OUTPUT)libsymbol-in.o
+
+ifeq ($(LP64), 1)
+ libdir_relative = lib64
+else
+ libdir_relative = lib
+endif
+
+prefix ?=
+libdir = $(prefix)/$(libdir_relative)
+
+# Shell quotes
+libdir_SQ = $(subst ','\'',$(libdir))
+
+all:
+
+export srctree OUTPUT CC LD CFLAGS V
+include $(srctree)/tools/build/Makefile.include
+include $(srctree)/tools/scripts/Makefile.include
+
+all: fixdep $(LIBFILE)
+
+$(SYMBOL_IN): FORCE
+ @$(MAKE) $(build)=libsymbol
+
+$(LIBFILE): $(SYMBOL_IN)
+ $(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(SYMBOL_IN)
+
+define do_install_mkdir
+ if [ ! -d '$(DESTDIR_SQ)$1' ]; then \
+ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$1'; \
+ fi
+endef
+
+define do_install
+ if [ ! -d '$(DESTDIR_SQ)$2' ]; then \
+ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$2'; \
+ fi; \
+ $(INSTALL) $1 $(if $3,-m $3,) '$(DESTDIR_SQ)$2'
+endef
+
+install_lib: $(LIBFILE)
+ $(call QUIET_INSTALL, $(LIBFILE)) \
+ $(call do_install_mkdir,$(libdir_SQ)); \
+ cp -fpR $(LIBFILE) $(DESTDIR)$(libdir_SQ)
+
+install_headers:
+ $(call QUIET_INSTALL, libsymbol_headers) \
+ $(call do_install,kallsyms.h,$(prefix)/include/symbol,644);
+
+install: install_lib install_headers
+
+clean:
+ $(call QUIET_CLEAN, libsymbol) $(RM) $(LIBFILE); \
+ find $(or $(OUTPUT),.) -name \*.o -or -name \*.o.cmd -or -name \*.o.d | xargs $(RM)
+
+FORCE:
+
+.PHONY: clean FORCE
diff --git a/tools/lib/traceevent/Makefile b/tools/lib/traceevent/Makefile
index c874c017c636..98dfd4badea3 100644
--- a/tools/lib/traceevent/Makefile
+++ b/tools/lib/traceevent/Makefile
@@ -234,11 +234,11 @@ install_pkgconfig:
$(call do_install_pkgconfig_file,$(prefix))
install_headers:
- $(call QUIET_INSTALL, headers) \
+ $(call QUIET_INSTALL, traceevent_headers) \
$(call do_install,event-parse.h,$(includedir_SQ),644); \
$(call do_install,event-utils.h,$(includedir_SQ),644); \
$(call do_install,trace-seq.h,$(includedir_SQ),644); \
- $(call do_install,kbuffer.h,$(includedir_SQ),644)
+ $(call do_install,kbuffer.h,$(includedir_SQ),644);
install: install_lib
diff --git a/tools/perf/.gitignore b/tools/perf/.gitignore
index fd7a6ff9e7aa..05806ecfc33c 100644
--- a/tools/perf/.gitignore
+++ b/tools/perf/.gitignore
@@ -39,7 +39,12 @@ trace/beauty/generated/
pmu-events/pmu-events.c
pmu-events/jevents
feature/
+libapi/
libbpf/
+libperf/
+libsubcmd/
+libsymbol/
+libtraceevent/
+libtraceevent_plugins/
fixdep
-libtraceevent-dynamic-list
Documentation/doc.dep
diff --git a/tools/perf/Documentation/perf-annotate.txt b/tools/perf/Documentation/perf-annotate.txt
index 18fcc52809fb..980fe2c29275 100644
--- a/tools/perf/Documentation/perf-annotate.txt
+++ b/tools/perf/Documentation/perf-annotate.txt
@@ -41,7 +41,7 @@ OPTIONS
-q::
--quiet::
- Do not show any message. (Suppress -v)
+ Do not show any warnings or messages. (Suppress -v)
-n::
--show-nr-samples::
diff --git a/tools/perf/Documentation/perf-diff.txt b/tools/perf/Documentation/perf-diff.txt
index be65bd55ab2a..f3067a4af294 100644
--- a/tools/perf/Documentation/perf-diff.txt
+++ b/tools/perf/Documentation/perf-diff.txt
@@ -75,7 +75,7 @@ OPTIONS
-q::
--quiet::
- Do not show any message. (Suppress -v)
+ Do not show any warnings or messages. (Suppress -v)
-f::
--force::
diff --git a/tools/perf/Documentation/perf-intel-pt.txt b/tools/perf/Documentation/perf-intel-pt.txt
index 92464a5d7eaf..7b6ccd2fa3bf 100644
--- a/tools/perf/Documentation/perf-intel-pt.txt
+++ b/tools/perf/Documentation/perf-intel-pt.txt
@@ -189,8 +189,16 @@ There is also script intel-pt-events.py which provides an example of how to
unpack the raw data for power events and PTWRITE. The script also displays
branches, and supports 2 additional modes selected by option:
- --insn-trace - instruction trace
- --src-trace - source trace
+ - --insn-trace - instruction trace
+ - --src-trace - source trace
+
+The intel-pt-events.py script also has options:
+
+ - --all-switch-events - display all switch events, not only the last consecutive.
+ - --interleave [<n>] - interleave sample output for the same timestamp so that
+ no more than n samples for a CPU are displayed in a row. 'n' defaults to 4.
+ Note this only affects the order of output, and only when the timestamp is the
+ same.
As mentioned above, it is easy to capture too much data. One way to limit the
data captured is to use 'snapshot' mode which is explained further below.
diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt
index 57384a97c04f..c5a3cb0f57c7 100644
--- a/tools/perf/Documentation/perf-list.txt
+++ b/tools/perf/Documentation/perf-list.txt
@@ -39,9 +39,13 @@ any extra expressions computed by perf stat.
--deprecated::
Print deprecated events. By default the deprecated events are hidden.
---cputype::
-Print events applying cpu with this type for hybrid platform
-(e.g. --cputype core or --cputype atom)
+--unit::
+Print PMU events and metrics limited to the specific PMU name.
+(e.g. --unit cpu, --unit msr, --unit cpu_core, --unit cpu_atom)
+
+-j::
+--json::
+Output in JSON format.
[[EVENT_MODIFIERS]]
EVENT MODIFIERS
diff --git a/tools/perf/Documentation/perf-lock.txt b/tools/perf/Documentation/perf-lock.txt
index 3b1e16563b79..4958a1ffa1cc 100644
--- a/tools/perf/Documentation/perf-lock.txt
+++ b/tools/perf/Documentation/perf-lock.txt
@@ -42,7 +42,7 @@ COMMON OPTIONS
-q::
--quiet::
- Do not show any message. (Suppress -v)
+ Do not show any warnings or messages. (Suppress -v)
-D::
--dump-raw-trace::
diff --git a/tools/perf/Documentation/perf-probe.txt b/tools/perf/Documentation/perf-probe.txt
index 080981d38d7b..7f8e8ba3a787 100644
--- a/tools/perf/Documentation/perf-probe.txt
+++ b/tools/perf/Documentation/perf-probe.txt
@@ -57,7 +57,7 @@ OPTIONS
-q::
--quiet::
- Be quiet (do not show any messages including errors).
+ Do not show any warnings or messages.
Can not use with -v.
-a::
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index e41ae950fdc3..9ea6d44aca58 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -282,7 +282,7 @@ OPTIONS
-q::
--quiet::
- Don't print any message, useful for scripting.
+ Don't print any warnings or messages, useful for scripting.
-v::
--verbose::
diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt
index 4533db2ee56b..4fa509b15948 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -27,7 +27,7 @@ OPTIONS
-q::
--quiet::
- Do not show any message. (Suppress -v)
+ Do not show any warnings or messages. (Suppress -v)
-n::
--show-nr-samples::
diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
index d7ff1867feda..18abdc1dce05 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -354,8 +354,8 @@ forbids the event merging logic from sharing events between groups and
may be used to increase accuracy in this case.
--quiet::
-Don't print output. This is useful with perf stat record below to only
-write data to the perf.data file.
+Don't print output, warnings or messages. This is useful with perf stat
+record below to only write data to the perf.data file.
STAT RECORD
-----------
diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST
index f5d72f936a6b..c8e8e05b4ff1 100644
--- a/tools/perf/MANIFEST
+++ b/tools/perf/MANIFEST
@@ -13,8 +13,7 @@ tools/lib/ctype.c
tools/lib/hweight.c
tools/lib/rbtree.c
tools/lib/string.c
-tools/lib/symbol/kallsyms.c
-tools/lib/symbol/kallsyms.h
+tools/lib/symbol
tools/lib/find_bit.c
tools/lib/bitmap.c
tools/lib/list_sort.c
diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config
index 898226ea8cad..9cc3c48f3288 100644
--- a/tools/perf/Makefile.config
+++ b/tools/perf/Makefile.config
@@ -307,7 +307,7 @@ CORE_CFLAGS += -ggdb3
CORE_CFLAGS += -funwind-tables
CORE_CFLAGS += -Wall
CORE_CFLAGS += -Wextra
-CORE_CFLAGS += -std=gnu99
+CORE_CFLAGS += -std=gnu11
CXXFLAGS += -std=gnu++14 -fno-exceptions -fno-rtti
CXXFLAGS += -Wall
@@ -349,7 +349,6 @@ ifeq ($(DEBUG),0)
endif
endif
-INC_FLAGS += -I$(srctree)/tools/lib/perf/include
INC_FLAGS += -I$(src-perf)/util/include
INC_FLAGS += -I$(src-perf)/arch/$(SRCARCH)/include
INC_FLAGS += -I$(srctree)/tools/include/
@@ -367,7 +366,6 @@ endif
INC_FLAGS += -I$(src-perf)/util
INC_FLAGS += -I$(src-perf)
-INC_FLAGS += -I$(srctree)/tools/lib/
CORE_CFLAGS += -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE
@@ -1216,7 +1214,7 @@ endif
# Among the variables below, these:
# perfexecdir
-# perf_include_dir
+# libbpf_include_dir
# perf_examples_dir
# template_dir
# mandir
@@ -1239,7 +1237,8 @@ includedir = $(abspath $(prefix)/$(includedir_relative))
mandir = share/man
infodir = share/info
perfexecdir = libexec/perf-core
-perf_include_dir = lib/perf/include
+# FIXME: system's libbpf header directory, where we expect to find bpf/bpf_helpers.h, for instance
+libbpf_include_dir = /usr/include
perf_examples_dir = lib/perf/examples
sharedir = $(prefix)/share
template_dir = share/perf-core/templates
@@ -1272,7 +1271,7 @@ includedir_SQ = $(subst ','\'',$(includedir))
mandir_SQ = $(subst ','\'',$(mandir))
infodir_SQ = $(subst ','\'',$(infodir))
perfexecdir_SQ = $(subst ','\'',$(perfexecdir))
-perf_include_dir_SQ = $(subst ','\'',$(perf_include_dir))
+libbpf_include_dir_SQ = $(subst ','\'',$(libbpf_include_dir))
perf_examples_dir_SQ = $(subst ','\'',$(perf_examples_dir))
template_dir_SQ = $(subst ','\'',$(template_dir))
htmldir_SQ = $(subst ','\'',$(htmldir))
@@ -1284,13 +1283,13 @@ srcdir_SQ = $(subst ','\'',$(srcdir))
ifneq ($(filter /%,$(firstword $(perfexecdir))),)
perfexec_instdir = $(perfexecdir)
-perf_include_instdir = $(perf_include_dir)
+perf_include_instdir = $(libbpf_include_dir)
perf_examples_instdir = $(perf_examples_dir)
STRACE_GROUPS_INSTDIR = $(STRACE_GROUPS_DIR)
tip_instdir = $(tipdir)
else
perfexec_instdir = $(prefix)/$(perfexecdir)
-perf_include_instdir = $(prefix)/$(perf_include_dir)
+perf_include_instdir = $(prefix)/$(libbpf_include_dir)
perf_examples_instdir = $(prefix)/$(perf_examples_dir)
STRACE_GROUPS_INSTDIR = $(prefix)/$(STRACE_GROUPS_DIR)
tip_instdir = $(prefix)/$(tipdir)
@@ -1352,7 +1351,7 @@ $(call detected_var,ETC_PERFCONFIG_SQ)
$(call detected_var,STRACE_GROUPS_DIR_SQ)
$(call detected_var,prefix_SQ)
$(call detected_var,perfexecdir_SQ)
-$(call detected_var,perf_include_dir_SQ)
+$(call detected_var,libbpf_include_dir_SQ)
$(call detected_var,perf_examples_dir_SQ)
$(call detected_var,tipdir_SQ)
$(call detected_var,srcdir_SQ)
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index a432e59afc42..a17a6ea85e81 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -241,10 +241,14 @@ sub-make: fixdep
else # force_fixdep
-LIB_DIR = $(srctree)/tools/lib/api/
-TRACE_EVENT_DIR = $(srctree)/tools/lib/traceevent/
+LIBAPI_DIR = $(srctree)/tools/lib/api/
+ifndef LIBTRACEEVENT_DYNAMIC
+LIBTRACEEVENT_DIR = $(srctree)/tools/lib/traceevent/
+LIBTRACEEVENT_PLUGINS_DIR = $(LIBTRACEEVENT_DIR)/plugins
+endif
LIBBPF_DIR = $(srctree)/tools/lib/bpf/
-SUBCMD_DIR = $(srctree)/tools/lib/subcmd/
+LIBSUBCMD_DIR = $(srctree)/tools/lib/subcmd/
+LIBSYMBOL_DIR = $(srctree)/tools/lib/symbol/
LIBPERF_DIR = $(srctree)/tools/lib/perf/
DOC_DIR = $(srctree)/tools/perf/Documentation/
@@ -291,37 +295,41 @@ SCRIPT_SH += perf-iostat.sh
grep-libs = $(filter -l%,$(1))
strip-libs = $(filter-out -l%,$(1))
+ifndef LIBTRACEEVENT_DYNAMIC
ifneq ($(OUTPUT),)
- TE_PATH=$(OUTPUT)
- PLUGINS_PATH=$(OUTPUT)
- SUBCMD_PATH=$(OUTPUT)
- LIBPERF_PATH=$(OUTPUT)
-ifneq ($(subdir),)
- API_PATH=$(OUTPUT)/../lib/api/
+ LIBTRACEEVENT_OUTPUT = $(abspath $(OUTPUT))/libtraceevent
else
- API_PATH=$(OUTPUT)
+ LIBTRACEEVENT_OUTPUT = $(CURDIR)/libtraceevent
endif
-else
- TE_PATH=$(TRACE_EVENT_DIR)
- PLUGINS_PATH=$(TRACE_EVENT_DIR)plugins/
- API_PATH=$(LIB_DIR)
- SUBCMD_PATH=$(SUBCMD_DIR)
- LIBPERF_PATH=$(LIBPERF_DIR)
-endif
-
-LIBTRACEEVENT = $(TE_PATH)libtraceevent.a
+LIBTRACEEVENT_PLUGINS_OUTPUT = $(LIBTRACEEVENT_OUTPUT)_plugins
+LIBTRACEEVENT_DESTDIR = $(LIBTRACEEVENT_OUTPUT)
+LIBTRACEEVENT_PLUGINS_DESTDIR = $(LIBTRACEEVENT_PLUGINS_OUTPUT)
+LIBTRACEEVENT_INCLUDE = $(LIBTRACEEVENT_DESTDIR)/include
+LIBTRACEEVENT = $(LIBTRACEEVENT_OUTPUT)/libtraceevent.a
export LIBTRACEEVENT
-LIBTRACEEVENT_DYNAMIC_LIST = $(PLUGINS_PATH)libtraceevent-dynamic-list
-
+LIBTRACEEVENT_DYNAMIC_LIST = $(LIBTRACEEVENT_PLUGINS_OUTPUT)/libtraceevent-dynamic-list
+CFLAGS += -I$(LIBTRACEEVENT_OUTPUT)/include
#
# The static build has no dynsym table, so this does not work for
# static build. Looks like linker starts to scream about that now
# (in Fedora 26) so we need to switch it off for static build.
DYNAMIC_LIST_LDFLAGS = -Xlinker --dynamic-list=$(LIBTRACEEVENT_DYNAMIC_LIST)
LIBTRACEEVENT_DYNAMIC_LIST_LDFLAGS = $(if $(findstring -static,$(LDFLAGS)),,$(DYNAMIC_LIST_LDFLAGS))
+else
+LIBTRACEEVENT_DYNAMIC_LIST =
+LIBTRACEEVENT_DYNAMIC_LIST_LDFLAGS =
+endif
-LIBAPI = $(API_PATH)libapi.a
+ifneq ($(OUTPUT),)
+ LIBAPI_OUTPUT = $(abspath $(OUTPUT))/libapi
+else
+ LIBAPI_OUTPUT = $(CURDIR)/libapi
+endif
+LIBAPI_DESTDIR = $(LIBAPI_OUTPUT)
+LIBAPI_INCLUDE = $(LIBAPI_DESTDIR)/include
+LIBAPI = $(LIBAPI_OUTPUT)/libapi.a
export LIBAPI
+CFLAGS += -I$(LIBAPI_OUTPUT)/include
ifneq ($(OUTPUT),)
LIBBPF_OUTPUT = $(abspath $(OUTPUT))/libbpf
@@ -331,11 +339,38 @@ endif
LIBBPF_DESTDIR = $(LIBBPF_OUTPUT)
LIBBPF_INCLUDE = $(LIBBPF_DESTDIR)/include
LIBBPF = $(LIBBPF_OUTPUT)/libbpf.a
+CFLAGS += -I$(LIBBPF_OUTPUT)/include
+
+ifneq ($(OUTPUT),)
+ LIBSUBCMD_OUTPUT = $(abspath $(OUTPUT))/libsubcmd
+else
+ LIBSUBCMD_OUTPUT = $(CURDIR)/libsubcmd
+endif
+LIBSUBCMD_DESTDIR = $(LIBSUBCMD_OUTPUT)
+LIBSUBCMD_INCLUDE = $(LIBSUBCMD_DESTDIR)/include
+LIBSUBCMD = $(LIBSUBCMD_OUTPUT)/libsubcmd.a
+CFLAGS += -I$(LIBSUBCMD_OUTPUT)/include
-LIBSUBCMD = $(SUBCMD_PATH)libsubcmd.a
+ifneq ($(OUTPUT),)
+ LIBSYMBOL_OUTPUT = $(abspath $(OUTPUT))/libsymbol
+else
+ LIBSYMBOL_OUTPUT = $(CURDIR)/libsymbol
+endif
+LIBSYMBOL_DESTDIR = $(LIBSYMBOL_OUTPUT)
+LIBSYMBOL_INCLUDE = $(LIBSYMBOL_DESTDIR)/include
+LIBSYMBOL = $(LIBSYMBOL_OUTPUT)/libsymbol.a
+CFLAGS += -I$(LIBSYMBOL_OUTPUT)/include
-LIBPERF = $(LIBPERF_PATH)libperf.a
+ifneq ($(OUTPUT),)
+ LIBPERF_OUTPUT = $(abspath $(OUTPUT))/libperf
+else
+ LIBPERF_OUTPUT = $(CURDIR)/libperf
+endif
+LIBPERF_DESTDIR = $(LIBPERF_OUTPUT)
+LIBPERF_INCLUDE = $(LIBPERF_DESTDIR)/include
+LIBPERF = $(LIBPERF_OUTPUT)/libperf.a
export LIBPERF
+CFLAGS += -I$(LIBPERF_OUTPUT)/include
# python extension build directories
PYTHON_EXTBUILD := $(OUTPUT)python_ext_build/
@@ -346,7 +381,11 @@ export PYTHON_EXTBUILD_LIB PYTHON_EXTBUILD_TMP
python-clean := $(call QUIET_CLEAN, python) $(RM) -r $(PYTHON_EXTBUILD) $(OUTPUT)python/perf*.so
PYTHON_EXT_SRCS := $(shell grep -v ^\# util/python-ext-sources)
+ifndef LIBTRACEEVENT_DYNAMIC
PYTHON_EXT_DEPS := util/python-ext-sources util/setup.py $(LIBTRACEEVENT) $(LIBAPI)
+else
+PYTHON_EXT_DEPS := util/python-ext-sources util/setup.py $(LIBAPI)
+endif
SCRIPTS = $(patsubst %.sh,%,$(SCRIPT_SH))
@@ -385,7 +424,7 @@ endif
export PERL_PATH
-PERFLIBS = $(LIBAPI) $(LIBSUBCMD) $(LIBPERF)
+PERFLIBS = $(LIBAPI) $(LIBPERF) $(LIBSUBCMD) $(LIBSYMBOL)
ifndef NO_LIBBPF
ifndef LIBBPF_DYNAMIC
PERFLIBS += $(LIBBPF)
@@ -668,14 +707,14 @@ build := -f $(srctree)/tools/build/Makefile.build dir=. obj
$(PERF_IN): prepare FORCE
$(Q)$(MAKE) $(build)=perf
-$(PMU_EVENTS_IN): FORCE
+$(PMU_EVENTS_IN): FORCE prepare
$(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=pmu-events obj=pmu-events
$(OUTPUT)perf: $(PERFLIBS) $(PERF_IN) $(PMU_EVENTS_IN) $(LIBTRACEEVENT_DYNAMIC_LIST)
$(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $(LIBTRACEEVENT_DYNAMIC_LIST_LDFLAGS) \
$(PERF_IN) $(PMU_EVENTS_IN) $(LIBS) -o $@
-$(GTK_IN): FORCE
+$(GTK_IN): FORCE prepare
$(Q)$(MAKE) $(build)=gtk
$(OUTPUT)libperf-gtk.so: $(GTK_IN) $(PERFLIBS)
@@ -751,8 +790,17 @@ prepare: $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h archheaders $(drm_ioc
$(rename_flags_array) \
$(arch_errno_name_array) \
$(sync_file_range_arrays) \
+ $(LIBAPI) \
+ $(LIBBPF) \
+ $(LIBPERF) \
+ $(LIBSUBCMD) \
+ $(LIBSYMBOL) \
bpf-skel
+ifndef LIBTRACEEVENT_DYNAMIC
+prepare: $(LIBTRACEEVENT)
+endif
+
$(OUTPUT)%.o: %.c prepare FORCE
$(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@
@@ -808,30 +856,46 @@ endif
$(patsubst perf-%,%.o,$(PROGRAMS)): $(wildcard */*.h)
+ifndef LIBTRACEEVENT_DYNAMIC
LIBTRACEEVENT_FLAGS += plugin_dir=$(plugindir_SQ) 'EXTRA_CFLAGS=$(EXTRA_CFLAGS)' 'LDFLAGS=$(filter-out -static,$(LDFLAGS))'
-$(LIBTRACEEVENT): FORCE
- $(Q)$(MAKE) -C $(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) $(OUTPUT)libtraceevent.a
-
-libtraceevent_plugins: FORCE
- $(Q)$(MAKE) -C $(TRACE_EVENT_DIR)plugins $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) plugins
-
-$(LIBTRACEEVENT_DYNAMIC_LIST): libtraceevent_plugins
- $(Q)$(MAKE) -C $(TRACE_EVENT_DIR)plugins $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) $(OUTPUT)libtraceevent-dynamic-list
+$(LIBTRACEEVENT): FORCE | $(LIBTRACEEVENT_OUTPUT)
+ $(Q)$(MAKE) -C $(LIBTRACEEVENT_DIR) O=$(LIBTRACEEVENT_OUTPUT) \
+ DESTDIR=$(LIBTRACEEVENT_DESTDIR) prefix= \
+ $@ install_headers
$(LIBTRACEEVENT)-clean:
$(call QUIET_CLEAN, libtraceevent)
- $(Q)$(MAKE) -C $(TRACE_EVENT_DIR) O=$(OUTPUT) clean >/dev/null
+ $(Q)$(RM) -r -- $(LIBTRACEEVENT_OUTPUT)
+
+libtraceevent_plugins: FORCE | $(LIBTRACEEVENT_PLUGINS_OUTPUT)
+ $(Q)$(MAKE) -C $(LIBTRACEEVENT_PLUGINS_DIR) O=$(LIBTRACEEVENT_PLUGINS_OUTPUT) \
+ DESTDIR=$(LIBTRACEEVENT_PLUGINS_DESTDIR) prefix= \
+ plugins
+
+libtraceevent_plugins-clean:
+ $(call QUIET_CLEAN, libtraceevent_plugins)
+ $(Q)$(RM) -r -- $(LIBTRACEEVENT_PLUGINS_OUTPUT)
+
+$(LIBTRACEEVENT_DYNAMIC_LIST): libtraceevent_plugins
+ $(Q)$(MAKE) -C $(LIBTRACEEVENT_PLUGINS_DIR) O=$(LIBTRACEEVENT_PLUGINS_OUTPUT) \
+ DESTDIR=$(LIBTRACEEVENT_PLUGINS_DESTDIR) prefix= \
+ $(LIBTRACEEVENT_FLAGS) $@
install-traceevent-plugins: libtraceevent_plugins
- $(Q)$(MAKE) -C $(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) install_plugins
+ $(Q)$(MAKE) -C $(LIBTRACEEVENT_PLUGINS_DIR) O=$(LIBTRACEEVENT_PLUGINS_OUTPUT) \
+ DESTDIR=$(LIBTRACEEVENT_PLUGINS_DESTDIR) prefix= \
+ $(LIBTRACEEVENT_FLAGS) install
+endif
-$(LIBAPI): FORCE
- $(Q)$(MAKE) -C $(LIB_DIR) O=$(OUTPUT) $(OUTPUT)libapi.a
+$(LIBAPI): FORCE | $(LIBAPI_OUTPUT)
+ $(Q)$(MAKE) -C $(LIBAPI_DIR) O=$(LIBAPI_OUTPUT) \
+ DESTDIR=$(LIBAPI_DESTDIR) prefix= \
+ $@ install_headers
$(LIBAPI)-clean:
$(call QUIET_CLEAN, libapi)
- $(Q)$(MAKE) -C $(LIB_DIR) O=$(OUTPUT) clean >/dev/null
+ $(Q)$(RM) -r -- $(LIBAPI_OUTPUT)
$(LIBBPF): FORCE | $(LIBBPF_OUTPUT)
$(Q)$(MAKE) -C $(LIBBPF_DIR) FEATURES_DUMP=$(FEATURE_DUMP_EXPORT) \
@@ -842,18 +906,32 @@ $(LIBBPF)-clean:
$(call QUIET_CLEAN, libbpf)
$(Q)$(RM) -r -- $(LIBBPF_OUTPUT)
-$(LIBPERF): FORCE
- $(Q)$(MAKE) -C $(LIBPERF_DIR) EXTRA_CFLAGS="$(LIBPERF_CFLAGS)" O=$(OUTPUT) $(OUTPUT)libperf.a
+$(LIBPERF): FORCE | $(LIBPERF_OUTPUT)
+ $(Q)$(MAKE) -C $(LIBPERF_DIR) O=$(LIBPERF_OUTPUT) \
+ DESTDIR=$(LIBPERF_DESTDIR) prefix= \
+ $@ install_headers
$(LIBPERF)-clean:
$(call QUIET_CLEAN, libperf)
- $(Q)$(MAKE) -C $(LIBPERF_DIR) O=$(OUTPUT) clean >/dev/null
+ $(Q)$(RM) -r -- $(LIBPERF_OUTPUT)
-$(LIBSUBCMD): FORCE
- $(Q)$(MAKE) -C $(SUBCMD_DIR) O=$(OUTPUT) $(OUTPUT)libsubcmd.a
+$(LIBSUBCMD): FORCE | $(LIBSUBCMD_OUTPUT)
+ $(Q)$(MAKE) -C $(LIBSUBCMD_DIR) O=$(LIBSUBCMD_OUTPUT) \
+ DESTDIR=$(LIBSUBCMD_DESTDIR) prefix= \
+ $@ install_headers
$(LIBSUBCMD)-clean:
- $(Q)$(MAKE) -C $(SUBCMD_DIR) O=$(OUTPUT) clean
+ $(call QUIET_CLEAN, libsubcmd)
+ $(Q)$(RM) -r -- $(LIBSUBCMD_OUTPUT)
+
+$(LIBSYMBOL): FORCE | $(LIBSYMBOL_OUTPUT)
+ $(Q)$(MAKE) -C $(LIBSYMBOL_DIR) O=$(LIBSYMBOL_OUTPUT) \
+ DESTDIR=$(LIBSYMBOL_DESTDIR) prefix= \
+ $@ install_headers
+
+$(LIBSYMBOL)-clean:
+ $(call QUIET_CLEAN, libsymbol)
+ $(Q)$(RM) -r -- $(LIBSYMBOL_OUTPUT)
help:
@echo 'Perf make targets:'
@@ -960,11 +1038,6 @@ endif
$(call QUIET_INSTALL, libexec) \
$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)'
ifndef NO_LIBBPF
- $(call QUIET_INSTALL, bpf-headers) \
- $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perf_include_instdir_SQ)/bpf'; \
- $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perf_include_instdir_SQ)/bpf/linux'; \
- $(INSTALL) include/bpf/*.h -m 644 -t '$(DESTDIR_SQ)$(perf_include_instdir_SQ)/bpf'; \
- $(INSTALL) include/bpf/linux/*.h -m 644 -t '$(DESTDIR_SQ)$(perf_include_instdir_SQ)/bpf/linux'
$(call QUIET_INSTALL, bpf-examples) \
$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perf_examples_instdir_SQ)/bpf'; \
$(INSTALL) examples/bpf/*.c -m 644 -t '$(DESTDIR_SQ)$(perf_examples_instdir_SQ)/bpf'
@@ -1044,8 +1117,13 @@ SKELETONS += $(SKEL_OUT)/bperf_cgroup.skel.h $(SKEL_OUT)/func_latency.skel.h
SKELETONS += $(SKEL_OUT)/off_cpu.skel.h $(SKEL_OUT)/lock_contention.skel.h
SKELETONS += $(SKEL_OUT)/kwork_trace.skel.h
-$(SKEL_TMP_OUT) $(LIBBPF_OUTPUT):
+$(SKEL_TMP_OUT) $(LIBAPI_OUTPUT) $(LIBBPF_OUTPUT) $(LIBPERF_OUTPUT) $(LIBSUBCMD_OUTPUT) $(LIBSYMBOL_OUTPUT):
+ $(Q)$(MKDIR) -p $@
+
+ifndef LIBTRACEEVENT_DYNAMIC
+$(LIBTRACEEVENT_OUTPUT) $(LIBTRACEEVENT_PLUGINS_OUTPUT):
$(Q)$(MKDIR) -p $@
+endif
ifdef BUILD_BPF_SKEL
BPFTOOL := $(SKEL_TMP_OUT)/bootstrap/bpftool
@@ -1089,7 +1167,7 @@ endif # BUILD_BPF_SKEL
bpf-skel-clean:
$(call QUIET_CLEAN, bpf-skel) $(RM) -r $(SKEL_TMP_OUT) $(SKELETONS)
-clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clean $(LIBPERF)-clean fixdep-clean python-clean bpf-skel-clean tests-coresight-targets-clean
+clean:: $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clean $(LIBSYMBOL)-clean $(LIBPERF)-clean fixdep-clean python-clean bpf-skel-clean tests-coresight-targets-clean
$(call QUIET_CLEAN, core-objs) $(RM) $(LIBPERF_A) $(OUTPUT)perf-archive $(OUTPUT)perf-iostat $(LANG_BINDINGS)
$(Q)find $(or $(OUTPUT),.) -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete
$(Q)$(RM) $(OUTPUT).config-detected
@@ -1129,6 +1207,10 @@ clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clea
$(call QUIET_CLEAN, Documentation) \
$(MAKE) -C $(DOC_DIR) O=$(OUTPUT) clean >/dev/null
+ifndef LIBTRACEEVENT_DYNAMIC
+clean:: $(LIBTRACEEVENT)-clean libtraceevent_plugins-clean
+endif
+
#
# To provide FEATURE-DUMP into $(FEATURE_DUMP_COPY)
# file if defined, with no further action.
diff --git a/tools/perf/arch/arm/util/unwind-libdw.c b/tools/perf/arch/arm/util/unwind-libdw.c
index b7692cb0c733..1834a0cd9ce3 100644
--- a/tools/perf/arch/arm/util/unwind-libdw.c
+++ b/tools/perf/arch/arm/util/unwind-libdw.c
@@ -2,7 +2,7 @@
#include <elfutils/libdwfl.h>
#include "../../../util/unwind-libdw.h"
#include "../../../util/perf_regs.h"
-#include "../../../util/event.h"
+#include "../../../util/sample.h"
bool libdw__arch_set_initial_registers(Dwfl_Thread *thread, void *arg)
{
diff --git a/tools/perf/arch/arm64/util/machine.c b/tools/perf/arch/arm64/util/machine.c
index 41c1596e5207..235a0a1e1ec7 100644
--- a/tools/perf/arch/arm64/util/machine.c
+++ b/tools/perf/arch/arm64/util/machine.c
@@ -7,6 +7,7 @@
#include "symbol.h"
#include "callchain.h"
#include "record.h"
+#include "util/perf_regs.h"
void arch__add_leaf_frame_record_opts(struct record_opts *opts)
{
diff --git a/tools/perf/arch/arm64/util/pmu.c b/tools/perf/arch/arm64/util/pmu.c
index f849b1e88d43..477e513972a4 100644
--- a/tools/perf/arch/arm64/util/pmu.c
+++ b/tools/perf/arch/arm64/util/pmu.c
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
+#include <internal/cpumap.h>
#include "../../../util/cpumap.h"
#include "../../../util/pmu.h"
diff --git a/tools/perf/arch/arm64/util/unwind-libdw.c b/tools/perf/arch/arm64/util/unwind-libdw.c
index a50941629649..09385081bb03 100644
--- a/tools/perf/arch/arm64/util/unwind-libdw.c
+++ b/tools/perf/arch/arm64/util/unwind-libdw.c
@@ -2,7 +2,7 @@
#include <elfutils/libdwfl.h>
#include "../../../util/unwind-libdw.h"
#include "../../../util/perf_regs.h"
-#include "../../../util/event.h"
+#include "../../../util/sample.h"
bool libdw__arch_set_initial_registers(Dwfl_Thread *thread, void *arg)
{
diff --git a/tools/perf/arch/powerpc/util/event.c b/tools/perf/arch/powerpc/util/event.c
index cf430a4c55b9..77d8cc2b5691 100644
--- a/tools/perf/arch/powerpc/util/event.c
+++ b/tools/perf/arch/powerpc/util/event.c
@@ -9,6 +9,7 @@
#include "../../../util/tool.h"
#include "../../../util/map.h"
#include "../../../util/debug.h"
+#include "../../../util/sample.h"
void arch_perf_parse_sample_weight(struct perf_sample *data,
const __u64 *array, u64 type)
diff --git a/tools/perf/arch/powerpc/util/unwind-libdw.c b/tools/perf/arch/powerpc/util/unwind-libdw.c
index 7b2d96ec28e3..e616642c754c 100644
--- a/tools/perf/arch/powerpc/util/unwind-libdw.c
+++ b/tools/perf/arch/powerpc/util/unwind-libdw.c
@@ -3,7 +3,7 @@
#include <linux/kernel.h>
#include "../../../util/unwind-libdw.h"
#include "../../../util/perf_regs.h"
-#include "../../../util/event.h"
+#include "../../../util/sample.h"
/* See backends/ppc_initreg.c and backends/ppc_regs.c in elfutils. */
static const int special_regs[3][2] = {
diff --git a/tools/perf/arch/riscv/util/Build b/tools/perf/arch/riscv/util/Build
index 7d3050134ae0..603dbb5ae4dc 100644
--- a/tools/perf/arch/riscv/util/Build
+++ b/tools/perf/arch/riscv/util/Build
@@ -1,4 +1,5 @@
perf-y += perf_regs.o
+perf-y += header.o
perf-$(CONFIG_DWARF) += dwarf-regs.o
perf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
diff --git a/tools/perf/arch/riscv/util/header.c b/tools/perf/arch/riscv/util/header.c
new file mode 100644
index 000000000000..4a41856938a8
--- /dev/null
+++ b/tools/perf/arch/riscv/util/header.c
@@ -0,0 +1,104 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Implementation of get_cpuid().
+ *
+ * Author: Nikita Shubin <n.shubin@yadro.com>
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <api/fs/fs.h>
+#include <errno.h>
+#include "../../util/debug.h"
+#include "../../util/header.h"
+
+#define CPUINFO_MVEN "mvendorid"
+#define CPUINFO_MARCH "marchid"
+#define CPUINFO_MIMP "mimpid"
+#define CPUINFO "/proc/cpuinfo"
+
+static char *_get_field(const char *line)
+{
+ char *line2, *nl;
+
+ line2 = strrchr(line, ' ');
+ if (!line2)
+ return NULL;
+
+ line2++;
+ nl = strrchr(line, '\n');
+ if (!nl)
+ return NULL;
+
+ return strndup(line2, nl - line2);
+}
+
+static char *_get_cpuid(void)
+{
+ char *line = NULL;
+ char *mvendorid = NULL;
+ char *marchid = NULL;
+ char *mimpid = NULL;
+ char *cpuid = NULL;
+ int read;
+ unsigned long line_sz;
+ FILE *cpuinfo;
+
+ cpuinfo = fopen(CPUINFO, "r");
+ if (cpuinfo == NULL)
+ return cpuid;
+
+ while ((read = getline(&line, &line_sz, cpuinfo)) != -1) {
+ if (!strncmp(line, CPUINFO_MVEN, strlen(CPUINFO_MVEN))) {
+ mvendorid = _get_field(line);
+ if (!mvendorid)
+ goto free;
+ } else if (!strncmp(line, CPUINFO_MARCH, strlen(CPUINFO_MARCH))) {
+ marchid = _get_field(line);
+ if (!marchid)
+ goto free;
+ } else if (!strncmp(line, CPUINFO_MIMP, strlen(CPUINFO_MIMP))) {
+ mimpid = _get_field(line);
+ if (!mimpid)
+ goto free;
+
+ break;
+ }
+ }
+
+ if (!mvendorid || !marchid || !mimpid)
+ goto free;
+
+ if (asprintf(&cpuid, "%s-%s-%s", mvendorid, marchid, mimpid) < 0)
+ cpuid = NULL;
+
+free:
+ fclose(cpuinfo);
+ free(mvendorid);
+ free(marchid);
+ free(mimpid);
+
+ return cpuid;
+}
+
+int get_cpuid(char *buffer, size_t sz)
+{
+ char *cpuid = _get_cpuid();
+ int ret = 0;
+
+ if (sz < strlen(cpuid)) {
+ ret = -EINVAL;
+ goto free;
+ }
+
+ scnprintf(buffer, sz, "%s", cpuid);
+free:
+ free(cpuid);
+ return ret;
+}
+
+char *
+get_cpuid_str(struct perf_pmu *pmu __maybe_unused)
+{
+ return _get_cpuid();
+}
diff --git a/tools/perf/arch/s390/util/unwind-libdw.c b/tools/perf/arch/s390/util/unwind-libdw.c
index 387c698cdd1b..7d92452d5287 100644
--- a/tools/perf/arch/s390/util/unwind-libdw.c
+++ b/tools/perf/arch/s390/util/unwind-libdw.c
@@ -3,6 +3,7 @@
#include "../../util/unwind-libdw.h"
#include "../../util/perf_regs.h"
#include "../../util/event.h"
+#include "../../util/sample.h"
#include "dwarf-regs-table.h"
diff --git a/tools/perf/arch/x86/include/arch-tests.h b/tools/perf/arch/x86/include/arch-tests.h
index 6a1a1b3c0827..902e9ea9b99e 100644
--- a/tools/perf/arch/x86/include/arch-tests.h
+++ b/tools/perf/arch/x86/include/arch-tests.h
@@ -8,6 +8,7 @@ struct test_suite;
int test__rdpmc(struct test_suite *test, int subtest);
int test__insn_x86(struct test_suite *test, int subtest);
int test__intel_pt_pkt_decoder(struct test_suite *test, int subtest);
+int test__intel_pt_hybrid_compat(struct test_suite *test, int subtest);
int test__bp_modify(struct test_suite *test, int subtest);
int test__x86_sample_parsing(struct test_suite *test, int subtest);
diff --git a/tools/perf/arch/x86/tests/Build b/tools/perf/arch/x86/tests/Build
index 70b5bcbc15df..6f4e8636c3bf 100644
--- a/tools/perf/arch/x86/tests/Build
+++ b/tools/perf/arch/x86/tests/Build
@@ -3,5 +3,5 @@ perf-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o
perf-y += arch-tests.o
perf-y += sample-parsing.o
-perf-$(CONFIG_AUXTRACE) += insn-x86.o intel-pt-pkt-decoder-test.o
+perf-$(CONFIG_AUXTRACE) += insn-x86.o intel-pt-test.o
perf-$(CONFIG_X86_64) += bp-modify.o
diff --git a/tools/perf/arch/x86/tests/arch-tests.c b/tools/perf/arch/x86/tests/arch-tests.c
index 04018b8aa85b..aae6ea0fe52b 100644
--- a/tools/perf/arch/x86/tests/arch-tests.c
+++ b/tools/perf/arch/x86/tests/arch-tests.c
@@ -5,7 +5,18 @@
#ifdef HAVE_AUXTRACE_SUPPORT
DEFINE_SUITE("x86 instruction decoder - new instructions", insn_x86);
-DEFINE_SUITE("Intel PT packet decoder", intel_pt_pkt_decoder);
+
+static struct test_case intel_pt_tests[] = {
+ TEST_CASE("Intel PT packet decoder", intel_pt_pkt_decoder),
+ TEST_CASE("Intel PT hybrid CPU compatibility", intel_pt_hybrid_compat),
+ { .name = NULL, }
+};
+
+struct test_suite suite__intel_pt = {
+ .desc = "Intel PT",
+ .test_cases = intel_pt_tests,
+};
+
#endif
#if defined(__x86_64__)
DEFINE_SUITE("x86 bp modify", bp_modify);
@@ -18,7 +29,7 @@ struct test_suite *arch_tests[] = {
#endif
#ifdef HAVE_AUXTRACE_SUPPORT
&suite__insn_x86,
- &suite__intel_pt_pkt_decoder,
+ &suite__intel_pt,
#endif
#if defined(__x86_64__)
&suite__bp_modify,
diff --git a/tools/perf/arch/x86/tests/intel-pt-pkt-decoder-test.c b/tools/perf/arch/x86/tests/intel-pt-test.c
index 42237656f453..70b7f79396b1 100644
--- a/tools/perf/arch/x86/tests/intel-pt-pkt-decoder-test.c
+++ b/tools/perf/arch/x86/tests/intel-pt-test.c
@@ -1,12 +1,17 @@
// SPDX-License-Identifier: GPL-2.0
+#include <linux/compiler.h>
+#include <linux/bits.h>
#include <string.h>
+#include <cpuid.h>
+#include <sched.h>
#include "intel-pt-decoder/intel-pt-pkt-decoder.h"
#include "debug.h"
#include "tests/tests.h"
#include "arch-tests.h"
+#include "cpumap.h"
/**
* struct test_data - Test data.
@@ -313,3 +318,152 @@ int test__intel_pt_pkt_decoder(struct test_suite *test __maybe_unused, int subte
return TEST_OK;
}
+
+static int setaffinity(int cpu)
+{
+ cpu_set_t cpu_set;
+
+ CPU_ZERO(&cpu_set);
+ CPU_SET(cpu, &cpu_set);
+ if (sched_setaffinity(0, sizeof(cpu_set), &cpu_set)) {
+ pr_debug("sched_setaffinity() failed for CPU %d\n", cpu);
+ return -1;
+ }
+ return 0;
+}
+
+#define INTEL_PT_ADDR_FILT_CNT_MASK GENMASK(2, 0)
+#define INTEL_PT_SUBLEAF_CNT 2
+#define CPUID_REG_CNT 4
+
+struct cpuid_result {
+ union {
+ struct {
+ unsigned int eax;
+ unsigned int ebx;
+ unsigned int ecx;
+ unsigned int edx;
+ };
+ unsigned int reg[CPUID_REG_CNT];
+ };
+};
+
+struct pt_caps {
+ struct cpuid_result subleaf[INTEL_PT_SUBLEAF_CNT];
+};
+
+static int get_pt_caps(int cpu, struct pt_caps *caps)
+{
+ struct cpuid_result r;
+ int i;
+
+ if (setaffinity(cpu))
+ return -1;
+
+ memset(caps, 0, sizeof(*caps));
+
+ for (i = 0; i < INTEL_PT_SUBLEAF_CNT; i++) {
+ __get_cpuid_count(20, i, &r.eax, &r.ebx, &r.ecx, &r.edx);
+ pr_debug("CPU %d CPUID leaf 20 subleaf %d\n", cpu, i);
+ pr_debug("eax = 0x%08x\n", r.eax);
+ pr_debug("ebx = 0x%08x\n", r.ebx);
+ pr_debug("ecx = 0x%08x\n", r.ecx);
+ pr_debug("edx = 0x%08x\n", r.edx);
+ caps->subleaf[i] = r;
+ }
+
+ return 0;
+}
+
+static bool is_hydrid(void)
+{
+ unsigned int eax, ebx, ecx, edx = 0;
+ bool result;
+
+ __get_cpuid_count(7, 0, &eax, &ebx, &ecx, &edx);
+ result = edx & BIT(15);
+ pr_debug("Is %shybrid : CPUID leaf 7 subleaf 0 edx %#x (bit-15 indicates hybrid)\n",
+ result ? "" : "not ", edx);
+ return result;
+}
+
+static int compare_caps(int cpu, struct pt_caps *caps, struct pt_caps *caps0)
+{
+ struct pt_caps mask = { /* Mask of bits to check*/
+ .subleaf = {
+ [0] = {
+ .ebx = GENMASK(8, 0),
+ .ecx = GENMASK(3, 0),
+ },
+ [1] = {
+ .eax = GENMASK(31, 16),
+ .ebx = GENMASK(31, 0),
+ }
+ }
+ };
+ unsigned int m, reg, reg0;
+ int ret = 0;
+ int i, j;
+
+ for (i = 0; i < INTEL_PT_SUBLEAF_CNT; i++) {
+ for (j = 0; j < CPUID_REG_CNT; j++) {
+ m = mask.subleaf[i].reg[j];
+ reg = m & caps->subleaf[i].reg[j];
+ reg0 = m & caps0->subleaf[i].reg[j];
+ if ((reg & reg0) != reg0) {
+ pr_debug("CPU %d subleaf %d reg %d FAIL %#x vs %#x\n",
+ cpu, i, j, reg, reg0);
+ ret = -1;
+ }
+ }
+ }
+
+ m = INTEL_PT_ADDR_FILT_CNT_MASK;
+ reg = m & caps->subleaf[1].eax;
+ reg0 = m & caps0->subleaf[1].eax;
+ if (reg < reg0) {
+ pr_debug("CPU %d subleaf 1 reg 0 FAIL address filter count %#x vs %#x\n",
+ cpu, reg, reg0);
+ ret = -1;
+ }
+
+ if (!ret)
+ pr_debug("CPU %d OK\n", cpu);
+
+ return ret;
+}
+
+int test__intel_pt_hybrid_compat(struct test_suite *test, int subtest)
+{
+ int max_cpu = cpu__max_cpu().cpu;
+ struct pt_caps last_caps;
+ struct pt_caps caps0;
+ int ret = TEST_OK;
+ int cpu;
+
+ if (!is_hydrid()) {
+ test->test_cases[subtest].skip_reason = "not hybrid";
+ return TEST_SKIP;
+ }
+
+ if (get_pt_caps(0, &caps0))
+ return TEST_FAIL;
+
+ for (cpu = 1, last_caps = caps0; cpu < max_cpu; cpu++) {
+ struct pt_caps caps;
+
+ if (get_pt_caps(cpu, &caps)) {
+ pr_debug("CPU %d not found\n", cpu);
+ continue;
+ }
+ if (!memcmp(&caps, &last_caps, sizeof(caps))) {
+ pr_debug("CPU %d same caps as previous CPU\n", cpu);
+ continue;
+ }
+ if (compare_caps(cpu, &caps, &caps0))
+ ret = TEST_FAIL;
+ last_caps = caps;
+ }
+
+ return ret;
+}
diff --git a/tools/perf/arch/x86/tests/sample-parsing.c b/tools/perf/arch/x86/tests/sample-parsing.c
index bfbd3662b69e..690c7c07e90d 100644
--- a/tools/perf/arch/x86/tests/sample-parsing.c
+++ b/tools/perf/arch/x86/tests/sample-parsing.c
@@ -10,6 +10,7 @@
#include "event.h"
#include "evsel.h"
#include "debug.h"
+#include "util/sample.h"
#include "util/synthetic-events.h"
#include "tests/tests.h"
diff --git a/tools/perf/arch/x86/util/event.c b/tools/perf/arch/x86/util/event.c
index e670f3547581..a3acefe6d0c6 100644
--- a/tools/perf/arch/x86/util/event.c
+++ b/tools/perf/arch/x86/util/event.c
@@ -2,6 +2,7 @@
#include <linux/types.h>
#include <linux/string.h>
#include <linux/zalloc.h>
+#include <stdlib.h>
#include "../../../util/event.h"
#include "../../../util/synthetic-events.h"
@@ -9,6 +10,7 @@
#include "../../../util/tool.h"
#include "../../../util/map.h"
#include "../../../util/debug.h"
+#include "util/sample.h"
#if defined(__x86_64__)
diff --git a/tools/perf/arch/x86/util/iostat.c b/tools/perf/arch/x86/util/iostat.c
index 404de795ec0b..7eb0a7b00b95 100644
--- a/tools/perf/arch/x86/util/iostat.c
+++ b/tools/perf/arch/x86/util/iostat.c
@@ -449,7 +449,7 @@ void iostat_print_metric(struct perf_stat_config *config, struct evsel *evsel,
void iostat_print_counters(struct evlist *evlist,
struct perf_stat_config *config, struct timespec *ts,
- char *prefix, iostat_print_counter_t print_cnt_cb)
+ char *prefix, iostat_print_counter_t print_cnt_cb, void *arg)
{
void *perf_device = NULL;
struct evsel *counter = evlist__first(evlist);
@@ -464,7 +464,7 @@ void iostat_print_counters(struct evlist *evlist,
iostat_prefix(evlist, config, prefix, ts);
fprintf(config->output, "\n%s", prefix);
}
- print_cnt_cb(config, counter, prefix);
+ print_cnt_cb(config, counter, arg);
}
fputc('\n', config->output);
}
diff --git a/tools/perf/arch/x86/util/tsc.c b/tools/perf/arch/x86/util/tsc.c
index eb2b5195bd02..9b99f48b923c 100644
--- a/tools/perf/arch/x86/util/tsc.c
+++ b/tools/perf/arch/x86/util/tsc.c
@@ -2,6 +2,7 @@
#include <linux/types.h>
#include <math.h>
#include <string.h>
+#include <stdlib.h>
#include "../../../util/debug.h"
#include "../../../util/tsc.h"
diff --git a/tools/perf/arch/x86/util/unwind-libdw.c b/tools/perf/arch/x86/util/unwind-libdw.c
index eea2bf87232b..ef71e8bf80bf 100644
--- a/tools/perf/arch/x86/util/unwind-libdw.c
+++ b/tools/perf/arch/x86/util/unwind-libdw.c
@@ -2,7 +2,7 @@
#include <elfutils/libdwfl.h>
#include "../../../util/unwind-libdw.h"
#include "../../../util/perf_regs.h"
-#include "../../../util/event.h"
+#include "util/sample.h"
bool libdw__arch_set_initial_registers(Dwfl_Thread *thread, void *arg)
{
diff --git a/tools/perf/bench/inject-buildid.c b/tools/perf/bench/inject-buildid.c
index 17672790f123..4561bda0ce6a 100644
--- a/tools/perf/bench/inject-buildid.c
+++ b/tools/perf/bench/inject-buildid.c
@@ -19,10 +19,10 @@
#include "util/data.h"
#include "util/stat.h"
#include "util/debug.h"
-#include "util/event.h"
#include "util/symbol.h"
#include "util/session.h"
#include "util/build-id.h"
+#include "util/sample.h"
#include "util/synthetic-events.h"
#define MMAP_DEV_MAJOR 8
diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c
index e78dedf9e682..9717c6c17433 100644
--- a/tools/perf/bench/numa.c
+++ b/tools/perf/bench/numa.c
@@ -16,6 +16,7 @@
#include <sched.h>
#include <stdio.h>
#include <assert.h>
+#include <debug.h>
#include <malloc.h>
#include <signal.h>
#include <stdlib.h>
@@ -116,7 +117,6 @@ struct params {
long bytes_thread;
int nr_tasks;
- bool show_quiet;
bool show_convergence;
bool measure_convergence;
@@ -197,7 +197,8 @@ static const struct option options[] = {
OPT_BOOLEAN('c', "show_convergence", &p0.show_convergence, "show convergence details, "
"convergence is reached when each process (all its threads) is running on a single NUMA node."),
OPT_BOOLEAN('m', "measure_convergence", &p0.measure_convergence, "measure convergence latency"),
- OPT_BOOLEAN('q', "quiet" , &p0.show_quiet, "quiet mode"),
+ OPT_BOOLEAN('q', "quiet" , &quiet,
+ "quiet mode (do not show any warnings or messages)"),
OPT_BOOLEAN('S', "serialize-startup", &p0.serialize_startup,"serialize thread startup"),
/* Special option string parsing callbacks: */
@@ -1474,7 +1475,7 @@ static int init(void)
/* char array in count_process_nodes(): */
BUG_ON(g->p.nr_nodes < 0);
- if (g->p.show_quiet && !g->p.show_details)
+ if (quiet && !g->p.show_details)
g->p.show_details = -1;
/* Some memory should be specified: */
@@ -1553,7 +1554,7 @@ static void print_res(const char *name, double val,
if (!name)
name = "main,";
- if (!g->p.show_quiet)
+ if (!quiet)
printf(" %-30s %15.3f, %-15s %s\n", name, val, txt_unit, txt_short);
else
printf(" %14.3f %s\n", val, txt_long);
diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index f839e69492e8..517d928c00e3 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -525,7 +525,7 @@ int cmd_annotate(int argc, const char **argv)
OPT_BOOLEAN('f', "force", &data.force, "don't complain, do it"),
OPT_INCR('v', "verbose", &verbose,
"be more verbose (show symbol address, etc)"),
- OPT_BOOLEAN('q', "quiet", &quiet, "do now show any message"),
+ OPT_BOOLEAN('q', "quiet", &quiet, "do now show any warnings or messages"),
OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
"dump raw trace in ASCII"),
#ifdef HAVE_GTK2_SUPPORT
diff --git a/tools/perf/builtin-daemon.c b/tools/perf/builtin-daemon.c
index 6cb3f6cc36d0..7036ec92d47d 100644
--- a/tools/perf/builtin-daemon.c
+++ b/tools/perf/builtin-daemon.c
@@ -100,12 +100,12 @@ static struct daemon __daemon = {
};
static const char * const daemon_usage[] = {
- "perf daemon start [<options>]",
+ "perf daemon {start|signal|stop|ping} [<options>]",
"perf daemon [<options>]",
NULL
};
-static bool done;
+static volatile sig_atomic_t done;
static void sig_handler(int sig __maybe_unused)
{
diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c
index d925096dd7f0..ed07cc6cca56 100644
--- a/tools/perf/builtin-diff.c
+++ b/tools/perf/builtin-diff.c
@@ -1260,7 +1260,7 @@ static const char * const diff_usage[] = {
static const struct option options[] = {
OPT_INCR('v', "verbose", &verbose,
"be more verbose (show symbol address, etc)"),
- OPT_BOOLEAN('q', "quiet", &quiet, "Do not show any message"),
+ OPT_BOOLEAN('q', "quiet", &quiet, "Do not show any warnings or messages"),
OPT_BOOLEAN('b', "baseline-only", &show_baseline_only,
"Show only items with match in baseline"),
OPT_CALLBACK('c', "compute", &compute,
diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c
index 7de07bb16d23..d7fe00f66b83 100644
--- a/tools/perf/builtin-ftrace.c
+++ b/tools/perf/builtin-ftrace.c
@@ -36,8 +36,8 @@
#define DEFAULT_TRACER "function_graph"
-static volatile int workload_exec_errno;
-static bool done;
+static volatile sig_atomic_t workload_exec_errno;
+static volatile sig_atomic_t done;
static void sig_handler(int sig __maybe_unused)
{
diff --git a/tools/perf/builtin-kwork.c b/tools/perf/builtin-kwork.c
index fb8c63656ad8..4ffbf5908070 100644
--- a/tools/perf/builtin-kwork.c
+++ b/tools/perf/builtin-kwork.c
@@ -6,10 +6,15 @@
*/
#include "builtin.h"
+#include "perf.h"
#include "util/data.h"
+#include "util/evlist.h"
+#include "util/evsel.h"
+#include "util/header.h"
#include "util/kwork.h"
#include "util/debug.h"
+#include "util/session.h"
#include "util/symbol.h"
#include "util/thread.h"
#include "util/string2.h"
@@ -21,6 +26,7 @@
#include <errno.h>
#include <inttypes.h>
+#include <signal.h>
#include <linux/err.h>
#include <linux/time64.h>
#include <linux/zalloc.h>
diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c
index 58e1ec1654ef..137d73edb541 100644
--- a/tools/perf/builtin-list.c
+++ b/tools/perf/builtin-list.c
@@ -15,34 +15,443 @@
#include "util/pmu-hybrid.h"
#include "util/debug.h"
#include "util/metricgroup.h"
+#include "util/string2.h"
+#include "util/strlist.h"
+#include "util/strbuf.h"
#include <subcmd/pager.h>
#include <subcmd/parse-options.h>
+#include <linux/zalloc.h>
+#include <stdarg.h>
#include <stdio.h>
-static bool desc_flag = true;
-static bool details_flag;
-static const char *hybrid_type;
+/**
+ * struct print_state - State and configuration passed to the default_print
+ * functions.
+ */
+struct print_state {
+ /**
+ * @pmu_glob: Optionally restrict PMU and metric matching to PMU or
+ * debugfs subsystem name.
+ */
+ char *pmu_glob;
+ /** @event_glob: Optional pattern matching glob. */
+ char *event_glob;
+ /** @name_only: Print event or metric names only. */
+ bool name_only;
+ /** @desc: Print the event or metric description. */
+ bool desc;
+ /** @long_desc: Print longer event or metric description. */
+ bool long_desc;
+ /** @deprecated: Print deprecated events or metrics. */
+ bool deprecated;
+ /**
+ * @detailed: Print extra information on the perf event such as names
+ * and expressions used internally by events.
+ */
+ bool detailed;
+ /** @metrics: Controls printing of metric and metric groups. */
+ bool metrics;
+ /** @metricgroups: Controls printing of metric and metric groups. */
+ bool metricgroups;
+ /** @last_topic: The last printed event topic. */
+ char *last_topic;
+ /** @last_metricgroups: The last printed metric group. */
+ char *last_metricgroups;
+ /** @visited_metrics: Metrics that are printed to avoid duplicates. */
+ struct strlist *visited_metrics;
+};
+
+static void default_print_start(void *ps)
+{
+ struct print_state *print_state = ps;
+
+ if (!print_state->name_only && pager_in_use())
+ printf("\nList of pre-defined events (to be used in -e or -M):\n\n");
+}
+
+static void default_print_end(void *print_state __maybe_unused) {}
+
+static void wordwrap(const char *s, int start, int max, int corr)
+{
+ int column = start;
+ int n;
+ bool saw_newline = false;
+
+ while (*s) {
+ int wlen = strcspn(s, " \t\n");
+
+ if ((column + wlen >= max && column > start) || saw_newline) {
+ printf("\n%*s", start, "");
+ column = start + corr;
+ }
+ n = printf("%s%.*s", column > start ? " " : "", wlen, s);
+ if (n <= 0)
+ break;
+ saw_newline = s[wlen] == '\n';
+ s += wlen;
+ column += n;
+ s = skip_spaces(s);
+ }
+}
+
+static void default_print_event(void *ps, const char *pmu_name, const char *topic,
+ const char *event_name, const char *event_alias,
+ const char *scale_unit __maybe_unused,
+ bool deprecated, const char *event_type_desc,
+ const char *desc, const char *long_desc,
+ const char *encoding_desc,
+ const char *metric_name, const char *metric_expr)
+{
+ struct print_state *print_state = ps;
+ int pos;
+
+ if (deprecated && !print_state->deprecated)
+ return;
+
+ if (print_state->pmu_glob && pmu_name && !strglobmatch(pmu_name, print_state->pmu_glob))
+ return;
+
+ if (print_state->event_glob &&
+ (!event_name || !strglobmatch(event_name, print_state->event_glob)) &&
+ (!event_alias || !strglobmatch(event_alias, print_state->event_glob)) &&
+ (!topic || !strglobmatch_nocase(topic, print_state->event_glob)))
+ return;
+
+ if (print_state->name_only) {
+ if (event_alias && strlen(event_alias))
+ printf("%s ", event_alias);
+ else
+ printf("%s ", event_name);
+ return;
+ }
+
+ if (strcmp(print_state->last_topic, topic ?: "")) {
+ if (topic)
+ printf("\n%s:\n", topic);
+ free(print_state->last_topic);
+ print_state->last_topic = strdup(topic ?: "");
+ }
+
+ if (event_alias && strlen(event_alias))
+ pos = printf(" %s OR %s", event_name, event_alias);
+ else
+ pos = printf(" %s", event_name);
+
+ if (!topic && event_type_desc) {
+ for (; pos < 53; pos++)
+ putchar(' ');
+ printf("[%s]\n", event_type_desc);
+ } else
+ putchar('\n');
+
+ if (desc && print_state->desc) {
+ printf("%*s", 8, "[");
+ wordwrap(desc, 8, pager_get_columns(), 0);
+ printf("]\n");
+ }
+ long_desc = long_desc ?: desc;
+ if (long_desc && print_state->long_desc) {
+ printf("%*s", 8, "[");
+ wordwrap(long_desc, 8, pager_get_columns(), 0);
+ printf("]\n");
+ }
+
+ if (print_state->detailed && encoding_desc) {
+ printf("%*s", 8, "");
+ wordwrap(encoding_desc, 8, pager_get_columns(), 0);
+ if (metric_name)
+ printf(" MetricName: %s", metric_name);
+ if (metric_expr)
+ printf(" MetricExpr: %s", metric_expr);
+ putchar('\n');
+ }
+}
+
+static void default_print_metric(void *ps,
+ const char *group,
+ const char *name,
+ const char *desc,
+ const char *long_desc,
+ const char *expr,
+ const char *unit __maybe_unused)
+{
+ struct print_state *print_state = ps;
+
+ if (print_state->event_glob &&
+ (!print_state->metrics || !name || !strglobmatch(name, print_state->event_glob)) &&
+ (!print_state->metricgroups || !group || !strglobmatch(group, print_state->event_glob)))
+ return;
+
+ if (!print_state->name_only && !print_state->last_metricgroups) {
+ if (print_state->metricgroups) {
+ printf("\nMetric Groups:\n");
+ if (!print_state->metrics)
+ putchar('\n');
+ } else {
+ printf("\nMetrics:\n\n");
+ }
+ }
+ if (!print_state->last_metricgroups ||
+ strcmp(print_state->last_metricgroups, group ?: "")) {
+ if (group && print_state->metricgroups) {
+ if (print_state->name_only)
+ printf("%s ", group);
+ else if (print_state->metrics)
+ printf("\n%s:\n", group);
+ else
+ printf("%s\n", group);
+ }
+ free(print_state->last_metricgroups);
+ print_state->last_metricgroups = strdup(group ?: "");
+ }
+ if (!print_state->metrics)
+ return;
+
+ if (print_state->name_only) {
+ if (print_state->metrics &&
+ !strlist__has_entry(print_state->visited_metrics, name)) {
+ printf("%s ", name);
+ strlist__add(print_state->visited_metrics, name);
+ }
+ return;
+ }
+ printf(" %s\n", name);
+
+ if (desc && print_state->desc) {
+ printf("%*s", 8, "[");
+ wordwrap(desc, 8, pager_get_columns(), 0);
+ printf("]\n");
+ }
+ if (long_desc && print_state->long_desc) {
+ printf("%*s", 8, "[");
+ wordwrap(long_desc, 8, pager_get_columns(), 0);
+ printf("]\n");
+ }
+ if (expr && print_state->detailed) {
+ printf("%*s", 8, "[");
+ wordwrap(expr, 8, pager_get_columns(), 0);
+ printf("]\n");
+ }
+}
+
+struct json_print_state {
+ /** Should a separator be printed prior to the next item? */
+ bool need_sep;
+};
+
+static void json_print_start(void *print_state __maybe_unused)
+{
+ printf("[\n");
+}
+
+static void json_print_end(void *ps)
+{
+ struct json_print_state *print_state = ps;
+
+ printf("%s]\n", print_state->need_sep ? "\n" : "");
+}
+
+static void fix_escape_printf(struct strbuf *buf, const char *fmt, ...)
+{
+ va_list args;
+
+ va_start(args, fmt);
+ strbuf_setlen(buf, 0);
+ for (size_t fmt_pos = 0; fmt_pos < strlen(fmt); fmt_pos++) {
+ switch (fmt[fmt_pos]) {
+ case '%':
+ fmt_pos++;
+ switch (fmt[fmt_pos]) {
+ case 's': {
+ const char *s = va_arg(args, const char*);
+
+ strbuf_addstr(buf, s);
+ break;
+ }
+ case 'S': {
+ const char *s = va_arg(args, const char*);
+
+ for (size_t s_pos = 0; s_pos < strlen(s); s_pos++) {
+ switch (s[s_pos]) {
+ case '\n':
+ strbuf_addstr(buf, "\\n");
+ break;
+ case '\\':
+ __fallthrough;
+ case '\"':
+ strbuf_addch(buf, '\\');
+ __fallthrough;
+ default:
+ strbuf_addch(buf, s[s_pos]);
+ break;
+ }
+ }
+ break;
+ }
+ default:
+ pr_err("Unexpected format character '%c'\n", fmt[fmt_pos]);
+ strbuf_addch(buf, '%');
+ strbuf_addch(buf, fmt[fmt_pos]);
+ }
+ break;
+ default:
+ strbuf_addch(buf, fmt[fmt_pos]);
+ break;
+ }
+ }
+ va_end(args);
+ fputs(buf->buf, stdout);
+}
+
+static void json_print_event(void *ps, const char *pmu_name, const char *topic,
+ const char *event_name, const char *event_alias,
+ const char *scale_unit,
+ bool deprecated, const char *event_type_desc,
+ const char *desc, const char *long_desc,
+ const char *encoding_desc,
+ const char *metric_name, const char *metric_expr)
+{
+ struct json_print_state *print_state = ps;
+ bool need_sep = false;
+ struct strbuf buf;
+
+ strbuf_init(&buf, 0);
+ printf("%s{\n", print_state->need_sep ? ",\n" : "");
+ print_state->need_sep = true;
+ if (pmu_name) {
+ fix_escape_printf(&buf, "\t\"Unit\": \"%S\"", pmu_name);
+ need_sep = true;
+ }
+ if (topic) {
+ fix_escape_printf(&buf, "%s\t\"Topic\": \"%S\"", need_sep ? ",\n" : "", topic);
+ need_sep = true;
+ }
+ if (event_name) {
+ fix_escape_printf(&buf, "%s\t\"EventName\": \"%S\"", need_sep ? ",\n" : "",
+ event_name);
+ need_sep = true;
+ }
+ if (event_alias && strlen(event_alias)) {
+ fix_escape_printf(&buf, "%s\t\"EventAlias\": \"%S\"", need_sep ? ",\n" : "",
+ event_alias);
+ need_sep = true;
+ }
+ if (scale_unit && strlen(scale_unit)) {
+ fix_escape_printf(&buf, "%s\t\"ScaleUnit\": \"%S\"", need_sep ? ",\n" : "",
+ scale_unit);
+ need_sep = true;
+ }
+ if (event_type_desc) {
+ fix_escape_printf(&buf, "%s\t\"EventType\": \"%S\"", need_sep ? ",\n" : "",
+ event_type_desc);
+ need_sep = true;
+ }
+ if (deprecated) {
+ fix_escape_printf(&buf, "%s\t\"Deprecated\": \"%S\"", need_sep ? ",\n" : "",
+ deprecated ? "1" : "0");
+ need_sep = true;
+ }
+ if (desc) {
+ fix_escape_printf(&buf, "%s\t\"BriefDescription\": \"%S\"", need_sep ? ",\n" : "",
+ desc);
+ need_sep = true;
+ }
+ if (long_desc) {
+ fix_escape_printf(&buf, "%s\t\"PublicDescription\": \"%S\"", need_sep ? ",\n" : "",
+ long_desc);
+ need_sep = true;
+ }
+ if (encoding_desc) {
+ fix_escape_printf(&buf, "%s\t\"Encoding\": \"%S\"", need_sep ? ",\n" : "",
+ encoding_desc);
+ need_sep = true;
+ }
+ if (metric_name) {
+ fix_escape_printf(&buf, "%s\t\"MetricName\": \"%S\"", need_sep ? ",\n" : "",
+ metric_name);
+ need_sep = true;
+ }
+ if (metric_expr) {
+ fix_escape_printf(&buf, "%s\t\"MetricExpr\": \"%S\"", need_sep ? ",\n" : "",
+ metric_expr);
+ need_sep = true;
+ }
+ printf("%s}", need_sep ? "\n" : "");
+ strbuf_release(&buf);
+}
+
+static void json_print_metric(void *ps __maybe_unused, const char *group,
+ const char *name, const char *desc,
+ const char *long_desc, const char *expr,
+ const char *unit)
+{
+ struct json_print_state *print_state = ps;
+ bool need_sep = false;
+ struct strbuf buf;
+
+ strbuf_init(&buf, 0);
+ printf("%s{\n", print_state->need_sep ? ",\n" : "");
+ print_state->need_sep = true;
+ if (group) {
+ fix_escape_printf(&buf, "\t\"MetricGroup\": \"%S\"", group);
+ need_sep = true;
+ }
+ if (name) {
+ fix_escape_printf(&buf, "%s\t\"MetricName\": \"%S\"", need_sep ? ",\n" : "", name);
+ need_sep = true;
+ }
+ if (expr) {
+ fix_escape_printf(&buf, "%s\t\"MetricExpr\": \"%S\"", need_sep ? ",\n" : "", expr);
+ need_sep = true;
+ }
+ if (unit) {
+ fix_escape_printf(&buf, "%s\t\"ScaleUnit\": \"%S\"", need_sep ? ",\n" : "", unit);
+ need_sep = true;
+ }
+ if (desc) {
+ fix_escape_printf(&buf, "%s\t\"BriefDescription\": \"%S\"", need_sep ? ",\n" : "",
+ desc);
+ need_sep = true;
+ }
+ if (long_desc) {
+ fix_escape_printf(&buf, "%s\t\"PublicDescription\": \"%S\"", need_sep ? ",\n" : "",
+ long_desc);
+ need_sep = true;
+ }
+ printf("%s}", need_sep ? "\n" : "");
+ strbuf_release(&buf);
+}
int cmd_list(int argc, const char **argv)
{
int i, ret = 0;
- bool raw_dump = false;
- bool long_desc_flag = false;
- bool deprecated = false;
- char *pmu_name = NULL;
+ struct print_state default_ps = {};
+ struct print_state json_ps = {};
+ void *ps = &default_ps;
+ struct print_callbacks print_cb = {
+ .print_start = default_print_start,
+ .print_end = default_print_end,
+ .print_event = default_print_event,
+ .print_metric = default_print_metric,
+ };
+ const char *hybrid_name = NULL;
+ const char *unit_name = NULL;
+ bool json = false;
struct option list_options[] = {
- OPT_BOOLEAN(0, "raw-dump", &raw_dump, "Dump raw events"),
- OPT_BOOLEAN('d', "desc", &desc_flag,
+ OPT_BOOLEAN(0, "raw-dump", &default_ps.name_only, "Dump raw events"),
+ OPT_BOOLEAN('j', "json", &json, "JSON encode events and metrics"),
+ OPT_BOOLEAN('d', "desc", &default_ps.desc,
"Print extra event descriptions. --no-desc to not print."),
- OPT_BOOLEAN('v', "long-desc", &long_desc_flag,
+ OPT_BOOLEAN('v', "long-desc", &default_ps.long_desc,
"Print longer event descriptions."),
- OPT_BOOLEAN(0, "details", &details_flag,
+ OPT_BOOLEAN(0, "details", &default_ps.detailed,
"Print information on the perf event names and expressions used internally by events."),
- OPT_BOOLEAN(0, "deprecated", &deprecated,
+ OPT_BOOLEAN(0, "deprecated", &default_ps.deprecated,
"Print deprecated events."),
- OPT_STRING(0, "cputype", &hybrid_type, "hybrid cpu type",
- "Print events applying cpu with this type for hybrid platform "
- "(e.g. core or atom)"),
+ OPT_STRING(0, "cputype", &hybrid_name, "hybrid cpu type",
+ "Limit PMU or metric printing to the given hybrid PMU (e.g. core or atom)."),
+ OPT_STRING(0, "unit", &unit_name, "PMU name",
+ "Limit PMU or metric printing to the specified PMU."),
OPT_INCR(0, "debug", &verbose,
"Enable debugging output"),
OPT_END()
@@ -53,24 +462,45 @@ int cmd_list(int argc, const char **argv)
};
set_option_flag(list_options, 0, "raw-dump", PARSE_OPT_HIDDEN);
+ /* Hide hybrid flag for the more generic 'unit' flag. */
+ set_option_flag(list_options, 0, "cputype", PARSE_OPT_HIDDEN);
argc = parse_options(argc, argv, list_options, list_usage,
PARSE_OPT_STOP_AT_NON_OPTION);
setup_pager();
- if (!raw_dump && pager_in_use())
- printf("\nList of pre-defined events (to be used in -e or -M):\n\n");
+ if (!default_ps.name_only)
+ setup_pager();
- if (hybrid_type) {
- pmu_name = perf_pmu__hybrid_type_to_pmu(hybrid_type);
- if (!pmu_name)
- pr_warning("WARNING: hybrid cputype is not supported!\n");
+ if (json) {
+ print_cb = (struct print_callbacks){
+ .print_start = json_print_start,
+ .print_end = json_print_end,
+ .print_event = json_print_event,
+ .print_metric = json_print_metric,
+ };
+ ps = &json_ps;
+ } else {
+ default_ps.desc = !default_ps.long_desc;
+ default_ps.last_topic = strdup("");
+ assert(default_ps.last_topic);
+ default_ps.visited_metrics = strlist__new(NULL, NULL);
+ assert(default_ps.visited_metrics);
+ if (unit_name)
+ default_ps.pmu_glob = strdup(unit_name);
+ else if (hybrid_name) {
+ default_ps.pmu_glob = perf_pmu__hybrid_type_to_pmu(hybrid_name);
+ if (!default_ps.pmu_glob)
+ pr_warning("WARNING: hybrid cputype is not supported!\n");
+ }
}
+ print_cb.print_start(ps);
if (argc == 0) {
- print_events(NULL, raw_dump, !desc_flag, long_desc_flag,
- details_flag, deprecated, pmu_name);
+ default_ps.metrics = true;
+ default_ps.metricgroups = true;
+ print_events(&print_cb, ps);
goto out;
}
@@ -78,68 +508,75 @@ int cmd_list(int argc, const char **argv)
char *sep, *s;
if (strcmp(argv[i], "tracepoint") == 0)
- print_tracepoint_events(NULL, NULL, raw_dump);
+ print_tracepoint_events(&print_cb, ps);
else if (strcmp(argv[i], "hw") == 0 ||
strcmp(argv[i], "hardware") == 0)
- print_symbol_events(NULL, PERF_TYPE_HARDWARE,
- event_symbols_hw, PERF_COUNT_HW_MAX, raw_dump);
+ print_symbol_events(&print_cb, ps, PERF_TYPE_HARDWARE,
+ event_symbols_hw, PERF_COUNT_HW_MAX);
else if (strcmp(argv[i], "sw") == 0 ||
strcmp(argv[i], "software") == 0) {
- print_symbol_events(NULL, PERF_TYPE_SOFTWARE,
- event_symbols_sw, PERF_COUNT_SW_MAX, raw_dump);
- print_tool_events(NULL, raw_dump);
+ print_symbol_events(&print_cb, ps, PERF_TYPE_SOFTWARE,
+ event_symbols_sw, PERF_COUNT_SW_MAX);
+ print_tool_events(&print_cb, ps);
} else if (strcmp(argv[i], "cache") == 0 ||
strcmp(argv[i], "hwcache") == 0)
- print_hwcache_events(NULL, raw_dump);
+ print_hwcache_events(&print_cb, ps);
else if (strcmp(argv[i], "pmu") == 0)
- print_pmu_events(NULL, raw_dump, !desc_flag,
- long_desc_flag, details_flag,
- deprecated, pmu_name);
+ print_pmu_events(&print_cb, ps);
else if (strcmp(argv[i], "sdt") == 0)
- print_sdt_events(NULL, NULL, raw_dump);
- else if (strcmp(argv[i], "metric") == 0 || strcmp(argv[i], "metrics") == 0)
- metricgroup__print(true, false, NULL, raw_dump, details_flag, pmu_name);
- else if (strcmp(argv[i], "metricgroup") == 0 || strcmp(argv[i], "metricgroups") == 0)
- metricgroup__print(false, true, NULL, raw_dump, details_flag, pmu_name);
- else if ((sep = strchr(argv[i], ':')) != NULL) {
- int sep_idx;
-
- sep_idx = sep - argv[i];
- s = strdup(argv[i]);
- if (s == NULL) {
+ print_sdt_events(&print_cb, ps);
+ else if (strcmp(argv[i], "metric") == 0 || strcmp(argv[i], "metrics") == 0) {
+ default_ps.metricgroups = false;
+ default_ps.metrics = true;
+ metricgroup__print(&print_cb, ps);
+ } else if (strcmp(argv[i], "metricgroup") == 0 ||
+ strcmp(argv[i], "metricgroups") == 0) {
+ default_ps.metricgroups = true;
+ default_ps.metrics = false;
+ metricgroup__print(&print_cb, ps);
+ } else if ((sep = strchr(argv[i], ':')) != NULL) {
+ char *old_pmu_glob = default_ps.pmu_glob;
+
+ default_ps.event_glob = strdup(argv[i]);
+ if (!default_ps.event_glob) {
ret = -1;
goto out;
}
- s[sep_idx] = '\0';
- print_tracepoint_events(s, s + sep_idx + 1, raw_dump);
- print_sdt_events(s, s + sep_idx + 1, raw_dump);
- metricgroup__print(true, true, s, raw_dump, details_flag, pmu_name);
- free(s);
+ print_tracepoint_events(&print_cb, ps);
+ print_sdt_events(&print_cb, ps);
+ default_ps.metrics = true;
+ default_ps.metricgroups = true;
+ metricgroup__print(&print_cb, ps);
+ zfree(&default_ps.event_glob);
+ default_ps.pmu_glob = old_pmu_glob;
} else {
if (asprintf(&s, "*%s*", argv[i]) < 0) {
printf("Critical: Not enough memory! Trying to continue...\n");
continue;
}
- print_symbol_events(s, PERF_TYPE_HARDWARE,
- event_symbols_hw, PERF_COUNT_HW_MAX, raw_dump);
- print_symbol_events(s, PERF_TYPE_SOFTWARE,
- event_symbols_sw, PERF_COUNT_SW_MAX, raw_dump);
- print_tool_events(s, raw_dump);
- print_hwcache_events(s, raw_dump);
- print_pmu_events(s, raw_dump, !desc_flag,
- long_desc_flag,
- details_flag,
- deprecated,
- pmu_name);
- print_tracepoint_events(NULL, s, raw_dump);
- print_sdt_events(NULL, s, raw_dump);
- metricgroup__print(true, true, s, raw_dump, details_flag, pmu_name);
+ default_ps.event_glob = s;
+ print_symbol_events(&print_cb, ps, PERF_TYPE_HARDWARE,
+ event_symbols_hw, PERF_COUNT_HW_MAX);
+ print_symbol_events(&print_cb, ps, PERF_TYPE_SOFTWARE,
+ event_symbols_sw, PERF_COUNT_SW_MAX);
+ print_tool_events(&print_cb, ps);
+ print_hwcache_events(&print_cb, ps);
+ print_pmu_events(&print_cb, ps);
+ print_tracepoint_events(&print_cb, ps);
+ print_sdt_events(&print_cb, ps);
+ default_ps.metrics = true;
+ default_ps.metricgroups = true;
+ metricgroup__print(&print_cb, ps);
free(s);
}
}
out:
- free(pmu_name);
+ print_cb.print_end(ps);
+ free(default_ps.pmu_glob);
+ free(default_ps.last_topic);
+ free(default_ps.last_metricgroups);
+ strlist__delete(default_ps.visited_metrics);
return ret;
}
diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c
index 9722d4ab2e55..0d280093b19a 100644
--- a/tools/perf/builtin-lock.c
+++ b/tools/perf/builtin-lock.c
@@ -24,6 +24,7 @@
#include "util/data.h"
#include "util/string2.h"
#include "util/map.h"
+#include "util/util.h"
#include <sys/types.h>
#include <sys/prctl.h>
@@ -1389,6 +1390,34 @@ static int dump_info(void)
return rc;
}
+static const struct evsel_str_handler lock_tracepoints[] = {
+ { "lock:lock_acquire", evsel__process_lock_acquire, }, /* CONFIG_LOCKDEP */
+ { "lock:lock_acquired", evsel__process_lock_acquired, }, /* CONFIG_LOCKDEP, CONFIG_LOCK_STAT */
+ { "lock:lock_contended", evsel__process_lock_contended, }, /* CONFIG_LOCKDEP, CONFIG_LOCK_STAT */
+ { "lock:lock_release", evsel__process_lock_release, }, /* CONFIG_LOCKDEP */
+};
+
+static const struct evsel_str_handler contention_tracepoints[] = {
+ { "lock:contention_begin", evsel__process_contention_begin, },
+ { "lock:contention_end", evsel__process_contention_end, },
+};
+
+static int process_event_update(struct perf_tool *tool,
+ union perf_event *event,
+ struct evlist **pevlist)
+{
+ int ret;
+
+ ret = perf_event__process_event_update(tool, event, pevlist);
+ if (ret < 0)
+ return ret;
+
+ /* this can return -EEXIST since we call it for each evsel */
+ perf_session__set_tracepoints_handlers(session, lock_tracepoints);
+ perf_session__set_tracepoints_handlers(session, contention_tracepoints);
+ return 0;
+}
+
typedef int (*tracepoint_handler)(struct evsel *evsel,
struct perf_sample *sample);
@@ -1544,28 +1573,19 @@ next:
print_bad_events(bad, total);
}
-static const struct evsel_str_handler lock_tracepoints[] = {
- { "lock:lock_acquire", evsel__process_lock_acquire, }, /* CONFIG_LOCKDEP */
- { "lock:lock_acquired", evsel__process_lock_acquired, }, /* CONFIG_LOCKDEP, CONFIG_LOCK_STAT */
- { "lock:lock_contended", evsel__process_lock_contended, }, /* CONFIG_LOCKDEP, CONFIG_LOCK_STAT */
- { "lock:lock_release", evsel__process_lock_release, }, /* CONFIG_LOCKDEP */
-};
-
-static const struct evsel_str_handler contention_tracepoints[] = {
- { "lock:contention_begin", evsel__process_contention_begin, },
- { "lock:contention_end", evsel__process_contention_end, },
-};
-
static bool force;
static int __cmd_report(bool display_info)
{
int err = -EINVAL;
struct perf_tool eops = {
+ .attr = perf_event__process_attr,
+ .event_update = process_event_update,
.sample = process_sample_event,
.comm = perf_event__process_comm,
.mmap = perf_event__process_mmap,
.namespaces = perf_event__process_namespaces,
+ .tracing_data = perf_event__process_tracing_data,
.ordered_events = true,
};
struct perf_data data = {
@@ -1584,17 +1604,19 @@ static int __cmd_report(bool display_info)
symbol_conf.sort_by_name = true;
symbol__init(&session->header.env);
- if (!perf_session__has_traces(session, "lock record"))
- goto out_delete;
+ if (!data.is_pipe) {
+ if (!perf_session__has_traces(session, "lock record"))
+ goto out_delete;
- if (perf_session__set_tracepoints_handlers(session, lock_tracepoints)) {
- pr_err("Initializing perf session tracepoint handlers failed\n");
- goto out_delete;
- }
+ if (perf_session__set_tracepoints_handlers(session, lock_tracepoints)) {
+ pr_err("Initializing perf session tracepoint handlers failed\n");
+ goto out_delete;
+ }
- if (perf_session__set_tracepoints_handlers(session, contention_tracepoints)) {
- pr_err("Initializing perf session tracepoint handlers failed\n");
- goto out_delete;
+ if (perf_session__set_tracepoints_handlers(session, contention_tracepoints)) {
+ pr_err("Initializing perf session tracepoint handlers failed\n");
+ goto out_delete;
+ }
}
if (setup_output_field(false, output_fields))
@@ -1632,9 +1654,12 @@ static int __cmd_contention(int argc, const char **argv)
{
int err = -EINVAL;
struct perf_tool eops = {
+ .attr = perf_event__process_attr,
+ .event_update = process_event_update,
.sample = process_sample_event,
.comm = perf_event__process_comm,
.mmap = perf_event__process_mmap,
+ .tracing_data = perf_event__process_tracing_data,
.ordered_events = true,
};
struct perf_data data = {
@@ -1697,7 +1722,7 @@ static int __cmd_contention(int argc, const char **argv)
pr_err("lock contention BPF setup failed\n");
goto out_delete;
}
- } else {
+ } else if (!data.is_pipe) {
if (!perf_session__has_traces(session, "lock record"))
goto out_delete;
@@ -1858,6 +1883,29 @@ static int parse_map_entry(const struct option *opt, const char *str,
return 0;
}
+static int parse_max_stack(const struct option *opt, const char *str,
+ int unset __maybe_unused)
+{
+ unsigned long *len = (unsigned long *)opt->value;
+ long val;
+ char *endptr;
+
+ errno = 0;
+ val = strtol(str, &endptr, 0);
+ if (*endptr != '\0' || errno != 0) {
+ pr_err("invalid max stack depth: %s\n", str);
+ return -1;
+ }
+
+ if (val < 0 || val > sysctl__max_stack()) {
+ pr_err("invalid max stack depth: %ld\n", val);
+ return -1;
+ }
+
+ *len = val;
+ return 0;
+}
+
int cmd_lock(int argc, const char **argv)
{
const struct option lock_options[] = {
@@ -1869,7 +1917,7 @@ int cmd_lock(int argc, const char **argv)
"file", "vmlinux pathname"),
OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name,
"file", "kallsyms pathname"),
- OPT_BOOLEAN('q', "quiet", &quiet, "Do not show any message"),
+ OPT_BOOLEAN('q', "quiet", &quiet, "Do not show any warnings or messages"),
OPT_END()
};
@@ -1913,9 +1961,9 @@ int cmd_lock(int argc, const char **argv)
"Trace on existing thread id (exclusive to --pid)"),
OPT_CALLBACK(0, "map-nr-entries", &bpf_map_entries, "num",
"Max number of BPF map entries", parse_map_entry),
- OPT_INTEGER(0, "max-stack", &max_stack_depth,
- "Set the maximum stack depth when collecting lock contention, "
- "Default: " __stringify(CONTENTION_STACK_DEPTH)),
+ OPT_CALLBACK(0, "max-stack", &max_stack_depth, "num",
+ "Set the maximum stack depth when collecting lopck contention, "
+ "Default: " __stringify(CONTENTION_STACK_DEPTH), parse_max_stack),
OPT_INTEGER(0, "stack-skip", &stack_skip,
"Set the number of stack depth to skip when finding a lock caller, "
"Default: " __stringify(CONTENTION_STACK_SKIP)),
diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c
index 923fb8316fda..dedd612eae5e 100644
--- a/tools/perf/builtin-mem.c
+++ b/tools/perf/builtin-mem.c
@@ -20,6 +20,7 @@
#include "util/symbol.h"
#include "util/pmu.h"
#include "util/pmu-hybrid.h"
+#include "util/sample.h"
#include "util/string2.h"
#include <linux/err.h>
diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c
index f62298f5db3b..2ae50fc9e597 100644
--- a/tools/perf/builtin-probe.c
+++ b/tools/perf/builtin-probe.c
@@ -40,7 +40,6 @@ static struct {
int command; /* Command short_name */
bool list_events;
bool uprobes;
- bool quiet;
bool target_used;
int nevents;
struct perf_probe_event events[MAX_PROBES];
@@ -514,8 +513,8 @@ __cmd_probe(int argc, const char **argv)
struct option options[] = {
OPT_INCR('v', "verbose", &verbose,
"be more verbose (show parsed arguments, etc)"),
- OPT_BOOLEAN('q', "quiet", &params.quiet,
- "be quiet (do not show any messages)"),
+ OPT_BOOLEAN('q', "quiet", &quiet,
+ "be quiet (do not show any warnings or messages)"),
OPT_CALLBACK_DEFAULT('l', "list", NULL, "[GROUP:]EVENT",
"list up probe events",
opt_set_filter_with_command, DEFAULT_LIST_FILTER),
@@ -634,7 +633,7 @@ __cmd_probe(int argc, const char **argv)
if (ret)
return ret;
- if (params.quiet) {
+ if (quiet) {
if (verbose != 0) {
pr_err(" Error: -v and -q are exclusive.\n");
return -EINVAL;
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index e128b855ddde..bd462a3f2bbd 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -646,10 +646,10 @@ static int record__pushfn(struct mmap *map, void *to, void *bf, size_t size)
return record__write(rec, map, bf, size);
}
-static volatile int signr = -1;
-static volatile int child_finished;
+static volatile sig_atomic_t signr = -1;
+static volatile sig_atomic_t child_finished;
#ifdef HAVE_EVENTFD_SUPPORT
-static volatile int done_fd = -1;
+static volatile sig_atomic_t done_fd = -1;
#endif
static void sig_handler(int sig)
@@ -1926,7 +1926,7 @@ static void record__read_lost_samples(struct record *rec)
}
-static volatile int workload_exec_errno;
+static volatile sig_atomic_t workload_exec_errno;
/*
* evlist__prepare_workload will send a SIGUSR1
@@ -3388,7 +3388,7 @@ static struct option __record_options[] = {
&record_parse_callchain_opt),
OPT_INCR('v', "verbose", &verbose,
"be more verbose (show counter open errors, etc)"),
- OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
+ OPT_BOOLEAN('q', "quiet", &quiet, "don't print any warnings or messages"),
OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
"per thread counts"),
OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"),
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 8361890176c2..b6d77d3da64f 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -1222,7 +1222,7 @@ int cmd_report(int argc, const char **argv)
"input file name"),
OPT_INCR('v', "verbose", &verbose,
"be more verbose (show symbol address, etc)"),
- OPT_BOOLEAN('q', "quiet", &quiet, "Do not show any message"),
+ OPT_BOOLEAN('q', "quiet", &quiet, "Do not show any warnings or messages"),
OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
"dump raw trace in ASCII"),
OPT_BOOLEAN(0, "stats", &report.stats_mode, "Display event stats"),
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 7ca238277d83..d7ec8c1af293 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -2049,7 +2049,7 @@ static void perf_sample__fprint_metric(struct perf_script *script,
u64 val;
if (!evsel->stats)
- evlist__alloc_stats(script->session->evlist, false);
+ evlist__alloc_stats(&stat_config, script->session->evlist, /*alloc_raw=*/false);
if (evsel_script(leader)->gnum++ == 0)
perf_stat__reset_shadow_stats();
val = sample->period * evsel->scale;
@@ -3632,7 +3632,7 @@ static int set_maps(struct perf_script *script)
perf_evlist__set_maps(&evlist->core, script->cpus, script->threads);
- if (evlist__alloc_stats(evlist, true))
+ if (evlist__alloc_stats(&stat_config, evlist, /*alloc_raw=*/true))
return -ENOMEM;
script->allocated = true;
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 265b05157972..d040fbcdcc5a 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -93,6 +93,7 @@
#include <linux/ctype.h>
#include <perf/evlist.h>
+#include <internal/threadmap.h>
#define DEFAULT_SEPARATOR " "
#define FREEZE_ON_SMI_PATH "devices/cpu/freeze_on_smi"
@@ -173,7 +174,7 @@ static struct target target = {
#define METRIC_ONLY_LEN 20
-static volatile pid_t child_pid = -1;
+static volatile sig_atomic_t child_pid = -1;
static int detailed_run = 0;
static bool transaction_run;
static bool topdown_run = false;
@@ -208,7 +209,7 @@ struct perf_stat {
static struct perf_stat perf_stat;
#define STAT_RECORD perf_stat.record
-static volatile int done = 0;
+static volatile sig_atomic_t done = 0;
static struct perf_stat_config stat_config = {
.aggr_mode = AGGR_GLOBAL,
@@ -465,15 +466,19 @@ static int read_bpf_map_counters(void)
return 0;
}
-static void read_counters(struct timespec *rs)
+static int read_counters(struct timespec *rs)
{
- struct evsel *counter;
-
if (!stat_config.stop_read_counter) {
if (read_bpf_map_counters() ||
read_affinity_counters(rs))
- return;
+ return -1;
}
+ return 0;
+}
+
+static void process_counters(void)
+{
+ struct evsel *counter;
evlist__for_each_entry(evsel_list, counter) {
if (counter->err)
@@ -482,6 +487,10 @@ static void read_counters(struct timespec *rs)
pr_warning("failed to process counter %s\n", counter->name);
counter->err = 0;
}
+
+ perf_stat_merge_counters(&stat_config, evsel_list);
+ perf_stat_process_percore(&stat_config, evsel_list);
+ perf_stat_process_shadow_stats(&stat_config, evsel_list);
}
static void process_interval(void)
@@ -492,7 +501,10 @@ static void process_interval(void)
diff_timespec(&rs, &ts, &ref_time);
perf_stat__reset_shadow_per_stat(&rt_stat);
- read_counters(&rs);
+ evlist__reset_aggr_stats(evsel_list);
+
+ if (read_counters(&rs) == 0)
+ process_counters();
if (STAT_RECORD) {
if (WRITE_STAT_ROUND_EVENT(rs.tv_sec * NSEC_PER_SEC + rs.tv_nsec, INTERVAL))
@@ -569,7 +581,7 @@ static void disable_counters(void)
}
}
-static volatile int workload_exec_errno;
+static volatile sig_atomic_t workload_exec_errno;
/*
* evlist__prepare_workload will send a SIGUSR1
@@ -963,11 +975,9 @@ try_again_reset:
init_stats(&walltime_nsecs_stats);
update_stats(&walltime_nsecs_stats, t1 - t0);
- if (stat_config.aggr_mode == AGGR_GLOBAL)
- evlist__save_aggr_prev_raw_counts(evsel_list);
-
evlist__copy_prev_raw_counts(evsel_list);
evlist__reset_prev_raw_counts(evsel_list);
+ evlist__reset_aggr_stats(evsel_list);
perf_stat__reset_shadow_per_stat(&rt_stat);
} else {
update_stats(&walltime_nsecs_stats, t1 - t0);
@@ -980,7 +990,8 @@ try_again_reset:
* avoid arbitrary skew, we must read all counters before closing any
* group leaders.
*/
- read_counters(&(struct timespec) { .tv_nsec = t1-t0 });
+ if (read_counters(&(struct timespec) { .tv_nsec = t1-t0 }) == 0)
+ process_counters();
/*
* We need to keep evsel_list alive, because it's processed
@@ -1023,13 +1034,13 @@ static void print_counters(struct timespec *ts, int argc, const char **argv)
/* Do not print anything if we record to the pipe. */
if (STAT_RECORD && perf_stat.data.is_pipe)
return;
- if (stat_config.quiet)
+ if (quiet)
return;
evlist__print_counters(evsel_list, &stat_config, &target, ts, argc, argv);
}
-static volatile int signr = -1;
+static volatile sig_atomic_t signr = -1;
static void skip_signal(int signo)
{
@@ -1273,8 +1284,8 @@ static struct option stat_options[] = {
"print summary for interval mode"),
OPT_BOOLEAN(0, "no-csv-summary", &stat_config.no_csv_summary,
"don't print 'summary' for CSV summary output"),
- OPT_BOOLEAN(0, "quiet", &stat_config.quiet,
- "don't print output (useful with record)"),
+ OPT_BOOLEAN(0, "quiet", &quiet,
+ "don't print any output, messages or warnings (useful with record)"),
OPT_CALLBACK(0, "cputype", &evsel_list, "hybrid cpu type",
"Only enable events on applying cpu with this type "
"for hybrid platform (e.g. core or atom)",
@@ -1330,10 +1341,26 @@ static struct aggr_cpu_id perf_stat__get_node(struct perf_stat_config *config __
return aggr_cpu_id__node(cpu, /*data=*/NULL);
}
+static struct aggr_cpu_id perf_stat__get_global(struct perf_stat_config *config __maybe_unused,
+ struct perf_cpu cpu)
+{
+ return aggr_cpu_id__global(cpu, /*data=*/NULL);
+}
+
+static struct aggr_cpu_id perf_stat__get_cpu(struct perf_stat_config *config __maybe_unused,
+ struct perf_cpu cpu)
+{
+ return aggr_cpu_id__cpu(cpu, /*data=*/NULL);
+}
+
static struct aggr_cpu_id perf_stat__get_aggr(struct perf_stat_config *config,
aggr_get_id_t get_id, struct perf_cpu cpu)
{
- struct aggr_cpu_id id = aggr_cpu_id__empty();
+ struct aggr_cpu_id id;
+
+ /* per-process mode - should use global aggr mode */
+ if (cpu.cpu == -1)
+ return get_id(config, cpu);
if (aggr_cpu_id__is_empty(&config->cpus_aggr_map->map[cpu.cpu]))
config->cpus_aggr_map->map[cpu.cpu] = get_id(config, cpu);
@@ -1366,16 +1393,16 @@ static struct aggr_cpu_id perf_stat__get_node_cached(struct perf_stat_config *co
return perf_stat__get_aggr(config, perf_stat__get_node, cpu);
}
-static bool term_percore_set(void)
+static struct aggr_cpu_id perf_stat__get_global_cached(struct perf_stat_config *config,
+ struct perf_cpu cpu)
{
- struct evsel *counter;
-
- evlist__for_each_entry(evsel_list, counter) {
- if (counter->percore)
- return true;
- }
+ return perf_stat__get_aggr(config, perf_stat__get_global, cpu);
+}
- return false;
+static struct aggr_cpu_id perf_stat__get_cpu_cached(struct perf_stat_config *config,
+ struct perf_cpu cpu)
+{
+ return perf_stat__get_aggr(config, perf_stat__get_cpu, cpu);
}
static aggr_cpu_id_get_t aggr_mode__get_aggr(enum aggr_mode aggr_mode)
@@ -1390,11 +1417,9 @@ static aggr_cpu_id_get_t aggr_mode__get_aggr(enum aggr_mode aggr_mode)
case AGGR_NODE:
return aggr_cpu_id__node;
case AGGR_NONE:
- if (term_percore_set())
- return aggr_cpu_id__core;
-
- return NULL;
+ return aggr_cpu_id__cpu;
case AGGR_GLOBAL:
+ return aggr_cpu_id__global;
case AGGR_THREAD:
case AGGR_UNSET:
case AGGR_MAX:
@@ -1415,11 +1440,9 @@ static aggr_get_id_t aggr_mode__get_id(enum aggr_mode aggr_mode)
case AGGR_NODE:
return perf_stat__get_node_cached;
case AGGR_NONE:
- if (term_percore_set()) {
- return perf_stat__get_core_cached;
- }
- return NULL;
+ return perf_stat__get_cpu_cached;
case AGGR_GLOBAL:
+ return perf_stat__get_global_cached;
case AGGR_THREAD:
case AGGR_UNSET:
case AGGR_MAX:
@@ -1434,8 +1457,9 @@ static int perf_stat_init_aggr_mode(void)
aggr_cpu_id_get_t get_id = aggr_mode__get_aggr(stat_config.aggr_mode);
if (get_id) {
+ bool needs_sort = stat_config.aggr_mode != AGGR_NONE;
stat_config.aggr_map = cpu_aggr_map__new(evsel_list->core.user_requested_cpus,
- get_id, /*data=*/NULL);
+ get_id, /*data=*/NULL, needs_sort);
if (!stat_config.aggr_map) {
pr_err("cannot build %s map", aggr_mode__string[stat_config.aggr_mode]);
return -1;
@@ -1443,6 +1467,21 @@ static int perf_stat_init_aggr_mode(void)
stat_config.aggr_get_id = aggr_mode__get_id(stat_config.aggr_mode);
}
+ if (stat_config.aggr_mode == AGGR_THREAD) {
+ nr = perf_thread_map__nr(evsel_list->core.threads);
+ stat_config.aggr_map = cpu_aggr_map__empty_new(nr);
+ if (stat_config.aggr_map == NULL)
+ return -ENOMEM;
+
+ for (int s = 0; s < nr; s++) {
+ struct aggr_cpu_id id = aggr_cpu_id__empty();
+
+ id.thread_idx = s;
+ stat_config.aggr_map->map[s] = id;
+ }
+ return 0;
+ }
+
/*
* The evsel_list->cpus is the base we operate on,
* taking the highest cpu number to be the size of
@@ -1527,6 +1566,26 @@ static struct aggr_cpu_id perf_env__get_core_aggr_by_cpu(struct perf_cpu cpu, vo
return id;
}
+static struct aggr_cpu_id perf_env__get_cpu_aggr_by_cpu(struct perf_cpu cpu, void *data)
+{
+ struct perf_env *env = data;
+ struct aggr_cpu_id id = aggr_cpu_id__empty();
+
+ if (cpu.cpu != -1) {
+ /*
+ * core_id is relative to socket and die,
+ * we need a global id. So we set
+ * socket, die id and core id
+ */
+ id.socket = env->cpu[cpu.cpu].socket_id;
+ id.die = env->cpu[cpu.cpu].die_id;
+ id.core = env->cpu[cpu.cpu].core_id;
+ id.cpu = cpu;
+ }
+
+ return id;
+}
+
static struct aggr_cpu_id perf_env__get_node_aggr_by_cpu(struct perf_cpu cpu, void *data)
{
struct aggr_cpu_id id = aggr_cpu_id__empty();
@@ -1535,6 +1594,16 @@ static struct aggr_cpu_id perf_env__get_node_aggr_by_cpu(struct perf_cpu cpu, vo
return id;
}
+static struct aggr_cpu_id perf_env__get_global_aggr_by_cpu(struct perf_cpu cpu __maybe_unused,
+ void *data __maybe_unused)
+{
+ struct aggr_cpu_id id = aggr_cpu_id__empty();
+
+ /* it always aggregates to the cpu 0 */
+ id.cpu = (struct perf_cpu){ .cpu = 0 };
+ return id;
+}
+
static struct aggr_cpu_id perf_stat__get_socket_file(struct perf_stat_config *config __maybe_unused,
struct perf_cpu cpu)
{
@@ -1552,12 +1621,24 @@ static struct aggr_cpu_id perf_stat__get_core_file(struct perf_stat_config *conf
return perf_env__get_core_aggr_by_cpu(cpu, &perf_stat.session->header.env);
}
+static struct aggr_cpu_id perf_stat__get_cpu_file(struct perf_stat_config *config __maybe_unused,
+ struct perf_cpu cpu)
+{
+ return perf_env__get_cpu_aggr_by_cpu(cpu, &perf_stat.session->header.env);
+}
+
static struct aggr_cpu_id perf_stat__get_node_file(struct perf_stat_config *config __maybe_unused,
struct perf_cpu cpu)
{
return perf_env__get_node_aggr_by_cpu(cpu, &perf_stat.session->header.env);
}
+static struct aggr_cpu_id perf_stat__get_global_file(struct perf_stat_config *config __maybe_unused,
+ struct perf_cpu cpu)
+{
+ return perf_env__get_global_aggr_by_cpu(cpu, &perf_stat.session->header.env);
+}
+
static aggr_cpu_id_get_t aggr_mode__get_aggr_file(enum aggr_mode aggr_mode)
{
switch (aggr_mode) {
@@ -1569,8 +1650,10 @@ static aggr_cpu_id_get_t aggr_mode__get_aggr_file(enum aggr_mode aggr_mode)
return perf_env__get_core_aggr_by_cpu;
case AGGR_NODE:
return perf_env__get_node_aggr_by_cpu;
- case AGGR_NONE:
case AGGR_GLOBAL:
+ return perf_env__get_global_aggr_by_cpu;
+ case AGGR_NONE:
+ return perf_env__get_cpu_aggr_by_cpu;
case AGGR_THREAD:
case AGGR_UNSET:
case AGGR_MAX:
@@ -1590,8 +1673,10 @@ static aggr_get_id_t aggr_mode__get_id_file(enum aggr_mode aggr_mode)
return perf_stat__get_core_file;
case AGGR_NODE:
return perf_stat__get_node_file;
- case AGGR_NONE:
case AGGR_GLOBAL:
+ return perf_stat__get_global_file;
+ case AGGR_NONE:
+ return perf_stat__get_cpu_file;
case AGGR_THREAD:
case AGGR_UNSET:
case AGGR_MAX:
@@ -1604,11 +1689,29 @@ static int perf_stat_init_aggr_mode_file(struct perf_stat *st)
{
struct perf_env *env = &st->session->header.env;
aggr_cpu_id_get_t get_id = aggr_mode__get_aggr_file(stat_config.aggr_mode);
+ bool needs_sort = stat_config.aggr_mode != AGGR_NONE;
+
+ if (stat_config.aggr_mode == AGGR_THREAD) {
+ int nr = perf_thread_map__nr(evsel_list->core.threads);
+
+ stat_config.aggr_map = cpu_aggr_map__empty_new(nr);
+ if (stat_config.aggr_map == NULL)
+ return -ENOMEM;
+
+ for (int s = 0; s < nr; s++) {
+ struct aggr_cpu_id id = aggr_cpu_id__empty();
+
+ id.thread_idx = s;
+ stat_config.aggr_map->map[s] = id;
+ }
+ return 0;
+ }
if (!get_id)
return 0;
- stat_config.aggr_map = cpu_aggr_map__new(evsel_list->core.user_requested_cpus, get_id, env);
+ stat_config.aggr_map = cpu_aggr_map__new(evsel_list->core.user_requested_cpus,
+ get_id, env, needs_sort);
if (!stat_config.aggr_map) {
pr_err("cannot build %s map", aggr_mode__string[stat_config.aggr_mode]);
return -1;
@@ -1991,13 +2094,11 @@ static int process_stat_round_event(struct perf_session *session,
union perf_event *event)
{
struct perf_record_stat_round *stat_round = &event->stat_round;
- struct evsel *counter;
struct timespec tsh, *ts = NULL;
const char **argv = session->header.env.cmdline_argv;
int argc = session->header.env.nr_cmdline;
- evlist__for_each_entry(evsel_list, counter)
- perf_stat_process_counter(&stat_config, counter);
+ process_counters();
if (stat_round->type == PERF_STAT_ROUND_TYPE__FINAL)
update_stats(&walltime_nsecs_stats, stat_round->time);
@@ -2024,17 +2125,23 @@ int process_stat_config_event(struct perf_session *session,
if (perf_cpu_map__empty(st->cpus)) {
if (st->aggr_mode != AGGR_UNSET)
pr_warning("warning: processing task data, aggregation mode not set\n");
- return 0;
- }
-
- if (st->aggr_mode != AGGR_UNSET)
+ } else if (st->aggr_mode != AGGR_UNSET) {
stat_config.aggr_mode = st->aggr_mode;
+ }
if (perf_stat.data.is_pipe)
perf_stat_init_aggr_mode();
else
perf_stat_init_aggr_mode_file(st);
+ if (stat_config.aggr_map) {
+ int nr_aggr = stat_config.aggr_map->nr;
+
+ if (evlist__alloc_aggr_stats(session->evlist, nr_aggr) < 0) {
+ pr_err("cannot allocate aggr counts\n");
+ return -1;
+ }
+ }
return 0;
}
@@ -2048,7 +2155,7 @@ static int set_maps(struct perf_stat *st)
perf_evlist__set_maps(&evsel_list->core, st->cpus, st->threads);
- if (evlist__alloc_stats(evsel_list, true))
+ if (evlist__alloc_stats(&stat_config, evsel_list, /*alloc_raw=*/true))
return -ENOMEM;
st->maps_allocated = true;
@@ -2277,7 +2384,7 @@ int cmd_stat(int argc, const char **argv)
goto out;
}
- if (!output && !stat_config.quiet) {
+ if (!output && !quiet) {
struct timespec tm;
mode = append_file ? "a" : "w";
@@ -2297,6 +2404,14 @@ int cmd_stat(int argc, const char **argv)
}
}
+ if (stat_config.interval_clear && !isatty(fileno(output))) {
+ fprintf(stderr, "--interval-clear does not work with output\n");
+ parse_options_usage(stat_usage, stat_options, "o", 1);
+ parse_options_usage(NULL, stat_options, "log-fd", 0);
+ parse_options_usage(NULL, stat_options, "interval-clear", 0);
+ return -1;
+ }
+
stat_config.output = output;
/*
@@ -2495,10 +2610,10 @@ int cmd_stat(int argc, const char **argv)
goto out;
}
- if (evlist__alloc_stats(evsel_list, interval))
+ if (perf_stat_init_aggr_mode())
goto out;
- if (perf_stat_init_aggr_mode())
+ if (evlist__alloc_stats(&stat_config, evsel_list, interval))
goto out;
/*
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 4b3ff7687236..bb5bd241246b 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -87,8 +87,8 @@
#include <linux/ctype.h>
#include <perf/mmap.h>
-static volatile int done;
-static volatile int resize;
+static volatile sig_atomic_t done;
+static volatile sig_atomic_t resize;
#define HEADER_LINE_NR 5
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index d3c757769b96..543c379d2a57 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -88,6 +88,8 @@
# define F_LINUX_SPECIFIC_BASE 1024
#endif
+#define RAW_SYSCALL_ARGS_NUM 6
+
/*
* strtoul: Go from a string to a value, i.e. for msr: MSR_FS_BASE to 0xc0000100
*/
@@ -108,7 +110,7 @@ struct syscall_fmt {
const char *sys_enter,
*sys_exit;
} bpf_prog_name;
- struct syscall_arg_fmt arg[6];
+ struct syscall_arg_fmt arg[RAW_SYSCALL_ARGS_NUM];
u8 nr_args;
bool errpid;
bool timeout;
@@ -120,7 +122,6 @@ struct trace {
struct syscalltbl *sctbl;
struct {
struct syscall *table;
- struct bpf_map *map;
struct { // per syscall BPF_MAP_TYPE_PROG_ARRAY
struct bpf_map *sys_enter,
*sys_exit;
@@ -924,6 +925,8 @@ static struct syscall_fmt syscall_fmts[] = {
.arg = { [0] = { .scnprintf = SCA_PTR, /* brk */ }, }, },
{ .name = "clock_gettime",
.arg = { [0] = STRARRAY(clk_id, clockid), }, },
+ { .name = "clock_nanosleep",
+ .arg = { [2] = { .scnprintf = SCA_TIMESPEC, /* rqtp */ }, }, },
{ .name = "clone", .errpid = true, .nr_args = 5,
.arg = { [0] = { .name = "flags", .scnprintf = SCA_CLONE_FLAGS, },
[1] = { .name = "child_stack", .scnprintf = SCA_HEX, },
@@ -1053,7 +1056,8 @@ static struct syscall_fmt syscall_fmts[] = {
.arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ },
[2] = { .scnprintf = SCA_OPEN_FLAGS, /* flags */ }, }, },
{ .name = "perf_event_open",
- .arg = { [2] = { .scnprintf = SCA_INT, /* cpu */ },
+ .arg = { [0] = { .scnprintf = SCA_PERF_ATTR, /* attr */ },
+ [2] = { .scnprintf = SCA_INT, /* cpu */ },
[3] = { .scnprintf = SCA_FD, /* group_fd */ },
[4] = { .scnprintf = SCA_PERF_FLAGS, /* flags */ }, }, },
{ .name = "pipe2",
@@ -1220,16 +1224,6 @@ struct syscall {
};
/*
- * Must match what is in the BPF program:
- *
- * tools/perf/examples/bpf/augmented_raw_syscalls.c
- */
-struct bpf_map_syscall_entry {
- bool enabled;
- u16 string_args_len[6];
-};
-
-/*
* We need to have this 'calculated' boolean because in some cases we really
* don't know what is the duration of a syscall, for instance, when we start
* a session and some threads are waiting for a syscall to finish, say 'poll',
@@ -1535,8 +1529,8 @@ static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
}
static pid_t workload_pid = -1;
-static bool done = false;
-static bool interrupted = false;
+static volatile sig_atomic_t done = false;
+static volatile sig_atomic_t interrupted = false;
static void sighandler_interrupt(int sig __maybe_unused)
{
@@ -1658,7 +1652,7 @@ static int syscall__alloc_arg_fmts(struct syscall *sc, int nr_args)
{
int idx;
- if (nr_args == 6 && sc->fmt && sc->fmt->nr_args != 0)
+ if (nr_args == RAW_SYSCALL_ARGS_NUM && sc->fmt && sc->fmt->nr_args != 0)
nr_args = sc->fmt->nr_args;
sc->arg_fmt = calloc(nr_args, sizeof(*sc->arg_fmt));
@@ -1791,11 +1785,11 @@ static int trace__read_syscall_info(struct trace *trace, int id)
#endif
sc = trace->syscalls.table + id;
if (sc->nonexistent)
- return 0;
+ return -EEXIST;
if (name == NULL) {
sc->nonexistent = true;
- return 0;
+ return -EEXIST;
}
sc->name = name;
@@ -1809,11 +1803,18 @@ static int trace__read_syscall_info(struct trace *trace, int id)
sc->tp_format = trace_event__tp_format("syscalls", tp_name);
}
- if (syscall__alloc_arg_fmts(sc, IS_ERR(sc->tp_format) ? 6 : sc->tp_format->format.nr_fields))
- return -ENOMEM;
-
- if (IS_ERR(sc->tp_format))
+ /*
+ * Fails to read trace point format via sysfs node, so the trace point
+ * doesn't exist. Set the 'nonexistent' flag as true.
+ */
+ if (IS_ERR(sc->tp_format)) {
+ sc->nonexistent = true;
return PTR_ERR(sc->tp_format);
+ }
+
+ if (syscall__alloc_arg_fmts(sc, IS_ERR(sc->tp_format) ?
+ RAW_SYSCALL_ARGS_NUM : sc->tp_format->format.nr_fields))
+ return -ENOMEM;
sc->args = sc->tp_format->format.fields;
/*
@@ -2131,11 +2132,8 @@ static struct syscall *trace__syscall_info(struct trace *trace,
(err = trace__read_syscall_info(trace, id)) != 0)
goto out_cant_read;
- if (trace->syscalls.table[id].name == NULL) {
- if (trace->syscalls.table[id].nonexistent)
- return NULL;
+ if (trace->syscalls.table && trace->syscalls.table[id].nonexistent)
goto out_cant_read;
- }
return &trace->syscalls.table[id];
@@ -3250,7 +3248,6 @@ static void trace__set_bpf_map_filtered_pids(struct trace *trace)
static void trace__set_bpf_map_syscalls(struct trace *trace)
{
- trace->syscalls.map = trace__find_bpf_map_by_name(trace, "syscalls");
trace->syscalls.prog_array.sys_enter = trace__find_bpf_map_by_name(trace, "syscalls_sys_enter");
trace->syscalls.prog_array.sys_exit = trace__find_bpf_map_by_name(trace, "syscalls_sys_exit");
}
@@ -3330,80 +3327,6 @@ static int trace__bpf_prog_sys_exit_fd(struct trace *trace, int id)
return sc ? bpf_program__fd(sc->bpf_prog.sys_exit) : bpf_program__fd(trace->syscalls.unaugmented_prog);
}
-static void trace__init_bpf_map_syscall_args(struct trace *trace, int id, struct bpf_map_syscall_entry *entry)
-{
- struct syscall *sc = trace__syscall_info(trace, NULL, id);
- int arg = 0;
-
- if (sc == NULL)
- goto out;
-
- for (; arg < sc->nr_args; ++arg) {
- entry->string_args_len[arg] = 0;
- if (sc->arg_fmt[arg].scnprintf == SCA_FILENAME) {
- /* Should be set like strace -s strsize */
- entry->string_args_len[arg] = PATH_MAX;
- }
- }
-out:
- for (; arg < 6; ++arg)
- entry->string_args_len[arg] = 0;
-}
-static int trace__set_ev_qualifier_bpf_filter(struct trace *trace)
-{
- int fd = bpf_map__fd(trace->syscalls.map);
- struct bpf_map_syscall_entry value = {
- .enabled = !trace->not_ev_qualifier,
- };
- int err = 0;
- size_t i;
-
- for (i = 0; i < trace->ev_qualifier_ids.nr; ++i) {
- int key = trace->ev_qualifier_ids.entries[i];
-
- if (value.enabled) {
- trace__init_bpf_map_syscall_args(trace, key, &value);
- trace__init_syscall_bpf_progs(trace, key);
- }
-
- err = bpf_map_update_elem(fd, &key, &value, BPF_EXIST);
- if (err)
- break;
- }
-
- return err;
-}
-
-static int __trace__init_syscalls_bpf_map(struct trace *trace, bool enabled)
-{
- int fd = bpf_map__fd(trace->syscalls.map);
- struct bpf_map_syscall_entry value = {
- .enabled = enabled,
- };
- int err = 0, key;
-
- for (key = 0; key < trace->sctbl->syscalls.nr_entries; ++key) {
- if (enabled)
- trace__init_bpf_map_syscall_args(trace, key, &value);
-
- err = bpf_map_update_elem(fd, &key, &value, BPF_ANY);
- if (err)
- break;
- }
-
- return err;
-}
-
-static int trace__init_syscalls_bpf_map(struct trace *trace)
-{
- bool enabled = true;
-
- if (trace->ev_qualifier_ids.nr)
- enabled = trace->not_ev_qualifier;
-
- return __trace__init_syscalls_bpf_map(trace, enabled);
-}
-
static struct bpf_program *trace__find_usable_bpf_prog_entry(struct trace *trace, struct syscall *sc)
{
struct tep_format_field *field, *candidate_field;
@@ -3618,16 +3541,6 @@ static void trace__set_bpf_map_syscalls(struct trace *trace __maybe_unused)
{
}
-static int trace__set_ev_qualifier_bpf_filter(struct trace *trace __maybe_unused)
-{
- return 0;
-}
-
-static int trace__init_syscalls_bpf_map(struct trace *trace __maybe_unused)
-{
- return 0;
-}
-
static struct bpf_program *trace__find_bpf_program_by_title(struct trace *trace __maybe_unused,
const char *name __maybe_unused)
{
@@ -3661,8 +3574,6 @@ static bool trace__only_augmented_syscalls_evsels(struct trace *trace)
static int trace__set_ev_qualifier_filter(struct trace *trace)
{
- if (trace->syscalls.map)
- return trace__set_ev_qualifier_bpf_filter(trace);
if (trace->syscalls.events.sys_enter)
return trace__set_ev_qualifier_tp_filter(trace);
return 0;
@@ -4036,9 +3947,6 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
if (err < 0)
goto out_error_mem;
- if (trace->syscalls.map)
- trace__init_syscalls_bpf_map(trace);
-
if (trace->syscalls.prog_array.sys_enter)
trace__init_syscalls_bpf_prog_array_maps(trace);
@@ -4092,8 +4000,8 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
}
trace->multiple_threads = perf_thread_map__pid(evlist->core.threads, 0) == -1 ||
- evlist->core.threads->nr > 1 ||
- evlist__first(evlist)->core.attr.inherit;
+ perf_thread_map__nr(evlist->core.threads) > 1 ||
+ evlist__first(evlist)->core.attr.inherit;
/*
* Now that we already used evsel->core.attr to ask the kernel to setup the
diff --git a/tools/perf/examples/bpf/5sec.c b/tools/perf/examples/bpf/5sec.c
index e6b6181c6dc6..3bd7fc17631f 100644
--- a/tools/perf/examples/bpf/5sec.c
+++ b/tools/perf/examples/bpf/5sec.c
@@ -39,13 +39,15 @@
Copyright (C) 2018 Red Hat, Inc., Arnaldo Carvalho de Melo <acme@redhat.com>
*/
-#include <bpf.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
#define NSEC_PER_SEC 1000000000L
-int probe(hrtimer_nanosleep, rqtp)(void *ctx, int err, long long sec)
+SEC("hrtimer_nanosleep=hrtimer_nanosleep rqtp")
+int hrtimer_nanosleep(void *ctx, int err, long long sec)
{
return sec / NSEC_PER_SEC == 5ULL;
}
-license(GPL);
+char _license[] SEC("license") = "GPL";
diff --git a/tools/perf/examples/bpf/augmented_raw_syscalls.c b/tools/perf/examples/bpf/augmented_raw_syscalls.c
index a262dcd020f4..9a03189d33d3 100644
--- a/tools/perf/examples/bpf/augmented_raw_syscalls.c
+++ b/tools/perf/examples/bpf/augmented_raw_syscalls.c
@@ -14,39 +14,52 @@
* code that will combine entry/exit in a strace like way.
*/
-#include <unistd.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
#include <linux/limits.h>
-#include <linux/socket.h>
-#include <pid_filter.h>
-/* bpf-output associated map */
-bpf_map(__augmented_syscalls__, PERF_EVENT_ARRAY, int, u32, __NR_CPUS__);
+// FIXME: These should come from system headers
+typedef char bool;
+typedef int pid_t;
+typedef long long int __s64;
+typedef __s64 time64_t;
-/*
- * string_args_len: one per syscall arg, 0 means not a string or don't copy it,
- * PATH_MAX for copying everything, any other value to limit
- * it a la 'strace -s strsize'.
- */
-struct syscall {
- bool enabled;
- u16 string_args_len[6];
+struct timespec64 {
+ time64_t tv_sec;
+ long int tv_nsec;
};
-bpf_map(syscalls, ARRAY, int, struct syscall, 512);
+/* bpf-output associated map */
+struct __augmented_syscalls__ {
+ __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
+ __type(key, int);
+ __type(value, __u32);
+ __uint(max_entries, __NR_CPUS__);
+} __augmented_syscalls__ SEC(".maps");
/*
* What to augment at entry?
*
* Pointer arg payloads (filenames, etc) passed from userspace to the kernel
*/
-bpf_map(syscalls_sys_enter, PROG_ARRAY, u32, u32, 512);
+struct syscalls_sys_enter {
+ __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+ __type(key, __u32);
+ __type(value, __u32);
+ __uint(max_entries, 512);
+} syscalls_sys_enter SEC(".maps");
/*
* What to augment at exit?
*
* Pointer arg payloads returned from the kernel (struct stat, etc) to userspace.
*/
-bpf_map(syscalls_sys_exit, PROG_ARRAY, u32, u32, 512);
+struct syscalls_sys_exit {
+ __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+ __type(key, __u32);
+ __type(value, __u32);
+ __uint(max_entries, 512);
+} syscalls_sys_exit SEC(".maps");
struct syscall_enter_args {
unsigned long long common_tp_fields;
@@ -66,7 +79,38 @@ struct augmented_arg {
char value[PATH_MAX];
};
-pid_filter(pids_filtered);
+struct pids_filtered {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, pid_t);
+ __type(value, bool);
+ __uint(max_entries, 64);
+} pids_filtered SEC(".maps");
+
+/*
+ * Desired design of maximum size and alignment (see RFC2553)
+ */
+#define SS_MAXSIZE 128 /* Implementation specific max size */
+
+typedef unsigned short sa_family_t;
+
+/*
+ * FIXME: Should come from system headers
+ *
+ * The definition uses anonymous union and struct in order to control the
+ * default alignment.
+ */
+struct sockaddr_storage {
+ union {
+ struct {
+ sa_family_t ss_family; /* address family */
+ /* Following field(s) are implementation specific */
+ char __data[SS_MAXSIZE - sizeof(unsigned short)];
+ /* space to achieve desired size, */
+ /* _SS_MAXSIZE value minus size of ss_family */
+ };
+ void *__align; /* implementation specific desired alignment */
+ };
+};
struct augmented_args_payload {
struct syscall_enter_args args;
@@ -75,11 +119,17 @@ struct augmented_args_payload {
struct augmented_arg arg, arg2;
};
struct sockaddr_storage saddr;
+ char __data[sizeof(struct augmented_arg)];
};
};
// We need more tmp space than the BPF stack can give us
-bpf_map(augmented_args_tmp, PERCPU_ARRAY, int, struct augmented_args_payload, 1);
+struct augmented_args_tmp {
+ __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+ __type(key, int);
+ __type(value, struct augmented_args_payload);
+ __uint(max_entries, 1);
+} augmented_args_tmp SEC(".maps");
static inline struct augmented_args_payload *augmented_args_payload(void)
{
@@ -90,14 +140,14 @@ static inline struct augmented_args_payload *augmented_args_payload(void)
static inline int augmented__output(void *ctx, struct augmented_args_payload *args, int len)
{
/* If perf_event_output fails, return non-zero so that it gets recorded unaugmented */
- return perf_event_output(ctx, &__augmented_syscalls__, BPF_F_CURRENT_CPU, args, len);
+ return bpf_perf_event_output(ctx, &__augmented_syscalls__, BPF_F_CURRENT_CPU, args, len);
}
static inline
unsigned int augmented_arg__read_str(struct augmented_arg *augmented_arg, const void *arg, unsigned int arg_len)
{
unsigned int augmented_len = sizeof(*augmented_arg);
- int string_len = probe_read_str(&augmented_arg->value, arg_len, arg);
+ int string_len = bpf_probe_read_str(&augmented_arg->value, arg_len, arg);
augmented_arg->size = augmented_arg->err = 0;
/*
@@ -146,7 +196,7 @@ int sys_enter_connect(struct syscall_enter_args *args)
if (socklen > sizeof(augmented_args->saddr))
socklen = sizeof(augmented_args->saddr);
- probe_read(&augmented_args->saddr, socklen, sockaddr_arg);
+ bpf_probe_read(&augmented_args->saddr, socklen, sockaddr_arg);
return augmented__output(args, augmented_args, len + socklen);
}
@@ -165,7 +215,7 @@ int sys_enter_sendto(struct syscall_enter_args *args)
if (socklen > sizeof(augmented_args->saddr))
socklen = sizeof(augmented_args->saddr);
- probe_read(&augmented_args->saddr, socklen, sockaddr_arg);
+ bpf_probe_read(&augmented_args->saddr, socklen, sockaddr_arg);
return augmented__output(args, augmented_args, len + socklen);
}
@@ -234,6 +284,80 @@ int sys_enter_renameat(struct syscall_enter_args *args)
return augmented__output(args, augmented_args, len);
}
+#define PERF_ATTR_SIZE_VER0 64 /* sizeof first published struct */
+
+// we need just the start, get the size to then copy it
+struct perf_event_attr_size {
+ __u32 type;
+ /*
+ * Size of the attr structure, for fwd/bwd compat.
+ */
+ __u32 size;
+};
+
+SEC("!syscalls:sys_enter_perf_event_open")
+int sys_enter_perf_event_open(struct syscall_enter_args *args)
+{
+ struct augmented_args_payload *augmented_args = augmented_args_payload();
+ const struct perf_event_attr_size *attr = (const struct perf_event_attr_size *)args->args[0], *attr_read;
+ unsigned int len = sizeof(augmented_args->args);
+
+ if (augmented_args == NULL)
+ goto failure;
+
+ if (bpf_probe_read(&augmented_args->__data, sizeof(*attr), attr) < 0)
+ goto failure;
+
+ attr_read = (const struct perf_event_attr_size *)augmented_args->__data;
+
+ __u32 size = attr_read->size;
+
+ if (!size)
+ size = PERF_ATTR_SIZE_VER0;
+
+ if (size > sizeof(augmented_args->__data))
+ goto failure;
+
+ // Now that we read attr->size and tested it against the size limits, read it completely
+ if (bpf_probe_read(&augmented_args->__data, size, attr) < 0)
+ goto failure;
+
+ return augmented__output(args, augmented_args, len + size);
+failure:
+ return 1; /* Failure: don't filter */
+}
+
+SEC("!syscalls:sys_enter_clock_nanosleep")
+int sys_enter_clock_nanosleep(struct syscall_enter_args *args)
+{
+ struct augmented_args_payload *augmented_args = augmented_args_payload();
+ const void *rqtp_arg = (const void *)args->args[2];
+ unsigned int len = sizeof(augmented_args->args);
+ __u32 size = sizeof(struct timespec64);
+
+ if (augmented_args == NULL)
+ goto failure;
+
+ if (size > sizeof(augmented_args->__data))
+ goto failure;
+
+ bpf_probe_read(&augmented_args->__data, size, rqtp_arg);
+
+ return augmented__output(args, augmented_args, len + size);
+failure:
+ return 1; /* Failure: don't filter */
+}
+
+static pid_t getpid(void)
+{
+ return bpf_get_current_pid_tgid();
+}
+
+static bool pid_filter__has(struct pids_filtered *pids, pid_t pid)
+{
+ return bpf_map_lookup_elem(pids, &pid) != NULL;
+}
+
SEC("raw_syscalls:sys_enter")
int sys_enter(struct syscall_enter_args *args)
{
@@ -248,7 +372,6 @@ int sys_enter(struct syscall_enter_args *args)
* initial, non-augmented raw_syscalls:sys_enter payload.
*/
unsigned int len = sizeof(augmented_args->args);
- struct syscall *syscall;
if (pid_filter__has(&pids_filtered, getpid()))
return 0;
@@ -257,7 +380,7 @@ int sys_enter(struct syscall_enter_args *args)
if (augmented_args == NULL)
return 1;
- probe_read(&augmented_args->args, sizeof(augmented_args->args), args);
+ bpf_probe_read(&augmented_args->args, sizeof(augmented_args->args), args);
/*
* Jump to syscall specific augmenter, even if the default one,
@@ -278,7 +401,7 @@ int sys_exit(struct syscall_exit_args *args)
if (pid_filter__has(&pids_filtered, getpid()))
return 0;
- probe_read(&exit_args, sizeof(exit_args), args);
+ bpf_probe_read(&exit_args, sizeof(exit_args), args);
/*
* Jump to syscall specific return augmenter, even if the default one,
* "!raw_syscalls:unaugmented" that will just return 1 to return the
@@ -291,4 +414,4 @@ int sys_exit(struct syscall_exit_args *args)
return 0;
}
-license(GPL);
+char _license[] SEC("license") = "GPL";
diff --git a/tools/perf/examples/bpf/augmented_syscalls.c b/tools/perf/examples/bpf/augmented_syscalls.c
deleted file mode 100644
index 524fdb8534b3..000000000000
--- a/tools/perf/examples/bpf/augmented_syscalls.c
+++ /dev/null
@@ -1,169 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Augment syscalls with the contents of the pointer arguments.
- *
- * Test it with:
- *
- * perf trace -e tools/perf/examples/bpf/augmented_syscalls.c cat /etc/passwd > /dev/null
- *
- * It'll catch some openat syscalls related to the dynamic linked and
- * the last one should be the one for '/etc/passwd'.
- *
- * This matches what is marshalled into the raw_syscall:sys_enter payload
- * expected by the 'perf trace' beautifiers, and can be used by them, that will
- * check if perf_sample->raw_data is more than what is expected for each
- * syscalls:sys_{enter,exit}_SYSCALL tracepoint, uing the extra data as the
- * contents of pointer arguments.
- */
-
-#include <stdio.h>
-#include <linux/socket.h>
-
-/* bpf-output associated map */
-bpf_map(__augmented_syscalls__, PERF_EVENT_ARRAY, int, u32, __NR_CPUS__);
-
-struct syscall_exit_args {
- unsigned long long common_tp_fields;
- long syscall_nr;
- long ret;
-};
-
-struct augmented_filename {
- unsigned int size;
- int reserved;
- char value[256];
-};
-
-#define augmented_filename_syscall(syscall) \
-struct augmented_enter_##syscall##_args { \
- struct syscall_enter_##syscall##_args args; \
- struct augmented_filename filename; \
-}; \
-int syscall_enter(syscall)(struct syscall_enter_##syscall##_args *args) \
-{ \
- struct augmented_enter_##syscall##_args augmented_args = { .filename.reserved = 0, }; \
- unsigned int len = sizeof(augmented_args); \
- probe_read(&augmented_args.args, sizeof(augmented_args.args), args); \
- augmented_args.filename.size = probe_read_str(&augmented_args.filename.value, \
- sizeof(augmented_args.filename.value), \
- args->filename_ptr); \
- if (augmented_args.filename.size < sizeof(augmented_args.filename.value)) { \
- len -= sizeof(augmented_args.filename.value) - augmented_args.filename.size; \
- len &= sizeof(augmented_args.filename.value) - 1; \
- } \
- /* If perf_event_output fails, return non-zero so that it gets recorded unaugmented */ \
- return perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, \
- &augmented_args, len); \
-} \
-int syscall_exit(syscall)(struct syscall_exit_args *args) \
-{ \
- return 1; /* 0 as soon as we start copying data returned by the kernel, e.g. 'read' */ \
-}
-
-struct syscall_enter_openat_args {
- unsigned long long common_tp_fields;
- long syscall_nr;
- long dfd;
- char *filename_ptr;
- long flags;
- long mode;
-};
-
-augmented_filename_syscall(openat);
-
-struct syscall_enter_open_args {
- unsigned long long common_tp_fields;
- long syscall_nr;
- char *filename_ptr;
- long flags;
- long mode;
-};
-
-augmented_filename_syscall(open);
-
-struct syscall_enter_inotify_add_watch_args {
- unsigned long long common_tp_fields;
- long syscall_nr;
- long fd;
- char *filename_ptr;
- long mask;
-};
-
-augmented_filename_syscall(inotify_add_watch);
-
-struct statbuf;
-
-struct syscall_enter_newstat_args {
- unsigned long long common_tp_fields;
- long syscall_nr;
- char *filename_ptr;
- struct stat *statbuf;
-};
-
-augmented_filename_syscall(newstat);
-
-#ifndef _K_SS_MAXSIZE
-#define _K_SS_MAXSIZE 128
-#endif
-
-#define augmented_sockaddr_syscall(syscall) \
-struct augmented_enter_##syscall##_args { \
- struct syscall_enter_##syscall##_args args; \
- struct sockaddr_storage addr; \
-}; \
-int syscall_enter(syscall)(struct syscall_enter_##syscall##_args *args) \
-{ \
- struct augmented_enter_##syscall##_args augmented_args; \
- unsigned long addrlen = sizeof(augmented_args.addr); \
- probe_read(&augmented_args.args, sizeof(augmented_args.args), args); \
-/* FIXME_CLANG_OPTIMIZATION_THAT_ACCESSES_USER_CONTROLLED_ADDRLEN_DESPITE_THIS_CHECK */ \
-/* if (addrlen > augmented_args.args.addrlen) */ \
-/* addrlen = augmented_args.args.addrlen; */ \
-/* */ \
- probe_read(&augmented_args.addr, addrlen, args->addr_ptr); \
- /* If perf_event_output fails, return non-zero so that it gets recorded unaugmented */ \
- return perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, \
- &augmented_args, \
- sizeof(augmented_args) - sizeof(augmented_args.addr) + addrlen);\
-} \
-int syscall_exit(syscall)(struct syscall_exit_args *args) \
-{ \
- return 1; /* 0 as soon as we start copying data returned by the kernel, e.g. 'read' */ \
-}
-
-struct sockaddr;
-
-struct syscall_enter_bind_args {
- unsigned long long common_tp_fields;
- long syscall_nr;
- long fd;
- struct sockaddr *addr_ptr;
- unsigned long addrlen;
-};
-
-augmented_sockaddr_syscall(bind);
-
-struct syscall_enter_connect_args {
- unsigned long long common_tp_fields;
- long syscall_nr;
- long fd;
- struct sockaddr *addr_ptr;
- unsigned long addrlen;
-};
-
-augmented_sockaddr_syscall(connect);
-
-struct syscall_enter_sendto_args {
- unsigned long long common_tp_fields;
- long syscall_nr;
- long fd;
- void *buff;
- long len;
- unsigned long flags;
- struct sockaddr *addr_ptr;
- long addr_len;
-};
-
-augmented_sockaddr_syscall(sendto);
-
-license(GPL);
diff --git a/tools/perf/examples/bpf/empty.c b/tools/perf/examples/bpf/empty.c
index 7d7fb0c9fe76..3e296c0c53d7 100644
--- a/tools/perf/examples/bpf/empty.c
+++ b/tools/perf/examples/bpf/empty.c
@@ -1,3 +1,12 @@
-#include <bpf/bpf.h>
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
-license(GPL);
+struct syscall_enter_args;
+
+SEC("raw_syscalls:sys_enter")
+int sys_enter(struct syscall_enter_args *args)
+{
+ return 0;
+}
+char _license[] SEC("license") = "GPL";
diff --git a/tools/perf/examples/bpf/etcsnoop.c b/tools/perf/examples/bpf/etcsnoop.c
deleted file mode 100644
index e81b535346c0..000000000000
--- a/tools/perf/examples/bpf/etcsnoop.c
+++ /dev/null
@@ -1,76 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Augment the filename syscalls with the contents of the filename pointer argument
- * filtering only those that do not start with /etc/.
- *
- * Test it with:
- *
- * perf trace -e tools/perf/examples/bpf/augmented_syscalls.c cat /etc/passwd > /dev/null
- *
- * It'll catch some openat syscalls related to the dynamic linked and
- * the last one should be the one for '/etc/passwd'.
- *
- * This matches what is marshalled into the raw_syscall:sys_enter payload
- * expected by the 'perf trace' beautifiers, and can be used by them unmodified,
- * which will be done as that feature is implemented in the next csets, for now
- * it will appear in a dump done by the default tracepoint handler in 'perf trace',
- * that uses bpf_output__fprintf() to just dump those contents, as done with
- * the bpf-output event associated with the __bpf_output__ map declared in
- * tools/perf/include/bpf/stdio.h.
- */
-
-#include <stdio.h>
-
-/* bpf-output associated map */
-bpf_map(__augmented_syscalls__, PERF_EVENT_ARRAY, int, u32, __NR_CPUS__);
-
-struct augmented_filename {
- int size;
- int reserved;
- char value[64];
-};
-
-#define augmented_filename_syscall_enter(syscall) \
-struct augmented_enter_##syscall##_args { \
- struct syscall_enter_##syscall##_args args; \
- struct augmented_filename filename; \
-}; \
-int syscall_enter(syscall)(struct syscall_enter_##syscall##_args *args) \
-{ \
- char etc[6] = "/etc/"; \
- struct augmented_enter_##syscall##_args augmented_args = { .filename.reserved = 0, }; \
- probe_read(&augmented_args.args, sizeof(augmented_args.args), args); \
- augmented_args.filename.size = probe_read_str(&augmented_args.filename.value, \
- sizeof(augmented_args.filename.value), \
- args->filename_ptr); \
- if (__builtin_memcmp(augmented_args.filename.value, etc, 4) != 0) \
- return 0; \
- /* If perf_event_output fails, return non-zero so that it gets recorded unaugmented */ \
- return perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, \
- &augmented_args, \
- (sizeof(augmented_args) - sizeof(augmented_args.filename.value) + \
- augmented_args.filename.size)); \
-}
-
-struct syscall_enter_openat_args {
- unsigned long long common_tp_fields;
- long syscall_nr;
- long dfd;
- char *filename_ptr;
- long flags;
- long mode;
-};
-
-augmented_filename_syscall_enter(openat);
-
-struct syscall_enter_open_args {
- unsigned long long common_tp_fields;
- long syscall_nr;
- char *filename_ptr;
- long flags;
- long mode;
-};
-
-augmented_filename_syscall_enter(open);
-
-license(GPL);
diff --git a/tools/perf/examples/bpf/hello.c b/tools/perf/examples/bpf/hello.c
index cf3c2fdc7f79..e9080b0df158 100644
--- a/tools/perf/examples/bpf/hello.c
+++ b/tools/perf/examples/bpf/hello.c
@@ -1,9 +1,27 @@
-#include <stdio.h>
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
-int syscall_enter(openat)(void *args)
+struct __bpf_stdout__ {
+ __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
+ __type(key, int);
+ __type(value, __u32);
+ __uint(max_entries, __NR_CPUS__);
+} __bpf_stdout__ SEC(".maps");
+
+#define puts(from) \
+ ({ const int __len = sizeof(from); \
+ char __from[sizeof(from)] = from; \
+ bpf_perf_event_output(args, &__bpf_stdout__, BPF_F_CURRENT_CPU, \
+ &__from, __len & (sizeof(from) - 1)); })
+
+struct syscall_enter_args;
+
+SEC("raw_syscalls:sys_enter")
+int sys_enter(struct syscall_enter_args *args)
{
puts("Hello, world\n");
return 0;
}
-license(GPL);
+char _license[] SEC("license") = "GPL";
diff --git a/tools/perf/include/bpf/bpf.h b/tools/perf/include/bpf/bpf.h
deleted file mode 100644
index b422aeef5339..000000000000
--- a/tools/perf/include/bpf/bpf.h
+++ /dev/null
@@ -1,70 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#ifndef _PERF_BPF_H
-#define _PERF_BPF_H
-
-#include <uapi/linux/bpf.h>
-
-/*
- * A helper structure used by eBPF C program to describe map attributes to
- * elf_bpf loader, taken from tools/testing/selftests/bpf/bpf_helpers.h:
- */
-struct bpf_map {
- unsigned int type;
- unsigned int key_size;
- unsigned int value_size;
- unsigned int max_entries;
- unsigned int map_flags;
- unsigned int inner_map_idx;
- unsigned int numa_node;
-};
-
-#define bpf_map(name, _type, type_key, type_val, _max_entries) \
-struct bpf_map SEC("maps") name = { \
- .type = BPF_MAP_TYPE_##_type, \
- .key_size = sizeof(type_key), \
- .value_size = sizeof(type_val), \
- .max_entries = _max_entries, \
-}; \
-struct ____btf_map_##name { \
- type_key key; \
- type_val value; \
-}; \
-struct ____btf_map_##name __attribute__((section(".maps." #name), used)) \
- ____btf_map_##name = { }
-
-/*
- * FIXME: this should receive .max_entries as a parameter, as careful
- * tuning of these limits is needed to avoid hitting limits that
- * prevents other BPF constructs, such as tracepoint handlers,
- * to get installed, with cryptic messages from libbpf, etc.
- * For the current need, 'perf trace --filter-pids', 64 should
- * be good enough, but this surely needs to be revisited.
- */
-#define pid_map(name, value_type) bpf_map(name, HASH, pid_t, value_type, 64)
-
-static int (*bpf_map_update_elem)(struct bpf_map *map, void *key, void *value, u64 flags) = (void *)BPF_FUNC_map_update_elem;
-static void *(*bpf_map_lookup_elem)(struct bpf_map *map, void *key) = (void *)BPF_FUNC_map_lookup_elem;
-
-static void (*bpf_tail_call)(void *ctx, void *map, int index) = (void *)BPF_FUNC_tail_call;
-
-#define SEC(NAME) __attribute__((section(NAME), used))
-
-#define probe(function, vars) \
- SEC(#function "=" #function " " #vars) function
-
-#define syscall_enter(name) \
- SEC("syscalls:sys_enter_" #name) syscall_enter_ ## name
-
-#define syscall_exit(name) \
- SEC("syscalls:sys_exit_" #name) syscall_exit_ ## name
-
-#define license(name) \
-char _license[] SEC("license") = #name; \
-int _version SEC("version") = LINUX_VERSION_CODE;
-
-static int (*probe_read)(void *dst, int size, const void *unsafe_addr) = (void *)BPF_FUNC_probe_read;
-static int (*probe_read_str)(void *dst, int size, const void *unsafe_addr) = (void *)BPF_FUNC_probe_read_str;
-
-static int (*perf_event_output)(void *, struct bpf_map *, int, void *, unsigned long) = (void *)BPF_FUNC_perf_event_output;
-
-#endif /* _PERF_BPF_H */
diff --git a/tools/perf/include/bpf/linux/socket.h b/tools/perf/include/bpf/linux/socket.h
deleted file mode 100644
index 7f844568dab8..000000000000
--- a/tools/perf/include/bpf/linux/socket.h
+++ /dev/null
@@ -1,24 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-#ifndef _UAPI_LINUX_SOCKET_H
-#define _UAPI_LINUX_SOCKET_H
-
-/*
- * Desired design of maximum size and alignment (see RFC2553)
- */
-#define _K_SS_MAXSIZE 128 /* Implementation specific max size */
-#define _K_SS_ALIGNSIZE (__alignof__ (struct sockaddr *))
- /* Implementation specific desired alignment */
-
-typedef unsigned short __kernel_sa_family_t;
-
-struct __kernel_sockaddr_storage {
- __kernel_sa_family_t ss_family; /* address family */
- /* Following field(s) are implementation specific */
- char __data[_K_SS_MAXSIZE - sizeof(unsigned short)];
- /* space to achieve desired size, */
- /* _SS_MAXSIZE value minus size of ss_family */
-} __attribute__ ((aligned(_K_SS_ALIGNSIZE))); /* force desired alignment */
-
-#define sockaddr_storage __kernel_sockaddr_storage
-
-#endif /* _UAPI_LINUX_SOCKET_H */
diff --git a/tools/perf/include/bpf/pid_filter.h b/tools/perf/include/bpf/pid_filter.h
deleted file mode 100644
index 6e61c4bdf548..000000000000
--- a/tools/perf/include/bpf/pid_filter.h
+++ /dev/null
@@ -1,21 +0,0 @@
-// SPDX-License-Identifier: LGPL-2.1
-
-#ifndef _PERF_BPF_PID_FILTER_
-#define _PERF_BPF_PID_FILTER_
-
-#include <bpf.h>
-
-#define pid_filter(name) pid_map(name, bool)
-
-static int pid_filter__add(struct bpf_map *pids, pid_t pid)
-{
- bool value = true;
- return bpf_map_update_elem(pids, &pid, &value, BPF_NOEXIST);
-}
-
-static bool pid_filter__has(struct bpf_map *pids, pid_t pid)
-{
- return bpf_map_lookup_elem(pids, &pid) != NULL;
-}
-
-#endif // _PERF_BPF_PID_FILTER_
diff --git a/tools/perf/include/bpf/stdio.h b/tools/perf/include/bpf/stdio.h
deleted file mode 100644
index 316af5b2ff35..000000000000
--- a/tools/perf/include/bpf/stdio.h
+++ /dev/null
@@ -1,16 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-
-#include <bpf.h>
-
-struct bpf_map SEC("maps") __bpf_stdout__ = {
- .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY,
- .key_size = sizeof(int),
- .value_size = sizeof(u32),
- .max_entries = __NR_CPUS__,
-};
-
-#define puts(from) \
- ({ const int __len = sizeof(from); \
- char __from[__len] = from; \
- perf_event_output(args, &__bpf_stdout__, BPF_F_CURRENT_CPU, \
- &__from, __len & (sizeof(from) - 1)); })
diff --git a/tools/perf/include/bpf/unistd.h b/tools/perf/include/bpf/unistd.h
deleted file mode 100644
index ca7877f9a976..000000000000
--- a/tools/perf/include/bpf/unistd.h
+++ /dev/null
@@ -1,10 +0,0 @@
-// SPDX-License-Identifier: LGPL-2.1
-
-#include <bpf.h>
-
-static int (*bpf_get_current_pid_tgid)(void) = (void *)BPF_FUNC_get_current_pid_tgid;
-
-static pid_t getpid(void)
-{
- return bpf_get_current_pid_tgid();
-}
diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/branch.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/branch.json
index 79f2016c53b0..79f2016c53b0 100644
--- a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/branch.json
+++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/branch.json
diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/bus.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/bus.json
index 579c1c993d17..579c1c993d17 100644
--- a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/bus.json
+++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/bus.json
diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/cache.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/cache.json
index 0141f749bff3..0141f749bff3 100644
--- a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/cache.json
+++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/cache.json
diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/exception.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/exception.json
index 344a2d552ad5..344a2d552ad5 100644
--- a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/exception.json
+++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/exception.json
diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/instruction.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/instruction.json
index e57cd55937c6..e57cd55937c6 100644
--- a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/instruction.json
+++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/instruction.json
diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/memory.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/memory.json
index 7b2b21ac150f..7b2b21ac150f 100644
--- a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/memory.json
+++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/memory.json
diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/pipeline.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/pipeline.json
index f9fae15f7555..f9fae15f7555 100644
--- a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/pipeline.json
+++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/pipeline.json
diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/spe.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/spe.json
index 20f2165c85fe..20f2165c85fe 100644
--- a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/spe.json
+++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/spe.json
diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/trace.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/trace.json
index 3116135c59e2..3116135c59e2 100644
--- a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/trace.json
+++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/trace.json
diff --git a/tools/perf/pmu-events/arch/arm64/mapfile.csv b/tools/perf/pmu-events/arch/arm64/mapfile.csv
index ad502d00f460..f134e833c069 100644
--- a/tools/perf/pmu-events/arch/arm64/mapfile.csv
+++ b/tools/perf/pmu-events/arch/arm64/mapfile.csv
@@ -34,7 +34,8 @@
0x00000000410fd460,v1,arm/cortex-a510,core
0x00000000410fd470,v1,arm/cortex-a710,core
0x00000000410fd480,v1,arm/cortex-x2,core
-0x00000000410fd490,v1,arm/neoverse-n2,core
+0x00000000410fd490,v1,arm/neoverse-n2-v2,core
+0x00000000410fd4f0,v1,arm/neoverse-n2-v2,core
0x00000000420f5160,v1,cavium/thunderx2,core
0x00000000430f0af0,v1,cavium/thunderx2,core
0x00000000460f0010,v1,fujitsu/a64fx,core
diff --git a/tools/perf/pmu-events/arch/riscv/mapfile.csv b/tools/perf/pmu-events/arch/riscv/mapfile.csv
new file mode 100644
index 000000000000..c61b3d6ef616
--- /dev/null
+++ b/tools/perf/pmu-events/arch/riscv/mapfile.csv
@@ -0,0 +1,17 @@
+# Format:
+# MVENDORID-MARCHID-MIMPID,Version,JSON/file/pathname,Type
+#
+# where
+# MVENDORID JEDEC code of the core provider
+# MARCHID base microarchitecture of the hart
+# MIMPID unique encoding of the version
+# of the processor implementation
+# Version could be used to track version of JSON file
+# but currently unused.
+# JSON/file/pathname is the path to JSON file, relative
+# to tools/perf/pmu-events/arch/riscv/.
+# Type is core, uncore etc
+#
+#
+#MVENDORID-MARCHID-MIMPID,Version,Filename,EventType
+0x489-0x8000000000000007-0x[[:xdigit:]]+,v1,sifive/u74,core
diff --git a/tools/perf/pmu-events/arch/riscv/riscv-sbi-firmware.json b/tools/perf/pmu-events/arch/riscv/riscv-sbi-firmware.json
new file mode 100644
index 000000000000..a9939823b14b
--- /dev/null
+++ b/tools/perf/pmu-events/arch/riscv/riscv-sbi-firmware.json
@@ -0,0 +1,134 @@
+[
+ {
+ "PublicDescription": "Misaligned load trap",
+ "ConfigCode": "0x8000000000000000",
+ "EventName": "FW_MISALIGNED_LOAD",
+ "BriefDescription": "Misaligned load trap event"
+ },
+ {
+ "PublicDescription": "Misaligned store trap",
+ "ConfigCode": "0x8000000000000001",
+ "EventName": "FW_MISALIGNED_STORE",
+ "BriefDescription": "Misaligned store trap event"
+ },
+ {
+ "PublicDescription": "Load access trap",
+ "ConfigCode": "0x8000000000000002",
+ "EventName": "FW_ACCESS_LOAD",
+ "BriefDescription": "Load access trap event"
+ },
+ {
+ "PublicDescription": "Store access trap",
+ "ConfigCode": "0x8000000000000003",
+ "EventName": "FW_ACCESS_STORE",
+ "BriefDescription": "Store access trap event"
+ },
+ {
+ "PublicDescription": "Illegal instruction trap",
+ "ConfigCode": "0x8000000000000004",
+ "EventName": "FW_ILLEGAL_INSN",
+ "BriefDescription": "Illegal instruction trap event"
+ },
+ {
+ "PublicDescription": "Set timer event",
+ "ConfigCode": "0x8000000000000005",
+ "EventName": "FW_SET_TIMER",
+ "BriefDescription": "Set timer event"
+ },
+ {
+ "PublicDescription": "Sent IPI to other HART event",
+ "ConfigCode": "0x8000000000000006",
+ "EventName": "FW_IPI_SENT",
+ "BriefDescription": "Sent IPI to other HART event"
+ },
+ {
+ "PublicDescription": "Received IPI from other HART event",
+ "ConfigCode": "0x8000000000000007",
+ "EventName": "FW_IPI_RECEIVED",
+ "BriefDescription": "Received IPI from other HART event"
+ },
+ {
+ "PublicDescription": "Sent FENCE.I request to other HART event",
+ "ConfigCode": "0x8000000000000008",
+ "EventName": "FW_FENCE_I_SENT",
+ "BriefDescription": "Sent FENCE.I request to other HART event"
+ },
+ {
+ "PublicDescription": "Received FENCE.I request from other HART event",
+ "ConfigCode": "0x8000000000000009",
+ "EventName": "FW_FENCE_I_RECEIVED",
+ "BriefDescription": "Received FENCE.I request from other HART event"
+ },
+ {
+ "PublicDescription": "Sent SFENCE.VMA request to other HART event",
+ "ConfigCode": "0x800000000000000a",
+ "EventName": "FW_SFENCE_VMA_SENT",
+ "BriefDescription": "Sent SFENCE.VMA request to other HART event"
+ },
+ {
+ "PublicDescription": "Received SFENCE.VMA request from other HART event",
+ "ConfigCode": "0x800000000000000b",
+ "EventName": "FW_SFENCE_VMA_RECEIVED",
+ "BriefDescription": "Received SFENCE.VMA request from other HART event"
+ },
+ {
+ "PublicDescription": "Sent SFENCE.VMA with ASID request to other HART event",
+ "ConfigCode": "0x800000000000000c",
+ "EventName": "FW_SFENCE_VMA_RECEIVED",
+ "BriefDescription": "Sent SFENCE.VMA with ASID request to other HART event"
+ },
+ {
+ "PublicDescription": "Received SFENCE.VMA with ASID request from other HART event",
+ "ConfigCode": "0x800000000000000d",
+ "EventName": "FW_SFENCE_VMA_ASID_RECEIVED",
+ "BriefDescription": "Received SFENCE.VMA with ASID request from other HART event"
+ },
+ {
+ "PublicDescription": "Sent HFENCE.GVMA request to other HART event",
+ "ConfigCode": "0x800000000000000e",
+ "EventName": "FW_HFENCE_GVMA_SENT",
+ "BriefDescription": "Sent HFENCE.GVMA request to other HART event"
+ },
+ {
+ "PublicDescription": "Received HFENCE.GVMA request from other HART event",
+ "ConfigCode": "0x800000000000000f",
+ "EventName": "FW_HFENCE_GVMA_RECEIVED",
+ "BriefDescription": "Received HFENCE.GVMA request from other HART event"
+ },
+ {
+ "PublicDescription": "Sent HFENCE.GVMA with VMID request to other HART event",
+ "ConfigCode": "0x8000000000000010",
+ "EventName": "FW_HFENCE_GVMA_VMID_SENT",
+ "BriefDescription": "Sent HFENCE.GVMA with VMID request to other HART event"
+ },
+ {
+ "PublicDescription": "Received HFENCE.GVMA with VMID request from other HART event",
+ "ConfigCode": "0x8000000000000011",
+ "EventName": "FW_HFENCE_GVMA_VMID_RECEIVED",
+ "BriefDescription": "Received HFENCE.GVMA with VMID request from other HART event"
+ },
+ {
+ "PublicDescription": "Sent HFENCE.VVMA request to other HART event",
+ "ConfigCode": "0x8000000000000012",
+ "EventName": "FW_HFENCE_VVMA_SENT",
+ "BriefDescription": "Sent HFENCE.VVMA request to other HART event"
+ },
+ {
+ "PublicDescription": "Received HFENCE.VVMA request from other HART event",
+ "ConfigCode": "0x8000000000000013",
+ "EventName": "FW_HFENCE_VVMA_RECEIVED",
+ "BriefDescription": "Received HFENCE.VVMA request from other HART event"
+ },
+ {
+ "PublicDescription": "Sent HFENCE.VVMA with ASID request to other HART event",
+ "ConfigCode": "0x8000000000000014",
+ "EventName": "FW_HFENCE_VVMA_ASID_SENT",
+ "BriefDescription": "Sent HFENCE.VVMA with ASID request to other HART event"
+ },
+ {
+ "PublicDescription": "Received HFENCE.VVMA with ASID request from other HART event",
+ "ConfigCode": "0x8000000000000015",
+ "EventName": "FW_HFENCE_VVMA_ASID_RECEIVED",
+ "BriefDescription": "Received HFENCE.VVMA with ASID request from other HART event"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/riscv/sifive/u74/firmware.json b/tools/perf/pmu-events/arch/riscv/sifive/u74/firmware.json
new file mode 100644
index 000000000000..9b4a032186a7
--- /dev/null
+++ b/tools/perf/pmu-events/arch/riscv/sifive/u74/firmware.json
@@ -0,0 +1,68 @@
+[
+ {
+ "ArchStdEvent": "FW_MISALIGNED_LOAD"
+ },
+ {
+ "ArchStdEvent": "FW_MISALIGNED_STORE"
+ },
+ {
+ "ArchStdEvent": "FW_ACCESS_LOAD"
+ },
+ {
+ "ArchStdEvent": "FW_ACCESS_STORE"
+ },
+ {
+ "ArchStdEvent": "FW_ILLEGAL_INSN"
+ },
+ {
+ "ArchStdEvent": "FW_SET_TIMER"
+ },
+ {
+ "ArchStdEvent": "FW_IPI_SENT"
+ },
+ {
+ "ArchStdEvent": "FW_IPI_RECEIVED"
+ },
+ {
+ "ArchStdEvent": "FW_FENCE_I_SENT"
+ },
+ {
+ "ArchStdEvent": "FW_FENCE_I_RECEIVED"
+ },
+ {
+ "ArchStdEvent": "FW_SFENCE_VMA_SENT"
+ },
+ {
+ "ArchStdEvent": "FW_SFENCE_VMA_RECEIVED"
+ },
+ {
+ "ArchStdEvent": "FW_SFENCE_VMA_RECEIVED"
+ },
+ {
+ "ArchStdEvent": "FW_SFENCE_VMA_ASID_RECEIVED"
+ },
+ {
+ "ArchStdEvent": "FW_HFENCE_GVMA_SENT"
+ },
+ {
+ "ArchStdEvent": "FW_HFENCE_GVMA_RECEIVED"
+ },
+ {
+ "ArchStdEvent": "FW_HFENCE_GVMA_VMID_SENT"
+ },
+ {
+ "ArchStdEvent": "FW_HFENCE_GVMA_VMID_RECEIVED"
+ },
+ {
+ "ArchStdEvent": "FW_HFENCE_VVMA_SENT"
+ },
+ {
+ "ArchStdEvent": "FW_HFENCE_VVMA_RECEIVED"
+ },
+ {
+ "ArchStdEvent": "FW_HFENCE_VVMA_ASID_SENT"
+ },
+ {
+ "ArchStdEvent": "FW_HFENCE_VVMA_ASID_RECEIVED"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/riscv/sifive/u74/instructions.json b/tools/perf/pmu-events/arch/riscv/sifive/u74/instructions.json
new file mode 100644
index 000000000000..5eab718c9256
--- /dev/null
+++ b/tools/perf/pmu-events/arch/riscv/sifive/u74/instructions.json
@@ -0,0 +1,92 @@
+[
+ {
+ "EventName": "EXCEPTION_TAKEN",
+ "EventCode": "0x0000100",
+ "BriefDescription": "Exception taken"
+ },
+ {
+ "EventName": "INTEGER_LOAD_RETIRED",
+ "EventCode": "0x0000200",
+ "BriefDescription": "Integer load instruction retired"
+ },
+ {
+ "EventName": "INTEGER_STORE_RETIRED",
+ "EventCode": "0x0000400",
+ "BriefDescription": "Integer store instruction retired"
+ },
+ {
+ "EventName": "ATOMIC_MEMORY_RETIRED",
+ "EventCode": "0x0000800",
+ "BriefDescription": "Atomic memory operation retired"
+ },
+ {
+ "EventName": "SYSTEM_INSTRUCTION_RETIRED",
+ "EventCode": "0x0001000",
+ "BriefDescription": "System instruction retired"
+ },
+ {
+ "EventName": "INTEGER_ARITHMETIC_RETIRED",
+ "EventCode": "0x0002000",
+ "BriefDescription": "Integer arithmetic instruction retired"
+ },
+ {
+ "EventName": "CONDITIONAL_BRANCH_RETIRED",
+ "EventCode": "0x0004000",
+ "BriefDescription": "Conditional branch retired"
+ },
+ {
+ "EventName": "JAL_INSTRUCTION_RETIRED",
+ "EventCode": "0x0008000",
+ "BriefDescription": "JAL instruction retired"
+ },
+ {
+ "EventName": "JALR_INSTRUCTION_RETIRED",
+ "EventCode": "0x0010000",
+ "BriefDescription": "JALR instruction retired"
+ },
+ {
+ "EventName": "INTEGER_MULTIPLICATION_RETIRED",
+ "EventCode": "0x0020000",
+ "BriefDescription": "Integer multiplication instruction retired"
+ },
+ {
+ "EventName": "INTEGER_DIVISION_RETIRED",
+ "EventCode": "0x0040000",
+ "BriefDescription": "Integer division instruction retired"
+ },
+ {
+ "EventName": "FP_LOAD_RETIRED",
+ "EventCode": "0x0080000",
+ "BriefDescription": "Floating-point load instruction retired"
+ },
+ {
+ "EventName": "FP_STORE_RETIRED",
+ "EventCode": "0x0100000",
+ "BriefDescription": "Floating-point store instruction retired"
+ },
+ {
+ "EventName": "FP_ADDITION_RETIRED",
+ "EventCode": "0x0200000",
+ "BriefDescription": "Floating-point addition retired"
+ },
+ {
+ "EventName": "FP_MULTIPLICATION_RETIRED",
+ "EventCode": "0x0400000",
+ "BriefDescription": "Floating-point multiplication retired"
+ },
+ {
+ "EventName": "FP_FUSEDMADD_RETIRED",
+ "EventCode": "0x0800000",
+ "BriefDescription": "Floating-point fused multiply-add retired"
+ },
+ {
+ "EventName": "FP_DIV_SQRT_RETIRED",
+ "EventCode": "0x1000000",
+ "BriefDescription": "Floating-point division or square-root retired"
+ },
+ {
+ "EventName": "OTHER_FP_RETIRED",
+ "EventCode": "0x2000000",
+ "BriefDescription": "Other floating-point instruction retired"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/riscv/sifive/u74/memory.json b/tools/perf/pmu-events/arch/riscv/sifive/u74/memory.json
new file mode 100644
index 000000000000..be1a46312ac3
--- /dev/null
+++ b/tools/perf/pmu-events/arch/riscv/sifive/u74/memory.json
@@ -0,0 +1,32 @@
+[
+ {
+ "EventName": "ICACHE_RETIRED",
+ "EventCode": "0x0000102",
+ "BriefDescription": "Instruction cache miss"
+ },
+ {
+ "EventName": "DCACHE_MISS_MMIO_ACCESSES",
+ "EventCode": "0x0000202",
+ "BriefDescription": "Data cache miss or memory-mapped I/O access"
+ },
+ {
+ "EventName": "DCACHE_WRITEBACK",
+ "EventCode": "0x0000402",
+ "BriefDescription": "Data cache write-back"
+ },
+ {
+ "EventName": "INST_TLB_MISS",
+ "EventCode": "0x0000802",
+ "BriefDescription": "Instruction TLB miss"
+ },
+ {
+ "EventName": "DATA_TLB_MISS",
+ "EventCode": "0x0001002",
+ "BriefDescription": "Data TLB miss"
+ },
+ {
+ "EventName": "UTLB_MISS",
+ "EventCode": "0x0002002",
+ "BriefDescription": "UTLB miss"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/riscv/sifive/u74/microarch.json b/tools/perf/pmu-events/arch/riscv/sifive/u74/microarch.json
new file mode 100644
index 000000000000..50ffa55418cb
--- /dev/null
+++ b/tools/perf/pmu-events/arch/riscv/sifive/u74/microarch.json
@@ -0,0 +1,57 @@
+[
+ {
+ "EventName": "ADDRESSGEN_INTERLOCK",
+ "EventCode": "0x0000101",
+ "BriefDescription": "Address-generation interlock"
+ },
+ {
+ "EventName": "LONGLAT_INTERLOCK",
+ "EventCode": "0x0000201",
+ "BriefDescription": "Long-latency interlock"
+ },
+ {
+ "EventName": "CSR_READ_INTERLOCK",
+ "EventCode": "0x0000401",
+ "BriefDescription": "CSR read interlock"
+ },
+ {
+ "EventName": "ICACHE_ITIM_BUSY",
+ "EventCode": "0x0000801",
+ "BriefDescription": "Instruction cache/ITIM busy"
+ },
+ {
+ "EventName": "DCACHE_DTIM_BUSY",
+ "EventCode": "0x0001001",
+ "BriefDescription": "Data cache/DTIM busy"
+ },
+ {
+ "EventName": "BRANCH_DIRECTION_MISPREDICTION",
+ "EventCode": "0x0002001",
+ "BriefDescription": "Branch direction misprediction"
+ },
+ {
+ "EventName": "BRANCH_TARGET_MISPREDICTION",
+ "EventCode": "0x0004001",
+ "BriefDescription": "Branch/jump target misprediction"
+ },
+ {
+ "EventName": "PIPE_FLUSH_CSR_WRITE",
+ "EventCode": "0x0008001",
+ "BriefDescription": "Pipeline flush from CSR write"
+ },
+ {
+ "EventName": "PIPE_FLUSH_OTHER_EVENT",
+ "EventCode": "0x0010001",
+ "BriefDescription": "Pipeline flush from other event"
+ },
+ {
+ "EventName": "INTEGER_MULTIPLICATION_INTERLOCK",
+ "EventCode": "0x0020001",
+ "BriefDescription": "Integer multiplication interlock"
+ },
+ {
+ "EventName": "FP_INTERLOCK",
+ "EventCode": "0x0040001",
+ "BriefDescription": "Floating-point interlock"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/alderlake/adl-metrics.json b/tools/perf/pmu-events/arch/x86/alderlake/adl-metrics.json
index e06d26ad5138..edf440e9359a 100644
--- a/tools/perf/pmu-events/arch/x86/alderlake/adl-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/alderlake/adl-metrics.json
@@ -1287,14 +1287,14 @@
},
{
"BriefDescription": "Average CPU Utilization",
- "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@",
+ "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / TSC",
"MetricGroup": "HPC;Summary",
"MetricName": "CPU_Utilization",
"Unit": "cpu_core"
},
{
"BriefDescription": "Measured Average Frequency for unhalted processors [GHz]",
- "MetricExpr": "Turbo_Utilization * msr@tsc@ / 1000000000 / duration_time",
+ "MetricExpr": "Turbo_Utilization * TSC / 1000000000 / duration_time",
"MetricGroup": "Power;Summary",
"MetricName": "Average_Frequency",
"Unit": "cpu_core"
@@ -1337,19 +1337,26 @@
},
{
"BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]",
- "MetricExpr": "64 * (arb@event\\=0x81\\,umask\\=0x1@ + arb@event\\=0x84\\,umask\\=0x1@) / 1000000 / duration_time / 1000",
+ "MetricExpr": "64 * (UNC_ARB_TRK_REQUESTS.ALL + UNC_ARB_COH_TRK_REQUESTS.ALL) / 1000000 / duration_time / 1000",
"MetricGroup": "HPC;Mem;MemoryBW;SoC",
"MetricName": "DRAM_BW_Use",
"Unit": "cpu_core"
},
{
"BriefDescription": "Average number of parallel requests to external memory. Accounts for all requests",
- "MetricExpr": "UNC_ARB_TRK_OCCUPANCY.ALL / arb@event\\=0x81\\,umask\\=0x1@",
+ "MetricExpr": "UNC_ARB_TRK_OCCUPANCY.ALL / UNC_ARB_TRK_REQUESTS.ALL",
"MetricGroup": "Mem;SoC",
"MetricName": "MEM_Parallel_Requests",
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Socket actual clocks when any core is active on that socket",
+ "MetricExpr": "UNC_CLOCK.SOCKET",
+ "MetricGroup": "SoC",
+ "MetricName": "Socket_CLKS",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "Instructions per Far Branch ( Far Branches apply upon transition from application to operating system, handling interrupts, exceptions) [lower number means higher occurrence rate]",
"MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.FAR_BRANCH:u",
"MetricGroup": "Branches;OS",
@@ -1357,6 +1364,12 @@
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Uncore frequency per die [GHZ]",
+ "MetricExpr": "Socket_CLKS / #num_dies / duration_time / 1000000000",
+ "MetricGroup": "SoC",
+ "MetricName": "UNCORE_FREQ"
+ },
+ {
"BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to frontend stalls.",
"MetricExpr": "TOPDOWN_FE_BOUND.ALL / SLOTS",
"MetricGroup": "TopdownL1",
@@ -1902,7 +1915,7 @@
},
{
"BriefDescription": "Average CPU Utilization",
- "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@",
+ "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / TSC",
"MetricName": "CPU_Utilization",
"Unit": "cpu_atom"
},
@@ -1950,62 +1963,72 @@
},
{
"BriefDescription": "C1 residency percent per core",
- "MetricExpr": "(cstate_core@c1\\-residency@ / msr@tsc@) * 100",
+ "MetricExpr": "cstate_core@c1\\-residency@ / TSC",
"MetricGroup": "Power",
- "MetricName": "C1_Core_Residency"
+ "MetricName": "C1_Core_Residency",
+ "ScaleUnit": "100%"
},
{
"BriefDescription": "C6 residency percent per core",
- "MetricExpr": "(cstate_core@c6\\-residency@ / msr@tsc@) * 100",
+ "MetricExpr": "cstate_core@c6\\-residency@ / TSC",
"MetricGroup": "Power",
- "MetricName": "C6_Core_Residency"
+ "MetricName": "C6_Core_Residency",
+ "ScaleUnit": "100%"
},
{
"BriefDescription": "C7 residency percent per core",
- "MetricExpr": "(cstate_core@c7\\-residency@ / msr@tsc@) * 100",
+ "MetricExpr": "cstate_core@c7\\-residency@ / TSC",
"MetricGroup": "Power",
- "MetricName": "C7_Core_Residency"
+ "MetricName": "C7_Core_Residency",
+ "ScaleUnit": "100%"
},
{
"BriefDescription": "C2 residency percent per package",
- "MetricExpr": "(cstate_pkg@c2\\-residency@ / msr@tsc@) * 100",
+ "MetricExpr": "cstate_pkg@c2\\-residency@ / TSC",
"MetricGroup": "Power",
- "MetricName": "C2_Pkg_Residency"
+ "MetricName": "C2_Pkg_Residency",
+ "ScaleUnit": "100%"
},
{
"BriefDescription": "C3 residency percent per package",
- "MetricExpr": "(cstate_pkg@c3\\-residency@ / msr@tsc@) * 100",
+ "MetricExpr": "cstate_pkg@c3\\-residency@ / TSC",
"MetricGroup": "Power",
- "MetricName": "C3_Pkg_Residency"
+ "MetricName": "C3_Pkg_Residency",
+ "ScaleUnit": "100%"
},
{
"BriefDescription": "C6 residency percent per package",
- "MetricExpr": "(cstate_pkg@c6\\-residency@ / msr@tsc@) * 100",
+ "MetricExpr": "cstate_pkg@c6\\-residency@ / TSC",
"MetricGroup": "Power",
- "MetricName": "C6_Pkg_Residency"
+ "MetricName": "C6_Pkg_Residency",
+ "ScaleUnit": "100%"
},
{
"BriefDescription": "C7 residency percent per package",
- "MetricExpr": "(cstate_pkg@c7\\-residency@ / msr@tsc@) * 100",
+ "MetricExpr": "cstate_pkg@c7\\-residency@ / TSC",
"MetricGroup": "Power",
- "MetricName": "C7_Pkg_Residency"
+ "MetricName": "C7_Pkg_Residency",
+ "ScaleUnit": "100%"
},
{
"BriefDescription": "C8 residency percent per package",
- "MetricExpr": "(cstate_pkg@c8\\-residency@ / msr@tsc@) * 100",
+ "MetricExpr": "cstate_pkg@c8\\-residency@ / TSC",
"MetricGroup": "Power",
- "MetricName": "C8_Pkg_Residency"
+ "MetricName": "C8_Pkg_Residency",
+ "ScaleUnit": "100%"
},
{
"BriefDescription": "C9 residency percent per package",
- "MetricExpr": "(cstate_pkg@c9\\-residency@ / msr@tsc@) * 100",
+ "MetricExpr": "cstate_pkg@c9\\-residency@ / TSC",
"MetricGroup": "Power",
- "MetricName": "C9_Pkg_Residency"
+ "MetricName": "C9_Pkg_Residency",
+ "ScaleUnit": "100%"
},
{
"BriefDescription": "C10 residency percent per package",
- "MetricExpr": "(cstate_pkg@c10\\-residency@ / msr@tsc@) * 100",
+ "MetricExpr": "cstate_pkg@c10\\-residency@ / TSC",
"MetricGroup": "Power",
- "MetricName": "C10_Pkg_Residency"
+ "MetricName": "C10_Pkg_Residency",
+ "ScaleUnit": "100%"
}
]
diff --git a/tools/perf/pmu-events/arch/x86/alderlake/cache.json b/tools/perf/pmu-events/arch/x86/alderlake/cache.json
index 2cc62d2779d2..adc9887b8ae0 100644
--- a/tools/perf/pmu-events/arch/x86/alderlake/cache.json
+++ b/tools/perf/pmu-events/arch/x86/alderlake/cache.json
@@ -1,1178 +1,871 @@
[
{
- "BriefDescription": "Counts the number of cacheable memory requests that miss in the LLC. Counts on a per core basis.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5",
- "EventCode": "0x2e",
- "EventName": "LONGEST_LAT_CACHE.MISS",
- "PEBScounters": "0,1,2,3,4,5",
- "SampleAfterValue": "200003",
- "Speculative": "1",
- "UMask": "0x41",
- "Unit": "cpu_atom"
- },
- {
- "BriefDescription": "Counts the number of cacheable memory requests that access the LLC. Counts on a per core basis.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5",
- "EventCode": "0x2e",
- "EventName": "LONGEST_LAT_CACHE.REFERENCE",
- "PEBScounters": "0,1,2,3,4,5",
- "SampleAfterValue": "200003",
- "Speculative": "1",
- "UMask": "0x4f",
- "Unit": "cpu_atom"
- },
- {
- "BriefDescription": "Counts the number of cycles the core is stalled due to an instruction cache or TLB miss which hit in the L2, LLC, DRAM or MMIO (Non-DRAM).",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5",
- "EventCode": "0x34",
- "EventName": "MEM_BOUND_STALLS.IFETCH",
- "PEBScounters": "0,1,2,3,4,5",
- "SampleAfterValue": "200003",
- "Speculative": "1",
- "UMask": "0x38",
- "Unit": "cpu_atom"
- },
- {
- "BriefDescription": "Counts the number of cycles the core is stalled due to an instruction cache or TLB miss which hit in DRAM or MMIO (Non-DRAM).",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5",
- "EventCode": "0x34",
- "EventName": "MEM_BOUND_STALLS.IFETCH_DRAM_HIT",
- "PEBScounters": "0,1,2,3,4,5",
- "SampleAfterValue": "200003",
- "Speculative": "1",
- "UMask": "0x20",
- "Unit": "cpu_atom"
- },
- {
- "BriefDescription": "Counts the number of cycles the core is stalled due to an instruction cache or TLB miss which hit in the L2 cache.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5",
- "EventCode": "0x34",
- "EventName": "MEM_BOUND_STALLS.IFETCH_L2_HIT",
- "PEBScounters": "0,1,2,3,4,5",
- "SampleAfterValue": "200003",
- "Speculative": "1",
- "UMask": "0x8",
- "Unit": "cpu_atom"
- },
- {
- "BriefDescription": "Counts the number of cycles the core is stalled due to an instruction cache or TLB miss which hit in the LLC or other core with HITE/F/M.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5",
- "EventCode": "0x34",
- "EventName": "MEM_BOUND_STALLS.IFETCH_LLC_HIT",
- "PEBScounters": "0,1,2,3,4,5",
- "SampleAfterValue": "200003",
- "Speculative": "1",
- "UMask": "0x10",
- "Unit": "cpu_atom"
- },
- {
- "BriefDescription": "Counts the number of cycles the core is stalled due to a demand load miss which hit in the L2, LLC, DRAM or MMIO (Non-DRAM).",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5",
- "EventCode": "0x34",
- "EventName": "MEM_BOUND_STALLS.LOAD",
- "PEBScounters": "0,1,2,3,4,5",
- "SampleAfterValue": "200003",
- "Speculative": "1",
- "UMask": "0x7",
- "Unit": "cpu_atom"
- },
- {
- "BriefDescription": "Counts the number of cycles the core is stalled due to a demand load miss which hit in DRAM or MMIO (Non-DRAM).",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5",
- "EventCode": "0x34",
- "EventName": "MEM_BOUND_STALLS.LOAD_DRAM_HIT",
- "PEBScounters": "0,1,2,3,4,5",
- "SampleAfterValue": "200003",
- "Speculative": "1",
- "UMask": "0x4",
- "Unit": "cpu_atom"
- },
- {
- "BriefDescription": "Counts the number of cycles the core is stalled due to a demand load which hit in the L2 cache.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5",
- "EventCode": "0x34",
- "EventName": "MEM_BOUND_STALLS.LOAD_L2_HIT",
- "PEBScounters": "0,1,2,3,4,5",
- "SampleAfterValue": "200003",
- "Speculative": "1",
- "UMask": "0x1",
- "Unit": "cpu_atom"
- },
- {
- "BriefDescription": "Counts the number of cycles the core is stalled due to a demand load which hit in the LLC or other core with HITE/F/M.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5",
- "EventCode": "0x34",
- "EventName": "MEM_BOUND_STALLS.LOAD_LLC_HIT",
- "PEBScounters": "0,1,2,3,4,5",
- "SampleAfterValue": "200003",
- "Speculative": "1",
- "UMask": "0x2",
- "Unit": "cpu_atom"
- },
- {
- "BriefDescription": "Counts the number of load uops retired that hit in DRAM.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5",
- "Data_LA": "1",
- "EventCode": "0xd1",
- "EventName": "MEM_LOAD_UOPS_RETIRED.DRAM_HIT",
- "PEBS": "1",
- "PEBScounters": "0,1,2,3,4,5",
- "SampleAfterValue": "200003",
- "UMask": "0x80",
- "Unit": "cpu_atom"
- },
- {
- "BriefDescription": "Counts the number of load uops retired that hit in the L2 cache.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5",
- "Data_LA": "1",
- "EventCode": "0xd1",
- "EventName": "MEM_LOAD_UOPS_RETIRED.L2_HIT",
- "PEBS": "1",
- "PEBScounters": "0,1,2,3,4,5",
- "SampleAfterValue": "200003",
- "UMask": "0x2",
- "Unit": "cpu_atom"
- },
- {
- "BriefDescription": "Counts the number of load uops retired that hit in the L3 cache.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5",
- "Data_LA": "1",
- "EventCode": "0xd1",
- "EventName": "MEM_LOAD_UOPS_RETIRED.L3_HIT",
- "PEBS": "1",
- "PEBScounters": "0,1,2,3,4,5",
- "SampleAfterValue": "200003",
- "UMask": "0x4",
- "Unit": "cpu_atom"
- },
- {
- "BriefDescription": "Counts the number of cycles that uops are blocked for any of the following reasons: load buffer, store buffer or RSV full.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5",
- "EventCode": "0x04",
- "EventName": "MEM_SCHEDULER_BLOCK.ALL",
- "PEBScounters": "0,1,2,3,4,5",
- "SampleAfterValue": "20003",
- "Speculative": "1",
- "UMask": "0x7",
- "Unit": "cpu_atom"
- },
- {
- "BriefDescription": "Counts the number of cycles that uops are blocked due to a load buffer full condition.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5",
- "EventCode": "0x04",
- "EventName": "MEM_SCHEDULER_BLOCK.LD_BUF",
- "PEBScounters": "0,1,2,3,4,5",
- "SampleAfterValue": "20003",
- "Speculative": "1",
- "UMask": "0x2",
- "Unit": "cpu_atom"
- },
- {
- "BriefDescription": "Counts the number of cycles that uops are blocked due to an RSV full condition.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5",
- "EventCode": "0x04",
- "EventName": "MEM_SCHEDULER_BLOCK.RSV",
- "PEBScounters": "0,1,2,3,4,5",
- "SampleAfterValue": "20003",
- "Speculative": "1",
- "UMask": "0x4",
- "Unit": "cpu_atom"
- },
- {
- "BriefDescription": "Counts the number of cycles that uops are blocked due to a store buffer full condition.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5",
- "EventCode": "0x04",
- "EventName": "MEM_SCHEDULER_BLOCK.ST_BUF",
- "PEBScounters": "0,1,2,3,4,5",
- "SampleAfterValue": "20003",
- "Speculative": "1",
- "UMask": "0x1",
- "Unit": "cpu_atom"
- },
- {
- "BriefDescription": "Counts the number of load uops retired.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5",
- "Data_LA": "1",
- "EventCode": "0xd0",
- "EventName": "MEM_UOPS_RETIRED.ALL_LOADS",
- "PEBS": "1",
- "PEBScounters": "0,1,2,3,4,5",
- "SampleAfterValue": "200003",
- "UMask": "0x81",
- "Unit": "cpu_atom"
- },
- {
- "BriefDescription": "Counts the number of store uops retired.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5",
- "Data_LA": "1",
- "EventCode": "0xd0",
- "EventName": "MEM_UOPS_RETIRED.ALL_STORES",
- "PEBS": "1",
- "PEBScounters": "0,1,2,3,4,5",
- "SampleAfterValue": "200003",
- "UMask": "0x82",
- "Unit": "cpu_atom"
- },
- {
- "BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 128 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1",
- "Data_LA": "1",
- "EventCode": "0xd0",
- "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_128",
- "L1_Hit_Indication": "1",
- "MSRIndex": "0x3F6",
- "MSRValue": "0x80",
- "PEBS": "2",
- "PEBScounters": "0,1",
- "SampleAfterValue": "1000003",
- "TakenAlone": "1",
- "UMask": "0x5",
- "Unit": "cpu_atom"
- },
- {
- "BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 16 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1",
- "Data_LA": "1",
- "EventCode": "0xd0",
- "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_16",
- "L1_Hit_Indication": "1",
- "MSRIndex": "0x3F6",
- "MSRValue": "0x10",
- "PEBS": "2",
- "PEBScounters": "0,1",
- "SampleAfterValue": "1000003",
- "TakenAlone": "1",
- "UMask": "0x5",
- "Unit": "cpu_atom"
- },
- {
- "BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 256 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1",
- "Data_LA": "1",
- "EventCode": "0xd0",
- "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_256",
- "L1_Hit_Indication": "1",
- "MSRIndex": "0x3F6",
- "MSRValue": "0x100",
- "PEBS": "2",
- "PEBScounters": "0,1",
- "SampleAfterValue": "1000003",
- "TakenAlone": "1",
- "UMask": "0x5",
- "Unit": "cpu_atom"
- },
- {
- "BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 32 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1",
- "Data_LA": "1",
- "EventCode": "0xd0",
- "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_32",
- "L1_Hit_Indication": "1",
- "MSRIndex": "0x3F6",
- "MSRValue": "0x20",
- "PEBS": "2",
- "PEBScounters": "0,1",
- "SampleAfterValue": "1000003",
- "TakenAlone": "1",
- "UMask": "0x5",
- "Unit": "cpu_atom"
- },
- {
- "BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 4 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1",
- "Data_LA": "1",
- "EventCode": "0xd0",
- "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_4",
- "L1_Hit_Indication": "1",
- "MSRIndex": "0x3F6",
- "MSRValue": "0x4",
- "PEBS": "2",
- "PEBScounters": "0,1",
- "SampleAfterValue": "1000003",
- "TakenAlone": "1",
- "UMask": "0x5",
- "Unit": "cpu_atom"
- },
- {
- "BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 512 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1",
- "Data_LA": "1",
- "EventCode": "0xd0",
- "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_512",
- "L1_Hit_Indication": "1",
- "MSRIndex": "0x3F6",
- "MSRValue": "0x200",
- "PEBS": "2",
- "PEBScounters": "0,1",
- "SampleAfterValue": "1000003",
- "TakenAlone": "1",
- "UMask": "0x5",
- "Unit": "cpu_atom"
- },
- {
- "BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 64 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1",
- "Data_LA": "1",
- "EventCode": "0xd0",
- "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_64",
- "L1_Hit_Indication": "1",
- "MSRIndex": "0x3F6",
- "MSRValue": "0x40",
- "PEBS": "2",
- "PEBScounters": "0,1",
- "SampleAfterValue": "1000003",
- "TakenAlone": "1",
- "UMask": "0x5",
- "Unit": "cpu_atom"
- },
- {
- "BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 8 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1",
- "Data_LA": "1",
- "EventCode": "0xd0",
- "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_8",
- "L1_Hit_Indication": "1",
- "MSRIndex": "0x3F6",
- "MSRValue": "0x8",
- "PEBS": "2",
- "PEBScounters": "0,1",
- "SampleAfterValue": "1000003",
- "TakenAlone": "1",
- "UMask": "0x5",
- "Unit": "cpu_atom"
- },
- {
- "BriefDescription": "Counts the number of retired split load uops.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5",
- "Data_LA": "1",
- "EventCode": "0xd0",
- "EventName": "MEM_UOPS_RETIRED.SPLIT_LOADS",
- "PEBS": "1",
- "PEBScounters": "0,1,2,3,4,5",
- "SampleAfterValue": "200003",
- "UMask": "0x41",
- "Unit": "cpu_atom"
- },
- {
- "BriefDescription": "Counts the number of stores uops retired. Counts with or without PEBS enabled.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5",
- "Data_LA": "1",
- "EventCode": "0xd0",
- "EventName": "MEM_UOPS_RETIRED.STORE_LATENCY",
- "L1_Hit_Indication": "1",
- "PEBS": "2",
- "PEBScounters": "0,1,2,3,4,5",
- "SampleAfterValue": "1000003",
- "UMask": "0x6",
- "Unit": "cpu_atom"
- },
- {
- "BriefDescription": "Counts demand data reads that were supplied by the L3 cache.",
- "Counter": "0,1,2,3,4,5",
- "EventCode": "0xB7",
- "EventName": "OCR.DEMAND_DATA_RD.L3_HIT",
- "MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x3F803C0001",
- "SampleAfterValue": "100003",
- "UMask": "0x1",
- "Unit": "cpu_atom"
- },
- {
- "BriefDescription": "Counts demand data reads that were supplied by the L3 cache where a snoop was sent, the snoop hit, and modified data was forwarded.",
- "Counter": "0,1,2,3,4,5",
- "EventCode": "0xB7",
- "EventName": "OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM",
- "MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x10003C0001",
- "SampleAfterValue": "100003",
- "UMask": "0x1",
- "Unit": "cpu_atom"
- },
- {
- "BriefDescription": "Counts demand data reads that were supplied by the L3 cache where a snoop was sent, the snoop hit, but no data was forwarded.",
- "Counter": "0,1,2,3,4,5",
- "EventCode": "0xB7",
- "EventName": "OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_NO_FWD",
- "MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x4003C0001",
- "SampleAfterValue": "100003",
- "UMask": "0x1",
- "Unit": "cpu_atom"
- },
- {
- "BriefDescription": "Counts demand data reads that were supplied by the L3 cache where a snoop was sent, the snoop hit, and non-modified data was forwarded.",
- "Counter": "0,1,2,3,4,5",
- "EventCode": "0xB7",
- "EventName": "OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
- "MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x8003C0001",
- "SampleAfterValue": "100003",
- "UMask": "0x1",
- "Unit": "cpu_atom"
- },
- {
- "BriefDescription": "Counts demand reads for ownership (RFO) and software prefetches for exclusive ownership (PREFETCHW) that were supplied by the L3 cache.",
- "Counter": "0,1,2,3,4,5",
- "EventCode": "0xB7",
- "EventName": "OCR.DEMAND_RFO.L3_HIT",
- "MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x3F803C0002",
- "SampleAfterValue": "100003",
- "UMask": "0x1",
- "Unit": "cpu_atom"
- },
- {
- "BriefDescription": "Counts demand reads for ownership (RFO) and software prefetches for exclusive ownership (PREFETCHW) that were supplied by the L3 cache where a snoop was sent, the snoop hit, and modified data was forwarded.",
- "Counter": "0,1,2,3,4,5",
- "EventCode": "0xB7",
- "EventName": "OCR.DEMAND_RFO.L3_HIT.SNOOP_HITM",
- "MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x10003C0002",
- "SampleAfterValue": "100003",
- "UMask": "0x1",
- "Unit": "cpu_atom"
- },
- {
- "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to instruction cache misses.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5",
- "EventCode": "0x71",
- "EventName": "TOPDOWN_FE_BOUND.ICACHE",
- "PEBScounters": "0,1,2,3,4,5",
- "SampleAfterValue": "1000003",
- "Speculative": "1",
- "UMask": "0x20",
- "Unit": "cpu_atom"
- },
- {
"BriefDescription": "L1D.HWPF_MISS",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3",
"EventCode": "0x51",
"EventName": "L1D.HWPF_MISS",
- "PEBScounters": "0,1,2,3",
"SampleAfterValue": "1000003",
- "Speculative": "1",
"UMask": "0x20",
"Unit": "cpu_core"
},
{
"BriefDescription": "Counts the number of cache lines replaced in L1 data cache.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3",
"EventCode": "0x51",
"EventName": "L1D.REPLACEMENT",
- "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts L1D data line replacements including opportunistic replacements, and replacements that require stall-for-replace or block-for-replace.",
"SampleAfterValue": "100003",
- "Speculative": "1",
"UMask": "0x1",
"Unit": "cpu_core"
},
{
"BriefDescription": "Number of cycles a demand request has waited due to L1D Fill Buffer (FB) unavailability.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3",
"EventCode": "0x48",
"EventName": "L1D_PEND_MISS.FB_FULL",
- "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts number of cycles a demand request has waited due to L1D Fill Buffer (FB) unavailability. Demand requests include cacheable/uncacheable demand load, store, lock or SW prefetch accesses.",
"SampleAfterValue": "1000003",
- "Speculative": "1",
"UMask": "0x2",
"Unit": "cpu_core"
},
{
- "BriefDescription": "Number of phases a demand request has waited due to L1D Fill Buffer (FB) unavailablability.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3",
+ "BriefDescription": "Number of phases a demand request has waited due to L1D Fill Buffer (FB) unavailability.",
"CounterMask": "1",
"EdgeDetect": "1",
"EventCode": "0x48",
"EventName": "L1D_PEND_MISS.FB_FULL_PERIODS",
- "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts number of phases a demand request has waited due to L1D Fill Buffer (FB) unavailability. Demand requests include cacheable/uncacheable demand load, store, lock or SW prefetch accesses.",
"SampleAfterValue": "1000003",
- "Speculative": "1",
"UMask": "0x2",
"Unit": "cpu_core"
},
{
"BriefDescription": "This event is deprecated. Refer to new event L1D_PEND_MISS.L2_STALLS",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3",
+ "Deprecated": "1",
"EventCode": "0x48",
"EventName": "L1D_PEND_MISS.L2_STALL",
- "PEBScounters": "0,1,2,3",
"SampleAfterValue": "1000003",
- "Speculative": "1",
"UMask": "0x4",
"Unit": "cpu_core"
},
{
"BriefDescription": "Number of cycles a demand request has waited due to L1D due to lack of L2 resources.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3",
"EventCode": "0x48",
"EventName": "L1D_PEND_MISS.L2_STALLS",
- "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts number of cycles a demand request has waited due to L1D due to lack of L2 resources. Demand requests include cacheable/uncacheable demand load, store, lock or SW prefetch accesses.",
"SampleAfterValue": "1000003",
- "Speculative": "1",
"UMask": "0x4",
"Unit": "cpu_core"
},
{
"BriefDescription": "Number of L1D misses that are outstanding",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3",
"EventCode": "0x48",
"EventName": "L1D_PEND_MISS.PENDING",
- "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts number of L1D misses that are outstanding in each cycle, that is each cycle the number of Fill Buffers (FB) outstanding required by Demand Reads. FB either is held by demand loads, or it is held by non-demand loads and gets hit at least once by demand. The valid outstanding interval is defined until the FB deallocation by one of the following ways: from FB allocation, if FB is allocated by demand from the demand Hit FB, if it is allocated by hardware or software prefetch. Note: In the L1D, a Demand Read contains cacheable or noncacheable demand loads, including ones causing cache-line splits and reads due to page walks resulted from any request type.",
"SampleAfterValue": "1000003",
- "Speculative": "1",
"UMask": "0x1",
"Unit": "cpu_core"
},
{
"BriefDescription": "Cycles with L1D load Misses outstanding.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3",
"CounterMask": "1",
"EventCode": "0x48",
"EventName": "L1D_PEND_MISS.PENDING_CYCLES",
- "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts duration of L1D miss outstanding in cycles.",
"SampleAfterValue": "1000003",
- "Speculative": "1",
"UMask": "0x1",
"Unit": "cpu_core"
},
{
"BriefDescription": "L2 cache lines filling L2",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3",
"EventCode": "0x25",
"EventName": "L2_LINES_IN.ALL",
- "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts the number of L2 cache lines filling the L2. Counting does not cover rejects.",
"SampleAfterValue": "100003",
- "Speculative": "1",
"UMask": "0x1f",
"Unit": "cpu_core"
},
{
"BriefDescription": "Cache lines that have been L2 hardware prefetched but not used by demand accesses",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3",
"EventCode": "0x26",
"EventName": "L2_LINES_OUT.USELESS_HWPF",
- "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts the number of cache lines that have been prefetched by the L2 hardware prefetcher but not used by demand access when evicted from the L2 cache",
"SampleAfterValue": "200003",
- "Speculative": "1",
"UMask": "0x4",
"Unit": "cpu_core"
},
{
"BriefDescription": "All accesses to L2 cache[This event is alias to L2_RQSTS.REFERENCES]",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_REQUEST.ALL",
- "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts all requests that were hit or true misses in L2 cache. True-miss excludes misses that were merged with ongoing L2 misses.[This event is alias to L2_RQSTS.REFERENCES]",
"SampleAfterValue": "200003",
- "Speculative": "1",
"UMask": "0xff",
"Unit": "cpu_core"
},
{
"BriefDescription": "Read requests with true-miss in L2 cache.[This event is alias to L2_RQSTS.MISS]",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_REQUEST.MISS",
- "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts read requests of any type with true-miss in the L2 cache. True-miss excludes L2 misses that were merged with ongoing L2 misses.[This event is alias to L2_RQSTS.MISS]",
"SampleAfterValue": "200003",
- "Speculative": "1",
"UMask": "0x3f",
"Unit": "cpu_core"
},
{
"BriefDescription": "L2 code requests",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.ALL_CODE_RD",
- "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts the total number of L2 code requests.",
"SampleAfterValue": "200003",
- "Speculative": "1",
"UMask": "0xe4",
"Unit": "cpu_core"
},
{
"BriefDescription": "Demand Data Read access L2 cache",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.ALL_DEMAND_DATA_RD",
- "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts Demand Data Read requests accessing the L2 cache. These requests may hit or miss L2 cache. True-miss exclude misses that were merged with ongoing L2 misses. An access is counted once.",
"SampleAfterValue": "200003",
- "Speculative": "1",
"UMask": "0xe1",
"Unit": "cpu_core"
},
{
"BriefDescription": "Demand requests that miss L2 cache",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.ALL_DEMAND_MISS",
- "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts demand requests that miss L2 cache.",
"SampleAfterValue": "200003",
- "Speculative": "1",
"UMask": "0x27",
"Unit": "cpu_core"
},
{
"BriefDescription": "L2_RQSTS.ALL_HWPF",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.ALL_HWPF",
- "PEBScounters": "0,1,2,3",
"SampleAfterValue": "200003",
- "Speculative": "1",
"UMask": "0xf0",
"Unit": "cpu_core"
},
{
"BriefDescription": "RFO requests to L2 cache.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.ALL_RFO",
- "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts the total number of RFO (read for ownership) requests to L2 cache. L2 RFO requests include both L1D demand RFO misses as well as L1D RFO prefetches.",
"SampleAfterValue": "200003",
- "Speculative": "1",
"UMask": "0xe2",
"Unit": "cpu_core"
},
{
"BriefDescription": "L2 cache hits when fetching instructions, code reads.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.CODE_RD_HIT",
- "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts L2 cache hits when fetching instructions, code reads.",
"SampleAfterValue": "200003",
- "Speculative": "1",
"UMask": "0xc4",
"Unit": "cpu_core"
},
{
"BriefDescription": "L2 cache misses when fetching instructions",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.CODE_RD_MISS",
- "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts L2 cache misses when fetching instructions.",
"SampleAfterValue": "200003",
- "Speculative": "1",
"UMask": "0x24",
"Unit": "cpu_core"
},
{
"BriefDescription": "Demand Data Read requests that hit L2 cache",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.DEMAND_DATA_RD_HIT",
- "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts the number of demand Data Read requests initiated by load instructions that hit L2 cache.",
"SampleAfterValue": "200003",
- "Speculative": "1",
"UMask": "0xc1",
"Unit": "cpu_core"
},
{
"BriefDescription": "Demand Data Read miss L2 cache",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.DEMAND_DATA_RD_MISS",
- "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts demand Data Read requests with true-miss in the L2 cache. True-miss excludes misses that were merged with ongoing L2 misses. An access is counted once.",
"SampleAfterValue": "200003",
- "Speculative": "1",
"UMask": "0x21",
"Unit": "cpu_core"
},
{
"BriefDescription": "L2_RQSTS.HWPF_MISS",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.HWPF_MISS",
- "PEBScounters": "0,1,2,3",
"SampleAfterValue": "200003",
- "Speculative": "1",
"UMask": "0x30",
"Unit": "cpu_core"
},
{
"BriefDescription": "Read requests with true-miss in L2 cache.[This event is alias to L2_REQUEST.MISS]",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.MISS",
- "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts read requests of any type with true-miss in the L2 cache. True-miss excludes L2 misses that were merged with ongoing L2 misses.[This event is alias to L2_REQUEST.MISS]",
"SampleAfterValue": "200003",
- "Speculative": "1",
"UMask": "0x3f",
"Unit": "cpu_core"
},
{
"BriefDescription": "All accesses to L2 cache[This event is alias to L2_REQUEST.ALL]",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.REFERENCES",
- "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts all requests that were hit or true misses in L2 cache. True-miss excludes misses that were merged with ongoing L2 misses.[This event is alias to L2_REQUEST.ALL]",
"SampleAfterValue": "200003",
- "Speculative": "1",
"UMask": "0xff",
"Unit": "cpu_core"
},
{
"BriefDescription": "RFO requests that hit L2 cache.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.RFO_HIT",
- "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts the RFO (Read-for-Ownership) requests that hit L2 cache.",
"SampleAfterValue": "200003",
- "Speculative": "1",
"UMask": "0xc2",
"Unit": "cpu_core"
},
{
"BriefDescription": "RFO requests that miss L2 cache",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.RFO_MISS",
- "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts the RFO (Read-for-Ownership) requests that miss L2 cache.",
"SampleAfterValue": "200003",
- "Speculative": "1",
"UMask": "0x22",
"Unit": "cpu_core"
},
{
"BriefDescription": "SW prefetch requests that hit L2 cache.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.SWPF_HIT",
- "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts Software prefetch requests that hit the L2 cache. Accounts for PREFETCHNTA and PREFETCHT0/1/2 instructions when FB is not full.",
"SampleAfterValue": "200003",
- "Speculative": "1",
"UMask": "0xc8",
"Unit": "cpu_core"
},
{
"BriefDescription": "SW prefetch requests that miss L2 cache.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.SWPF_MISS",
- "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts Software prefetch requests that miss the L2 cache. Accounts for PREFETCHNTA and PREFETCHT0/1/2 instructions when FB is not full.",
"SampleAfterValue": "200003",
- "Speculative": "1",
"UMask": "0x28",
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Counts the number of cacheable memory requests that miss in the LLC. Counts on a per core basis.",
+ "EventCode": "0x2e",
+ "EventName": "LONGEST_LAT_CACHE.MISS",
+ "PublicDescription": "Counts the number of cacheable memory requests that miss in the Last Level Cache (LLC). Requests include demand loads, reads for ownership (RFO), instruction fetches and L1 HW prefetches. If the platform has an L3 cache, the LLC is the L3 cache, otherwise it is the L2 cache. Counts on a per core basis.",
+ "SampleAfterValue": "200003",
+ "UMask": "0x41",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Core-originated cacheable requests that missed L3 (Except hardware prefetches to the L3)",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x2e",
"EventName": "LONGEST_LAT_CACHE.MISS",
- "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts core-originated cacheable requests that miss the L3 cache (Longest Latency cache). Requests include data and code reads, Reads-for-Ownership (RFOs), speculative accesses and hardware prefetches to the L1 and L2. It does not include hardware prefetches to the L3, and may not count other types of requests to the L3.",
"SampleAfterValue": "100003",
- "Speculative": "1",
"UMask": "0x41",
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Counts the number of cacheable memory requests that access the LLC. Counts on a per core basis.",
+ "EventCode": "0x2e",
+ "EventName": "LONGEST_LAT_CACHE.REFERENCE",
+ "PublicDescription": "Counts the number of cacheable memory requests that access the Last Level Cache (LLC). Requests include demand loads, reads for ownership (RFO), instruction fetches and L1 HW prefetches. If the platform has an L3 cache, the LLC is the L3 cache, otherwise it is the L2 cache. Counts on a per core basis.",
+ "SampleAfterValue": "200003",
+ "UMask": "0x4f",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Core-originated cacheable requests that refer to L3 (Except hardware prefetches to the L3)",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x2e",
"EventName": "LONGEST_LAT_CACHE.REFERENCE",
- "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts core-originated cacheable requests to the L3 cache (Longest Latency cache). Requests include data and code reads, Reads-for-Ownership (RFOs), speculative accesses and hardware prefetches to the L1 and L2. It does not include hardware prefetches to the L3, and may not count other types of requests to the L3.",
"SampleAfterValue": "100003",
- "Speculative": "1",
"UMask": "0x4f",
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Counts the number of cycles the core is stalled due to an instruction cache or TLB miss which hit in the L2, LLC, DRAM or MMIO (Non-DRAM).",
+ "EventCode": "0x34",
+ "EventName": "MEM_BOUND_STALLS.IFETCH",
+ "PublicDescription": "Counts the number of cycles the core is stalled due to an instruction cache or translation lookaside buffer (TLB) miss which hit in the L2, LLC, DRAM or MMIO (Non-DRAM).",
+ "SampleAfterValue": "200003",
+ "UMask": "0x38",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of cycles the core is stalled due to an instruction cache or TLB miss which hit in DRAM or MMIO (Non-DRAM).",
+ "EventCode": "0x34",
+ "EventName": "MEM_BOUND_STALLS.IFETCH_DRAM_HIT",
+ "PublicDescription": "Counts the number of cycles the core is stalled due to an instruction cache or translation lookaside buffer (TLB) miss which hit in DRAM or MMIO (non-DRAM).",
+ "SampleAfterValue": "200003",
+ "UMask": "0x20",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of cycles the core is stalled due to an instruction cache or TLB miss which hit in the L2 cache.",
+ "EventCode": "0x34",
+ "EventName": "MEM_BOUND_STALLS.IFETCH_L2_HIT",
+ "PublicDescription": "Counts the number of cycles the core is stalled due to an instruction cache or Translation Lookaside Buffer (TLB) miss which hit in the L2 cache.",
+ "SampleAfterValue": "200003",
+ "UMask": "0x8",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of cycles the core is stalled due to an instruction cache or TLB miss which hit in the LLC or other core with HITE/F/M.",
+ "EventCode": "0x34",
+ "EventName": "MEM_BOUND_STALLS.IFETCH_LLC_HIT",
+ "PublicDescription": "Counts the number of cycles the core is stalled due to an instruction cache or Translation Lookaside Buffer (TLB) miss which hit in the Last Level Cache (LLC) or other core with HITE/F/M.",
+ "SampleAfterValue": "200003",
+ "UMask": "0x10",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of cycles the core is stalled due to a demand load miss which hit in the L2, LLC, DRAM or MMIO (Non-DRAM).",
+ "EventCode": "0x34",
+ "EventName": "MEM_BOUND_STALLS.LOAD",
+ "SampleAfterValue": "200003",
+ "UMask": "0x7",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of cycles the core is stalled due to a demand load miss which hit in DRAM or MMIO (Non-DRAM).",
+ "EventCode": "0x34",
+ "EventName": "MEM_BOUND_STALLS.LOAD_DRAM_HIT",
+ "SampleAfterValue": "200003",
+ "UMask": "0x4",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of cycles the core is stalled due to a demand load which hit in the L2 cache.",
+ "EventCode": "0x34",
+ "EventName": "MEM_BOUND_STALLS.LOAD_L2_HIT",
+ "SampleAfterValue": "200003",
+ "UMask": "0x1",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of cycles the core is stalled due to a demand load which hit in the LLC or other core with HITE/F/M.",
+ "EventCode": "0x34",
+ "EventName": "MEM_BOUND_STALLS.LOAD_LLC_HIT",
+ "PublicDescription": "Counts the number of cycles the core is stalled due to a demand load which hit in the Last Level Cache (LLC) or other core with HITE/F/M.",
+ "SampleAfterValue": "200003",
+ "UMask": "0x2",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Retired load instructions.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3",
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_INST_RETIRED.ALL_LOADS",
"PEBS": "1",
- "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts all retired load instructions. This event accounts for SW prefetch instructions of PREFETCHNTA or PREFETCHT0/1/2 or PREFETCHW.",
"SampleAfterValue": "1000003",
"UMask": "0x81",
"Unit": "cpu_core"
},
{
"BriefDescription": "Retired store instructions.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3",
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_INST_RETIRED.ALL_STORES",
- "L1_Hit_Indication": "1",
"PEBS": "1",
- "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts all retired store instructions.",
"SampleAfterValue": "1000003",
"UMask": "0x82",
"Unit": "cpu_core"
},
{
"BriefDescription": "All retired memory instructions.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3",
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_INST_RETIRED.ANY",
- "L1_Hit_Indication": "1",
"PEBS": "1",
- "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts all retired memory instructions - loads and stores.",
"SampleAfterValue": "1000003",
"UMask": "0x83",
"Unit": "cpu_core"
},
{
"BriefDescription": "Retired load instructions with locked access.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3",
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_INST_RETIRED.LOCK_LOADS",
"PEBS": "1",
- "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts retired load instructions with locked access.",
"SampleAfterValue": "100007",
"UMask": "0x21",
"Unit": "cpu_core"
},
{
"BriefDescription": "Retired load instructions that split across a cacheline boundary.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3",
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_INST_RETIRED.SPLIT_LOADS",
"PEBS": "1",
- "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts retired load instructions that split across a cacheline boundary.",
"SampleAfterValue": "100003",
"UMask": "0x41",
"Unit": "cpu_core"
},
{
"BriefDescription": "Retired store instructions that split across a cacheline boundary.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3",
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_INST_RETIRED.SPLIT_STORES",
- "L1_Hit_Indication": "1",
"PEBS": "1",
- "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts retired store instructions that split across a cacheline boundary.",
"SampleAfterValue": "100003",
"UMask": "0x42",
"Unit": "cpu_core"
},
{
"BriefDescription": "Retired load instructions that miss the STLB.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3",
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_INST_RETIRED.STLB_MISS_LOADS",
"PEBS": "1",
- "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Number of retired load instructions that (start a) miss in the 2nd-level TLB (STLB).",
"SampleAfterValue": "100003",
"UMask": "0x11",
"Unit": "cpu_core"
},
{
"BriefDescription": "Retired store instructions that miss the STLB.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3",
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_INST_RETIRED.STLB_MISS_STORES",
- "L1_Hit_Indication": "1",
"PEBS": "1",
- "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Number of retired store instructions that (start a) miss in the 2nd-level TLB (STLB).",
"SampleAfterValue": "100003",
"UMask": "0x12",
"Unit": "cpu_core"
},
{
"BriefDescription": "Completed demand load uops that miss the L1 d-cache.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3",
"EventCode": "0x43",
"EventName": "MEM_LOAD_COMPLETED.L1_MISS_ANY",
- "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Number of completed demand load requests that missed the L1 data cache including shadow misses (FB hits, merge to an ongoing L1D miss)",
"SampleAfterValue": "1000003",
- "Speculative": "1",
"UMask": "0xfd",
"Unit": "cpu_core"
},
{
"BriefDescription": "Retired load instructions whose data sources were HitM responses from shared L3",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3",
"Data_LA": "1",
"EventCode": "0xd2",
"EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_FWD",
"PEBS": "1",
- "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts retired load instructions whose data sources were HitM responses from shared L3.",
"SampleAfterValue": "20011",
"UMask": "0x4",
"Unit": "cpu_core"
},
{
"BriefDescription": "Retired load instructions whose data sources were L3 and cross-core snoop hits in on-pkg core cache",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3",
"Data_LA": "1",
"EventCode": "0xd2",
"EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_HIT",
"PEBS": "1",
- "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts retired load instructions whose data sources were L3 and cross-core snoop hits in on-pkg core cache.",
"SampleAfterValue": "20011",
"UMask": "0x2",
"Unit": "cpu_core"
},
{
"BriefDescription": "Retired load instructions whose data sources were HitM responses from shared L3",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3",
"Data_LA": "1",
"EventCode": "0xd2",
"EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_HITM",
"PEBS": "1",
- "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts retired load instructions whose data sources were HitM responses from shared L3.",
"SampleAfterValue": "20011",
"UMask": "0x4",
"Unit": "cpu_core"
},
{
"BriefDescription": "Retired load instructions whose data sources were L3 hit and cross-core snoop missed in on-pkg core cache.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3",
"Data_LA": "1",
"EventCode": "0xd2",
"EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS",
"PEBS": "1",
- "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts the retired load instructions whose data sources were L3 hit and cross-core snoop missed in on-pkg core cache.",
"SampleAfterValue": "20011",
"UMask": "0x1",
"Unit": "cpu_core"
},
{
"BriefDescription": "Retired load instructions whose data sources were hits in L3 without snoops required",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3",
"Data_LA": "1",
"EventCode": "0xd2",
"EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_NONE",
"PEBS": "1",
- "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts retired load instructions whose data sources were hits in L3 without snoops required.",
"SampleAfterValue": "100003",
"UMask": "0x8",
"Unit": "cpu_core"
},
{
"BriefDescription": "Retired load instructions whose data sources were L3 and cross-core snoop hits in on-pkg core cache",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3",
"Data_LA": "1",
"EventCode": "0xd2",
"EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_NO_FWD",
"PEBS": "1",
- "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts retired load instructions whose data sources were L3 and cross-core snoop hits in on-pkg core cache.",
"SampleAfterValue": "20011",
"UMask": "0x2",
"Unit": "cpu_core"
},
{
"BriefDescription": "Retired load instructions which data sources missed L3 but serviced from local dram",
- "Counter": "0,1,2,3",
"Data_LA": "1",
"EventCode": "0xd3",
"EventName": "MEM_LOAD_L3_MISS_RETIRED.LOCAL_DRAM",
- "PEBScounters": "0,1,2,3",
+ "PEBS": "1",
+ "PublicDescription": "Retired load instructions which data sources missed L3 but serviced from local DRAM.",
"SampleAfterValue": "100007",
"UMask": "0x1",
"Unit": "cpu_core"
},
{
"BriefDescription": "Retired instructions with at least 1 uncacheable load or lock.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3",
"Data_LA": "1",
"EventCode": "0xd4",
"EventName": "MEM_LOAD_MISC_RETIRED.UC",
"PEBS": "1",
- "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Retired instructions with at least one load to uncacheable memory-type, or at least one cache-line split locked access (Bus Lock).",
"SampleAfterValue": "100007",
"UMask": "0x4",
"Unit": "cpu_core"
},
{
"BriefDescription": "Number of completed demand load requests that missed the L1, but hit the FB(fill buffer), because a preceding miss to the same cacheline initiated the line to be brought into L1, but data is not yet ready in L1.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3",
"Data_LA": "1",
"EventCode": "0xd1",
"EventName": "MEM_LOAD_RETIRED.FB_HIT",
"PEBS": "1",
- "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts retired load instructions with at least one uop was load missed in L1 but hit FB (Fill Buffers) due to preceding miss to the same cache line with data not ready.",
"SampleAfterValue": "100007",
"UMask": "0x40",
"Unit": "cpu_core"
},
{
"BriefDescription": "Retired load instructions with L1 cache hits as data sources",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3",
"Data_LA": "1",
"EventCode": "0xd1",
"EventName": "MEM_LOAD_RETIRED.L1_HIT",
"PEBS": "1",
- "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts retired load instructions with at least one uop that hit in the L1 data cache. This event includes all SW prefetches and lock instructions regardless of the data source.",
"SampleAfterValue": "1000003",
"UMask": "0x1",
"Unit": "cpu_core"
},
{
"BriefDescription": "Retired load instructions missed L1 cache as data sources",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3",
"Data_LA": "1",
"EventCode": "0xd1",
"EventName": "MEM_LOAD_RETIRED.L1_MISS",
"PEBS": "1",
- "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts retired load instructions with at least one uop that missed in the L1 cache.",
"SampleAfterValue": "200003",
"UMask": "0x8",
"Unit": "cpu_core"
},
{
"BriefDescription": "Retired load instructions with L2 cache hits as data sources",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3",
"Data_LA": "1",
"EventCode": "0xd1",
"EventName": "MEM_LOAD_RETIRED.L2_HIT",
"PEBS": "1",
- "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts retired load instructions with L2 cache hits as data sources.",
"SampleAfterValue": "200003",
"UMask": "0x2",
"Unit": "cpu_core"
},
{
"BriefDescription": "Retired load instructions missed L2 cache as data sources",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3",
"Data_LA": "1",
"EventCode": "0xd1",
"EventName": "MEM_LOAD_RETIRED.L2_MISS",
"PEBS": "1",
- "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts retired load instructions missed L2 cache as data sources.",
"SampleAfterValue": "100021",
"UMask": "0x10",
"Unit": "cpu_core"
},
{
"BriefDescription": "Retired load instructions with L3 cache hits as data sources",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3",
"Data_LA": "1",
"EventCode": "0xd1",
"EventName": "MEM_LOAD_RETIRED.L3_HIT",
"PEBS": "1",
- "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts retired load instructions with at least one uop that hit in the L3 cache.",
"SampleAfterValue": "100021",
"UMask": "0x4",
"Unit": "cpu_core"
},
{
"BriefDescription": "Retired load instructions missed L3 cache as data sources",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3",
"Data_LA": "1",
"EventCode": "0xd1",
"EventName": "MEM_LOAD_RETIRED.L3_MISS",
"PEBS": "1",
- "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts retired load instructions with at least one uop that missed in the L3 cache.",
"SampleAfterValue": "50021",
"UMask": "0x20",
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Counts the number of load uops retired that hit in DRAM.",
+ "Data_LA": "1",
+ "EventCode": "0xd1",
+ "EventName": "MEM_LOAD_UOPS_RETIRED.DRAM_HIT",
+ "PEBS": "1",
+ "SampleAfterValue": "200003",
+ "UMask": "0x80",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of load uops retired that hit in the L2 cache.",
+ "Data_LA": "1",
+ "EventCode": "0xd1",
+ "EventName": "MEM_LOAD_UOPS_RETIRED.L2_HIT",
+ "PEBS": "1",
+ "SampleAfterValue": "200003",
+ "UMask": "0x2",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of load uops retired that hit in the L3 cache.",
+ "Data_LA": "1",
+ "EventCode": "0xd1",
+ "EventName": "MEM_LOAD_UOPS_RETIRED.L3_HIT",
+ "PEBS": "1",
+ "SampleAfterValue": "200003",
+ "UMask": "0x4",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of cycles that uops are blocked for any of the following reasons: load buffer, store buffer or RSV full.",
+ "EventCode": "0x04",
+ "EventName": "MEM_SCHEDULER_BLOCK.ALL",
+ "SampleAfterValue": "20003",
+ "UMask": "0x7",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of cycles that uops are blocked due to a load buffer full condition.",
+ "EventCode": "0x04",
+ "EventName": "MEM_SCHEDULER_BLOCK.LD_BUF",
+ "SampleAfterValue": "20003",
+ "UMask": "0x2",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of cycles that uops are blocked due to an RSV full condition.",
+ "EventCode": "0x04",
+ "EventName": "MEM_SCHEDULER_BLOCK.RSV",
+ "SampleAfterValue": "20003",
+ "UMask": "0x4",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of cycles that uops are blocked due to a store buffer full condition.",
+ "EventCode": "0x04",
+ "EventName": "MEM_SCHEDULER_BLOCK.ST_BUF",
+ "SampleAfterValue": "20003",
+ "UMask": "0x1",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "MEM_STORE_RETIRED.L2_HIT",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3",
"EventCode": "0x44",
"EventName": "MEM_STORE_RETIRED.L2_HIT",
- "PEBScounters": "0,1,2,3",
"SampleAfterValue": "200003",
"UMask": "0x1",
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Counts the number of load uops retired.",
+ "Data_LA": "1",
+ "EventCode": "0xd0",
+ "EventName": "MEM_UOPS_RETIRED.ALL_LOADS",
+ "PEBS": "1",
+ "PublicDescription": "Counts the total number of load uops retired.",
+ "SampleAfterValue": "200003",
+ "UMask": "0x81",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of store uops retired.",
+ "Data_LA": "1",
+ "EventCode": "0xd0",
+ "EventName": "MEM_UOPS_RETIRED.ALL_STORES",
+ "PEBS": "1",
+ "PublicDescription": "Counts the total number of store uops retired.",
+ "SampleAfterValue": "200003",
+ "UMask": "0x82",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 128 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.",
+ "Data_LA": "1",
+ "EventCode": "0xd0",
+ "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_128",
+ "MSRIndex": "0x3F6",
+ "MSRValue": "0x80",
+ "PEBS": "2",
+ "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 128 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x5",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 16 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.",
+ "Data_LA": "1",
+ "EventCode": "0xd0",
+ "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_16",
+ "MSRIndex": "0x3F6",
+ "MSRValue": "0x10",
+ "PEBS": "2",
+ "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 16 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x5",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 256 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.",
+ "Data_LA": "1",
+ "EventCode": "0xd0",
+ "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_256",
+ "MSRIndex": "0x3F6",
+ "MSRValue": "0x100",
+ "PEBS": "2",
+ "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 256 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x5",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 32 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.",
+ "Data_LA": "1",
+ "EventCode": "0xd0",
+ "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_32",
+ "MSRIndex": "0x3F6",
+ "MSRValue": "0x20",
+ "PEBS": "2",
+ "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 32 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x5",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 4 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.",
+ "Data_LA": "1",
+ "EventCode": "0xd0",
+ "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_4",
+ "MSRIndex": "0x3F6",
+ "MSRValue": "0x4",
+ "PEBS": "2",
+ "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 4 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x5",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 512 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.",
+ "Data_LA": "1",
+ "EventCode": "0xd0",
+ "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_512",
+ "MSRIndex": "0x3F6",
+ "MSRValue": "0x200",
+ "PEBS": "2",
+ "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 512 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x5",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 64 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.",
+ "Data_LA": "1",
+ "EventCode": "0xd0",
+ "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_64",
+ "MSRIndex": "0x3F6",
+ "MSRValue": "0x40",
+ "PEBS": "2",
+ "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 64 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x5",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 8 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.",
+ "Data_LA": "1",
+ "EventCode": "0xd0",
+ "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_8",
+ "MSRIndex": "0x3F6",
+ "MSRValue": "0x8",
+ "PEBS": "2",
+ "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 8 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x5",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of retired split load uops.",
+ "Data_LA": "1",
+ "EventCode": "0xd0",
+ "EventName": "MEM_UOPS_RETIRED.SPLIT_LOADS",
+ "PEBS": "1",
+ "SampleAfterValue": "200003",
+ "UMask": "0x41",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of stores uops retired. Counts with or without PEBS enabled.",
+ "Data_LA": "1",
+ "EventCode": "0xd0",
+ "EventName": "MEM_UOPS_RETIRED.STORE_LATENCY",
+ "PEBS": "2",
+ "PublicDescription": "Counts the number of stores uops retired. Counts with or without PEBS enabled. If PEBS is enabled and a PEBS record is generated, will populate PEBS Latency and PEBS Data Source fields accordingly.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x6",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Retired memory uops for any access",
- "Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xe5",
"EventName": "MEM_UOP_RETIRED.ANY",
- "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Number of retired micro-operations (uops) for load or store memory accesses",
"SampleAfterValue": "1000003",
"UMask": "0x3",
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Counts demand data reads that were supplied by the L3 cache.",
+ "EventCode": "0xB7",
+ "EventName": "OCR.DEMAND_DATA_RD.L3_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x3F803C0001",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts demand data reads that were supplied by the L3 cache where a snoop was sent, the snoop hit, and modified data was forwarded.",
+ "EventCode": "0xB7",
+ "EventName": "OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x10003C0001",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Counts demand data reads that resulted in a snoop hit in another cores caches, data forwarding is required as the data is modified.",
- "Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x2A,0x2B",
"EventName": "OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM",
"MSRIndex": "0x1a6,0x1a7",
@@ -1182,8 +875,27 @@
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Counts demand data reads that were supplied by the L3 cache where a snoop was sent, the snoop hit, but no data was forwarded.",
+ "EventCode": "0xB7",
+ "EventName": "OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x4003C0001",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts demand data reads that were supplied by the L3 cache where a snoop was sent, the snoop hit, and non-modified data was forwarded.",
+ "EventCode": "0xB7",
+ "EventName": "OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x8003C0001",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
- "Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x2A,0x2B",
"EventName": "OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
"MSRIndex": "0x1a6,0x1a7",
@@ -1193,8 +905,27 @@
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Counts demand reads for ownership (RFO) and software prefetches for exclusive ownership (PREFETCHW) that were supplied by the L3 cache.",
+ "EventCode": "0xB7",
+ "EventName": "OCR.DEMAND_RFO.L3_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x3F803C0002",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts demand reads for ownership (RFO) and software prefetches for exclusive ownership (PREFETCHW) that were supplied by the L3 cache where a snoop was sent, the snoop hit, and modified data was forwarded.",
+ "EventCode": "0xB7",
+ "EventName": "OCR.DEMAND_RFO.L3_HIT.SNOOP_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x10003C0002",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Counts demand read for ownership (RFO) requests and software prefetches for exclusive ownership (PREFETCHW) that resulted in a snoop hit in another cores caches, data forwarding is required as the data is modified.",
- "Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x2A,0x2B",
"EventName": "OCR.DEMAND_RFO.L3_HIT.SNOOP_HITM",
"MSRIndex": "0x1a6,0x1a7",
@@ -1205,139 +936,111 @@
},
{
"BriefDescription": "OFFCORE_REQUESTS.ALL_REQUESTS",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3",
"EventCode": "0x21",
"EventName": "OFFCORE_REQUESTS.ALL_REQUESTS",
- "PEBScounters": "0,1,2,3",
"SampleAfterValue": "100003",
- "Speculative": "1",
"UMask": "0x80",
"Unit": "cpu_core"
},
{
"BriefDescription": "Demand and prefetch data reads",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3",
"EventCode": "0x21",
"EventName": "OFFCORE_REQUESTS.DATA_RD",
- "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts the demand and prefetch data reads. All Core Data Reads include cacheable 'Demands' and L2 prefetchers (not L3 prefetchers). Counting also covers reads due to page walks resulted from any request type.",
"SampleAfterValue": "100003",
- "Speculative": "1",
"UMask": "0x8",
"Unit": "cpu_core"
},
{
"BriefDescription": "Demand Data Read requests sent to uncore",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3",
"EventCode": "0x21",
"EventName": "OFFCORE_REQUESTS.DEMAND_DATA_RD",
- "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts the Demand Data Read requests sent to uncore. Use it in conjunction with OFFCORE_REQUESTS_OUTSTANDING to determine average latency in the uncore.",
"SampleAfterValue": "100003",
- "Speculative": "1",
"UMask": "0x1",
"Unit": "cpu_core"
},
{
"BriefDescription": "This event is deprecated. Refer to new event OFFCORE_REQUESTS_OUTSTANDING.DATA_RD",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3",
+ "Deprecated": "1",
"Errata": "ADL038",
"EventCode": "0x20",
"EventName": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD",
- "PEBScounters": "0,1,2,3",
"SampleAfterValue": "1000003",
- "Speculative": "1",
"UMask": "0x8",
"Unit": "cpu_core"
},
{
"BriefDescription": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3",
"CounterMask": "1",
"Errata": "ADL038",
"EventCode": "0x20",
"EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD",
- "PEBScounters": "0,1,2,3",
"SampleAfterValue": "1000003",
- "Speculative": "1",
"UMask": "0x8",
"Unit": "cpu_core"
},
{
"BriefDescription": "For every cycle where the core is waiting on at least 1 outstanding Demand RFO request, increments by 1.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3",
"CounterMask": "1",
"EventCode": "0x20",
"EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO",
- "PEBScounters": "0,1,2,3",
+ "PublicDescription": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO",
"SampleAfterValue": "1000003",
- "Speculative": "1",
"UMask": "0x4",
"Unit": "cpu_core"
},
{
"BriefDescription": "OFFCORE_REQUESTS_OUTSTANDING.DATA_RD",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3",
"Errata": "ADL038",
"EventCode": "0x20",
"EventName": "OFFCORE_REQUESTS_OUTSTANDING.DATA_RD",
- "PEBScounters": "0,1,2,3",
"SampleAfterValue": "1000003",
- "Speculative": "1",
"UMask": "0x8",
"Unit": "cpu_core"
},
{
"BriefDescription": "Number of PREFETCHNTA instructions executed.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3",
"EventCode": "0x40",
"EventName": "SW_PREFETCH_ACCESS.NTA",
- "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts the number of PREFETCHNTA instructions executed.",
"SampleAfterValue": "100003",
- "Speculative": "1",
"UMask": "0x1",
"Unit": "cpu_core"
},
{
"BriefDescription": "Number of PREFETCHW instructions executed.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3",
"EventCode": "0x40",
"EventName": "SW_PREFETCH_ACCESS.PREFETCHW",
- "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts the number of PREFETCHW instructions executed.",
"SampleAfterValue": "100003",
- "Speculative": "1",
"UMask": "0x8",
"Unit": "cpu_core"
},
{
"BriefDescription": "Number of PREFETCHT0 instructions executed.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3",
"EventCode": "0x40",
"EventName": "SW_PREFETCH_ACCESS.T0",
- "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts the number of PREFETCHT0 instructions executed.",
"SampleAfterValue": "100003",
- "Speculative": "1",
"UMask": "0x2",
"Unit": "cpu_core"
},
{
"BriefDescription": "Number of PREFETCHT1 or PREFETCHT2 instructions executed.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3",
"EventCode": "0x40",
"EventName": "SW_PREFETCH_ACCESS.T1_T2",
- "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts the number of PREFETCHT1 or PREFETCHT2 instructions executed.",
"SampleAfterValue": "100003",
- "Speculative": "1",
"UMask": "0x4",
"Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to instruction cache misses.",
+ "EventCode": "0x71",
+ "EventName": "TOPDOWN_FE_BOUND.ICACHE",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x20",
+ "Unit": "cpu_atom"
}
]
diff --git a/tools/perf/pmu-events/arch/x86/alderlake/floating-point.json b/tools/perf/pmu-events/arch/x86/alderlake/floating-point.json
index 48a4605fc057..3eb7cab9b431 100644
--- a/tools/perf/pmu-events/arch/x86/alderlake/floating-point.json
+++ b/tools/perf/pmu-events/arch/x86/alderlake/floating-point.json
@@ -1,165 +1,124 @@
[
{
- "BriefDescription": "Counts the number of floating point operations retired that required microcode assist.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5",
- "EventCode": "0xc3",
- "EventName": "MACHINE_CLEARS.FP_ASSIST",
- "PEBScounters": "0,1,2,3,4,5",
- "SampleAfterValue": "20003",
- "Speculative": "1",
- "UMask": "0x4",
- "Unit": "cpu_atom"
- },
- {
- "BriefDescription": "Counts the number of floating point divide uops retired (x87 and SSE, including x87 sqrt).",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5",
- "EventCode": "0xc2",
- "EventName": "UOPS_RETIRED.FPDIV",
- "PEBS": "1",
- "PEBScounters": "0,1,2,3,4,5",
- "SampleAfterValue": "2000003",
- "UMask": "0x8",
- "Unit": "cpu_atom"
- },
- {
"BriefDescription": "ARITH.FPDIV_ACTIVE",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
"CounterMask": "1",
"EventCode": "0xb0",
"EventName": "ARITH.FPDIV_ACTIVE",
- "PEBScounters": "0,1,2,3,4,5,6,7",
"SampleAfterValue": "1000003",
- "Speculative": "1",
"UMask": "0x1",
"Unit": "cpu_core"
},
{
"BriefDescription": "Counts all microcode FP assists.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc1",
"EventName": "ASSISTS.FP",
- "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts all microcode Floating Point assists.",
"SampleAfterValue": "100003",
- "Speculative": "1",
"UMask": "0x2",
"Unit": "cpu_core"
},
{
"BriefDescription": "ASSISTS.SSE_AVX_MIX",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc1",
"EventName": "ASSISTS.SSE_AVX_MIX",
- "PEBScounters": "0,1,2,3,4,5,6,7",
"SampleAfterValue": "1000003",
- "Speculative": "1",
"UMask": "0x10",
"Unit": "cpu_core"
},
{
"BriefDescription": "FP_ARITH_DISPATCHED.PORT_0",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb3",
"EventName": "FP_ARITH_DISPATCHED.PORT_0",
- "PEBScounters": "0,1,2,3,4,5,6,7",
"SampleAfterValue": "2000003",
- "Speculative": "1",
"UMask": "0x1",
"Unit": "cpu_core"
},
{
"BriefDescription": "FP_ARITH_DISPATCHED.PORT_1",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb3",
"EventName": "FP_ARITH_DISPATCHED.PORT_1",
- "PEBScounters": "0,1,2,3,4,5,6,7",
"SampleAfterValue": "2000003",
- "Speculative": "1",
"UMask": "0x2",
"Unit": "cpu_core"
},
{
"BriefDescription": "FP_ARITH_DISPATCHED.PORT_5",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb3",
"EventName": "FP_ARITH_DISPATCHED.PORT_5",
- "PEBScounters": "0,1,2,3,4,5,6,7",
"SampleAfterValue": "2000003",
- "Speculative": "1",
"UMask": "0x4",
"Unit": "cpu_core"
},
{
"BriefDescription": "Counts number of SSE/AVX computational 128-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 2 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc7",
"EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE",
- "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Number of SSE/AVX computational 128-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 2 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
"SampleAfterValue": "100003",
"UMask": "0x4",
"Unit": "cpu_core"
},
{
"BriefDescription": "Number of SSE/AVX computational 128-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 4 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc7",
"EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE",
- "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Number of SSE/AVX computational 128-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 4 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT RSQRT RCP DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
"SampleAfterValue": "100003",
"UMask": "0x8",
"Unit": "cpu_core"
},
{
"BriefDescription": "Counts number of SSE/AVX computational 256-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 4 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc7",
"EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE",
- "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Number of SSE/AVX computational 256-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 4 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
"SampleAfterValue": "100003",
"UMask": "0x10",
"Unit": "cpu_core"
},
{
"BriefDescription": "Counts number of SSE/AVX computational 256-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 8 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT RSQRT RCP DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc7",
"EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE",
- "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Number of SSE/AVX computational 256-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 8 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT RSQRT RCP DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
"SampleAfterValue": "100003",
"UMask": "0x20",
"Unit": "cpu_core"
},
{
"BriefDescription": "Counts number of SSE/AVX computational scalar double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computational operation. Applies to SSE* and AVX* scalar double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc7",
"EventName": "FP_ARITH_INST_RETIRED.SCALAR_DOUBLE",
- "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Number of SSE/AVX computational scalar double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computational operation. Applies to SSE* and AVX* scalar double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
"SampleAfterValue": "100003",
"UMask": "0x1",
"Unit": "cpu_core"
},
{
"BriefDescription": "Counts number of SSE/AVX computational scalar single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computational operation. Applies to SSE* and AVX* scalar single precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT RCP FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc7",
"EventName": "FP_ARITH_INST_RETIRED.SCALAR_SINGLE",
- "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Number of SSE/AVX computational scalar single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computational operation. Applies to SSE* and AVX* scalar single precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT RCP FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
"SampleAfterValue": "100003",
"UMask": "0x2",
"Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Counts the number of floating point operations retired that required microcode assist.",
+ "EventCode": "0xc3",
+ "EventName": "MACHINE_CLEARS.FP_ASSIST",
+ "PublicDescription": "Counts the number of floating point operations retired that required microcode assist, which is not a reflection of the number of FP operations, instructions or uops.",
+ "SampleAfterValue": "20003",
+ "UMask": "0x4",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of floating point divide uops retired (x87 and SSE, including x87 sqrt).",
+ "EventCode": "0xc2",
+ "EventName": "UOPS_RETIRED.FPDIV",
+ "PEBS": "1",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x8",
+ "Unit": "cpu_atom"
}
]
diff --git a/tools/perf/pmu-events/arch/x86/alderlake/frontend.json b/tools/perf/pmu-events/arch/x86/alderlake/frontend.json
index da1a7ba0e568..250cd128b674 100644
--- a/tools/perf/pmu-events/arch/x86/alderlake/frontend.json
+++ b/tools/perf/pmu-events/arch/x86/alderlake/frontend.json
@@ -1,536 +1,416 @@
[
{
"BriefDescription": "Counts the total number of BACLEARS due to all branch types including conditional and unconditional jumps, returns, and indirect branches.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5",
"EventCode": "0xe6",
"EventName": "BACLEARS.ANY",
- "PEBScounters": "0,1,2,3,4,5",
+ "PublicDescription": "Counts the total number of BACLEARS, which occur when the Branch Target Buffer (BTB) prediction or lack thereof, was corrected by a later branch predictor in the frontend. Includes BACLEARS due to all branch types including conditional and unconditional jumps, returns, and indirect branches.",
"SampleAfterValue": "100003",
- "Speculative": "1",
"UMask": "0x1",
"Unit": "cpu_atom"
},
{
- "BriefDescription": "Counts the number of requests to the instruction cache for one or more bytes of a cache line.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5",
- "EventCode": "0x80",
- "EventName": "ICACHE.ACCESSES",
- "PEBScounters": "0,1,2,3,4,5",
- "SampleAfterValue": "200003",
- "Speculative": "1",
- "UMask": "0x3",
- "Unit": "cpu_atom"
- },
- {
- "BriefDescription": "Counts the number of instruction cache misses.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5",
- "EventCode": "0x80",
- "EventName": "ICACHE.MISSES",
- "PEBScounters": "0,1,2,3,4,5",
- "SampleAfterValue": "200003",
- "Speculative": "1",
- "UMask": "0x2",
- "Unit": "cpu_atom"
- },
- {
"BriefDescription": "Stalls caused by changing prefix length of the instruction.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3",
"EventCode": "0x87",
"EventName": "DECODE.LCP",
- "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts cycles that the Instruction Length decoder (ILD) stalls occurred due to dynamically changing prefix length of the decoded instruction (by operand size prefix instruction 0x66, address size prefix instruction 0x67 or REX.W for Intel64). Count is proportional to the number of prefixes in a 16B-line. This may result in a three-cycle penalty for each LCP (Length changing prefix) in a 16-byte chunk.",
"SampleAfterValue": "500009",
- "Speculative": "1",
"UMask": "0x1",
"Unit": "cpu_core"
},
{
"BriefDescription": "Cycles the Microcode Sequencer is busy.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3",
"EventCode": "0x87",
"EventName": "DECODE.MS_BUSY",
- "PEBScounters": "0,1,2,3",
"SampleAfterValue": "500009",
- "Speculative": "1",
"UMask": "0x2",
"Unit": "cpu_core"
},
{
"BriefDescription": "DSB-to-MITE switch true penalty cycles.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3",
"EventCode": "0x61",
"EventName": "DSB2MITE_SWITCHES.PENALTY_CYCLES",
- "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Decode Stream Buffer (DSB) is a Uop-cache that holds translations of previously fetched instructions that were decoded by the legacy x86 decode pipeline (MITE). This event counts fetch penalty cycles when a transition occurs from DSB to MITE.",
"SampleAfterValue": "100003",
- "Speculative": "1",
"UMask": "0x2",
"Unit": "cpu_core"
},
{
"BriefDescription": "Retired Instructions who experienced DSB miss.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc6",
"EventName": "FRONTEND_RETIRED.ANY_DSB_MISS",
"MSRIndex": "0x3F7",
"MSRValue": "0x1",
"PEBS": "1",
- "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts retired Instructions that experienced DSB (Decode stream buffer i.e. the decoded instruction-cache) miss.",
"SampleAfterValue": "100007",
- "TakenAlone": "1",
"UMask": "0x1",
"Unit": "cpu_core"
},
{
"BriefDescription": "Retired Instructions who experienced a critical DSB miss.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc6",
"EventName": "FRONTEND_RETIRED.DSB_MISS",
"MSRIndex": "0x3F7",
"MSRValue": "0x11",
"PEBS": "1",
- "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Number of retired Instructions that experienced a critical DSB (Decode stream buffer i.e. the decoded instruction-cache) miss. Critical means stalls were exposed to the back-end as a result of the DSB miss.",
"SampleAfterValue": "100007",
- "TakenAlone": "1",
"UMask": "0x1",
"Unit": "cpu_core"
},
{
"BriefDescription": "Retired Instructions who experienced iTLB true miss.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc6",
"EventName": "FRONTEND_RETIRED.ITLB_MISS",
"MSRIndex": "0x3F7",
"MSRValue": "0x14",
"PEBS": "1",
- "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts retired Instructions that experienced iTLB (Instruction TLB) true miss.",
"SampleAfterValue": "100007",
- "TakenAlone": "1",
"UMask": "0x1",
"Unit": "cpu_core"
},
{
"BriefDescription": "Retired Instructions who experienced Instruction L1 Cache true miss.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc6",
"EventName": "FRONTEND_RETIRED.L1I_MISS",
"MSRIndex": "0x3F7",
"MSRValue": "0x12",
"PEBS": "1",
- "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts retired Instructions who experienced Instruction L1 Cache true miss.",
"SampleAfterValue": "100007",
- "TakenAlone": "1",
"UMask": "0x1",
"Unit": "cpu_core"
},
{
"BriefDescription": "Retired Instructions who experienced Instruction L2 Cache true miss.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc6",
"EventName": "FRONTEND_RETIRED.L2_MISS",
"MSRIndex": "0x3F7",
"MSRValue": "0x13",
"PEBS": "1",
- "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts retired Instructions who experienced Instruction L2 Cache true miss.",
"SampleAfterValue": "100007",
- "TakenAlone": "1",
"UMask": "0x1",
"Unit": "cpu_core"
},
{
"BriefDescription": "Retired instructions after front-end starvation of at least 1 cycle",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc6",
"EventName": "FRONTEND_RETIRED.LATENCY_GE_1",
"MSRIndex": "0x3F7",
"MSRValue": "0x600106",
"PEBS": "1",
- "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of at least 1 cycle which was not interrupted by a back-end stall.",
"SampleAfterValue": "100007",
- "TakenAlone": "1",
"UMask": "0x1",
"Unit": "cpu_core"
},
{
"BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 128 cycles which was not interrupted by a back-end stall.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc6",
"EventName": "FRONTEND_RETIRED.LATENCY_GE_128",
"MSRIndex": "0x3F7",
"MSRValue": "0x608006",
"PEBS": "1",
- "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 128 cycles which was not interrupted by a back-end stall.",
"SampleAfterValue": "100007",
- "TakenAlone": "1",
"UMask": "0x1",
"Unit": "cpu_core"
},
{
"BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 16 cycles which was not interrupted by a back-end stall.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc6",
"EventName": "FRONTEND_RETIRED.LATENCY_GE_16",
"MSRIndex": "0x3F7",
"MSRValue": "0x601006",
"PEBS": "1",
- "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 16 cycles. During this period the front-end delivered no uops.",
"SampleAfterValue": "100007",
- "TakenAlone": "1",
"UMask": "0x1",
"Unit": "cpu_core"
},
{
"BriefDescription": "Retired instructions after front-end starvation of at least 2 cycles",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc6",
"EventName": "FRONTEND_RETIRED.LATENCY_GE_2",
"MSRIndex": "0x3F7",
"MSRValue": "0x600206",
"PEBS": "1",
- "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of at least 2 cycles which was not interrupted by a back-end stall.",
"SampleAfterValue": "100007",
- "TakenAlone": "1",
"UMask": "0x1",
"Unit": "cpu_core"
},
{
"BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 256 cycles which was not interrupted by a back-end stall.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc6",
"EventName": "FRONTEND_RETIRED.LATENCY_GE_256",
"MSRIndex": "0x3F7",
"MSRValue": "0x610006",
"PEBS": "1",
- "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 256 cycles which was not interrupted by a back-end stall.",
"SampleAfterValue": "100007",
- "TakenAlone": "1",
"UMask": "0x1",
"Unit": "cpu_core"
},
{
"BriefDescription": "Retired instructions that are fetched after an interval where the front-end had at least 1 bubble-slot for a period of 2 cycles which was not interrupted by a back-end stall.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc6",
"EventName": "FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_1",
"MSRIndex": "0x3F7",
"MSRValue": "0x100206",
"PEBS": "1",
- "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts retired instructions that are delivered to the back-end after the front-end had at least 1 bubble-slot for a period of 2 cycles. A bubble-slot is an empty issue-pipeline slot while there was no RAT stall.",
"SampleAfterValue": "100007",
- "TakenAlone": "1",
"UMask": "0x1",
"Unit": "cpu_core"
},
{
"BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 32 cycles which was not interrupted by a back-end stall.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc6",
"EventName": "FRONTEND_RETIRED.LATENCY_GE_32",
"MSRIndex": "0x3F7",
"MSRValue": "0x602006",
"PEBS": "1",
- "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 32 cycles. During this period the front-end delivered no uops.",
"SampleAfterValue": "100007",
- "TakenAlone": "1",
"UMask": "0x1",
"Unit": "cpu_core"
},
{
"BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 4 cycles which was not interrupted by a back-end stall.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc6",
"EventName": "FRONTEND_RETIRED.LATENCY_GE_4",
"MSRIndex": "0x3F7",
"MSRValue": "0x600406",
"PEBS": "1",
- "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 4 cycles which was not interrupted by a back-end stall.",
"SampleAfterValue": "100007",
- "TakenAlone": "1",
"UMask": "0x1",
"Unit": "cpu_core"
},
{
"BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 512 cycles which was not interrupted by a back-end stall.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc6",
"EventName": "FRONTEND_RETIRED.LATENCY_GE_512",
"MSRIndex": "0x3F7",
"MSRValue": "0x620006",
"PEBS": "1",
- "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 512 cycles which was not interrupted by a back-end stall.",
"SampleAfterValue": "100007",
- "TakenAlone": "1",
"UMask": "0x1",
"Unit": "cpu_core"
},
{
"BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 64 cycles which was not interrupted by a back-end stall.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc6",
"EventName": "FRONTEND_RETIRED.LATENCY_GE_64",
"MSRIndex": "0x3F7",
"MSRValue": "0x604006",
"PEBS": "1",
- "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 64 cycles which was not interrupted by a back-end stall.",
"SampleAfterValue": "100007",
- "TakenAlone": "1",
"UMask": "0x1",
"Unit": "cpu_core"
},
{
"BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 8 cycles which was not interrupted by a back-end stall.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc6",
"EventName": "FRONTEND_RETIRED.LATENCY_GE_8",
"MSRIndex": "0x3F7",
"MSRValue": "0x600806",
"PEBS": "1",
- "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 8 cycles. During this period the front-end delivered no uops.",
"SampleAfterValue": "100007",
- "TakenAlone": "1",
"UMask": "0x1",
"Unit": "cpu_core"
},
{
"BriefDescription": "FRONTEND_RETIRED.MS_FLOWS",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc6",
"EventName": "FRONTEND_RETIRED.MS_FLOWS",
"MSRIndex": "0x3F7",
"MSRValue": "0x8",
"PEBS": "1",
- "PEBScounters": "0,1,2,3,4,5,6,7",
"SampleAfterValue": "100007",
- "TakenAlone": "1",
"UMask": "0x1",
"Unit": "cpu_core"
},
{
"BriefDescription": "Retired Instructions who experienced STLB (2nd level TLB) true miss.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc6",
"EventName": "FRONTEND_RETIRED.STLB_MISS",
"MSRIndex": "0x3F7",
"MSRValue": "0x15",
"PEBS": "1",
- "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts retired Instructions that experienced STLB (2nd level TLB) true miss.",
"SampleAfterValue": "100007",
- "TakenAlone": "1",
"UMask": "0x1",
"Unit": "cpu_core"
},
{
"BriefDescription": "FRONTEND_RETIRED.UNKNOWN_BRANCH",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc6",
"EventName": "FRONTEND_RETIRED.UNKNOWN_BRANCH",
"MSRIndex": "0x3F7",
"MSRValue": "0x17",
"PEBS": "1",
- "PEBScounters": "0,1,2,3,4,5,6,7",
"SampleAfterValue": "100007",
- "TakenAlone": "1",
"UMask": "0x1",
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Counts the number of requests to the instruction cache for one or more bytes of a cache line.",
+ "EventCode": "0x80",
+ "EventName": "ICACHE.ACCESSES",
+ "PublicDescription": "Counts the total number of requests to the instruction cache. The event only counts new cache line accesses, so that multiple back to back fetches to the exact same cache line or byte chunk count as one. Specifically, the event counts when accesses from sequential code crosses the cache line boundary, or when a branch target is moved to a new line or to a non-sequential byte chunk of the same line.",
+ "SampleAfterValue": "200003",
+ "UMask": "0x3",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of instruction cache misses.",
+ "EventCode": "0x80",
+ "EventName": "ICACHE.MISSES",
+ "PublicDescription": "Counts the number of missed requests to the instruction cache. The event only counts new cache line accesses, so that multiple back to back fetches to the exact same cache line and byte chunk count as one. Specifically, the event counts when accesses from sequential code crosses the cache line boundary, or when a branch target is moved to a new line or to a non-sequential byte chunk of the same line.",
+ "SampleAfterValue": "200003",
+ "UMask": "0x2",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache miss.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3",
"EventCode": "0x80",
"EventName": "ICACHE_DATA.STALLS",
- "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts cycles where a code line fetch is stalled due to an L1 instruction cache miss. The decode pipeline works at a 32 Byte granularity.",
"SampleAfterValue": "500009",
- "Speculative": "1",
"UMask": "0x4",
"Unit": "cpu_core"
},
{
"BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache tag miss.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3",
"EventCode": "0x83",
"EventName": "ICACHE_TAG.STALLS",
- "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts cycles where a code fetch is stalled due to L1 instruction cache tag miss.",
"SampleAfterValue": "200003",
- "Speculative": "1",
"UMask": "0x4",
"Unit": "cpu_core"
},
{
"BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering any Uop",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3",
"CounterMask": "1",
"EventCode": "0x79",
"EventName": "IDQ.DSB_CYCLES_ANY",
- "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts the number of cycles uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path.",
"SampleAfterValue": "2000003",
- "Speculative": "1",
"UMask": "0x8",
"Unit": "cpu_core"
},
{
"BriefDescription": "Cycles DSB is delivering optimal number of Uops",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3",
"CounterMask": "6",
"EventCode": "0x79",
"EventName": "IDQ.DSB_CYCLES_OK",
- "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts the number of cycles where optimal number of uops was delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB).",
"SampleAfterValue": "2000003",
- "Speculative": "1",
"UMask": "0x8",
"Unit": "cpu_core"
},
{
"BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3",
"EventCode": "0x79",
"EventName": "IDQ.DSB_UOPS",
- "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts the number of uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path.",
"SampleAfterValue": "2000003",
- "Speculative": "1",
"UMask": "0x8",
"Unit": "cpu_core"
},
{
"BriefDescription": "Cycles MITE is delivering any Uop",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3",
"CounterMask": "1",
"EventCode": "0x79",
"EventName": "IDQ.MITE_CYCLES_ANY",
- "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts the number of cycles uops were delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB).",
"SampleAfterValue": "2000003",
- "Speculative": "1",
"UMask": "0x4",
"Unit": "cpu_core"
},
{
"BriefDescription": "Cycles MITE is delivering optimal number of Uops",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3",
"CounterMask": "6",
"EventCode": "0x79",
"EventName": "IDQ.MITE_CYCLES_OK",
- "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts the number of cycles where optimal number of uops was delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB).",
"SampleAfterValue": "2000003",
- "Speculative": "1",
"UMask": "0x4",
"Unit": "cpu_core"
},
{
"BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from MITE path",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3",
"EventCode": "0x79",
"EventName": "IDQ.MITE_UOPS",
- "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts the number of uops delivered to Instruction Decode Queue (IDQ) from the MITE path. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).",
"SampleAfterValue": "2000003",
- "Speculative": "1",
"UMask": "0x4",
"Unit": "cpu_core"
},
{
"BriefDescription": "Cycles when uops are being delivered to IDQ while MS is busy",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3",
"CounterMask": "1",
"EventCode": "0x79",
"EventName": "IDQ.MS_CYCLES_ANY",
- "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Uops maybe initiated by Decode Stream Buffer (DSB) or MITE.",
"SampleAfterValue": "2000003",
- "Speculative": "1",
"UMask": "0x20",
"Unit": "cpu_core"
},
{
"BriefDescription": "Number of switches from DSB or MITE to the MS",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3",
"CounterMask": "1",
"EdgeDetect": "1",
"EventCode": "0x79",
"EventName": "IDQ.MS_SWITCHES",
- "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer.",
"SampleAfterValue": "100003",
- "Speculative": "1",
"UMask": "0x20",
"Unit": "cpu_core"
},
{
"BriefDescription": "Uops delivered to IDQ while MS is busy",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3",
"EventCode": "0x79",
"EventName": "IDQ.MS_UOPS",
- "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts the total number of uops delivered by the Microcode Sequencer (MS).",
"SampleAfterValue": "1000003",
- "Speculative": "1",
"UMask": "0x20",
"Unit": "cpu_core"
},
{
"BriefDescription": "Uops not delivered by IDQ when backend of the machine is not stalled",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x9c",
"EventName": "IDQ_UOPS_NOT_DELIVERED.CORE",
- "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts the number of uops not delivered to by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle.",
"SampleAfterValue": "1000003",
- "Speculative": "1",
"UMask": "0x1",
"Unit": "cpu_core"
},
{
"BriefDescription": "Cycles when no uops are not delivered by the IDQ when backend of the machine is not stalled",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
"CounterMask": "6",
"EventCode": "0x9c",
"EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE",
- "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts the number of cycles when no uops were delivered by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle.",
"SampleAfterValue": "1000003",
- "Speculative": "1",
"UMask": "0x1",
"Unit": "cpu_core"
},
{
"BriefDescription": "Cycles when optimal number of uops was delivered to the back-end when the back-end is not stalled",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
"CounterMask": "1",
"EventCode": "0x9c",
"EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_FE_WAS_OK",
"Invert": "1",
- "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts the number of cycles when the optimal number of uops were delivered by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle.",
"SampleAfterValue": "1000003",
- "Speculative": "1",
"UMask": "0x1",
"Unit": "cpu_core"
}
diff --git a/tools/perf/pmu-events/arch/x86/alderlake/memory.json b/tools/perf/pmu-events/arch/x86/alderlake/memory.json
index f894e4a0212b..7595eb4ab46f 100644
--- a/tools/perf/pmu-events/arch/x86/alderlake/memory.json
+++ b/tools/perf/pmu-events/arch/x86/alderlake/memory.json
@@ -1,339 +1,234 @@
[
{
+ "BriefDescription": "Execution stalls while L3 cache miss demand load is outstanding.",
+ "CounterMask": "6",
+ "EventCode": "0xa3",
+ "EventName": "CYCLE_ACTIVITY.STALLS_L3_MISS",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x6",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "Counts the number of cycles that the head (oldest load) of the load buffer is stalled due to any number of reasons, including an L1 miss, WCB full, pagewalk, store address block or store data block, on a load that retires.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5",
"EventCode": "0x05",
"EventName": "LD_HEAD.ANY_AT_RET",
- "PEBScounters": "0,1,2,3,4,5",
"SampleAfterValue": "1000003",
- "Speculative": "1",
"UMask": "0xff",
"Unit": "cpu_atom"
},
{
"BriefDescription": "Counts the number of cycles that the head (oldest load) of the load buffer is stalled due to a core bound stall including a store address match, a DTLB miss or a page walk that detains the load from retiring.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5",
"EventCode": "0x05",
"EventName": "LD_HEAD.L1_BOUND_AT_RET",
- "PEBScounters": "0,1,2,3,4,5",
"SampleAfterValue": "1000003",
- "Speculative": "1",
"UMask": "0xf4",
"Unit": "cpu_atom"
},
{
"BriefDescription": "Counts the number of cycles that the head (oldest load) of the load buffer and retirement are both stalled due to other block cases.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5",
"EventCode": "0x05",
"EventName": "LD_HEAD.OTHER_AT_RET",
- "PEBScounters": "0,1,2,3,4,5",
+ "PublicDescription": "Counts the number of cycles that the head (oldest load) of the load buffer and retirement are both stalled due to other block cases such as pipeline conflicts, fences, etc.",
"SampleAfterValue": "1000003",
- "Speculative": "1",
"UMask": "0xc0",
"Unit": "cpu_atom"
},
{
"BriefDescription": "Counts the number of cycles that the head (oldest load) of the load buffer and retirement are both stalled due to a pagewalk.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5",
"EventCode": "0x05",
"EventName": "LD_HEAD.PGWALK_AT_RET",
- "PEBScounters": "0,1,2,3,4,5",
"SampleAfterValue": "1000003",
- "Speculative": "1",
"UMask": "0xa0",
"Unit": "cpu_atom"
},
{
"BriefDescription": "Counts the number of cycles that the head (oldest load) of the load buffer and retirement are both stalled due to a store address match.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5",
"EventCode": "0x05",
"EventName": "LD_HEAD.ST_ADDR_AT_RET",
- "PEBScounters": "0,1,2,3,4,5",
"SampleAfterValue": "1000003",
- "Speculative": "1",
"UMask": "0x84",
"Unit": "cpu_atom"
},
{
"BriefDescription": "Counts the number of machine clears due to memory ordering caused by a snoop from an external agent. Does not count internally generated machine clears such as those due to memory disambiguation.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5",
"EventCode": "0xc3",
"EventName": "MACHINE_CLEARS.MEMORY_ORDERING",
- "PEBScounters": "0,1,2,3,4,5",
"SampleAfterValue": "20003",
- "Speculative": "1",
"UMask": "0x2",
"Unit": "cpu_atom"
},
{
- "BriefDescription": "Counts demand data reads that were not supplied by the L3 cache.",
- "Counter": "0,1,2,3,4,5",
- "EventCode": "0xB7",
- "EventName": "OCR.DEMAND_DATA_RD.L3_MISS",
- "MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x3F84400001",
- "SampleAfterValue": "100003",
- "UMask": "0x1",
- "Unit": "cpu_atom"
- },
- {
- "BriefDescription": "Counts demand data reads that were not supplied by the L3 cache.",
- "Counter": "0,1,2,3,4,5",
- "EventCode": "0xB7",
- "EventName": "OCR.DEMAND_DATA_RD.L3_MISS_LOCAL",
- "MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x3F84400001",
- "SampleAfterValue": "100003",
- "UMask": "0x1",
- "Unit": "cpu_atom"
- },
- {
- "BriefDescription": "Counts demand reads for ownership (RFO) and software prefetches for exclusive ownership (PREFETCHW) that were not supplied by the L3 cache.",
- "Counter": "0,1,2,3,4,5",
- "EventCode": "0xB7",
- "EventName": "OCR.DEMAND_RFO.L3_MISS",
- "MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x3F84400002",
- "SampleAfterValue": "100003",
- "UMask": "0x1",
- "Unit": "cpu_atom"
- },
- {
- "BriefDescription": "Counts demand reads for ownership (RFO) and software prefetches for exclusive ownership (PREFETCHW) that were not supplied by the L3 cache.",
- "Counter": "0,1,2,3,4,5",
- "EventCode": "0xB7",
- "EventName": "OCR.DEMAND_RFO.L3_MISS_LOCAL",
- "MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x3F84400002",
- "SampleAfterValue": "100003",
- "UMask": "0x1",
- "Unit": "cpu_atom"
- },
- {
- "BriefDescription": "Execution stalls while L3 cache miss demand load is outstanding.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3",
- "CounterMask": "6",
- "EventCode": "0xa3",
- "EventName": "CYCLE_ACTIVITY.STALLS_L3_MISS",
- "PEBScounters": "0,1,2,3",
- "SampleAfterValue": "1000003",
- "Speculative": "1",
- "UMask": "0x6",
- "Unit": "cpu_core"
- },
- {
"BriefDescription": "Number of machine clears due to memory ordering conflicts.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc3",
"EventName": "MACHINE_CLEARS.MEMORY_ORDERING",
- "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts the number of Machine Clears detected dye to memory ordering. Memory Ordering Machine Clears may apply when a memory read may not conform to the memory ordering rules of the x86 architecture",
"SampleAfterValue": "100003",
- "Speculative": "1",
"UMask": "0x2",
"Unit": "cpu_core"
},
{
"BriefDescription": "Cycles while L1 cache miss demand load is outstanding.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3",
"CounterMask": "2",
"EventCode": "0x47",
"EventName": "MEMORY_ACTIVITY.CYCLES_L1D_MISS",
- "PEBScounters": "0,1,2,3",
"SampleAfterValue": "1000003",
- "Speculative": "1",
"UMask": "0x2",
"Unit": "cpu_core"
},
{
"BriefDescription": "Execution stalls while L1 cache miss demand load is outstanding.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3",
"CounterMask": "3",
"EventCode": "0x47",
"EventName": "MEMORY_ACTIVITY.STALLS_L1D_MISS",
- "PEBScounters": "0,1,2,3",
"SampleAfterValue": "1000003",
- "Speculative": "1",
"UMask": "0x3",
"Unit": "cpu_core"
},
{
"BriefDescription": "MEMORY_ACTIVITY.STALLS_L2_MISS",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3",
"CounterMask": "5",
"EventCode": "0x47",
"EventName": "MEMORY_ACTIVITY.STALLS_L2_MISS",
- "PEBScounters": "0,1,2,3",
"SampleAfterValue": "1000003",
- "Speculative": "1",
"UMask": "0x5",
"Unit": "cpu_core"
},
{
"BriefDescription": "MEMORY_ACTIVITY.STALLS_L3_MISS",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3",
"CounterMask": "9",
"EventCode": "0x47",
"EventName": "MEMORY_ACTIVITY.STALLS_L3_MISS",
- "PEBScounters": "0,1,2,3",
"SampleAfterValue": "1000003",
- "Speculative": "1",
"UMask": "0x9",
"Unit": "cpu_core"
},
{
"BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 128 cycles.",
- "CollectPEBSRecord": "2",
- "Counter": "1,2,3,4,5,6,7",
"Data_LA": "1",
"EventCode": "0xcd",
"EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_128",
"MSRIndex": "0x3F6",
"MSRValue": "0x80",
"PEBS": "2",
- "PEBScounters": "1,2,3,4,5,6,7",
+ "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 128 cycles. Reported latency may be longer than just the memory latency.",
"SampleAfterValue": "1009",
- "TakenAlone": "1",
"UMask": "0x1",
"Unit": "cpu_core"
},
{
"BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 16 cycles.",
- "CollectPEBSRecord": "2",
- "Counter": "1,2,3,4,5,6,7",
"Data_LA": "1",
"EventCode": "0xcd",
"EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_16",
"MSRIndex": "0x3F6",
"MSRValue": "0x10",
"PEBS": "2",
- "PEBScounters": "1,2,3,4,5,6,7",
+ "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 16 cycles. Reported latency may be longer than just the memory latency.",
"SampleAfterValue": "20011",
- "TakenAlone": "1",
"UMask": "0x1",
"Unit": "cpu_core"
},
{
"BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 256 cycles.",
- "CollectPEBSRecord": "2",
- "Counter": "1,2,3,4,5,6,7",
"Data_LA": "1",
"EventCode": "0xcd",
"EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_256",
"MSRIndex": "0x3F6",
"MSRValue": "0x100",
"PEBS": "2",
- "PEBScounters": "1,2,3,4,5,6,7",
+ "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 256 cycles. Reported latency may be longer than just the memory latency.",
"SampleAfterValue": "503",
- "TakenAlone": "1",
"UMask": "0x1",
"Unit": "cpu_core"
},
{
"BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 32 cycles.",
- "CollectPEBSRecord": "2",
- "Counter": "1,2,3,4,5,6,7",
"Data_LA": "1",
"EventCode": "0xcd",
"EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_32",
"MSRIndex": "0x3F6",
"MSRValue": "0x20",
"PEBS": "2",
- "PEBScounters": "1,2,3,4,5,6,7",
+ "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 32 cycles. Reported latency may be longer than just the memory latency.",
"SampleAfterValue": "100007",
- "TakenAlone": "1",
"UMask": "0x1",
"Unit": "cpu_core"
},
{
"BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 4 cycles.",
- "CollectPEBSRecord": "2",
- "Counter": "1,2,3,4,5,6,7",
"Data_LA": "1",
"EventCode": "0xcd",
"EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_4",
"MSRIndex": "0x3F6",
"MSRValue": "0x4",
"PEBS": "2",
- "PEBScounters": "1,2,3,4,5,6,7",
+ "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 4 cycles. Reported latency may be longer than just the memory latency.",
"SampleAfterValue": "100003",
- "TakenAlone": "1",
"UMask": "0x1",
"Unit": "cpu_core"
},
{
"BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 512 cycles.",
- "CollectPEBSRecord": "2",
- "Counter": "1,2,3,4,5,6,7",
"Data_LA": "1",
"EventCode": "0xcd",
"EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_512",
"MSRIndex": "0x3F6",
"MSRValue": "0x200",
"PEBS": "2",
- "PEBScounters": "1,2,3,4,5,6,7",
+ "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 512 cycles. Reported latency may be longer than just the memory latency.",
"SampleAfterValue": "101",
- "TakenAlone": "1",
"UMask": "0x1",
"Unit": "cpu_core"
},
{
"BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 64 cycles.",
- "CollectPEBSRecord": "2",
- "Counter": "1,2,3,4,5,6,7",
"Data_LA": "1",
"EventCode": "0xcd",
"EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_64",
"MSRIndex": "0x3F6",
"MSRValue": "0x40",
"PEBS": "2",
- "PEBScounters": "1,2,3,4,5,6,7",
+ "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 64 cycles. Reported latency may be longer than just the memory latency.",
"SampleAfterValue": "2003",
- "TakenAlone": "1",
"UMask": "0x1",
"Unit": "cpu_core"
},
{
"BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 8 cycles.",
- "CollectPEBSRecord": "2",
- "Counter": "1,2,3,4,5,6,7",
"Data_LA": "1",
"EventCode": "0xcd",
"EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_8",
"MSRIndex": "0x3F6",
"MSRValue": "0x8",
"PEBS": "2",
- "PEBScounters": "1,2,3,4,5,6,7",
+ "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 8 cycles. Reported latency may be longer than just the memory latency.",
"SampleAfterValue": "50021",
- "TakenAlone": "1",
"UMask": "0x1",
"Unit": "cpu_core"
},
{
"BriefDescription": "Retired memory store access operations. A PDist event for PEBS Store Latency Facility.",
- "CollectPEBSRecord": "2",
"Data_LA": "1",
"EventCode": "0xcd",
"EventName": "MEM_TRANS_RETIRED.STORE_SAMPLE",
"PEBS": "2",
+ "PublicDescription": "Counts Retired memory accesses with at least 1 store operation. This PEBS event is the precisely-distributed (PDist) trigger covering all stores uops for sampling by the PEBS Store Latency Facility. The facility is described in Intel SDM Volume 3 section 19.9.8",
"SampleAfterValue": "1000003",
"UMask": "0x2",
"Unit": "cpu_core"
},
{
"BriefDescription": "Counts demand data reads that were not supplied by the L3 cache.",
- "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xB7",
+ "EventName": "OCR.DEMAND_DATA_RD.L3_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x3F84400001",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts demand data reads that were not supplied by the L3 cache.",
"EventCode": "0x2A,0x2B",
"EventName": "OCR.DEMAND_DATA_RD.L3_MISS",
"MSRIndex": "0x1a6,0x1a7",
@@ -343,8 +238,27 @@
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Counts demand data reads that were not supplied by the L3 cache.",
+ "EventCode": "0xB7",
+ "EventName": "OCR.DEMAND_DATA_RD.L3_MISS_LOCAL",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x3F84400001",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts demand reads for ownership (RFO) and software prefetches for exclusive ownership (PREFETCHW) that were not supplied by the L3 cache.",
+ "EventCode": "0xB7",
+ "EventName": "OCR.DEMAND_RFO.L3_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x3F84400002",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Counts demand read for ownership (RFO) requests and software prefetches for exclusive ownership (PREFETCHW) that were not supplied by the L3 cache.",
- "Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x2A,0x2B",
"EventName": "OCR.DEMAND_RFO.L3_MISS",
"MSRIndex": "0x1a6,0x1a7",
@@ -352,5 +266,33 @@
"SampleAfterValue": "100003",
"UMask": "0x1",
"Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Counts demand reads for ownership (RFO) and software prefetches for exclusive ownership (PREFETCHW) that were not supplied by the L3 cache.",
+ "EventCode": "0xB7",
+ "EventName": "OCR.DEMAND_RFO.L3_MISS_LOCAL",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x3F84400002",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Demand Data Read requests who miss L3 cache",
+ "EventCode": "0x21",
+ "EventName": "OFFCORE_REQUESTS.L3_MISS_DEMAND_DATA_RD",
+ "PublicDescription": "Demand Data Read requests who miss L3 cache.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x10",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "For every cycle, increments by the number of demand data read requests pending that are known to have missed the L3 cache.",
+ "EventCode": "0x20",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD",
+ "PublicDescription": "For every cycle, increments by the number of demand data read requests pending that are known to have missed the L3 cache. Note that this does not capture all elapsed cycles while requests are outstanding - only cycles from when the requests were known by the requesting core to have missed the L3 cache.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x10",
+ "Unit": "cpu_core"
}
]
diff --git a/tools/perf/pmu-events/arch/x86/alderlake/other.json b/tools/perf/pmu-events/arch/x86/alderlake/other.json
index c49d8ce27310..329c611d7cf7 100644
--- a/tools/perf/pmu-events/arch/x86/alderlake/other.json
+++ b/tools/perf/pmu-events/arch/x86/alderlake/other.json
@@ -1,111 +1,66 @@
[
{
- "BriefDescription": "Counts modified writebacks from L1 cache and L2 cache that have any type of response.",
- "Counter": "0,1,2,3,4,5",
- "EventCode": "0xB7",
- "EventName": "OCR.COREWB_M.ANY_RESPONSE",
- "MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x10008",
- "SampleAfterValue": "100003",
- "UMask": "0x1",
- "Unit": "cpu_atom"
- },
- {
- "BriefDescription": "Counts demand data reads that have any type of response.",
- "Counter": "0,1,2,3,4,5",
- "EventCode": "0xB7",
- "EventName": "OCR.DEMAND_DATA_RD.ANY_RESPONSE",
- "MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x10001",
- "SampleAfterValue": "100003",
- "UMask": "0x1",
- "Unit": "cpu_atom"
- },
- {
- "BriefDescription": "Counts demand reads for ownership (RFO) and software prefetches for exclusive ownership (PREFETCHW) that have any type of response.",
- "Counter": "0,1,2,3,4,5",
- "EventCode": "0xB7",
- "EventName": "OCR.DEMAND_RFO.ANY_RESPONSE",
- "MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x10002",
- "SampleAfterValue": "100003",
- "UMask": "0x1",
- "Unit": "cpu_atom"
- },
- {
- "BriefDescription": "Counts streaming stores that have any type of response.",
- "Counter": "0,1,2,3,4,5",
- "EventCode": "0xB7",
- "EventName": "OCR.STREAMING_WR.ANY_RESPONSE",
- "MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x10800",
- "SampleAfterValue": "100003",
- "UMask": "0x1",
- "Unit": "cpu_atom"
- },
- {
"BriefDescription": "ASSISTS.HARDWARE",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc1",
"EventName": "ASSISTS.HARDWARE",
- "PEBScounters": "0,1,2,3,4,5,6,7",
"SampleAfterValue": "100003",
- "Speculative": "1",
"UMask": "0x4",
"Unit": "cpu_core"
},
{
"BriefDescription": "ASSISTS.PAGE_FAULT",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc1",
"EventName": "ASSISTS.PAGE_FAULT",
- "PEBScounters": "0,1,2,3,4,5,6,7",
"SampleAfterValue": "1000003",
- "Speculative": "1",
"UMask": "0x8",
"Unit": "cpu_core"
},
{
"BriefDescription": "CORE_POWER.LICENSE_1",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3",
"EventCode": "0x28",
"EventName": "CORE_POWER.LICENSE_1",
- "PEBScounters": "0,1,2,3",
"SampleAfterValue": "200003",
- "Speculative": "1",
"UMask": "0x2",
"Unit": "cpu_core"
},
{
"BriefDescription": "CORE_POWER.LICENSE_2",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3",
"EventCode": "0x28",
"EventName": "CORE_POWER.LICENSE_2",
- "PEBScounters": "0,1,2,3",
"SampleAfterValue": "200003",
- "Speculative": "1",
"UMask": "0x4",
"Unit": "cpu_core"
},
{
"BriefDescription": "CORE_POWER.LICENSE_3",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3",
"EventCode": "0x28",
"EventName": "CORE_POWER.LICENSE_3",
- "PEBScounters": "0,1,2,3",
"SampleAfterValue": "200003",
- "Speculative": "1",
"UMask": "0x8",
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Counts modified writebacks from L1 cache and L2 cache that have any type of response.",
+ "EventCode": "0xB7",
+ "EventName": "OCR.COREWB_M.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x10008",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts demand data reads that have any type of response.",
+ "EventCode": "0xB7",
+ "EventName": "OCR.DEMAND_DATA_RD.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x10001",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Counts demand data reads that have any type of response.",
- "Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x2A,0x2B",
"EventName": "OCR.DEMAND_DATA_RD.ANY_RESPONSE",
"MSRIndex": "0x1a6,0x1a7",
@@ -116,7 +71,6 @@
},
{
"BriefDescription": "Counts demand data reads that were supplied by DRAM.",
- "Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x2A,0x2B",
"EventName": "OCR.DEMAND_DATA_RD.DRAM",
"MSRIndex": "0x1a6,0x1a7",
@@ -126,8 +80,17 @@
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Counts demand reads for ownership (RFO) and software prefetches for exclusive ownership (PREFETCHW) that have any type of response.",
+ "EventCode": "0xB7",
+ "EventName": "OCR.DEMAND_RFO.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x10002",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Counts demand read for ownership (RFO) requests and software prefetches for exclusive ownership (PREFETCHW) that have any type of response.",
- "Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x2A,0x2B",
"EventName": "OCR.DEMAND_RFO.ANY_RESPONSE",
"MSRIndex": "0x1a6,0x1a7",
@@ -138,7 +101,16 @@
},
{
"BriefDescription": "Counts streaming stores that have any type of response.",
- "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xB7",
+ "EventName": "OCR.STREAMING_WR.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x10800",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts streaming stores that have any type of response.",
"EventCode": "0x2A,0x2B",
"EventName": "OCR.STREAMING_WR.ANY_RESPONSE",
"MSRIndex": "0x1a6,0x1a7",
@@ -149,68 +121,52 @@
},
{
"BriefDescription": "Cycles when Reservation Station (RS) is empty for the thread.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xa5",
"EventName": "RS.EMPTY",
- "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts cycles during which the reservation station (RS) is empty for this logical processor. This is usually caused when the front-end pipeline runs into starvation periods (e.g. branch mispredictions or i-cache misses)",
"SampleAfterValue": "1000003",
- "Speculative": "1",
"UMask": "0x7",
"Unit": "cpu_core"
},
{
"BriefDescription": "Counts end of periods where the Reservation Station (RS) was empty.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
"CounterMask": "1",
"EdgeDetect": "1",
"EventCode": "0xa5",
"EventName": "RS.EMPTY_COUNT",
"Invert": "1",
- "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to closely sample on front-end latency issues (see the FRONTEND_RETIRED event of designated precise events)",
"SampleAfterValue": "100003",
- "Speculative": "1",
"UMask": "0x7",
"Unit": "cpu_core"
},
{
"BriefDescription": "This event is deprecated. Refer to new event RS.EMPTY_COUNT",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
"CounterMask": "1",
+ "Deprecated": "1",
"EdgeDetect": "1",
"EventCode": "0xa5",
"EventName": "RS_EMPTY.COUNT",
"Invert": "1",
- "PEBScounters": "0,1,2,3,4,5,6,7",
"SampleAfterValue": "100003",
- "Speculative": "1",
"UMask": "0x7",
"Unit": "cpu_core"
},
{
"BriefDescription": "This event is deprecated. Refer to new event RS.EMPTY",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
+ "Deprecated": "1",
"EventCode": "0xa5",
"EventName": "RS_EMPTY.CYCLES",
- "PEBScounters": "0,1,2,3,4,5,6,7",
"SampleAfterValue": "1000003",
- "Speculative": "1",
"UMask": "0x7",
"Unit": "cpu_core"
},
{
"BriefDescription": "XQ.FULL_CYCLES",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3",
"CounterMask": "1",
"EventCode": "0x2d",
"EventName": "XQ.FULL_CYCLES",
- "PEBScounters": "0,1,2,3",
"SampleAfterValue": "1000003",
- "Speculative": "1",
"UMask": "0x1",
"Unit": "cpu_core"
}
diff --git a/tools/perf/pmu-events/arch/x86/alderlake/pipeline.json b/tools/perf/pmu-events/arch/x86/alderlake/pipeline.json
index 1a137f7f8b7e..f46fa7ba168a 100644
--- a/tools/perf/pmu-events/arch/x86/alderlake/pipeline.json
+++ b/tools/perf/pmu-events/arch/x86/alderlake/pipeline.json
@@ -1,2168 +1,1634 @@
[
{
+ "BriefDescription": "This event is deprecated. Refer to new event ARITH.DIV_ACTIVE",
+ "CounterMask": "1",
+ "Deprecated": "1",
+ "EventCode": "0xb0",
+ "EventName": "ARITH.DIVIDER_ACTIVE",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x9",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Cycles when divide unit is busy executing divide or square root operations.",
+ "CounterMask": "1",
+ "EventCode": "0xb0",
+ "EventName": "ARITH.DIV_ACTIVE",
+ "PublicDescription": "Counts cycles when divide unit is busy executing divide or square root operations. Accounts for integer and floating-point operations.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x9",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "This event is deprecated. Refer to new event ARITH.FPDIV_ACTIVE",
+ "CounterMask": "1",
+ "Deprecated": "1",
+ "EventCode": "0xb0",
+ "EventName": "ARITH.FP_DIVIDER_ACTIVE",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x1",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "This event counts the cycles the integer divider is busy.",
+ "EventCode": "0xb0",
+ "EventName": "ARITH.IDIV_ACTIVE",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x8",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "This event is deprecated. Refer to new event ARITH.IDIV_ACTIVE",
+ "CounterMask": "1",
+ "Deprecated": "1",
+ "EventCode": "0xb0",
+ "EventName": "ARITH.INT_DIVIDER_ACTIVE",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x8",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Number of occurrences where a microcode assist is invoked by hardware.",
+ "EventCode": "0xc1",
+ "EventName": "ASSISTS.ANY",
+ "PublicDescription": "Counts the number of occurrences where a microcode assist is invoked by hardware. Examples include AD (page Access Dirty), FP and AVX related assists.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1b",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "Counts the total number of branch instructions retired for all branch types.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5",
"EventCode": "0xc4",
"EventName": "BR_INST_RETIRED.ALL_BRANCHES",
"PEBS": "1",
- "PEBScounters": "0,1,2,3,4,5",
+ "PublicDescription": "Counts the total number of instructions in which the instruction pointer (IP) of the processor is resteered due to a branch instruction and the branch instruction successfully retires. All branch type instructions are accounted for.",
"SampleAfterValue": "200003",
"Unit": "cpu_atom"
},
{
+ "BriefDescription": "All branch instructions retired.",
+ "EventCode": "0xc4",
+ "EventName": "BR_INST_RETIRED.ALL_BRANCHES",
+ "PEBS": "1",
+ "PublicDescription": "Counts all branch instructions retired.",
+ "SampleAfterValue": "400009",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "This event is deprecated. Refer to new event BR_INST_RETIRED.NEAR_CALL",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5",
+ "Deprecated": "1",
"EventCode": "0xc4",
"EventName": "BR_INST_RETIRED.CALL",
"PEBS": "1",
- "PEBScounters": "0,1,2,3,4,5",
"SampleAfterValue": "200003",
"UMask": "0xf9",
"Unit": "cpu_atom"
},
{
"BriefDescription": "Counts the number of retired JCC (Jump on Conditional Code) branch instructions retired, includes both taken and not taken branches.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5",
"EventCode": "0xc4",
"EventName": "BR_INST_RETIRED.COND",
"PEBS": "1",
- "PEBScounters": "0,1,2,3,4,5",
"SampleAfterValue": "200003",
"UMask": "0x7e",
"Unit": "cpu_atom"
},
{
+ "BriefDescription": "Conditional branch instructions retired.",
+ "EventCode": "0xc4",
+ "EventName": "BR_INST_RETIRED.COND",
+ "PEBS": "1",
+ "PublicDescription": "Counts conditional branch instructions retired.",
+ "SampleAfterValue": "400009",
+ "UMask": "0x11",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Not taken branch instructions retired.",
+ "EventCode": "0xc4",
+ "EventName": "BR_INST_RETIRED.COND_NTAKEN",
+ "PEBS": "1",
+ "PublicDescription": "Counts not taken branch instructions retired.",
+ "SampleAfterValue": "400009",
+ "UMask": "0x10",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "Counts the number of taken JCC (Jump on Conditional Code) branch instructions retired.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5",
"EventCode": "0xc4",
"EventName": "BR_INST_RETIRED.COND_TAKEN",
"PEBS": "1",
- "PEBScounters": "0,1,2,3,4,5",
"SampleAfterValue": "200003",
"UMask": "0xfe",
"Unit": "cpu_atom"
},
{
+ "BriefDescription": "Taken conditional branch instructions retired.",
+ "EventCode": "0xc4",
+ "EventName": "BR_INST_RETIRED.COND_TAKEN",
+ "PEBS": "1",
+ "PublicDescription": "Counts taken conditional branch instructions retired.",
+ "SampleAfterValue": "400009",
+ "UMask": "0x1",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "Counts the number of far branch instructions retired, includes far jump, far call and return, and interrupt call and return.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5",
"EventCode": "0xc4",
"EventName": "BR_INST_RETIRED.FAR_BRANCH",
"PEBS": "1",
- "PEBScounters": "0,1,2,3,4,5",
"SampleAfterValue": "200003",
"UMask": "0xbf",
"Unit": "cpu_atom"
},
{
+ "BriefDescription": "Far branch instructions retired.",
+ "EventCode": "0xc4",
+ "EventName": "BR_INST_RETIRED.FAR_BRANCH",
+ "PEBS": "1",
+ "PublicDescription": "Counts far branch instructions retired.",
+ "SampleAfterValue": "100007",
+ "UMask": "0x40",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "Counts the number of near indirect JMP and near indirect CALL branch instructions retired.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5",
"EventCode": "0xc4",
"EventName": "BR_INST_RETIRED.INDIRECT",
"PEBS": "1",
- "PEBScounters": "0,1,2,3,4,5",
"SampleAfterValue": "200003",
"UMask": "0xeb",
"Unit": "cpu_atom"
},
{
+ "BriefDescription": "Indirect near branch instructions retired (excluding returns)",
+ "EventCode": "0xc4",
+ "EventName": "BR_INST_RETIRED.INDIRECT",
+ "PEBS": "1",
+ "PublicDescription": "Counts near indirect branch instructions retired excluding returns. TSX abort is an indirect branch.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x80",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "Counts the number of near indirect CALL branch instructions retired.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5",
"EventCode": "0xc4",
"EventName": "BR_INST_RETIRED.INDIRECT_CALL",
"PEBS": "1",
- "PEBScounters": "0,1,2,3,4,5",
"SampleAfterValue": "200003",
"UMask": "0xfb",
"Unit": "cpu_atom"
},
{
"BriefDescription": "This event is deprecated. Refer to new event BR_INST_RETIRED.INDIRECT_CALL",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5",
+ "Deprecated": "1",
"EventCode": "0xc4",
"EventName": "BR_INST_RETIRED.IND_CALL",
"PEBS": "1",
- "PEBScounters": "0,1,2,3,4,5",
"SampleAfterValue": "200003",
"UMask": "0xfb",
"Unit": "cpu_atom"
},
{
"BriefDescription": "This event is deprecated. Refer to new event BR_INST_RETIRED.COND",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5",
+ "Deprecated": "1",
"EventCode": "0xc4",
"EventName": "BR_INST_RETIRED.JCC",
"PEBS": "1",
- "PEBScounters": "0,1,2,3,4,5",
"SampleAfterValue": "200003",
"UMask": "0x7e",
"Unit": "cpu_atom"
},
{
"BriefDescription": "Counts the number of near CALL branch instructions retired.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5",
"EventCode": "0xc4",
"EventName": "BR_INST_RETIRED.NEAR_CALL",
"PEBS": "1",
- "PEBScounters": "0,1,2,3,4,5",
"SampleAfterValue": "200003",
"UMask": "0xf9",
"Unit": "cpu_atom"
},
{
+ "BriefDescription": "Direct and indirect near call instructions retired.",
+ "EventCode": "0xc4",
+ "EventName": "BR_INST_RETIRED.NEAR_CALL",
+ "PEBS": "1",
+ "PublicDescription": "Counts both direct and indirect near call instructions retired.",
+ "SampleAfterValue": "100007",
+ "UMask": "0x2",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "Counts the number of near RET branch instructions retired.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5",
"EventCode": "0xc4",
"EventName": "BR_INST_RETIRED.NEAR_RETURN",
"PEBS": "1",
- "PEBScounters": "0,1,2,3,4,5",
"SampleAfterValue": "200003",
"UMask": "0xf7",
"Unit": "cpu_atom"
},
{
+ "BriefDescription": "Return instructions retired.",
+ "EventCode": "0xc4",
+ "EventName": "BR_INST_RETIRED.NEAR_RETURN",
+ "PEBS": "1",
+ "PublicDescription": "Counts return instructions retired.",
+ "SampleAfterValue": "100007",
+ "UMask": "0x8",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Taken branch instructions retired.",
+ "EventCode": "0xc4",
+ "EventName": "BR_INST_RETIRED.NEAR_TAKEN",
+ "PEBS": "1",
+ "PublicDescription": "Counts taken branch instructions retired.",
+ "SampleAfterValue": "400009",
+ "UMask": "0x20",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "This event is deprecated. Refer to new event BR_INST_RETIRED.INDIRECT",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5",
+ "Deprecated": "1",
"EventCode": "0xc4",
"EventName": "BR_INST_RETIRED.NON_RETURN_IND",
"PEBS": "1",
- "PEBScounters": "0,1,2,3,4,5",
"SampleAfterValue": "200003",
"UMask": "0xeb",
"Unit": "cpu_atom"
},
{
"BriefDescription": "Counts the number of near relative CALL branch instructions retired.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5",
"EventCode": "0xc4",
"EventName": "BR_INST_RETIRED.REL_CALL",
"PEBS": "1",
- "PEBScounters": "0,1,2,3,4,5",
"SampleAfterValue": "200003",
"UMask": "0xfd",
"Unit": "cpu_atom"
},
{
"BriefDescription": "This event is deprecated. Refer to new event BR_INST_RETIRED.NEAR_RETURN",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5",
+ "Deprecated": "1",
"EventCode": "0xc4",
"EventName": "BR_INST_RETIRED.RETURN",
"PEBS": "1",
- "PEBScounters": "0,1,2,3,4,5",
"SampleAfterValue": "200003",
"UMask": "0xf7",
"Unit": "cpu_atom"
},
{
"BriefDescription": "This event is deprecated. Refer to new event BR_INST_RETIRED.COND_TAKEN",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5",
+ "Deprecated": "1",
"EventCode": "0xc4",
"EventName": "BR_INST_RETIRED.TAKEN_JCC",
"PEBS": "1",
- "PEBScounters": "0,1,2,3,4,5",
"SampleAfterValue": "200003",
"UMask": "0xfe",
"Unit": "cpu_atom"
},
{
"BriefDescription": "Counts the total number of mispredicted branch instructions retired for all branch types.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5",
"EventCode": "0xc5",
"EventName": "BR_MISP_RETIRED.ALL_BRANCHES",
"PEBS": "1",
- "PEBScounters": "0,1,2,3,4,5",
+ "PublicDescription": "Counts the total number of mispredicted branch instructions retired. All branch type instructions are accounted for. Prediction of the branch target address enables the processor to begin executing instructions before the non-speculative execution path is known. The branch prediction unit (BPU) predicts the target address based on the instruction pointer (IP) of the branch and on the execution path through which execution reached this IP. A branch misprediction occurs when the prediction is wrong, and results in discarding all instructions executed in the speculative path and re-fetching from the correct path.",
"SampleAfterValue": "200003",
"Unit": "cpu_atom"
},
{
+ "BriefDescription": "All mispredicted branch instructions retired.",
+ "EventCode": "0xc5",
+ "EventName": "BR_MISP_RETIRED.ALL_BRANCHES",
+ "PEBS": "1",
+ "PublicDescription": "Counts all the retired branch instructions that were mispredicted by the processor. A branch misprediction occurs when the processor incorrectly predicts the destination of the branch. When the misprediction is discovered at execution, all the instructions executed in the wrong (speculative) path must be discarded, and the processor must start fetching from the correct path.",
+ "SampleAfterValue": "400009",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "Counts the number of mispredicted JCC (Jump on Conditional Code) branch instructions retired.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5",
"EventCode": "0xc5",
"EventName": "BR_MISP_RETIRED.COND",
"PEBS": "1",
- "PEBScounters": "0,1,2,3,4,5",
"SampleAfterValue": "200003",
"UMask": "0x7e",
"Unit": "cpu_atom"
},
{
+ "BriefDescription": "Mispredicted conditional branch instructions retired.",
+ "EventCode": "0xc5",
+ "EventName": "BR_MISP_RETIRED.COND",
+ "PEBS": "1",
+ "PublicDescription": "Counts mispredicted conditional branch instructions retired.",
+ "SampleAfterValue": "400009",
+ "UMask": "0x11",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Mispredicted non-taken conditional branch instructions retired.",
+ "EventCode": "0xc5",
+ "EventName": "BR_MISP_RETIRED.COND_NTAKEN",
+ "PEBS": "1",
+ "PublicDescription": "Counts the number of conditional branch instructions retired that were mispredicted and the branch direction was not taken.",
+ "SampleAfterValue": "400009",
+ "UMask": "0x10",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "Counts the number of mispredicted taken JCC (Jump on Conditional Code) branch instructions retired.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5",
"EventCode": "0xc5",
"EventName": "BR_MISP_RETIRED.COND_TAKEN",
"PEBS": "1",
- "PEBScounters": "0,1,2,3,4,5",
"SampleAfterValue": "200003",
"UMask": "0xfe",
"Unit": "cpu_atom"
},
{
+ "BriefDescription": "number of branch instructions retired that were mispredicted and taken.",
+ "EventCode": "0xc5",
+ "EventName": "BR_MISP_RETIRED.COND_TAKEN",
+ "PEBS": "1",
+ "PublicDescription": "Counts taken conditional mispredicted branch instructions retired.",
+ "SampleAfterValue": "400009",
+ "UMask": "0x1",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "Counts the number of mispredicted near indirect JMP and near indirect CALL branch instructions retired.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5",
"EventCode": "0xc5",
"EventName": "BR_MISP_RETIRED.INDIRECT",
"PEBS": "1",
- "PEBScounters": "0,1,2,3,4,5",
"SampleAfterValue": "200003",
"UMask": "0xeb",
"Unit": "cpu_atom"
},
{
"BriefDescription": "Counts the number of mispredicted near indirect CALL branch instructions retired.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5",
"EventCode": "0xc5",
"EventName": "BR_MISP_RETIRED.INDIRECT_CALL",
"PEBS": "1",
- "PEBScounters": "0,1,2,3,4,5",
"SampleAfterValue": "200003",
"UMask": "0xfb",
"Unit": "cpu_atom"
},
{
+ "BriefDescription": "Mispredicted indirect CALL retired.",
+ "EventCode": "0xc5",
+ "EventName": "BR_MISP_RETIRED.INDIRECT_CALL",
+ "PEBS": "1",
+ "PublicDescription": "Counts retired mispredicted indirect (near taken) CALL instructions, including both register and memory indirect.",
+ "SampleAfterValue": "400009",
+ "UMask": "0x2",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "This event is deprecated. Refer to new event BR_MISP_RETIRED.INDIRECT_CALL",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5",
+ "Deprecated": "1",
"EventCode": "0xc5",
"EventName": "BR_MISP_RETIRED.IND_CALL",
"PEBS": "1",
- "PEBScounters": "0,1,2,3,4,5",
"SampleAfterValue": "200003",
"UMask": "0xfb",
"Unit": "cpu_atom"
},
{
"BriefDescription": "This event is deprecated. Refer to new event BR_MISP_RETIRED.COND",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5",
+ "Deprecated": "1",
"EventCode": "0xc5",
"EventName": "BR_MISP_RETIRED.JCC",
"PEBS": "1",
- "PEBScounters": "0,1,2,3,4,5",
"SampleAfterValue": "200003",
"UMask": "0x7e",
"Unit": "cpu_atom"
},
{
+ "BriefDescription": "Number of near branch instructions retired that were mispredicted and taken.",
+ "EventCode": "0xc5",
+ "EventName": "BR_MISP_RETIRED.NEAR_TAKEN",
+ "PEBS": "1",
+ "PublicDescription": "Counts number of near branch instructions retired that were mispredicted and taken.",
+ "SampleAfterValue": "400009",
+ "UMask": "0x20",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "This event is deprecated. Refer to new event BR_MISP_RETIRED.INDIRECT",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5",
+ "Deprecated": "1",
"EventCode": "0xc5",
"EventName": "BR_MISP_RETIRED.NON_RETURN_IND",
"PEBS": "1",
- "PEBScounters": "0,1,2,3,4,5",
"SampleAfterValue": "200003",
"UMask": "0xeb",
"Unit": "cpu_atom"
},
{
+ "BriefDescription": "This event counts the number of mispredicted ret instructions retired. Non PEBS",
+ "EventCode": "0xc5",
+ "EventName": "BR_MISP_RETIRED.RET",
+ "PEBS": "1",
+ "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts mispredicted return instructions retired.",
+ "SampleAfterValue": "100007",
+ "UMask": "0x8",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "Counts the number of mispredicted near RET branch instructions retired.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5",
"EventCode": "0xc5",
"EventName": "BR_MISP_RETIRED.RETURN",
"PEBS": "1",
- "PEBScounters": "0,1,2,3,4,5",
"SampleAfterValue": "200003",
"UMask": "0xf7",
"Unit": "cpu_atom"
},
{
"BriefDescription": "This event is deprecated. Refer to new event BR_MISP_RETIRED.COND_TAKEN",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5",
+ "Deprecated": "1",
"EventCode": "0xc5",
"EventName": "BR_MISP_RETIRED.TAKEN_JCC",
"PEBS": "1",
- "PEBScounters": "0,1,2,3,4,5",
"SampleAfterValue": "200003",
"UMask": "0xfe",
"Unit": "cpu_atom"
},
{
- "BriefDescription": "Counts the number of unhalted core clock cycles. (Fixed event)",
- "CollectPEBSRecord": "2",
- "Counter": "Fixed counter 1",
- "EventName": "CPU_CLK_UNHALTED.CORE",
- "PEBScounters": "33",
- "SampleAfterValue": "2000003",
- "Speculative": "1",
- "UMask": "0x2",
- "Unit": "cpu_atom"
- },
- {
- "BriefDescription": "Counts the number of unhalted core clock cycles.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5",
- "EventCode": "0x3c",
- "EventName": "CPU_CLK_UNHALTED.CORE_P",
- "PEBScounters": "0,1,2,3,4,5",
- "SampleAfterValue": "2000003",
- "Speculative": "1",
- "Unit": "cpu_atom"
- },
- {
- "BriefDescription": "Counts the number of unhalted reference clock cycles at TSC frequency. (Fixed event)",
- "CollectPEBSRecord": "2",
- "Counter": "Fixed counter 2",
- "EventName": "CPU_CLK_UNHALTED.REF_TSC",
- "PEBScounters": "34",
- "SampleAfterValue": "2000003",
- "Speculative": "1",
- "UMask": "0x3",
- "Unit": "cpu_atom"
- },
- {
- "BriefDescription": "Counts the number of unhalted reference clock cycles at TSC frequency.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5",
- "EventCode": "0x3c",
- "EventName": "CPU_CLK_UNHALTED.REF_TSC_P",
- "PEBScounters": "0,1,2,3,4,5",
- "SampleAfterValue": "2000003",
- "Speculative": "1",
- "UMask": "0x1",
- "Unit": "cpu_atom"
- },
- {
- "BriefDescription": "Counts the number of unhalted core clock cycles. (Fixed event)",
- "CollectPEBSRecord": "2",
- "Counter": "Fixed counter 1",
- "EventName": "CPU_CLK_UNHALTED.THREAD",
- "PEBScounters": "33",
- "SampleAfterValue": "2000003",
- "Speculative": "1",
- "UMask": "0x2",
- "Unit": "cpu_atom"
- },
- {
- "BriefDescription": "Counts the number of unhalted core clock cycles.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5",
- "EventCode": "0x3c",
- "EventName": "CPU_CLK_UNHALTED.THREAD_P",
- "PEBScounters": "0,1,2,3,4,5",
- "SampleAfterValue": "2000003",
- "Speculative": "1",
- "Unit": "cpu_atom"
- },
- {
- "BriefDescription": "Counts the total number of instructions retired. (Fixed event)",
- "CollectPEBSRecord": "2",
- "Counter": "Fixed counter 0",
- "EventName": "INST_RETIRED.ANY",
- "PEBS": "1",
- "PEBScounters": "32",
- "SampleAfterValue": "2000003",
- "UMask": "0x1",
- "Unit": "cpu_atom"
- },
- {
- "BriefDescription": "Counts the total number of instructions retired.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5",
- "EventCode": "0xc0",
- "EventName": "INST_RETIRED.ANY_P",
- "PEBS": "1",
- "PEBScounters": "0,1,2,3,4,5",
- "SampleAfterValue": "2000003",
- "Unit": "cpu_atom"
- },
- {
- "BriefDescription": "This event is deprecated. Refer to new event LD_BLOCKS.ADDRESS_ALIAS",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5",
- "EventCode": "0x03",
- "EventName": "LD_BLOCKS.4K_ALIAS",
- "PEBS": "1",
- "PEBScounters": "0,1,2,3,4,5",
- "SampleAfterValue": "1000003",
- "UMask": "0x4",
- "Unit": "cpu_atom"
- },
- {
- "BriefDescription": "Counts the number of retired loads that are blocked because it initially appears to be store forward blocked, but subsequently is shown not to be blocked based on 4K alias check.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5",
- "EventCode": "0x03",
- "EventName": "LD_BLOCKS.ADDRESS_ALIAS",
- "PEBS": "1",
- "PEBScounters": "0,1,2,3,4,5",
- "SampleAfterValue": "1000003",
- "UMask": "0x4",
- "Unit": "cpu_atom"
- },
- {
- "BriefDescription": "Counts the number of retired loads that are blocked because its address exactly matches an older store whose data is not ready.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5",
- "EventCode": "0x03",
- "EventName": "LD_BLOCKS.DATA_UNKNOWN",
- "PEBS": "1",
- "PEBScounters": "0,1,2,3,4,5",
- "SampleAfterValue": "1000003",
- "UMask": "0x1",
- "Unit": "cpu_atom"
- },
- {
- "BriefDescription": "Counts the number of machine clears due to memory ordering in which an internal load passes an older store within the same CPU.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5",
- "EventCode": "0xc3",
- "EventName": "MACHINE_CLEARS.DISAMBIGUATION",
- "PEBScounters": "0,1,2,3,4,5",
- "SampleAfterValue": "20003",
- "Speculative": "1",
- "UMask": "0x8",
- "Unit": "cpu_atom"
- },
- {
- "BriefDescription": "Counts the number of machines clears due to memory renaming.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5",
- "EventCode": "0xc3",
- "EventName": "MACHINE_CLEARS.MRN_NUKE",
- "PEBScounters": "0,1,2,3,4,5",
- "SampleAfterValue": "1000003",
- "Speculative": "1",
- "UMask": "0x80",
- "Unit": "cpu_atom"
- },
- {
- "BriefDescription": "Counts the number of machine clears due to a page fault. Counts both I-Side and D-Side (Loads/Stores) page faults. A page fault occurs when either the page is not present, or an access violation occurs.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5",
- "EventCode": "0xc3",
- "EventName": "MACHINE_CLEARS.PAGE_FAULT",
- "PEBScounters": "0,1,2,3,4,5",
- "SampleAfterValue": "20003",
- "Speculative": "1",
- "UMask": "0x20",
- "Unit": "cpu_atom"
- },
- {
- "BriefDescription": "Counts the number of machine clears that flush the pipeline and restart the machine with the use of microcode due to SMC, MEMORY_ORDERING, FP_ASSISTS, PAGE_FAULT, DISAMBIGUATION, and FPC_VIRTUAL_TRAP.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5",
- "EventCode": "0xc3",
- "EventName": "MACHINE_CLEARS.SLOW",
- "PEBScounters": "0,1,2,3,4,5",
- "SampleAfterValue": "20003",
- "Speculative": "1",
- "UMask": "0x6f",
- "Unit": "cpu_atom"
- },
- {
- "BriefDescription": "Counts the number of machine clears due to program modifying data (self modifying code) within 1K of a recently fetched code page.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5",
- "EventCode": "0xc3",
- "EventName": "MACHINE_CLEARS.SMC",
- "PEBScounters": "0,1,2,3,4,5",
- "SampleAfterValue": "20003",
- "Speculative": "1",
- "UMask": "0x1",
- "Unit": "cpu_atom"
- },
- {
- "BriefDescription": "Counts the number of issue slots not consumed by the backend due to a micro-sequencer (MS) scoreboard, which stalls the front-end from issuing from the UROM until a specified older uop retires.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5",
- "EventCode": "0x75",
- "EventName": "SERIALIZATION.NON_C01_MS_SCB",
- "PEBScounters": "0,1,2,3,4,5",
- "SampleAfterValue": "200003",
- "Speculative": "1",
- "UMask": "0x2",
- "Unit": "cpu_atom"
- },
- {
- "BriefDescription": "Counts the total number of issue slots that were not consumed by the backend because allocation is stalled due to a mispredicted jump or a machine clear.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5",
- "EventCode": "0x73",
- "EventName": "TOPDOWN_BAD_SPECULATION.ALL",
- "PEBScounters": "0,1,2,3,4,5",
- "SampleAfterValue": "1000003",
- "Speculative": "1",
- "Unit": "cpu_atom"
- },
- {
- "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to fast nukes such as memory ordering and memory disambiguation machine clears.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5",
- "EventCode": "0x73",
- "EventName": "TOPDOWN_BAD_SPECULATION.FASTNUKE",
- "PEBScounters": "0,1,2,3,4,5",
- "SampleAfterValue": "1000003",
- "Speculative": "1",
- "UMask": "0x2",
- "Unit": "cpu_atom"
- },
- {
- "BriefDescription": "Counts the total number of issue slots that were not consumed by the backend because allocation is stalled due to a machine clear (nuke) of any kind including memory ordering and memory disambiguation.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5",
- "EventCode": "0x73",
- "EventName": "TOPDOWN_BAD_SPECULATION.MACHINE_CLEARS",
- "PEBScounters": "0,1,2,3,4,5",
- "SampleAfterValue": "1000003",
- "Speculative": "1",
- "UMask": "0x3",
- "Unit": "cpu_atom"
- },
- {
- "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to branch mispredicts.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5",
- "EventCode": "0x73",
- "EventName": "TOPDOWN_BAD_SPECULATION.MISPREDICT",
- "PEBScounters": "0,1,2,3,4,5",
- "SampleAfterValue": "1000003",
- "Speculative": "1",
- "UMask": "0x4",
- "Unit": "cpu_atom"
- },
- {
- "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to a machine clear (nuke).",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5",
- "EventCode": "0x73",
- "EventName": "TOPDOWN_BAD_SPECULATION.NUKE",
- "PEBScounters": "0,1,2,3,4,5",
- "SampleAfterValue": "1000003",
- "Speculative": "1",
- "UMask": "0x1",
- "Unit": "cpu_atom"
- },
- {
- "BriefDescription": "Counts the total number of issue slots every cycle that were not consumed by the backend due to backend stalls.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5",
- "EventCode": "0x74",
- "EventName": "TOPDOWN_BE_BOUND.ALL",
- "PEBScounters": "0,1,2,3,4,5",
- "SampleAfterValue": "1000003",
- "Speculative": "1",
- "Unit": "cpu_atom"
- },
- {
- "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to certain allocation restrictions.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5",
- "EventCode": "0x74",
- "EventName": "TOPDOWN_BE_BOUND.ALLOC_RESTRICTIONS",
- "PEBScounters": "0,1,2,3,4,5",
- "SampleAfterValue": "1000003",
- "Speculative": "1",
- "UMask": "0x1",
- "Unit": "cpu_atom"
- },
- {
- "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to memory reservation stalls in which a scheduler is not able to accept uops.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5",
- "EventCode": "0x74",
- "EventName": "TOPDOWN_BE_BOUND.MEM_SCHEDULER",
- "PEBScounters": "0,1,2,3,4,5",
- "SampleAfterValue": "1000003",
- "Speculative": "1",
- "UMask": "0x2",
- "Unit": "cpu_atom"
- },
- {
- "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to IEC or FPC RAT stalls, which can be due to FIQ or IEC reservation stalls in which the integer, floating point or SIMD scheduler is not able to accept uops.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5",
- "EventCode": "0x74",
- "EventName": "TOPDOWN_BE_BOUND.NON_MEM_SCHEDULER",
- "PEBScounters": "0,1,2,3,4,5",
- "SampleAfterValue": "1000003",
- "Speculative": "1",
- "UMask": "0x8",
- "Unit": "cpu_atom"
- },
- {
- "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to the physical register file unable to accept an entry (marble stalls).",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5",
- "EventCode": "0x74",
- "EventName": "TOPDOWN_BE_BOUND.REGISTER",
- "PEBScounters": "0,1,2,3,4,5",
- "SampleAfterValue": "1000003",
- "Speculative": "1",
- "UMask": "0x20",
- "Unit": "cpu_atom"
- },
- {
- "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to the reorder buffer being full (ROB stalls).",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5",
- "EventCode": "0x74",
- "EventName": "TOPDOWN_BE_BOUND.REORDER_BUFFER",
- "PEBScounters": "0,1,2,3,4,5",
- "SampleAfterValue": "1000003",
- "Speculative": "1",
- "UMask": "0x40",
- "Unit": "cpu_atom"
- },
- {
- "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to scoreboards from the instruction queue (IQ), jump execution unit (JEU), or microcode sequencer (MS).",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5",
- "EventCode": "0x74",
- "EventName": "TOPDOWN_BE_BOUND.SERIALIZATION",
- "PEBScounters": "0,1,2,3,4,5",
- "SampleAfterValue": "1000003",
- "Speculative": "1",
- "UMask": "0x10",
- "Unit": "cpu_atom"
- },
- {
- "BriefDescription": "Counts the total number of issue slots every cycle that were not consumed by the backend due to frontend stalls.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5",
- "EventCode": "0x71",
- "EventName": "TOPDOWN_FE_BOUND.ALL",
- "PEBScounters": "0,1,2,3,4,5",
- "SampleAfterValue": "1000003",
- "Speculative": "1",
- "Unit": "cpu_atom"
- },
- {
- "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to BACLEARS.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5",
- "EventCode": "0x71",
- "EventName": "TOPDOWN_FE_BOUND.BRANCH_DETECT",
- "PEBScounters": "0,1,2,3,4,5",
- "SampleAfterValue": "1000003",
- "Speculative": "1",
- "UMask": "0x2",
- "Unit": "cpu_atom"
- },
- {
- "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to BTCLEARS.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5",
- "EventCode": "0x71",
- "EventName": "TOPDOWN_FE_BOUND.BRANCH_RESTEER",
- "PEBScounters": "0,1,2,3,4,5",
- "SampleAfterValue": "1000003",
- "Speculative": "1",
- "UMask": "0x40",
- "Unit": "cpu_atom"
- },
- {
- "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to the microcode sequencer (MS).",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5",
- "EventCode": "0x71",
- "EventName": "TOPDOWN_FE_BOUND.CISC",
- "PEBScounters": "0,1,2,3,4,5",
- "SampleAfterValue": "1000003",
- "Speculative": "1",
- "UMask": "0x1",
- "Unit": "cpu_atom"
- },
- {
- "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to decode stalls.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5",
- "EventCode": "0x71",
- "EventName": "TOPDOWN_FE_BOUND.DECODE",
- "PEBScounters": "0,1,2,3,4,5",
- "SampleAfterValue": "1000003",
- "Speculative": "1",
- "UMask": "0x8",
- "Unit": "cpu_atom"
- },
- {
- "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to frontend bandwidth restrictions due to decode, predecode, cisc, and other limitations.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5",
- "EventCode": "0x71",
- "EventName": "TOPDOWN_FE_BOUND.FRONTEND_BANDWIDTH",
- "PEBScounters": "0,1,2,3,4,5",
- "SampleAfterValue": "1000003",
- "Speculative": "1",
- "UMask": "0x8d",
- "Unit": "cpu_atom"
- },
- {
- "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to a latency related stalls including BACLEARs, BTCLEARs, ITLB misses, and ICache misses.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5",
- "EventCode": "0x71",
- "EventName": "TOPDOWN_FE_BOUND.FRONTEND_LATENCY",
- "PEBScounters": "0,1,2,3,4,5",
- "SampleAfterValue": "1000003",
- "Speculative": "1",
- "UMask": "0x72",
- "Unit": "cpu_atom"
- },
- {
- "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to ITLB misses.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5",
- "EventCode": "0x71",
- "EventName": "TOPDOWN_FE_BOUND.ITLB",
- "PEBScounters": "0,1,2,3,4,5",
- "SampleAfterValue": "1000003",
- "Speculative": "1",
- "UMask": "0x10",
- "Unit": "cpu_atom"
- },
- {
- "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to other common frontend stalls not categorized.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5",
- "EventCode": "0x71",
- "EventName": "TOPDOWN_FE_BOUND.OTHER",
- "PEBScounters": "0,1,2,3,4,5",
- "SampleAfterValue": "1000003",
- "Speculative": "1",
- "UMask": "0x80",
- "Unit": "cpu_atom"
- },
- {
- "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to wrong predecodes.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5",
- "EventCode": "0x71",
- "EventName": "TOPDOWN_FE_BOUND.PREDECODE",
- "PEBScounters": "0,1,2,3,4,5",
- "SampleAfterValue": "1000003",
- "Speculative": "1",
- "UMask": "0x4",
- "Unit": "cpu_atom"
- },
- {
- "BriefDescription": "Counts the total number of consumed retirement slots.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5",
- "EventCode": "0xc2",
- "EventName": "TOPDOWN_RETIRING.ALL",
- "PEBS": "1",
- "PEBScounters": "0,1,2,3,4,5",
- "SampleAfterValue": "1000003",
- "Unit": "cpu_atom"
- },
- {
- "BriefDescription": "Counts the total number of uops retired.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5",
- "EventCode": "0xc2",
- "EventName": "UOPS_RETIRED.ALL",
- "PEBS": "1",
- "PEBScounters": "0,1,2,3,4,5",
- "SampleAfterValue": "2000003",
- "Unit": "cpu_atom"
- },
- {
- "BriefDescription": "Counts the number of integer divide uops retired.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5",
- "EventCode": "0xc2",
- "EventName": "UOPS_RETIRED.IDIV",
- "PEBS": "1",
- "PEBScounters": "0,1,2,3,4,5",
- "SampleAfterValue": "2000003",
- "UMask": "0x10",
- "Unit": "cpu_atom"
- },
- {
- "BriefDescription": "Counts the number of uops that are from complex flows issued by the micro-sequencer (MS).",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5",
- "EventCode": "0xc2",
- "EventName": "UOPS_RETIRED.MS",
- "PEBS": "1",
- "PEBScounters": "0,1,2,3,4,5",
- "SampleAfterValue": "2000003",
- "UMask": "0x1",
- "Unit": "cpu_atom"
- },
- {
- "BriefDescription": "Counts the number of x87 uops retired, includes those in MS flows.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5",
- "EventCode": "0xc2",
- "EventName": "UOPS_RETIRED.X87",
- "PEBS": "1",
- "PEBScounters": "0,1,2,3,4,5",
- "SampleAfterValue": "2000003",
- "UMask": "0x2",
- "Unit": "cpu_atom"
- },
- {
- "BriefDescription": "This event is deprecated. Refer to new event ARITH.DIV_ACTIVE",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
- "CounterMask": "1",
- "EventCode": "0xb0",
- "EventName": "ARITH.DIVIDER_ACTIVE",
- "PEBScounters": "0,1,2,3,4,5,6,7",
- "SampleAfterValue": "1000003",
- "Speculative": "1",
- "UMask": "0x9",
- "Unit": "cpu_core"
- },
- {
- "BriefDescription": "Cycles when divide unit is busy executing divide or square root operations.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
- "CounterMask": "1",
- "EventCode": "0xb0",
- "EventName": "ARITH.DIV_ACTIVE",
- "PEBScounters": "0,1,2,3,4,5,6,7",
- "SampleAfterValue": "1000003",
- "Speculative": "1",
- "UMask": "0x9",
- "Unit": "cpu_core"
- },
- {
- "BriefDescription": "This event is deprecated. Refer to new event ARITH.FPDIV_ACTIVE",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
- "CounterMask": "1",
- "EventCode": "0xb0",
- "EventName": "ARITH.FP_DIVIDER_ACTIVE",
- "PEBScounters": "0,1,2,3,4,5,6,7",
- "SampleAfterValue": "1000003",
- "Speculative": "1",
- "UMask": "0x1",
- "Unit": "cpu_core"
- },
- {
- "BriefDescription": "This event counts the cycles the integer divider is busy.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
- "EventCode": "0xb0",
- "EventName": "ARITH.IDIV_ACTIVE",
- "PEBScounters": "0,1,2,3,4,5,6,7",
- "SampleAfterValue": "1000003",
- "Speculative": "1",
- "UMask": "0x8",
- "Unit": "cpu_core"
- },
- {
- "BriefDescription": "This event is deprecated. Refer to new event ARITH.IDIV_ACTIVE",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
- "CounterMask": "1",
- "EventCode": "0xb0",
- "EventName": "ARITH.INT_DIVIDER_ACTIVE",
- "PEBScounters": "0,1,2,3,4,5,6,7",
- "SampleAfterValue": "1000003",
- "Speculative": "1",
- "UMask": "0x8",
- "Unit": "cpu_core"
- },
- {
- "BriefDescription": "Number of occurrences where a microcode assist is invoked by hardware.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
- "EventCode": "0xc1",
- "EventName": "ASSISTS.ANY",
- "PEBScounters": "0,1,2,3,4,5,6,7",
- "SampleAfterValue": "100003",
- "Speculative": "1",
- "UMask": "0x1b",
- "Unit": "cpu_core"
- },
- {
- "BriefDescription": "All branch instructions retired.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
- "EventCode": "0xc4",
- "EventName": "BR_INST_RETIRED.ALL_BRANCHES",
- "PEBS": "1",
- "PEBScounters": "0,1,2,3,4,5,6,7",
- "SampleAfterValue": "400009",
- "Unit": "cpu_core"
- },
- {
- "BriefDescription": "Conditional branch instructions retired.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
- "EventCode": "0xc4",
- "EventName": "BR_INST_RETIRED.COND",
- "PEBS": "1",
- "PEBScounters": "0,1,2,3,4,5,6,7",
- "SampleAfterValue": "400009",
- "UMask": "0x11",
- "Unit": "cpu_core"
- },
- {
- "BriefDescription": "Not taken branch instructions retired.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
- "EventCode": "0xc4",
- "EventName": "BR_INST_RETIRED.COND_NTAKEN",
- "PEBS": "1",
- "PEBScounters": "0,1,2,3,4,5,6,7",
- "SampleAfterValue": "400009",
- "UMask": "0x10",
- "Unit": "cpu_core"
- },
- {
- "BriefDescription": "Taken conditional branch instructions retired.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
- "EventCode": "0xc4",
- "EventName": "BR_INST_RETIRED.COND_TAKEN",
- "PEBS": "1",
- "PEBScounters": "0,1,2,3,4,5,6,7",
- "SampleAfterValue": "400009",
- "UMask": "0x1",
- "Unit": "cpu_core"
- },
- {
- "BriefDescription": "Far branch instructions retired.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
- "EventCode": "0xc4",
- "EventName": "BR_INST_RETIRED.FAR_BRANCH",
- "PEBS": "1",
- "PEBScounters": "0,1,2,3,4,5,6,7",
- "SampleAfterValue": "100007",
- "UMask": "0x40",
- "Unit": "cpu_core"
- },
- {
- "BriefDescription": "Indirect near branch instructions retired (excluding returns)",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
- "EventCode": "0xc4",
- "EventName": "BR_INST_RETIRED.INDIRECT",
- "PEBS": "1",
- "PEBScounters": "0,1,2,3,4,5,6,7",
- "SampleAfterValue": "100003",
- "UMask": "0x80",
- "Unit": "cpu_core"
- },
- {
- "BriefDescription": "Direct and indirect near call instructions retired.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
- "EventCode": "0xc4",
- "EventName": "BR_INST_RETIRED.NEAR_CALL",
- "PEBS": "1",
- "PEBScounters": "0,1,2,3,4,5,6,7",
- "SampleAfterValue": "100007",
- "UMask": "0x2",
- "Unit": "cpu_core"
- },
- {
- "BriefDescription": "Return instructions retired.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
- "EventCode": "0xc4",
- "EventName": "BR_INST_RETIRED.NEAR_RETURN",
- "PEBS": "1",
- "PEBScounters": "0,1,2,3,4,5,6,7",
- "SampleAfterValue": "100007",
- "UMask": "0x8",
- "Unit": "cpu_core"
- },
- {
- "BriefDescription": "Taken branch instructions retired.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
- "EventCode": "0xc4",
- "EventName": "BR_INST_RETIRED.NEAR_TAKEN",
- "PEBS": "1",
- "PEBScounters": "0,1,2,3,4,5,6,7",
- "SampleAfterValue": "400009",
- "UMask": "0x20",
- "Unit": "cpu_core"
- },
- {
- "BriefDescription": "All mispredicted branch instructions retired.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
- "EventCode": "0xc5",
- "EventName": "BR_MISP_RETIRED.ALL_BRANCHES",
- "PEBS": "1",
- "PEBScounters": "0,1,2,3,4,5,6,7",
- "SampleAfterValue": "400009",
- "Unit": "cpu_core"
- },
- {
- "BriefDescription": "Mispredicted conditional branch instructions retired.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
- "EventCode": "0xc5",
- "EventName": "BR_MISP_RETIRED.COND",
- "PEBS": "1",
- "PEBScounters": "0,1,2,3,4,5,6,7",
- "SampleAfterValue": "400009",
- "UMask": "0x11",
- "Unit": "cpu_core"
- },
- {
- "BriefDescription": "Mispredicted non-taken conditional branch instructions retired.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
- "EventCode": "0xc5",
- "EventName": "BR_MISP_RETIRED.COND_NTAKEN",
- "PEBS": "1",
- "PEBScounters": "0,1,2,3,4,5,6,7",
- "SampleAfterValue": "400009",
- "UMask": "0x10",
- "Unit": "cpu_core"
- },
- {
- "BriefDescription": "number of branch instructions retired that were mispredicted and taken.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
- "EventCode": "0xc5",
- "EventName": "BR_MISP_RETIRED.COND_TAKEN",
- "PEBS": "1",
- "PEBScounters": "0,1,2,3,4,5,6,7",
- "SampleAfterValue": "400009",
- "UMask": "0x1",
- "Unit": "cpu_core"
- },
- {
- "BriefDescription": "Mispredicted indirect CALL retired.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
- "EventCode": "0xc5",
- "EventName": "BR_MISP_RETIRED.INDIRECT_CALL",
- "PEBS": "1",
- "PEBScounters": "0,1,2,3,4,5,6,7",
- "SampleAfterValue": "400009",
- "UMask": "0x2",
- "Unit": "cpu_core"
- },
- {
- "BriefDescription": "Number of near branch instructions retired that were mispredicted and taken.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
- "EventCode": "0xc5",
- "EventName": "BR_MISP_RETIRED.NEAR_TAKEN",
- "PEBS": "1",
- "PEBScounters": "0,1,2,3,4,5,6,7",
- "SampleAfterValue": "400009",
- "UMask": "0x20",
- "Unit": "cpu_core"
- },
- {
- "BriefDescription": "This event counts the number of mispredicted ret instructions retired. Non PEBS",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
- "EventCode": "0xc5",
- "EventName": "BR_MISP_RETIRED.RET",
- "PEBS": "1",
- "PEBScounters": "0,1,2,3,4,5,6,7",
- "SampleAfterValue": "100007",
- "UMask": "0x8",
- "Unit": "cpu_core"
- },
- {
"BriefDescription": "Core clocks when the thread is in the C0.1 light-weight slower wakeup time but more power saving optimized state.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xec",
"EventName": "CPU_CLK_UNHALTED.C01",
- "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts core clocks when the thread is in the C0.1 light-weight slower wakeup time but more power saving optimized state. This state can be entered via the TPAUSE or UMWAIT instructions.",
"SampleAfterValue": "2000003",
- "Speculative": "1",
"UMask": "0x10",
"Unit": "cpu_core"
},
{
"BriefDescription": "Core clocks when the thread is in the C0.2 light-weight faster wakeup time but less power saving optimized state.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xec",
"EventName": "CPU_CLK_UNHALTED.C02",
- "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts core clocks when the thread is in the C0.2 light-weight faster wakeup time but less power saving optimized state. This state can be entered via the TPAUSE or UMWAIT instructions.",
"SampleAfterValue": "2000003",
- "Speculative": "1",
"UMask": "0x20",
"Unit": "cpu_core"
},
{
"BriefDescription": "Core clocks when the thread is in the C0.1 or C0.2 or running a PAUSE in C0 ACPI state.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xec",
"EventName": "CPU_CLK_UNHALTED.C0_WAIT",
- "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts core clocks when the thread is in the C0.1 or C0.2 power saving optimized states (TPAUSE or UMWAIT instructions) or running the PAUSE instruction.",
"SampleAfterValue": "2000003",
- "Speculative": "1",
"UMask": "0x70",
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Counts the number of unhalted core clock cycles. (Fixed event)",
+ "EventName": "CPU_CLK_UNHALTED.CORE",
+ "PublicDescription": "Counts the number of core cycles while the core is not in a halt state. The core enters the halt state when it is running the HLT instruction. The core frequency may change from time to time. For this reason this event may have a changing ratio with regards to time. This event uses fixed counter 1.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x2",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of unhalted core clock cycles.",
+ "EventCode": "0x3c",
+ "EventName": "CPU_CLK_UNHALTED.CORE_P",
+ "PublicDescription": "Counts the number of core cycles while the core is not in a halt state. The core enters the halt state when it is running the HLT instruction. The core frequency may change from time to time. For this reason this event may have a changing ratio with regards to time. This event uses a programmable general purpose performance counter.",
+ "SampleAfterValue": "2000003",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Cycle counts are evenly distributed between active threads in the Core.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xec",
"EventName": "CPU_CLK_UNHALTED.DISTRIBUTED",
- "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "This event distributes cycle counts between active hyperthreads, i.e., those in C0. A hyperthread becomes inactive when it executes the HLT or MWAIT instructions. If all other hyperthreads are inactive (or disabled or do not exist), all counts are attributed to this hyperthread. To obtain the full count when the Core is active, sum the counts from each hyperthread.",
"SampleAfterValue": "2000003",
- "Speculative": "1",
"UMask": "0x2",
"Unit": "cpu_core"
},
{
"BriefDescription": "Core crystal clock cycles when this thread is unhalted and the other thread is halted.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x3c",
"EventName": "CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE",
- "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts Core crystal clock cycles when current thread is unhalted and the other thread is halted.",
"SampleAfterValue": "25003",
- "Speculative": "1",
"UMask": "0x2",
"Unit": "cpu_core"
},
{
"BriefDescription": "CPU_CLK_UNHALTED.PAUSE",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xec",
"EventName": "CPU_CLK_UNHALTED.PAUSE",
- "PEBScounters": "0,1,2,3,4,5,6,7",
"SampleAfterValue": "2000003",
- "Speculative": "1",
"UMask": "0x40",
"Unit": "cpu_core"
},
{
"BriefDescription": "CPU_CLK_UNHALTED.PAUSE_INST",
- "Counter": "0,1,2,3,4,5,6,7",
"CounterMask": "1",
"EdgeDetect": "1",
"EventCode": "0xec",
"EventName": "CPU_CLK_UNHALTED.PAUSE_INST",
- "PEBScounters": "0,1,2,3,4,5,6,7",
"SampleAfterValue": "2000003",
- "Speculative": "1",
"UMask": "0x40",
"Unit": "cpu_core"
},
{
"BriefDescription": "Core crystal clock cycles. Cycle counts are evenly distributed between active threads in the Core.",
- "Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x3c",
"EventName": "CPU_CLK_UNHALTED.REF_DISTRIBUTED",
- "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "This event distributes Core crystal clock cycle counts between active hyperthreads, i.e., those in C0 sleep-state. A hyperthread becomes inactive when it executes the HLT or MWAIT instructions. If one thread is active in a core, all counts are attributed to this hyperthread. To obtain the full count when the Core is active, sum the counts from each hyperthread.",
"SampleAfterValue": "2000003",
- "Speculative": "1",
"UMask": "0x8",
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Counts the number of unhalted reference clock cycles at TSC frequency. (Fixed event)",
+ "EventName": "CPU_CLK_UNHALTED.REF_TSC",
+ "PublicDescription": "Counts the number of reference cycles that the core is not in a halt state. The core enters the halt state when it is running the HLT instruction. This event is not affected by core frequency changes and increments at a fixed frequency that is also used for the Time Stamp Counter (TSC). This event uses fixed counter 2.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x3",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Reference cycles when the core is not in halt state.",
- "CollectPEBSRecord": "2",
- "Counter": "Fixed counter 2",
"EventName": "CPU_CLK_UNHALTED.REF_TSC",
- "PEBScounters": "34",
+ "PublicDescription": "Counts the number of reference cycles when the core is not in a halt state. The core enters the halt state when it is running the HLT instruction or the MWAIT instruction. This event is not affected by core frequency changes (for example, P states, TM2 transitions) but has the same incrementing frequency as the time stamp counter. This event can approximate elapsed time while the core was not in a halt state. It is counted on a dedicated fixed counter, leaving the eight programmable counters available for other events. Note: On all current platforms this event stops counting during 'throttling (TM)' states duty off periods the processor is 'halted'. The counter update is done at a lower clock rate then the core clock the overflow status bit for this counter may appear 'sticky'. After the counter has overflowed and software clears the overflow status bit and resets the counter to less than MAX. The reset value to the counter is not clocked immediately so the overflow status bit will flip 'high (1)' and generate another PMI (if enabled) after which the reset value gets clocked into the counter. Therefore, software will get the interrupt, read the overflow status bit '1 for bit 34 while the counter value is less than MAX. Software should ignore this case.",
"SampleAfterValue": "2000003",
- "Speculative": "1",
"UMask": "0x3",
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Counts the number of unhalted reference clock cycles at TSC frequency.",
+ "EventCode": "0x3c",
+ "EventName": "CPU_CLK_UNHALTED.REF_TSC_P",
+ "PublicDescription": "Counts the number of reference cycles that the core is not in a halt state. The core enters the halt state when it is running the HLT instruction. This event is not affected by core frequency changes and increments at a fixed frequency that is also used for the Time Stamp Counter (TSC). This event uses a programmable general purpose performance counter.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x1",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Reference cycles when the core is not in halt state.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x3c",
"EventName": "CPU_CLK_UNHALTED.REF_TSC_P",
- "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts the number of reference cycles when the core is not in a halt state. The core enters the halt state when it is running the HLT instruction or the MWAIT instruction. This event is not affected by core frequency changes (for example, P states, TM2 transitions) but has the same incrementing frequency as the time stamp counter. This event can approximate elapsed time while the core was not in a halt state. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. Note: On all current platforms this event stops counting during 'throttling (TM)' states duty off periods the processor is 'halted'. The counter update is done at a lower clock rate then the core clock the overflow status bit for this counter may appear 'sticky'. After the counter has overflowed and software clears the overflow status bit and resets the counter to less than MAX. The reset value to the counter is not clocked immediately so the overflow status bit will flip 'high (1)' and generate another PMI (if enabled) after which the reset value gets clocked into the counter. Therefore, software will get the interrupt, read the overflow status bit '1 for bit 34 while the counter value is less than MAX. Software should ignore this case.",
"SampleAfterValue": "2000003",
- "Speculative": "1",
"UMask": "0x1",
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Counts the number of unhalted core clock cycles. (Fixed event)",
+ "EventName": "CPU_CLK_UNHALTED.THREAD",
+ "PublicDescription": "Counts the number of core cycles while the core is not in a halt state. The core enters the halt state when it is running the HLT instruction. The core frequency may change from time to time. For this reason this event may have a changing ratio with regards to time. This event uses fixed counter 1.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x2",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Core cycles when the thread is not in halt state",
- "CollectPEBSRecord": "2",
- "Counter": "Fixed counter 1",
"EventName": "CPU_CLK_UNHALTED.THREAD",
- "PEBScounters": "33",
+ "PublicDescription": "Counts the number of core cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. This event is a component in many key event ratios. The core frequency may change from time to time due to transitions associated with Enhanced Intel SpeedStep Technology or TM2. For this reason this event may have a changing ratio with regards to time. When the core frequency is constant, this event can approximate elapsed time while the core was not in the halt state. It is counted on a dedicated fixed counter, leaving the eight programmable counters available for other events.",
"SampleAfterValue": "2000003",
- "Speculative": "1",
"UMask": "0x2",
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Counts the number of unhalted core clock cycles.",
+ "EventCode": "0x3c",
+ "EventName": "CPU_CLK_UNHALTED.THREAD_P",
+ "PublicDescription": "Counts the number of core cycles while the core is not in a halt state. The core enters the halt state when it is running the HLT instruction. The core frequency may change from time to time. For this reason this event may have a changing ratio with regards to time. This event uses a programmable general purpose performance counter.",
+ "SampleAfterValue": "2000003",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Thread cycles when thread is not in halt state",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x3c",
"EventName": "CPU_CLK_UNHALTED.THREAD_P",
- "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "This is an architectural event that counts the number of thread cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. The core frequency may change from time to time due to power or thermal throttling. For this reason, this event may have a changing ratio with regards to wall clock time.",
"SampleAfterValue": "2000003",
- "Speculative": "1",
"Unit": "cpu_core"
},
{
"BriefDescription": "Cycles while L1 cache miss demand load is outstanding.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3",
"CounterMask": "8",
"EventCode": "0xa3",
"EventName": "CYCLE_ACTIVITY.CYCLES_L1D_MISS",
- "PEBScounters": "0,1,2,3",
"SampleAfterValue": "1000003",
- "Speculative": "1",
"UMask": "0x8",
"Unit": "cpu_core"
},
{
"BriefDescription": "Cycles while L2 cache miss demand load is outstanding.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3",
"CounterMask": "1",
"EventCode": "0xa3",
"EventName": "CYCLE_ACTIVITY.CYCLES_L2_MISS",
- "PEBScounters": "0,1,2,3",
"SampleAfterValue": "1000003",
- "Speculative": "1",
"UMask": "0x1",
"Unit": "cpu_core"
},
{
"BriefDescription": "Cycles while memory subsystem has an outstanding load.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
"CounterMask": "16",
"EventCode": "0xa3",
"EventName": "CYCLE_ACTIVITY.CYCLES_MEM_ANY",
- "PEBScounters": "0,1,2,3,4,5,6,7",
"SampleAfterValue": "1000003",
- "Speculative": "1",
"UMask": "0x10",
"Unit": "cpu_core"
},
{
"BriefDescription": "Execution stalls while L1 cache miss demand load is outstanding.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3",
"CounterMask": "12",
"EventCode": "0xa3",
"EventName": "CYCLE_ACTIVITY.STALLS_L1D_MISS",
- "PEBScounters": "0,1,2,3",
"SampleAfterValue": "1000003",
- "Speculative": "1",
"UMask": "0xc",
"Unit": "cpu_core"
},
{
"BriefDescription": "Execution stalls while L2 cache miss demand load is outstanding.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3",
"CounterMask": "5",
"EventCode": "0xa3",
"EventName": "CYCLE_ACTIVITY.STALLS_L2_MISS",
- "PEBScounters": "0,1,2,3",
"SampleAfterValue": "1000003",
- "Speculative": "1",
"UMask": "0x5",
"Unit": "cpu_core"
},
{
"BriefDescription": "Total execution stalls.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
"CounterMask": "4",
"EventCode": "0xa3",
"EventName": "CYCLE_ACTIVITY.STALLS_TOTAL",
- "PEBScounters": "0,1,2,3,4,5,6,7",
"SampleAfterValue": "1000003",
- "Speculative": "1",
"UMask": "0x4",
"Unit": "cpu_core"
},
{
"BriefDescription": "Cycles total of 1 uop is executed on all ports and Reservation Station was not empty.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xa6",
"EventName": "EXE_ACTIVITY.1_PORTS_UTIL",
- "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts cycles during which a total of 1 uop was executed on all ports and Reservation Station (RS) was not empty.",
"SampleAfterValue": "2000003",
- "Speculative": "1",
"UMask": "0x2",
"Unit": "cpu_core"
},
{
"BriefDescription": "Cycles total of 2 uops are executed on all ports and Reservation Station was not empty.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xa6",
"EventName": "EXE_ACTIVITY.2_PORTS_UTIL",
- "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts cycles during which a total of 2 uops were executed on all ports and Reservation Station (RS) was not empty.",
"SampleAfterValue": "2000003",
- "Speculative": "1",
"UMask": "0x4",
"Unit": "cpu_core"
},
{
"BriefDescription": "Cycles total of 3 uops are executed on all ports and Reservation Station was not empty.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xa6",
"EventName": "EXE_ACTIVITY.3_PORTS_UTIL",
- "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Cycles total of 3 uops are executed on all ports and Reservation Station (RS) was not empty.",
"SampleAfterValue": "2000003",
- "Speculative": "1",
"UMask": "0x8",
"Unit": "cpu_core"
},
{
"BriefDescription": "Cycles total of 4 uops are executed on all ports and Reservation Station was not empty.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xa6",
"EventName": "EXE_ACTIVITY.4_PORTS_UTIL",
- "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Cycles total of 4 uops are executed on all ports and Reservation Station (RS) was not empty.",
"SampleAfterValue": "2000003",
- "Speculative": "1",
"UMask": "0x10",
"Unit": "cpu_core"
},
{
"BriefDescription": "Execution stalls while memory subsystem has an outstanding load.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
"CounterMask": "5",
"EventCode": "0xa6",
"EventName": "EXE_ACTIVITY.BOUND_ON_LOADS",
- "PEBScounters": "0,1,2,3,4,5,6,7",
"SampleAfterValue": "2000003",
- "Speculative": "1",
"UMask": "0x21",
"Unit": "cpu_core"
},
{
"BriefDescription": "Cycles where the Store Buffer was full and no loads caused an execution stall.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
"CounterMask": "2",
"EventCode": "0xa6",
"EventName": "EXE_ACTIVITY.BOUND_ON_STORES",
- "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts cycles where the Store Buffer was full and no loads caused an execution stall.",
"SampleAfterValue": "1000003",
- "Speculative": "1",
"UMask": "0x40",
"Unit": "cpu_core"
},
{
"BriefDescription": "Cycles no uop executed while RS was not empty, the SB was not full and there was no outstanding load.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xa6",
"EventName": "EXE_ACTIVITY.EXE_BOUND_0_PORTS",
- "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Number of cycles total of 0 uops executed on all ports, Reservation Station (RS) was not empty, the Store Buffer (SB) was not full and there was no outstanding load.",
"SampleAfterValue": "1000003",
- "Speculative": "1",
"UMask": "0x80",
"Unit": "cpu_core"
},
{
"BriefDescription": "Instruction decoders utilized in a cycle",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3",
"EventCode": "0x75",
"EventName": "INST_DECODED.DECODERS",
- "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Number of decoders utilized in a cycle when the MITE (legacy decode pipeline) fetches instructions.",
"SampleAfterValue": "2000003",
- "Speculative": "1",
"UMask": "0x1",
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Counts the total number of instructions retired. (Fixed event)",
+ "EventName": "INST_RETIRED.ANY",
+ "PEBS": "1",
+ "PublicDescription": "Counts the total number of instructions that retired. For instructions that consist of multiple uops, this event counts the retirement of the last uop of the instruction. This event continues counting during hardware interrupts, traps, and inside interrupt handlers. This event uses fixed counter 0.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x1",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Number of instructions retired. Fixed Counter - architectural event",
- "CollectPEBSRecord": "2",
- "Counter": "Fixed counter 0",
"EventName": "INST_RETIRED.ANY",
"PEBS": "1",
- "PEBScounters": "32",
+ "PublicDescription": "Counts the number of X86 instructions retired - an Architectural PerfMon event. Counting continues during hardware interrupts, traps, and inside interrupt handlers. Notes: INST_RETIRED.ANY is counted by a designated fixed counter freeing up programmable counters to count other events. INST_RETIRED.ANY_P is counted by a programmable counter.",
"SampleAfterValue": "2000003",
"UMask": "0x1",
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Counts the total number of instructions retired.",
+ "EventCode": "0xc0",
+ "EventName": "INST_RETIRED.ANY_P",
+ "PEBS": "1",
+ "PublicDescription": "Counts the total number of instructions that retired. For instructions that consist of multiple uops, this event counts the retirement of the last uop of the instruction. This event continues counting during hardware interrupts, traps, and inside interrupt handlers. This event uses a programmable general purpose performance counter.",
+ "SampleAfterValue": "2000003",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Number of instructions retired. General Counter - architectural event",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc0",
"EventName": "INST_RETIRED.ANY_P",
"PEBS": "1",
- "PEBScounters": "1,2,3,4,5,6,7",
+ "PublicDescription": "Counts the number of X86 instructions retired - an Architectural PerfMon event. Counting continues during hardware interrupts, traps, and inside interrupt handlers. Notes: INST_RETIRED.ANY is counted by a designated fixed counter freeing up programmable counters to count other events. INST_RETIRED.ANY_P is counted by a programmable counter.",
"SampleAfterValue": "2000003",
"Unit": "cpu_core"
},
{
"BriefDescription": "INST_RETIRED.MACRO_FUSED",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc0",
"EventName": "INST_RETIRED.MACRO_FUSED",
- "PEBScounters": "1,2,3,4,5,6,7",
"SampleAfterValue": "2000003",
"UMask": "0x10",
"Unit": "cpu_core"
},
{
"BriefDescription": "Retired NOP instructions.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc0",
"EventName": "INST_RETIRED.NOP",
- "PEBScounters": "1,2,3,4,5,6,7",
+ "PublicDescription": "Counts all retired NOP or ENDBR32/64 instructions",
"SampleAfterValue": "2000003",
"UMask": "0x2",
"Unit": "cpu_core"
},
{
"BriefDescription": "Precise instruction retired with PEBS precise-distribution",
- "CollectPEBSRecord": "2",
- "Counter": "Fixed counter 0",
"EventName": "INST_RETIRED.PREC_DIST",
"PEBS": "1",
- "PEBScounters": "32",
+ "PublicDescription": "A version of INST_RETIRED that allows for a precise distribution of samples across instructions retired. It utilizes the Precise Distribution of Instructions Retired (PDIR++) feature to fix bias in how retired instructions get sampled. Use on Fixed Counter 0.",
"SampleAfterValue": "2000003",
"UMask": "0x1",
"Unit": "cpu_core"
},
{
"BriefDescription": "INST_RETIRED.REP_ITERATION",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc0",
"EventName": "INST_RETIRED.REP_ITERATION",
- "PEBScounters": "1,2,3,4,5,6,7",
"SampleAfterValue": "2000003",
"UMask": "0x8",
"Unit": "cpu_core"
},
{
"BriefDescription": "Counts cycles after recovery from a branch misprediction or machine clear till the first uop is issued from the resteered path.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xad",
"EventName": "INT_MISC.CLEAR_RESTEER_CYCLES",
- "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Cycles after recovery from a branch misprediction or machine clear till the first uop is issued from the resteered path.",
"SampleAfterValue": "500009",
- "Speculative": "1",
"UMask": "0x80",
"Unit": "cpu_core"
},
{
"BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for this thread",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xad",
"EventName": "INT_MISC.RECOVERY_CYCLES",
- "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts core cycles when the Resource allocator was stalled due to recovery from an earlier branch misprediction or machine clear event.",
"SampleAfterValue": "500009",
- "Speculative": "1",
"UMask": "0x1",
"Unit": "cpu_core"
},
{
"BriefDescription": "INT_MISC.UNKNOWN_BRANCH_CYCLES",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xad",
"EventName": "INT_MISC.UNKNOWN_BRANCH_CYCLES",
"MSRIndex": "0x3F7",
"MSRValue": "0x7",
- "PEBScounters": "0,1,2,3,4,5,6,7",
"SampleAfterValue": "1000003",
- "Speculative": "1",
- "TakenAlone": "1",
"UMask": "0x40",
"Unit": "cpu_core"
},
{
"BriefDescription": "TMA slots where uops got dropped",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xad",
"EventName": "INT_MISC.UOP_DROPPING",
- "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Estimated number of Top-down Microarchitecture Analysis slots that got dropped due to non front-end reasons",
"SampleAfterValue": "1000003",
- "Speculative": "1",
"UMask": "0x10",
"Unit": "cpu_core"
},
{
"BriefDescription": "INT_VEC_RETIRED.128BIT",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xe7",
"EventName": "INT_VEC_RETIRED.128BIT",
- "PEBScounters": "0,1,2,3,4,5,6,7",
"SampleAfterValue": "1000003",
"UMask": "0x13",
"Unit": "cpu_core"
},
{
"BriefDescription": "INT_VEC_RETIRED.256BIT",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xe7",
"EventName": "INT_VEC_RETIRED.256BIT",
- "PEBScounters": "0,1,2,3,4,5,6,7",
"SampleAfterValue": "1000003",
"UMask": "0xac",
"Unit": "cpu_core"
},
{
"BriefDescription": "integer ADD, SUB, SAD 128-bit vector instructions.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xe7",
"EventName": "INT_VEC_RETIRED.ADD_128",
- "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Number of retired integer ADD/SUB (regular or horizontal), SAD 128-bit vector instructions.",
"SampleAfterValue": "1000003",
"UMask": "0x3",
"Unit": "cpu_core"
},
{
"BriefDescription": "integer ADD, SUB, SAD 256-bit vector instructions.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xe7",
"EventName": "INT_VEC_RETIRED.ADD_256",
- "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Number of retired integer ADD/SUB (regular or horizontal), SAD 256-bit vector instructions.",
"SampleAfterValue": "1000003",
"UMask": "0xc",
"Unit": "cpu_core"
},
{
"BriefDescription": "INT_VEC_RETIRED.MUL_256",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xe7",
"EventName": "INT_VEC_RETIRED.MUL_256",
- "PEBScounters": "0,1,2,3,4,5,6,7",
"SampleAfterValue": "1000003",
"UMask": "0x80",
"Unit": "cpu_core"
},
{
"BriefDescription": "INT_VEC_RETIRED.SHUFFLES",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xe7",
"EventName": "INT_VEC_RETIRED.SHUFFLES",
- "PEBScounters": "0,1,2,3,4,5,6,7",
"SampleAfterValue": "1000003",
"UMask": "0x40",
"Unit": "cpu_core"
},
{
"BriefDescription": "INT_VEC_RETIRED.VNNI_128",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xe7",
"EventName": "INT_VEC_RETIRED.VNNI_128",
- "PEBScounters": "0,1,2,3,4,5,6,7",
"SampleAfterValue": "1000003",
"UMask": "0x10",
"Unit": "cpu_core"
},
{
"BriefDescription": "INT_VEC_RETIRED.VNNI_256",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xe7",
"EventName": "INT_VEC_RETIRED.VNNI_256",
- "PEBScounters": "0,1,2,3,4,5,6,7",
"SampleAfterValue": "1000003",
"UMask": "0x20",
"Unit": "cpu_core"
},
{
+ "BriefDescription": "This event is deprecated. Refer to new event LD_BLOCKS.ADDRESS_ALIAS",
+ "Deprecated": "1",
+ "EventCode": "0x03",
+ "EventName": "LD_BLOCKS.4K_ALIAS",
+ "PEBS": "1",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x4",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of retired loads that are blocked because it initially appears to be store forward blocked, but subsequently is shown not to be blocked based on 4K alias check.",
+ "EventCode": "0x03",
+ "EventName": "LD_BLOCKS.ADDRESS_ALIAS",
+ "PEBS": "1",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x4",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "False dependencies in MOB due to partial compare on address.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3",
"EventCode": "0x03",
"EventName": "LD_BLOCKS.ADDRESS_ALIAS",
- "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts the number of times a load got blocked due to false dependencies in MOB due to partial compare on address.",
"SampleAfterValue": "100003",
- "Speculative": "1",
"UMask": "0x4",
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Counts the number of retired loads that are blocked because its address exactly matches an older store whose data is not ready.",
+ "EventCode": "0x03",
+ "EventName": "LD_BLOCKS.DATA_UNKNOWN",
+ "PEBS": "1",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x1",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "The number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3",
"EventCode": "0x03",
"EventName": "LD_BLOCKS.NO_SR",
- "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts the number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use.",
"SampleAfterValue": "100003",
- "Speculative": "1",
"UMask": "0x88",
"Unit": "cpu_core"
},
{
"BriefDescription": "Loads blocked due to overlapping with a preceding store that cannot be forwarded.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3",
"EventCode": "0x03",
"EventName": "LD_BLOCKS.STORE_FORWARD",
- "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts the number of times where store forwarding was prevented for a load operation. The most common case is a load blocked due to the address of memory access (partially) overlapping with a preceding uncompleted store. Note: See the table of not supported store forwards in the Optimization Guide.",
"SampleAfterValue": "100003",
- "Speculative": "1",
"UMask": "0x82",
"Unit": "cpu_core"
},
{
"BriefDescription": "Counts the number of demand load dispatches that hit L1D fill buffer (FB) allocated for software prefetch.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3",
"EventCode": "0x4c",
"EventName": "LOAD_HIT_PREFETCH.SWPF",
- "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts all not software-prefetch load dispatches that hit the fill buffer (FB) allocated for the software prefetch. It can also be incremented by some lock instructions. So it should only be used with profiling so that the locks can be excluded by ASM (Assembly File) inspection of the nearby instructions.",
"SampleAfterValue": "100003",
- "Speculative": "1",
"UMask": "0x1",
"Unit": "cpu_core"
},
{
"BriefDescription": "Cycles Uops delivered by the LSD, but didn't come from the decoder.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
"CounterMask": "1",
"EventCode": "0xa8",
"EventName": "LSD.CYCLES_ACTIVE",
- "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts the cycles when at least one uop is delivered by the LSD (Loop-stream detector).",
"SampleAfterValue": "2000003",
- "Speculative": "1",
"UMask": "0x1",
"Unit": "cpu_core"
},
{
"BriefDescription": "Cycles optimal number of Uops delivered by the LSD, but did not come from the decoder.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
"CounterMask": "6",
"EventCode": "0xa8",
"EventName": "LSD.CYCLES_OK",
- "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts the cycles when optimal number of uops is delivered by the LSD (Loop-stream detector).",
"SampleAfterValue": "2000003",
- "Speculative": "1",
"UMask": "0x1",
"Unit": "cpu_core"
},
{
"BriefDescription": "Number of Uops delivered by the LSD.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xa8",
"EventName": "LSD.UOPS",
- "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts the number of uops delivered to the back-end by the LSD(Loop Stream Detector).",
"SampleAfterValue": "2000003",
- "Speculative": "1",
"UMask": "0x1",
"Unit": "cpu_core"
},
{
"BriefDescription": "Number of machine clears (nukes) of any type.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
"CounterMask": "1",
"EdgeDetect": "1",
"EventCode": "0xc3",
"EventName": "MACHINE_CLEARS.COUNT",
- "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts the number of machine clears (nukes) of any type.",
"SampleAfterValue": "100003",
- "Speculative": "1",
"UMask": "0x1",
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Counts the number of machine clears due to memory ordering in which an internal load passes an older store within the same CPU.",
+ "EventCode": "0xc3",
+ "EventName": "MACHINE_CLEARS.DISAMBIGUATION",
+ "SampleAfterValue": "20003",
+ "UMask": "0x8",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of machines clears due to memory renaming.",
+ "EventCode": "0xc3",
+ "EventName": "MACHINE_CLEARS.MRN_NUKE",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x80",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of machine clears due to a page fault. Counts both I-Side and D-Side (Loads/Stores) page faults. A page fault occurs when either the page is not present, or an access violation occurs.",
+ "EventCode": "0xc3",
+ "EventName": "MACHINE_CLEARS.PAGE_FAULT",
+ "SampleAfterValue": "20003",
+ "UMask": "0x20",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of machine clears that flush the pipeline and restart the machine with the use of microcode due to SMC, MEMORY_ORDERING, FP_ASSISTS, PAGE_FAULT, DISAMBIGUATION, and FPC_VIRTUAL_TRAP.",
+ "EventCode": "0xc3",
+ "EventName": "MACHINE_CLEARS.SLOW",
+ "SampleAfterValue": "20003",
+ "UMask": "0x6f",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of machine clears due to program modifying data (self modifying code) within 1K of a recently fetched code page.",
+ "EventCode": "0xc3",
+ "EventName": "MACHINE_CLEARS.SMC",
+ "SampleAfterValue": "20003",
+ "UMask": "0x1",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Self-modifying code (SMC) detected.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc3",
"EventName": "MACHINE_CLEARS.SMC",
- "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts self-modifying code (SMC) detected, which causes a machine clear.",
"SampleAfterValue": "100003",
- "Speculative": "1",
"UMask": "0x4",
"Unit": "cpu_core"
},
{
"BriefDescription": "MISC2_RETIRED.LFENCE",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xe0",
"EventName": "MISC2_RETIRED.LFENCE",
- "PEBScounters": "0,1,2,3,4,5,6,7",
"SampleAfterValue": "400009",
- "Speculative": "1",
"UMask": "0x20",
"Unit": "cpu_core"
},
{
"BriefDescription": "Increments whenever there is an update to the LBR array.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xcc",
"EventName": "MISC_RETIRED.LBR_INSERTS",
- "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Increments when an entry is added to the Last Branch Record (LBR) array (or removed from the array in case of RETURNs in call stack mode). The event requires LBR enable via IA32_DEBUGCTL MSR and branch type selection via MSR_LBR_SELECT.",
"SampleAfterValue": "100003",
"UMask": "0x20",
"Unit": "cpu_core"
},
{
"BriefDescription": "Cycles stalled due to no store buffers available. (not including draining form sync).",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xa2",
"EventName": "RESOURCE_STALLS.SB",
- "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts allocation stall cycles caused by the store buffer (SB) being full. This counts cycles that the pipeline back-end blocked uop delivery from the front-end.",
"SampleAfterValue": "100003",
- "Speculative": "1",
"UMask": "0x8",
"Unit": "cpu_core"
},
{
"BriefDescription": "Counts cycles where the pipeline is stalled due to serializing operations.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xa2",
"EventName": "RESOURCE_STALLS.SCOREBOARD",
- "PEBScounters": "0,1,2,3,4,5,6,7",
"SampleAfterValue": "100003",
- "Speculative": "1",
"UMask": "0x2",
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Counts the number of issue slots not consumed by the backend due to a micro-sequencer (MS) scoreboard, which stalls the front-end from issuing from the UROM until a specified older uop retires.",
+ "EventCode": "0x75",
+ "EventName": "SERIALIZATION.NON_C01_MS_SCB",
+ "PublicDescription": "Counts the number of issue slots not consumed by the backend due to a micro-sequencer (MS) scoreboard, which stalls the front-end from issuing from the UROM until a specified older uop retires. The most commonly executed instruction with an MS scoreboard is PAUSE.",
+ "SampleAfterValue": "200003",
+ "UMask": "0x2",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "TMA slots where no uops were being issued due to lack of back-end resources.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xa4",
"EventName": "TOPDOWN.BACKEND_BOUND_SLOTS",
- "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Number of slots in TMA method where no micro-operations were being issued from front-end to back-end of the machine due to lack of back-end resources.",
"SampleAfterValue": "10000003",
- "Speculative": "1",
"UMask": "0x2",
"Unit": "cpu_core"
},
{
"BriefDescription": "TMA slots wasted due to incorrect speculations.",
- "CollectPEBSRecord": "2",
"EventCode": "0xa4",
"EventName": "TOPDOWN.BAD_SPEC_SLOTS",
+ "PublicDescription": "Number of slots of TMA method that were wasted due to incorrect speculation. It covers all types of control-flow or data-related mis-speculations.",
"SampleAfterValue": "10000003",
- "Speculative": "1",
"UMask": "0x4",
"Unit": "cpu_core"
},
{
"BriefDescription": "TMA slots wasted due to incorrect speculation by branch mispredictions",
- "CollectPEBSRecord": "2",
"EventCode": "0xa4",
"EventName": "TOPDOWN.BR_MISPREDICT_SLOTS",
+ "PublicDescription": "Number of TMA slots that were wasted due to incorrect speculation by (any type of) branch mispredictions. This event estimates number of specualtive operations that were issued but not retired as well as the out-of-order engine recovery past a branch misprediction.",
"SampleAfterValue": "10000003",
- "Speculative": "1",
"UMask": "0x8",
"Unit": "cpu_core"
},
{
"BriefDescription": "TOPDOWN.MEMORY_BOUND_SLOTS",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xa4",
"EventName": "TOPDOWN.MEMORY_BOUND_SLOTS",
- "PEBScounters": "0,1,2,3,4,5,6,7",
"SampleAfterValue": "10000003",
- "Speculative": "1",
"UMask": "0x10",
"Unit": "cpu_core"
},
{
"BriefDescription": "TMA slots available for an unhalted logical processor. Fixed counter - architectural event",
- "CollectPEBSRecord": "2",
- "Counter": "Fixed counter 3",
"EventName": "TOPDOWN.SLOTS",
- "PEBScounters": "35",
+ "PublicDescription": "Number of available slots for an unhalted logical processor. The event increments by machine-width of the narrowest pipeline as employed by the Top-down Microarchitecture Analysis method (TMA). The count is distributed among unhalted logical processors (hyper-threads) who share the same physical core. Software can use this event as the denominator for the top-level metrics of the TMA method. This architectural event is counted on a designated fixed counter (Fixed Counter 3).",
"SampleAfterValue": "10000003",
- "Speculative": "1",
"UMask": "0x4",
"Unit": "cpu_core"
},
{
"BriefDescription": "TMA slots available for an unhalted logical processor. General counter - architectural event",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xa4",
"EventName": "TOPDOWN.SLOTS_P",
- "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts the number of available slots for an unhalted logical processor. The event increments by machine-width of the narrowest pipeline as employed by the Top-down Microarchitecture Analysis method. The count is distributed among unhalted logical processors (hyper-threads) who share the same physical core.",
"SampleAfterValue": "10000003",
- "Speculative": "1",
"UMask": "0x1",
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Counts the total number of issue slots that were not consumed by the backend because allocation is stalled due to a mispredicted jump or a machine clear.",
+ "EventCode": "0x73",
+ "EventName": "TOPDOWN_BAD_SPECULATION.ALL",
+ "PublicDescription": "Counts the total number of issue slots that were not consumed by the backend because allocation is stalled due to a mispredicted jump or a machine clear. Only issue slots wasted due to fast nukes such as memory ordering nukes are counted. Other nukes are not accounted for. Counts all issue slots blocked during this recovery window including relevant microcode flows and while uops are not yet available in the instruction queue (IQ) even if an FE_bound event occurs during this period. Also includes the issue slots that were consumed by the backend but were thrown away because they were younger than the mispredict or machine clear.",
+ "SampleAfterValue": "1000003",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to fast nukes such as memory ordering and memory disambiguation machine clears.",
+ "EventCode": "0x73",
+ "EventName": "TOPDOWN_BAD_SPECULATION.FASTNUKE",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x2",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the total number of issue slots that were not consumed by the backend because allocation is stalled due to a machine clear (nuke) of any kind including memory ordering and memory disambiguation.",
+ "EventCode": "0x73",
+ "EventName": "TOPDOWN_BAD_SPECULATION.MACHINE_CLEARS",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x3",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to branch mispredicts.",
+ "EventCode": "0x73",
+ "EventName": "TOPDOWN_BAD_SPECULATION.MISPREDICT",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x4",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to a machine clear (nuke).",
+ "EventCode": "0x73",
+ "EventName": "TOPDOWN_BAD_SPECULATION.NUKE",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x1",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the total number of issue slots every cycle that were not consumed by the backend due to backend stalls.",
+ "EventCode": "0x74",
+ "EventName": "TOPDOWN_BE_BOUND.ALL",
+ "SampleAfterValue": "1000003",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to certain allocation restrictions.",
+ "EventCode": "0x74",
+ "EventName": "TOPDOWN_BE_BOUND.ALLOC_RESTRICTIONS",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x1",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to memory reservation stalls in which a scheduler is not able to accept uops.",
+ "EventCode": "0x74",
+ "EventName": "TOPDOWN_BE_BOUND.MEM_SCHEDULER",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x2",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to IEC or FPC RAT stalls, which can be due to FIQ or IEC reservation stalls in which the integer, floating point or SIMD scheduler is not able to accept uops.",
+ "EventCode": "0x74",
+ "EventName": "TOPDOWN_BE_BOUND.NON_MEM_SCHEDULER",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x8",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to the physical register file unable to accept an entry (marble stalls).",
+ "EventCode": "0x74",
+ "EventName": "TOPDOWN_BE_BOUND.REGISTER",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x20",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to the reorder buffer being full (ROB stalls).",
+ "EventCode": "0x74",
+ "EventName": "TOPDOWN_BE_BOUND.REORDER_BUFFER",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x40",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to scoreboards from the instruction queue (IQ), jump execution unit (JEU), or microcode sequencer (MS).",
+ "EventCode": "0x74",
+ "EventName": "TOPDOWN_BE_BOUND.SERIALIZATION",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x10",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the total number of issue slots every cycle that were not consumed by the backend due to frontend stalls.",
+ "EventCode": "0x71",
+ "EventName": "TOPDOWN_FE_BOUND.ALL",
+ "SampleAfterValue": "1000003",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to BACLEARS.",
+ "EventCode": "0x71",
+ "EventName": "TOPDOWN_FE_BOUND.BRANCH_DETECT",
+ "PublicDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to BACLEARS, which occurs when the Branch Target Buffer (BTB) prediction or lack thereof, was corrected by a later branch predictor in the frontend. Includes BACLEARS due to all branch types including conditional and unconditional jumps, returns, and indirect branches.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x2",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to BTCLEARS.",
+ "EventCode": "0x71",
+ "EventName": "TOPDOWN_FE_BOUND.BRANCH_RESTEER",
+ "PublicDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to BTCLEARS, which occurs when the Branch Target Buffer (BTB) predicts a taken branch.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x40",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to the microcode sequencer (MS).",
+ "EventCode": "0x71",
+ "EventName": "TOPDOWN_FE_BOUND.CISC",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x1",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to decode stalls.",
+ "EventCode": "0x71",
+ "EventName": "TOPDOWN_FE_BOUND.DECODE",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x8",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to frontend bandwidth restrictions due to decode, predecode, cisc, and other limitations.",
+ "EventCode": "0x71",
+ "EventName": "TOPDOWN_FE_BOUND.FRONTEND_BANDWIDTH",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x8d",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to a latency related stalls including BACLEARs, BTCLEARs, ITLB misses, and ICache misses.",
+ "EventCode": "0x71",
+ "EventName": "TOPDOWN_FE_BOUND.FRONTEND_LATENCY",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x72",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to ITLB misses.",
+ "EventCode": "0x71",
+ "EventName": "TOPDOWN_FE_BOUND.ITLB",
+ "PublicDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to Instruction Table Lookaside Buffer (ITLB) misses.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x10",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to other common frontend stalls not categorized.",
+ "EventCode": "0x71",
+ "EventName": "TOPDOWN_FE_BOUND.OTHER",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x80",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to wrong predecodes.",
+ "EventCode": "0x71",
+ "EventName": "TOPDOWN_FE_BOUND.PREDECODE",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x4",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the total number of consumed retirement slots.",
+ "EventCode": "0xc2",
+ "EventName": "TOPDOWN_RETIRING.ALL",
+ "PEBS": "1",
+ "SampleAfterValue": "1000003",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "UOPS_DECODED.DEC0_UOPS",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3",
"EventCode": "0x76",
"EventName": "UOPS_DECODED.DEC0_UOPS",
- "PEBScounters": "0,1,2,3",
"SampleAfterValue": "1000003",
- "Speculative": "1",
"UMask": "0x1",
"Unit": "cpu_core"
},
{
"BriefDescription": "Uops executed on port 0",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb2",
"EventName": "UOPS_DISPATCHED.PORT_0",
- "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Number of uops dispatch to execution port 0.",
"SampleAfterValue": "2000003",
- "Speculative": "1",
"UMask": "0x1",
"Unit": "cpu_core"
},
{
"BriefDescription": "Uops executed on port 1",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb2",
"EventName": "UOPS_DISPATCHED.PORT_1",
- "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Number of uops dispatch to execution port 1.",
"SampleAfterValue": "2000003",
- "Speculative": "1",
"UMask": "0x2",
"Unit": "cpu_core"
},
{
"BriefDescription": "Uops executed on ports 2, 3 and 10",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb2",
"EventName": "UOPS_DISPATCHED.PORT_2_3_10",
- "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Number of uops dispatch to execution ports 2, 3 and 10",
"SampleAfterValue": "2000003",
- "Speculative": "1",
"UMask": "0x4",
"Unit": "cpu_core"
},
{
"BriefDescription": "Uops executed on ports 4 and 9",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb2",
"EventName": "UOPS_DISPATCHED.PORT_4_9",
- "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Number of uops dispatch to execution ports 4 and 9",
"SampleAfterValue": "2000003",
- "Speculative": "1",
"UMask": "0x10",
"Unit": "cpu_core"
},
{
"BriefDescription": "Uops executed on ports 5 and 11",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb2",
"EventName": "UOPS_DISPATCHED.PORT_5_11",
- "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Number of uops dispatch to execution ports 5 and 11",
"SampleAfterValue": "2000003",
- "Speculative": "1",
"UMask": "0x20",
"Unit": "cpu_core"
},
{
"BriefDescription": "Uops executed on port 6",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb2",
"EventName": "UOPS_DISPATCHED.PORT_6",
- "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Number of uops dispatch to execution port 6.",
"SampleAfterValue": "2000003",
- "Speculative": "1",
"UMask": "0x40",
"Unit": "cpu_core"
},
{
"BriefDescription": "Uops executed on ports 7 and 8",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb2",
"EventName": "UOPS_DISPATCHED.PORT_7_8",
- "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Number of uops dispatch to execution ports 7 and 8.",
"SampleAfterValue": "2000003",
- "Speculative": "1",
"UMask": "0x80",
"Unit": "cpu_core"
},
{
"BriefDescription": "Cycles at least 1 micro-op is executed from any thread on physical core.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
"CounterMask": "1",
"EventCode": "0xb1",
"EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_1",
- "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts cycles when at least 1 micro-op is executed from any thread on physical core.",
"SampleAfterValue": "2000003",
- "Speculative": "1",
"UMask": "0x2",
"Unit": "cpu_core"
},
{
"BriefDescription": "Cycles at least 2 micro-op is executed from any thread on physical core.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
"CounterMask": "2",
"EventCode": "0xb1",
"EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_2",
- "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts cycles when at least 2 micro-ops are executed from any thread on physical core.",
"SampleAfterValue": "2000003",
- "Speculative": "1",
"UMask": "0x2",
"Unit": "cpu_core"
},
{
"BriefDescription": "Cycles at least 3 micro-op is executed from any thread on physical core.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
"CounterMask": "3",
"EventCode": "0xb1",
"EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_3",
- "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts cycles when at least 3 micro-ops are executed from any thread on physical core.",
"SampleAfterValue": "2000003",
- "Speculative": "1",
"UMask": "0x2",
"Unit": "cpu_core"
},
{
"BriefDescription": "Cycles at least 4 micro-op is executed from any thread on physical core.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
"CounterMask": "4",
"EventCode": "0xb1",
"EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_4",
- "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts cycles when at least 4 micro-ops are executed from any thread on physical core.",
"SampleAfterValue": "2000003",
- "Speculative": "1",
"UMask": "0x2",
"Unit": "cpu_core"
},
{
"BriefDescription": "Cycles where at least 1 uop was executed per-thread",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
"CounterMask": "1",
"EventCode": "0xb1",
"EventName": "UOPS_EXECUTED.CYCLES_GE_1",
- "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Cycles where at least 1 uop was executed per-thread.",
"SampleAfterValue": "2000003",
- "Speculative": "1",
"UMask": "0x1",
"Unit": "cpu_core"
},
{
"BriefDescription": "Cycles where at least 2 uops were executed per-thread",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
"CounterMask": "2",
"EventCode": "0xb1",
"EventName": "UOPS_EXECUTED.CYCLES_GE_2",
- "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Cycles where at least 2 uops were executed per-thread.",
"SampleAfterValue": "2000003",
- "Speculative": "1",
"UMask": "0x1",
"Unit": "cpu_core"
},
{
"BriefDescription": "Cycles where at least 3 uops were executed per-thread",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
"CounterMask": "3",
"EventCode": "0xb1",
"EventName": "UOPS_EXECUTED.CYCLES_GE_3",
- "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Cycles where at least 3 uops were executed per-thread.",
"SampleAfterValue": "2000003",
- "Speculative": "1",
"UMask": "0x1",
"Unit": "cpu_core"
},
{
"BriefDescription": "Cycles where at least 4 uops were executed per-thread",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
"CounterMask": "4",
"EventCode": "0xb1",
"EventName": "UOPS_EXECUTED.CYCLES_GE_4",
- "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Cycles where at least 4 uops were executed per-thread.",
"SampleAfterValue": "2000003",
- "Speculative": "1",
"UMask": "0x1",
"Unit": "cpu_core"
},
{
"BriefDescription": "Counts number of cycles no uops were dispatched to be executed on this thread.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
"CounterMask": "1",
"EventCode": "0xb1",
"EventName": "UOPS_EXECUTED.STALLS",
"Invert": "1",
- "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts cycles during which no uops were dispatched from the Reservation Station (RS) per thread.",
"SampleAfterValue": "2000003",
- "Speculative": "1",
"UMask": "0x1",
"Unit": "cpu_core"
},
{
"BriefDescription": "This event is deprecated. Refer to new event UOPS_EXECUTED.STALLS",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
"CounterMask": "1",
+ "Deprecated": "1",
"EventCode": "0xb1",
"EventName": "UOPS_EXECUTED.STALL_CYCLES",
"Invert": "1",
- "PEBScounters": "0,1,2,3,4,5,6,7",
"SampleAfterValue": "2000003",
- "Speculative": "1",
"UMask": "0x1",
"Unit": "cpu_core"
},
{
"BriefDescription": "Counts the number of uops to be executed per-thread each cycle.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb1",
"EventName": "UOPS_EXECUTED.THREAD",
- "PEBScounters": "0,1,2,3,4,5,6,7",
"SampleAfterValue": "2000003",
- "Speculative": "1",
"UMask": "0x1",
"Unit": "cpu_core"
},
{
"BriefDescription": "Counts the number of x87 uops dispatched.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb1",
"EventName": "UOPS_EXECUTED.X87",
- "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts the number of x87 uops executed.",
"SampleAfterValue": "2000003",
- "Speculative": "1",
"UMask": "0x10",
"Unit": "cpu_core"
},
{
"BriefDescription": "Uops that RAT issues to RS",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xae",
"EventName": "UOPS_ISSUED.ANY",
- "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts the number of uops that the Resource Allocation Table (RAT) issues to the Reservation Station (RS).",
"SampleAfterValue": "2000003",
- "Speculative": "1",
"UMask": "0x1",
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Counts the total number of uops retired.",
+ "EventCode": "0xc2",
+ "EventName": "UOPS_RETIRED.ALL",
+ "PEBS": "1",
+ "SampleAfterValue": "2000003",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Cycles with retired uop(s).",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
"CounterMask": "1",
"EventCode": "0xc2",
"EventName": "UOPS_RETIRED.CYCLES",
- "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts cycles where at least one uop has retired.",
"SampleAfterValue": "1000003",
"UMask": "0x2",
"Unit": "cpu_core"
},
{
"BriefDescription": "Retired uops except the last uop of each instruction.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc2",
"EventName": "UOPS_RETIRED.HEAVY",
- "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts the number of retired micro-operations (uops) except the last uop of each instruction. An instruction that is decoded into less than two uops does not contribute to the count.",
"SampleAfterValue": "2000003",
"UMask": "0x1",
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Counts the number of integer divide uops retired.",
+ "EventCode": "0xc2",
+ "EventName": "UOPS_RETIRED.IDIV",
+ "PEBS": "1",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x10",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of uops that are from complex flows issued by the micro-sequencer (MS).",
+ "EventCode": "0xc2",
+ "EventName": "UOPS_RETIRED.MS",
+ "PEBS": "1",
+ "PublicDescription": "Counts the number of uops that are from complex flows issued by the Microcode Sequencer (MS). This includes uops from flows due to complex instructions, faults, assists, and inserted flows.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x1",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "UOPS_RETIRED.MS",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc2",
"EventName": "UOPS_RETIRED.MS",
"MSRIndex": "0x3F7",
"MSRValue": "0x8",
- "PEBScounters": "0,1,2,3,4,5,6,7",
"SampleAfterValue": "2000003",
- "TakenAlone": "1",
"UMask": "0x4",
"Unit": "cpu_core"
},
{
"BriefDescription": "Retirement slots used.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc2",
"EventName": "UOPS_RETIRED.SLOTS",
- "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "Counts the retirement slots used each cycle.",
"SampleAfterValue": "2000003",
"UMask": "0x2",
"Unit": "cpu_core"
},
{
"BriefDescription": "Cycles without actually retired uops.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
"CounterMask": "1",
"EventCode": "0xc2",
"EventName": "UOPS_RETIRED.STALLS",
"Invert": "1",
- "PEBScounters": "0,1,2,3,4,5,6,7",
+ "PublicDescription": "This event counts cycles without actually retired uops.",
"SampleAfterValue": "1000003",
"UMask": "0x2",
"Unit": "cpu_core"
},
{
"BriefDescription": "This event is deprecated. Refer to new event UOPS_RETIRED.STALLS",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5,6,7",
"CounterMask": "1",
+ "Deprecated": "1",
"EventCode": "0xc2",
"EventName": "UOPS_RETIRED.STALL_CYCLES",
"Invert": "1",
- "PEBScounters": "0,1,2,3,4,5,6,7",
"SampleAfterValue": "1000003",
"UMask": "0x2",
"Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Counts the number of x87 uops retired, includes those in MS flows.",
+ "EventCode": "0xc2",
+ "EventName": "UOPS_RETIRED.X87",
+ "PEBS": "1",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x2",
+ "Unit": "cpu_atom"
}
]
diff --git a/tools/perf/pmu-events/arch/x86/alderlake/uncore-memory.json b/tools/perf/pmu-events/arch/x86/alderlake/uncore-memory.json
index d82d6f62a6fb..2ccd9cf96957 100644
--- a/tools/perf/pmu-events/arch/x86/alderlake/uncore-memory.json
+++ b/tools/perf/pmu-events/arch/x86/alderlake/uncore-memory.json
@@ -1,221 +1,174 @@
[
{
- "BriefDescription": "Number of clocks",
- "Counter": "0,1,2,3,4",
- "CounterType": "PGMABLE",
- "EventCode": "0x01",
- "EventName": "UNC_M_CLOCKTICKS",
+ "BriefDescription": "Counts every 64B read request entering the Memory Controller 0 to DRAM (sum of all channels).",
+ "EventName": "UNC_MC0_RDCAS_COUNT_FREERUN",
"PerPkg": "1",
+ "PublicDescription": "Counts every 64B read request entering the Memory Controller 0 to DRAM (sum of all channels).",
"Unit": "iMC"
},
{
- "BriefDescription": "Incoming VC0 read request",
- "Counter": "0,1,2,3,4",
- "CounterType": "PGMABLE",
- "EventCode": "0x02",
- "EventName": "UNC_M_VC0_REQUESTS_RD",
+ "BriefDescription": "Counts every 64B write request entering the Memory Controller 0 to DRAM (sum of all channels). Each write request counts as a new request incrementing this counter. However, same cache line write requests (both full and partial) are combined to a single 64 byte data transfer to DRAM.",
+ "EventName": "UNC_MC0_WRCAS_COUNT_FREERUN",
"PerPkg": "1",
"Unit": "iMC"
},
{
- "BriefDescription": "Incoming VC0 write request",
- "Counter": "0,1,2,3,4",
- "CounterType": "PGMABLE",
- "EventCode": "0x03",
- "EventName": "UNC_M_VC0_REQUESTS_WR",
+ "BriefDescription": "Counts every 64B read request entering the Memory Controller 1 to DRAM (sum of all channels).",
+ "EventName": "UNC_MC1_RDCAS_COUNT_FREERUN",
"PerPkg": "1",
+ "PublicDescription": "Counts every 64B read entering the Memory Controller 1 to DRAM (sum of all channels).",
"Unit": "iMC"
},
{
- "BriefDescription": "Incoming VC1 read request",
- "Counter": "0,1,2,3,4",
- "CounterType": "PGMABLE",
- "EventCode": "0x04",
- "EventName": "UNC_M_VC1_REQUESTS_RD",
+ "BriefDescription": "Counts every 64B write request entering the Memory Controller 1 to DRAM (sum of all channels). Each write request counts as a new request incrementing this counter. However, same cache line write requests (both full and partial) are combined to a single 64 byte data transfer to DRAM.",
+ "EventName": "UNC_MC1_WRCAS_COUNT_FREERUN",
"PerPkg": "1",
"Unit": "iMC"
},
{
- "BriefDescription": "Incoming VC1 write request",
- "Counter": "0,1,2,3,4",
- "CounterType": "PGMABLE",
- "EventCode": "0x05",
- "EventName": "UNC_M_VC1_REQUESTS_WR",
+ "BriefDescription": "ACT command for a read request sent to DRAM",
+ "EventCode": "0x24",
+ "EventName": "UNC_M_ACT_COUNT_RD",
"PerPkg": "1",
"Unit": "iMC"
},
{
- "BriefDescription": "Incoming read prefetch request from IA",
- "Counter": "0,1,2,3,4",
- "CounterType": "PGMABLE",
- "EventCode": "0x0A",
- "EventName": "UNC_M_PREFETCH_RD",
+ "BriefDescription": "ACT command sent to DRAM",
+ "EventCode": "0x26",
+ "EventName": "UNC_M_ACT_COUNT_TOTAL",
"PerPkg": "1",
"Unit": "iMC"
},
{
- "BriefDescription": "Any Rank at Hot state",
- "Counter": "0,1,2,3,4",
- "CounterType": "PGMABLE",
- "EventCode": "0x19",
- "EventName": "UNC_M_DRAM_THERMAL_HOT",
+ "BriefDescription": "ACT command for a write request sent to DRAM",
+ "EventCode": "0x25",
+ "EventName": "UNC_M_ACT_COUNT_WR",
"PerPkg": "1",
"Unit": "iMC"
},
{
- "BriefDescription": "Any Rank at Warm state",
- "Counter": "0,1,2,3,4",
- "CounterType": "PGMABLE",
- "EventCode": "0x1A",
- "EventName": "UNC_M_DRAM_THERMAL_WARM",
+ "BriefDescription": "Read CAS command sent to DRAM",
+ "EventCode": "0x22",
+ "EventName": "UNC_M_CAS_COUNT_RD",
"PerPkg": "1",
"Unit": "iMC"
},
{
- "BriefDescription": "incoming read request page status is Page Hit",
- "Counter": "0,1,2,3,4",
- "CounterType": "PGMABLE",
- "EventCode": "0x1C",
- "EventName": "UNC_M_DRAM_PAGE_HIT_RD",
+ "BriefDescription": "Write CAS command sent to DRAM",
+ "EventCode": "0x23",
+ "EventName": "UNC_M_CAS_COUNT_WR",
+ "PerPkg": "1",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "Number of clocks",
+ "EventCode": "0x01",
+ "EventName": "UNC_M_CLOCKTICKS",
"PerPkg": "1",
"Unit": "iMC"
},
{
"BriefDescription": "incoming read request page status is Page Empty",
- "Counter": "0,1,2,3,4",
- "CounterType": "PGMABLE",
"EventCode": "0x1D",
"EventName": "UNC_M_DRAM_PAGE_EMPTY_RD",
"PerPkg": "1",
"Unit": "iMC"
},
{
- "BriefDescription": "incoming read request page status is Page Miss",
- "Counter": "0,1,2,3,4",
- "CounterType": "PGMABLE",
- "EventCode": "0x1E",
- "EventName": "UNC_M_DRAM_PAGE_MISS_RD",
+ "BriefDescription": "incoming write request page status is Page Empty",
+ "EventCode": "0x20",
+ "EventName": "UNC_M_DRAM_PAGE_EMPTY_WR",
+ "PerPkg": "1",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "incoming read request page status is Page Hit",
+ "EventCode": "0x1C",
+ "EventName": "UNC_M_DRAM_PAGE_HIT_RD",
"PerPkg": "1",
"Unit": "iMC"
},
{
"BriefDescription": "incoming write request page status is Page Hit",
- "Counter": "0,1,2,3,4",
- "CounterType": "PGMABLE",
"EventCode": "0x1F",
"EventName": "UNC_M_DRAM_PAGE_HIT_WR",
"PerPkg": "1",
"Unit": "iMC"
},
{
- "BriefDescription": "incoming write request page status is Page Empty",
- "Counter": "0,1,2,3,4",
- "CounterType": "PGMABLE",
- "EventCode": "0x20",
- "EventName": "UNC_M_DRAM_PAGE_EMPTY_WR",
+ "BriefDescription": "incoming read request page status is Page Miss",
+ "EventCode": "0x1E",
+ "EventName": "UNC_M_DRAM_PAGE_MISS_RD",
"PerPkg": "1",
"Unit": "iMC"
},
{
"BriefDescription": "incoming write request page status is Page Miss",
- "Counter": "0,1,2,3,4",
- "CounterType": "PGMABLE",
"EventCode": "0x21",
"EventName": "UNC_M_DRAM_PAGE_MISS_WR",
"PerPkg": "1",
"Unit": "iMC"
},
{
- "BriefDescription": "Read CAS command sent to DRAM",
- "Counter": "0,1,2,3,4",
- "CounterType": "PGMABLE",
- "EventCode": "0x22",
- "EventName": "UNC_M_CAS_COUNT_RD",
- "PerPkg": "1",
- "Unit": "iMC"
- },
- {
- "BriefDescription": "Write CAS command sent to DRAM",
- "Counter": "0,1,2,3,4",
- "CounterType": "PGMABLE",
- "EventCode": "0x23",
- "EventName": "UNC_M_CAS_COUNT_WR",
+ "BriefDescription": "Any Rank at Hot state",
+ "EventCode": "0x19",
+ "EventName": "UNC_M_DRAM_THERMAL_HOT",
"PerPkg": "1",
"Unit": "iMC"
},
{
- "BriefDescription": "ACT command for a read request sent to DRAM",
- "Counter": "0,1,2,3,4",
- "CounterType": "PGMABLE",
- "EventCode": "0x24",
- "EventName": "UNC_M_ACT_COUNT_RD",
+ "BriefDescription": "Any Rank at Warm state",
+ "EventCode": "0x1A",
+ "EventName": "UNC_M_DRAM_THERMAL_WARM",
"PerPkg": "1",
"Unit": "iMC"
},
{
- "BriefDescription": "ACT command for a write request sent to DRAM",
- "Counter": "0,1,2,3,4",
- "CounterType": "PGMABLE",
- "EventCode": "0x25",
- "EventName": "UNC_M_ACT_COUNT_WR",
+ "BriefDescription": "Incoming read prefetch request from IA.",
+ "EventCode": "0x0A",
+ "EventName": "UNC_M_PREFETCH_RD",
"PerPkg": "1",
"Unit": "iMC"
},
{
- "BriefDescription": "ACT command sent to DRAM",
- "Counter": "0,1,2,3,4",
- "CounterType": "PGMABLE",
- "EventCode": "0x26",
- "EventName": "UNC_M_ACT_COUNT_TOTAL",
+ "BriefDescription": "PRE command sent to DRAM due to page table idle timer expiration",
+ "EventCode": "0x28",
+ "EventName": "UNC_M_PRE_COUNT_IDLE",
"PerPkg": "1",
"Unit": "iMC"
},
{
"BriefDescription": "PRE command sent to DRAM for a read/write request",
- "Counter": "0,1,2,3,4",
- "CounterType": "PGMABLE",
"EventCode": "0x27",
"EventName": "UNC_M_PRE_COUNT_PAGE_MISS",
"PerPkg": "1",
"Unit": "iMC"
},
{
- "BriefDescription": "PRE command sent to DRAM due to page table idle timer expiration",
- "Counter": "0,1,2,3,4",
- "CounterType": "PGMABLE",
- "EventCode": "0x28",
- "EventName": "UNC_M_PRE_COUNT_IDLE",
- "PerPkg": "1",
- "Unit": "iMC"
- },
- {
- "BriefDescription": "Counts every 64B read request entering the Memory Controller 0 to DRAM (sum of all channels)",
- "CounterType": "FREERUN",
- "EventName": "UNC_MC0_RDCAS_COUNT_FREERUN",
+ "BriefDescription": "Incoming VC0 read request",
+ "EventCode": "0x02",
+ "EventName": "UNC_M_VC0_REQUESTS_RD",
"PerPkg": "1",
"Unit": "iMC"
},
{
- "BriefDescription": "Counts every 64B read request entering the Memory Controller 1 to DRAM (sum of all channels)",
- "Counter": "3",
- "CounterType": "FREERUN",
- "EventName": "UNC_MC1_RDCAS_COUNT_FREERUN",
+ "BriefDescription": "Incoming VC0 write request",
+ "EventCode": "0x03",
+ "EventName": "UNC_M_VC0_REQUESTS_WR",
"PerPkg": "1",
"Unit": "iMC"
},
{
- "BriefDescription": "Counts every 64B write request entering the Memory Controller 0 to DRAM (sum of all channels). Each write request counts as a new request incrementing this counter. However, same cache line write requests (both full and partial) are combined to a single 64 byte data transfer to DRAM",
- "Counter": "1",
- "CounterType": "FREERUN",
- "EventName": "UNC_MC0_WRCAS_COUNT_FREERUN",
+ "BriefDescription": "Incoming VC1 read request",
+ "EventCode": "0x04",
+ "EventName": "UNC_M_VC1_REQUESTS_RD",
"PerPkg": "1",
"Unit": "iMC"
},
{
- "BriefDescription": "Counts every 64B write request entering the Memory Controller 1 to DRAM (sum of all channels). Each write request counts as a new request incrementing this counter. However, same cache line write requests (both full and partial) are combined to a single 64 byte data transfer to DRAM",
- "Counter": "4",
- "CounterType": "FREERUN",
- "EventName": "UNC_MC1_WRCAS_COUNT_FREERUN",
+ "BriefDescription": "Incoming VC1 write request",
+ "EventCode": "0x05",
+ "EventName": "UNC_M_VC1_REQUESTS_WR",
"PerPkg": "1",
"Unit": "iMC"
}
diff --git a/tools/perf/pmu-events/arch/x86/alderlake/uncore-other.json b/tools/perf/pmu-events/arch/x86/alderlake/uncore-other.json
index b1ae349f5f21..bc5fb6b76065 100644
--- a/tools/perf/pmu-events/arch/x86/alderlake/uncore-other.json
+++ b/tools/perf/pmu-events/arch/x86/alderlake/uncore-other.json
@@ -1,40 +1,73 @@
[
{
- "BriefDescription": "This 48-bit fixed counter counts the UCLK cycles",
- "Counter": "Fixed",
- "CounterType": "PGMABLE",
- "EventCode": "0xff",
- "EventName": "UNC_CLOCK.SOCKET",
+ "BriefDescription": "Number of requests allocated in Coherency Tracker.",
+ "EventCode": "0x84",
+ "EventName": "UNC_ARB_COH_TRK_REQUESTS.ALL",
"PerPkg": "1",
- "Unit": "CLOCK"
+ "UMask": "0x1",
+ "Unit": "ARB"
},
{
- "BriefDescription": "Counts the number of coherent and in-coherent requests initiated by IA cores, processor graphic units, or LLC",
- "Counter": "0,1",
- "CounterType": "PGMABLE",
+ "BriefDescription": "Each cycle counts number of any coherent request at memory controller that were issued by any core.",
+ "EventCode": "0x85",
+ "EventName": "UNC_ARB_DAT_OCCUPANCY.ALL",
+ "PerPkg": "1",
+ "UMask": "0x1",
+ "Unit": "ARB"
+ },
+ {
+ "BriefDescription": "Each cycle counts number of coherent reads pending on data return from memory controller that were issued by any core.",
+ "EventCode": "0x85",
+ "EventName": "UNC_ARB_DAT_OCCUPANCY.RD",
+ "PerPkg": "1",
+ "UMask": "0x2",
+ "Unit": "ARB"
+ },
+ {
+ "BriefDescription": "Number of coherent read requests sent to memory controller that were issued by any core.",
"EventCode": "0x81",
- "EventName": "UNC_ARB_TRK_REQUESTS.ALL",
+ "EventName": "UNC_ARB_DAT_REQUESTS.RD",
"PerPkg": "1",
- "UMask": "0x01",
+ "UMask": "0x2",
"Unit": "ARB"
},
{
- "BriefDescription": "Number of requests allocated in Coherency Tracker",
- "Counter": "0,1",
- "CounterType": "PGMABLE",
- "EventCode": "0x84",
- "EventName": "UNC_ARB_COH_TRK_REQUESTS.ALL",
+ "BriefDescription": "This event is deprecated. Refer to new event UNC_ARB_DAT_OCCUPANCY.ALL",
+ "EventCode": "0x85",
+ "EventName": "UNC_ARB_IFA_OCCUPANCY.ALL",
"PerPkg": "1",
- "UMask": "0x01",
+ "UMask": "0x1",
"Unit": "ARB"
},
{
- "BriefDescription": "Each cycle counts number of all outgoing valid entries in ReqTrk. Such entry is defined as valid from its allocation in ReqTrk till deallocation. Accounts for Coherent and non-coherent traffic",
- "CounterType": "PGMABLE",
+ "BriefDescription": "This event is deprecated. Refer to new event UNC_ARB_DAT_REQUESTS.RD",
+ "EventCode": "0x81",
+ "EventName": "UNC_ARB_REQ_TRK_REQUEST.DRD",
+ "PerPkg": "1",
+ "UMask": "0x2",
+ "Unit": "ARB"
+ },
+ {
+ "BriefDescription": "Each cycle counts number of all outgoing valid entries in ReqTrk. Such entry is defined as valid from its allocation in ReqTrk till deallocation. Accounts for Coherent and non-coherent traffic.",
"EventCode": "0x80",
"EventName": "UNC_ARB_TRK_OCCUPANCY.ALL",
"PerPkg": "1",
- "UMask": "0x01",
+ "UMask": "0x1",
"Unit": "ARB"
+ },
+ {
+ "BriefDescription": "Counts the number of coherent and in-coherent requests initiated by IA cores, processor graphic units, or LLC.",
+ "EventCode": "0x81",
+ "EventName": "UNC_ARB_TRK_REQUESTS.ALL",
+ "PerPkg": "1",
+ "UMask": "0x1",
+ "Unit": "ARB"
+ },
+ {
+ "BriefDescription": "This 48-bit fixed counter counts the UCLK cycles.",
+ "EventCode": "0xff",
+ "EventName": "UNC_CLOCK.SOCKET",
+ "PerPkg": "1",
+ "Unit": "CLOCK"
}
]
diff --git a/tools/perf/pmu-events/arch/x86/alderlake/virtual-memory.json b/tools/perf/pmu-events/arch/x86/alderlake/virtual-memory.json
index 12baf768ad8d..3827d292da80 100644
--- a/tools/perf/pmu-events/arch/x86/alderlake/virtual-memory.json
+++ b/tools/perf/pmu-events/arch/x86/alderlake/virtual-memory.json
@@ -1,317 +1,236 @@
[
{
- "BriefDescription": "Counts the number of page walks completed due to load DTLB misses to any page size.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5",
- "EventCode": "0x08",
- "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED",
- "PEBScounters": "0,1,2,3,4,5",
- "SampleAfterValue": "200003",
- "Speculative": "1",
- "UMask": "0xe",
- "Unit": "cpu_atom"
- },
- {
- "BriefDescription": "Counts the number of page walks completed due to store DTLB misses to any page size.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5",
- "EventCode": "0x49",
- "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED",
- "PEBScounters": "0,1,2,3,4,5",
- "SampleAfterValue": "2000003",
- "Speculative": "1",
- "UMask": "0xe",
- "Unit": "cpu_atom"
- },
- {
- "BriefDescription": "Counts the number of page walks initiated by a instruction fetch that missed the first and second level TLBs.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5",
- "EventCode": "0x85",
- "EventName": "ITLB_MISSES.MISS_CAUSED_WALK",
- "PEBScounters": "0,1,2,3,4,5",
- "SampleAfterValue": "1000003",
- "Speculative": "1",
- "UMask": "0x1",
- "Unit": "cpu_atom"
- },
- {
- "BriefDescription": "Counts the number of page walks due to an instruction fetch that miss the PDE (Page Directory Entry) cache.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5",
- "EventCode": "0x85",
- "EventName": "ITLB_MISSES.PDE_CACHE_MISS",
- "PEBScounters": "0,1,2,3,4,5",
- "SampleAfterValue": "2000003",
- "Speculative": "1",
- "UMask": "0x80",
- "Unit": "cpu_atom"
- },
- {
- "BriefDescription": "Counts the number of page walks completed due to instruction fetch misses to any page size.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5",
- "EventCode": "0x85",
- "EventName": "ITLB_MISSES.WALK_COMPLETED",
- "PEBScounters": "0,1,2,3,4,5",
- "SampleAfterValue": "200003",
- "Speculative": "1",
- "UMask": "0xe",
- "Unit": "cpu_atom"
- },
- {
- "BriefDescription": "Counts the number of cycles that the head (oldest load) of the load buffer and retirement are both stalled due to a DTLB miss.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3,4,5",
- "EventCode": "0x05",
- "EventName": "LD_HEAD.DTLB_MISS_AT_RET",
- "PEBScounters": "0,1,2,3,4,5",
- "SampleAfterValue": "1000003",
- "Speculative": "1",
- "UMask": "0x90",
- "Unit": "cpu_atom"
- },
- {
"BriefDescription": "Loads that miss the DTLB and hit the STLB.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3",
"EventCode": "0x12",
"EventName": "DTLB_LOAD_MISSES.STLB_HIT",
- "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts loads that miss the DTLB (Data TLB) and hit the STLB (Second level TLB).",
"SampleAfterValue": "100003",
- "Speculative": "1",
"UMask": "0x20",
"Unit": "cpu_core"
},
{
"BriefDescription": "Cycles when at least one PMH is busy with a page walk for a demand load.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3",
"CounterMask": "1",
"EventCode": "0x12",
"EventName": "DTLB_LOAD_MISSES.WALK_ACTIVE",
- "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a demand load.",
"SampleAfterValue": "100003",
- "Speculative": "1",
"UMask": "0x10",
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Counts the number of page walks completed due to load DTLB misses to any page size.",
+ "EventCode": "0x08",
+ "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED",
+ "PublicDescription": "Counts the number of page walks completed due to loads (including SW prefetches) whose address translations missed in all Translation Lookaside Buffer (TLB) levels and were mapped to any page size. Includes page walks that page fault.",
+ "SampleAfterValue": "200003",
+ "UMask": "0xe",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Load miss in all TLB levels causes a page walk that completes. (All page sizes)",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3",
"EventCode": "0x12",
"EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED",
- "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts completed page walks (all page sizes) caused by demand data loads. This implies it missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
"SampleAfterValue": "100003",
- "Speculative": "1",
"UMask": "0xe",
"Unit": "cpu_core"
},
{
"BriefDescription": "Page walks completed due to a demand data load to a 1G page.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3",
"EventCode": "0x12",
"EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_1G",
- "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts completed page walks (1G sizes) caused by demand data loads. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
"SampleAfterValue": "100003",
- "Speculative": "1",
"UMask": "0x8",
"Unit": "cpu_core"
},
{
"BriefDescription": "Page walks completed due to a demand data load to a 2M/4M page.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3",
"EventCode": "0x12",
"EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M",
- "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts completed page walks (2M/4M sizes) caused by demand data loads. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
"SampleAfterValue": "100003",
- "Speculative": "1",
"UMask": "0x4",
"Unit": "cpu_core"
},
{
"BriefDescription": "Page walks completed due to a demand data load to a 4K page.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3",
"EventCode": "0x12",
"EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_4K",
- "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts completed page walks (4K sizes) caused by demand data loads. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
"SampleAfterValue": "100003",
- "Speculative": "1",
"UMask": "0x2",
"Unit": "cpu_core"
},
{
"BriefDescription": "Number of page walks outstanding for a demand load in the PMH each cycle.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3",
"EventCode": "0x12",
"EventName": "DTLB_LOAD_MISSES.WALK_PENDING",
- "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts the number of page walks outstanding for a demand load in the PMH (Page Miss Handler) each cycle.",
"SampleAfterValue": "100003",
- "Speculative": "1",
"UMask": "0x10",
"Unit": "cpu_core"
},
{
"BriefDescription": "Stores that miss the DTLB and hit the STLB.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3",
"EventCode": "0x13",
"EventName": "DTLB_STORE_MISSES.STLB_HIT",
- "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts stores that miss the DTLB (Data TLB) and hit the STLB (2nd Level TLB).",
"SampleAfterValue": "100003",
- "Speculative": "1",
"UMask": "0x20",
"Unit": "cpu_core"
},
{
"BriefDescription": "Cycles when at least one PMH is busy with a page walk for a store.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3",
"CounterMask": "1",
"EventCode": "0x13",
"EventName": "DTLB_STORE_MISSES.WALK_ACTIVE",
- "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a store.",
"SampleAfterValue": "100003",
- "Speculative": "1",
"UMask": "0x10",
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Counts the number of page walks completed due to store DTLB misses to any page size.",
+ "EventCode": "0x49",
+ "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED",
+ "PublicDescription": "Counts the number of page walks completed due to stores whose address translations missed in all Translation Lookaside Buffer (TLB) levels and were mapped to any page size. Includes page walks that page fault.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0xe",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Store misses in all TLB levels causes a page walk that completes. (All page sizes)",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3",
"EventCode": "0x13",
"EventName": "DTLB_STORE_MISSES.WALK_COMPLETED",
- "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts completed page walks (all page sizes) caused by demand data stores. This implies it missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
"SampleAfterValue": "100003",
- "Speculative": "1",
"UMask": "0xe",
"Unit": "cpu_core"
},
{
"BriefDescription": "Page walks completed due to a demand data store to a 1G page.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3",
"EventCode": "0x13",
"EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_1G",
- "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts completed page walks (1G sizes) caused by demand data stores. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
"SampleAfterValue": "100003",
- "Speculative": "1",
"UMask": "0x8",
"Unit": "cpu_core"
},
{
"BriefDescription": "Page walks completed due to a demand data store to a 2M/4M page.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3",
"EventCode": "0x13",
"EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_2M_4M",
- "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts completed page walks (2M/4M sizes) caused by demand data stores. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
"SampleAfterValue": "100003",
- "Speculative": "1",
"UMask": "0x4",
"Unit": "cpu_core"
},
{
"BriefDescription": "Page walks completed due to a demand data store to a 4K page.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3",
"EventCode": "0x13",
"EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_4K",
- "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts completed page walks (4K sizes) caused by demand data stores. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
"SampleAfterValue": "100003",
- "Speculative": "1",
"UMask": "0x2",
"Unit": "cpu_core"
},
{
"BriefDescription": "Number of page walks outstanding for a store in the PMH each cycle.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3",
"EventCode": "0x13",
"EventName": "DTLB_STORE_MISSES.WALK_PENDING",
- "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts the number of page walks outstanding for a store in the PMH (Page Miss Handler) each cycle.",
"SampleAfterValue": "100003",
- "Speculative": "1",
"UMask": "0x10",
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Counts the number of page walks initiated by a instruction fetch that missed the first and second level TLBs.",
+ "EventCode": "0x85",
+ "EventName": "ITLB_MISSES.MISS_CAUSED_WALK",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x1",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of page walks due to an instruction fetch that miss the PDE (Page Directory Entry) cache.",
+ "EventCode": "0x85",
+ "EventName": "ITLB_MISSES.PDE_CACHE_MISS",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x80",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Instruction fetch requests that miss the ITLB and hit the STLB.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3",
"EventCode": "0x11",
"EventName": "ITLB_MISSES.STLB_HIT",
- "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts instruction fetch requests that miss the ITLB (Instruction TLB) and hit the STLB (Second-level TLB).",
"SampleAfterValue": "100003",
- "Speculative": "1",
"UMask": "0x20",
"Unit": "cpu_core"
},
{
"BriefDescription": "Cycles when at least one PMH is busy with a page walk for code (instruction fetch) request.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3",
"CounterMask": "1",
"EventCode": "0x11",
"EventName": "ITLB_MISSES.WALK_ACTIVE",
- "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a code (instruction fetch) request.",
"SampleAfterValue": "100003",
- "Speculative": "1",
"UMask": "0x10",
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Counts the number of page walks completed due to instruction fetch misses to any page size.",
+ "EventCode": "0x85",
+ "EventName": "ITLB_MISSES.WALK_COMPLETED",
+ "PublicDescription": "Counts the number of page walks completed due to instruction fetches whose address translations missed in all Translation Lookaside Buffer (TLB) levels and were mapped to any page size. Includes page walks that page fault.",
+ "SampleAfterValue": "200003",
+ "UMask": "0xe",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (All page sizes)",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3",
"EventCode": "0x11",
"EventName": "ITLB_MISSES.WALK_COMPLETED",
- "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts completed page walks (all page sizes) caused by a code fetch. This implies it missed in the ITLB (Instruction TLB) and further levels of TLB. The page walk can end with or without a fault.",
"SampleAfterValue": "100003",
- "Speculative": "1",
"UMask": "0xe",
"Unit": "cpu_core"
},
{
"BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (2M/4M)",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3",
"EventCode": "0x11",
"EventName": "ITLB_MISSES.WALK_COMPLETED_2M_4M",
- "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts completed page walks (2M/4M page sizes) caused by a code fetch. This implies it missed in the ITLB (Instruction TLB) and further levels of TLB. The page walk can end with or without a fault.",
"SampleAfterValue": "100003",
- "Speculative": "1",
"UMask": "0x4",
"Unit": "cpu_core"
},
{
"BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (4K)",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3",
"EventCode": "0x11",
"EventName": "ITLB_MISSES.WALK_COMPLETED_4K",
- "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts completed page walks (4K page sizes) caused by a code fetch. This implies it missed in the ITLB (Instruction TLB) and further levels of TLB. The page walk can end with or without a fault.",
"SampleAfterValue": "100003",
- "Speculative": "1",
"UMask": "0x2",
"Unit": "cpu_core"
},
{
"BriefDescription": "Number of page walks outstanding for an outstanding code request in the PMH each cycle.",
- "CollectPEBSRecord": "2",
- "Counter": "0,1,2,3",
"EventCode": "0x11",
"EventName": "ITLB_MISSES.WALK_PENDING",
- "PEBScounters": "0,1,2,3",
+ "PublicDescription": "Counts the number of page walks outstanding for an outstanding code (instruction fetch) request in the PMH (Page Miss Handler) each cycle.",
"SampleAfterValue": "100003",
- "Speculative": "1",
"UMask": "0x10",
"Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Counts the number of cycles that the head (oldest load) of the load buffer and retirement are both stalled due to a DTLB miss.",
+ "EventCode": "0x05",
+ "EventName": "LD_HEAD.DTLB_MISS_AT_RET",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x90",
+ "Unit": "cpu_atom"
}
]
diff --git a/tools/perf/pmu-events/arch/x86/alderlaken/adln-metrics.json b/tools/perf/pmu-events/arch/x86/alderlaken/adln-metrics.json
new file mode 100644
index 000000000000..c57e9f325fb0
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/alderlaken/adln-metrics.json
@@ -0,0 +1,583 @@
+[
+ {
+ "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to frontend stalls.",
+ "MetricExpr": "TOPDOWN_FE_BOUND.ALL / SLOTS",
+ "MetricGroup": "TopdownL1",
+ "MetricName": "tma_frontend_bound",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to frontend bandwidth restrictions due to decode, predecode, cisc, and other limitations.",
+ "MetricExpr": "TOPDOWN_FE_BOUND.FRONTEND_LATENCY / SLOTS",
+ "MetricGroup": "TopdownL2;tma_frontend_bound_group",
+ "MetricName": "tma_frontend_latency",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to instruction cache misses.",
+ "MetricExpr": "TOPDOWN_FE_BOUND.ICACHE / SLOTS",
+ "MetricGroup": "TopdownL3;tma_frontend_latency_group",
+ "MetricName": "tma_icache",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to Instruction Table Lookaside Buffer (ITLB) misses.",
+ "MetricExpr": "TOPDOWN_FE_BOUND.ITLB / SLOTS",
+ "MetricGroup": "TopdownL3;tma_frontend_latency_group",
+ "MetricName": "tma_itlb",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to BACLEARS, which occurs when the Branch Target Buffer (BTB) prediction or lack thereof, was corrected by a later branch predictor in the frontend",
+ "MetricExpr": "TOPDOWN_FE_BOUND.BRANCH_DETECT / SLOTS",
+ "MetricGroup": "TopdownL3;tma_frontend_latency_group",
+ "MetricName": "tma_branch_detect",
+ "PublicDescription": "Counts the number of issue slots that were not delivered by the frontend due to BACLEARS, which occurs when the Branch Target Buffer (BTB) prediction or lack thereof, was corrected by a later branch predictor in the frontend. Includes BACLEARS due to all branch types including conditional and unconditional jumps, returns, and indirect branches.",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to BTCLEARS, which occurs when the Branch Target Buffer (BTB) predicts a taken branch.",
+ "MetricExpr": "TOPDOWN_FE_BOUND.BRANCH_RESTEER / SLOTS",
+ "MetricGroup": "TopdownL3;tma_frontend_latency_group",
+ "MetricName": "tma_branch_resteer",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to frontend bandwidth restrictions due to decode, predecode, cisc, and other limitations.",
+ "MetricExpr": "TOPDOWN_FE_BOUND.FRONTEND_BANDWIDTH / SLOTS",
+ "MetricGroup": "TopdownL2;tma_frontend_bound_group",
+ "MetricName": "tma_frontend_bandwidth",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to the microcode sequencer (MS).",
+ "MetricExpr": "TOPDOWN_FE_BOUND.CISC / SLOTS",
+ "MetricGroup": "TopdownL3;tma_frontend_bandwidth_group",
+ "MetricName": "tma_cisc",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to decode stalls.",
+ "MetricExpr": "TOPDOWN_FE_BOUND.DECODE / SLOTS",
+ "MetricGroup": "TopdownL3;tma_frontend_bandwidth_group",
+ "MetricName": "tma_decode",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to wrong predecodes.",
+ "MetricExpr": "TOPDOWN_FE_BOUND.PREDECODE / SLOTS",
+ "MetricGroup": "TopdownL3;tma_frontend_bandwidth_group",
+ "MetricName": "tma_predecode",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to other common frontend stalls not categorized.",
+ "MetricExpr": "TOPDOWN_FE_BOUND.OTHER / SLOTS",
+ "MetricGroup": "TopdownL3;tma_frontend_bandwidth_group",
+ "MetricName": "tma_other_fb",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "Counts the total number of issue slots that were not consumed by the backend because allocation is stalled due to a mispredicted jump or a machine clear",
+ "MetricExpr": "(SLOTS - (TOPDOWN_FE_BOUND.ALL + TOPDOWN_BE_BOUND.ALL + TOPDOWN_RETIRING.ALL)) / SLOTS",
+ "MetricGroup": "TopdownL1",
+ "MetricName": "tma_bad_speculation",
+ "PublicDescription": "Counts the total number of issue slots that were not consumed by the backend because allocation is stalled due to a mispredicted jump or a machine clear. Only issue slots wasted due to fast nukes such as memory ordering nukes are counted. Other nukes are not accounted for. Counts all issue slots blocked during this recovery window including relevant microcode flows and while uops are not yet available in the instruction queue (IQ). Also includes the issue slots that were consumed by the backend but were thrown away because they were younger than the mispredict or machine clear.",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to branch mispredicts.",
+ "MetricExpr": "TOPDOWN_BAD_SPECULATION.MISPREDICT / SLOTS",
+ "MetricGroup": "TopdownL2;tma_bad_speculation_group",
+ "MetricName": "tma_branch_mispredicts",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "Counts the total number of issue slots that were not consumed by the backend because allocation is stalled due to a machine clear (nuke) of any kind including memory ordering and memory disambiguation.",
+ "MetricExpr": "TOPDOWN_BAD_SPECULATION.MACHINE_CLEARS / SLOTS",
+ "MetricGroup": "TopdownL2;tma_bad_speculation_group",
+ "MetricName": "tma_machine_clears",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to a machine clear (slow nuke).",
+ "MetricExpr": "TOPDOWN_BAD_SPECULATION.NUKE / SLOTS",
+ "MetricGroup": "TopdownL3;tma_machine_clears_group",
+ "MetricName": "tma_nuke",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "Counts the number of machine clears relative to the number of nuke slots due to SMC. ",
+ "MetricExpr": "tma_nuke * (MACHINE_CLEARS.SMC / MACHINE_CLEARS.SLOW)",
+ "MetricGroup": "TopdownL4;tma_nuke_group",
+ "MetricName": "tma_smc",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "Counts the number of machine clears relative to the number of nuke slots due to memory ordering. ",
+ "MetricExpr": "tma_nuke * (MACHINE_CLEARS.MEMORY_ORDERING / MACHINE_CLEARS.SLOW)",
+ "MetricGroup": "TopdownL4;tma_nuke_group",
+ "MetricName": "tma_memory_ordering",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "Counts the number of machine clears relative to the number of nuke slots due to FP assists. ",
+ "MetricExpr": "tma_nuke * (MACHINE_CLEARS.FP_ASSIST / MACHINE_CLEARS.SLOW)",
+ "MetricGroup": "TopdownL4;tma_nuke_group",
+ "MetricName": "tma_fp_assist",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "Counts the number of machine clears relative to the number of nuke slots due to memory disambiguation. ",
+ "MetricExpr": "tma_nuke * (MACHINE_CLEARS.DISAMBIGUATION / MACHINE_CLEARS.SLOW)",
+ "MetricGroup": "TopdownL4;tma_nuke_group",
+ "MetricName": "tma_disambiguation",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "Counts the number of machine clears relative to the number of nuke slots due to page faults. ",
+ "MetricExpr": "tma_nuke * (MACHINE_CLEARS.PAGE_FAULT / MACHINE_CLEARS.SLOW)",
+ "MetricGroup": "TopdownL4;tma_nuke_group",
+ "MetricName": "tma_page_fault",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to a machine clear classified as a fast nuke due to memory ordering, memory disambiguation and memory renaming.",
+ "MetricExpr": "TOPDOWN_BAD_SPECULATION.FASTNUKE / SLOTS",
+ "MetricGroup": "TopdownL3;tma_machine_clears_group",
+ "MetricName": "tma_fast_nuke",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "Counts the total number of issue slots that were not consumed by the backend due to backend stalls",
+ "MetricExpr": "TOPDOWN_BE_BOUND.ALL / SLOTS",
+ "MetricGroup": "TopdownL1",
+ "MetricName": "tma_backend_bound",
+ "PublicDescription": "Counts the total number of issue slots that were not consumed by the backend due to backend stalls. Note that uops must be available for consumption in order for this event to count. If a uop is not available (IQ is empty), this event will not count. The rest of these subevents count backend stalls, in cycles, due to an outstanding request which is memory bound vs core bound. The subevents are not slot based events and therefore can not be precisely added or subtracted from the Backend_Bound_Aux subevents which are slot based.",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "Counts the number of cycles due to backend bound stalls that are core execution bound and not attributed to outstanding demand load or store stalls. ",
+ "MetricExpr": "max(0, tma_backend_bound - tma_load_store_bound)",
+ "MetricGroup": "TopdownL2;tma_backend_bound_group",
+ "MetricName": "tma_core_bound",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "Counts the number of cycles the core is stalled due to stores or loads. ",
+ "MetricExpr": "min((TOPDOWN_BE_BOUND.ALL / SLOTS), (LD_HEAD.ANY_AT_RET / CLKS) + tma_store_bound)",
+ "MetricGroup": "TopdownL2;tma_backend_bound_group",
+ "MetricName": "tma_load_store_bound",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "Counts the number of cycles the core is stalled due to store buffer full.",
+ "MetricExpr": "tma_mem_scheduler * (MEM_SCHEDULER_BLOCK.ST_BUF / MEM_SCHEDULER_BLOCK.ALL)",
+ "MetricGroup": "TopdownL3;tma_load_store_bound_group",
+ "MetricName": "tma_store_bound",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "Counts the number of cycles that the oldest load of the load buffer is stalled at retirement due to a load block.",
+ "MetricExpr": "LD_HEAD.L1_BOUND_AT_RET / CLKS",
+ "MetricGroup": "TopdownL3;tma_load_store_bound_group",
+ "MetricName": "tma_l1_bound",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "Counts the number of cycles that the oldest load of the load buffer is stalled at retirement due to a store forward block.",
+ "MetricExpr": "LD_HEAD.ST_ADDR_AT_RET / CLKS",
+ "MetricGroup": "TopdownL4;tma_l1_bound_group",
+ "MetricName": "tma_store_fwd",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "Counts the number of cycles that the oldest load of the load buffer is stalled at retirement due to a first level TLB miss.",
+ "MetricExpr": "LD_HEAD.DTLB_MISS_AT_RET / CLKS",
+ "MetricGroup": "TopdownL4;tma_l1_bound_group",
+ "MetricName": "tma_stlb_hit",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "Counts the number of cycles that the oldest load of the load buffer is stalled at retirement due to a second level TLB miss requiring a page walk.",
+ "MetricExpr": "LD_HEAD.PGWALK_AT_RET / CLKS",
+ "MetricGroup": "TopdownL4;tma_l1_bound_group",
+ "MetricName": "tma_stlb_miss",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "Counts the number of cycles that the oldest load of the load buffer is stalled at retirement due to a number of other load blocks.",
+ "MetricExpr": "LD_HEAD.OTHER_AT_RET / CLKS",
+ "MetricGroup": "TopdownL4;tma_l1_bound_group",
+ "MetricName": "tma_other_l1",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "Counts the number of cycles a core is stalled due to a demand load which hit in the L2 Cache.",
+ "MetricExpr": "(MEM_BOUND_STALLS.LOAD_L2_HIT / CLKS) - (MEM_BOUND_STALLS_AT_RET_CORRECTION * MEM_BOUND_STALLS.LOAD_L2_HIT / MEM_BOUND_STALLS.LOAD)",
+ "MetricGroup": "TopdownL3;tma_load_store_bound_group",
+ "MetricName": "tma_l2_bound",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "Counts the number of cycles a core is stalled due to a demand load which hit in the Last Level Cache (LLC) or other core with HITE/F/M.",
+ "MetricExpr": "(MEM_BOUND_STALLS.LOAD_LLC_HIT / CLKS) - (MEM_BOUND_STALLS_AT_RET_CORRECTION * MEM_BOUND_STALLS.LOAD_LLC_HIT / MEM_BOUND_STALLS.LOAD)",
+ "MetricGroup": "TopdownL3;tma_load_store_bound_group",
+ "MetricName": "tma_l3_bound",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "Counts the number of cycles the core is stalled due to a demand load miss which hit in DRAM or MMIO (Non-DRAM).",
+ "MetricExpr": "(MEM_BOUND_STALLS.LOAD_DRAM_HIT / CLKS) - (MEM_BOUND_STALLS_AT_RET_CORRECTION * MEM_BOUND_STALLS.LOAD_DRAM_HIT / MEM_BOUND_STALLS.LOAD)",
+ "MetricGroup": "TopdownL3;tma_load_store_bound_group",
+ "MetricName": "tma_dram_bound",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "Counts the number of cycles the core is stalled due to a demand load miss which hits in the L2, LLC, DRAM or MMIO (Non-DRAM) but could not be correctly attributed or cycles in which the load miss is waiting on a request buffer.",
+ "MetricExpr": "max(0, tma_load_store_bound - (tma_store_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_dram_bound))",
+ "MetricGroup": "TopdownL3;tma_load_store_bound_group",
+ "MetricName": "tma_other_load_store",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "Counts the total number of issue slots that were not consumed by the backend due to backend stalls",
+ "MetricExpr": "tma_backend_bound",
+ "MetricGroup": "TopdownL1",
+ "MetricName": "tma_backend_bound_aux",
+ "PublicDescription": "Counts the total number of issue slots that were not consumed by the backend due to backend stalls. Note that UOPS must be available for consumption in order for this event to count. If a uop is not available (IQ is empty), this event will not count. All of these subevents count backend stalls, in slots, due to a resource limitation. These are not cycle based events and therefore can not be precisely added or subtracted from the Backend_Bound subevents which are cycle based. These subevents are supplementary to Backend_Bound and can be used to analyze results from a resource perspective at allocation. ",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "Counts the total number of issue slots that were not consumed by the backend due to backend stalls",
+ "MetricExpr": "tma_backend_bound",
+ "MetricGroup": "TopdownL2;tma_backend_bound_aux_group",
+ "MetricName": "tma_resource_bound",
+ "PublicDescription": "Counts the total number of issue slots that were not consumed by the backend due to backend stalls. Note that uops must be available for consumption in order for this event to count. If a uop is not available (IQ is empty), this event will not count. ",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to memory reservation stalls in which a scheduler is not able to accept uops.",
+ "MetricExpr": "TOPDOWN_BE_BOUND.MEM_SCHEDULER / SLOTS",
+ "MetricGroup": "TopdownL3;tma_resource_bound_group",
+ "MetricName": "tma_mem_scheduler",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "Counts the number of cycles, relative to the number of mem_scheduler slots, in which uops are blocked due to store buffer full",
+ "MetricExpr": "tma_mem_scheduler * (MEM_SCHEDULER_BLOCK.ST_BUF / MEM_SCHEDULER_BLOCK.ALL)",
+ "MetricGroup": "TopdownL4;tma_mem_scheduler_group",
+ "MetricName": "tma_st_buffer",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "Counts the number of cycles, relative to the number of mem_scheduler slots, in which uops are blocked due to load buffer full",
+ "MetricExpr": "tma_mem_scheduler * MEM_SCHEDULER_BLOCK.LD_BUF / MEM_SCHEDULER_BLOCK.ALL",
+ "MetricGroup": "TopdownL4;tma_mem_scheduler_group",
+ "MetricName": "tma_ld_buffer",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "Counts the number of cycles, relative to the number of mem_scheduler slots, in which uops are blocked due to RSV full relative ",
+ "MetricExpr": "tma_mem_scheduler * MEM_SCHEDULER_BLOCK.RSV / MEM_SCHEDULER_BLOCK.ALL",
+ "MetricGroup": "TopdownL4;tma_mem_scheduler_group",
+ "MetricName": "tma_rsv",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to IEC or FPC RAT stalls, which can be due to FIQ or IEC reservation stalls in which the integer, floating point or SIMD scheduler is not able to accept uops.",
+ "MetricExpr": "TOPDOWN_BE_BOUND.NON_MEM_SCHEDULER / SLOTS",
+ "MetricGroup": "TopdownL3;tma_resource_bound_group",
+ "MetricName": "tma_non_mem_scheduler",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to the physical register file unable to accept an entry (marble stalls).",
+ "MetricExpr": "TOPDOWN_BE_BOUND.REGISTER / SLOTS",
+ "MetricGroup": "TopdownL3;tma_resource_bound_group",
+ "MetricName": "tma_register",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to the reorder buffer being full (ROB stalls).",
+ "MetricExpr": "TOPDOWN_BE_BOUND.REORDER_BUFFER / SLOTS",
+ "MetricGroup": "TopdownL3;tma_resource_bound_group",
+ "MetricName": "tma_reorder_buffer",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to certain allocation restrictions.",
+ "MetricExpr": "TOPDOWN_BE_BOUND.ALLOC_RESTRICTIONS / SLOTS",
+ "MetricGroup": "TopdownL3;tma_resource_bound_group",
+ "MetricName": "tma_alloc_restriction",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to scoreboards from the instruction queue (IQ), jump execution unit (JEU), or microcode sequencer (MS).",
+ "MetricExpr": "TOPDOWN_BE_BOUND.SERIALIZATION / SLOTS",
+ "MetricGroup": "TopdownL3;tma_resource_bound_group",
+ "MetricName": "tma_serialization",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "Counts the numer of issue slots that result in retirement slots. ",
+ "MetricExpr": "TOPDOWN_RETIRING.ALL / SLOTS",
+ "MetricGroup": "TopdownL1",
+ "MetricName": "tma_retiring",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "Counts the number of uops that are not from the microsequencer. ",
+ "MetricExpr": "(TOPDOWN_RETIRING.ALL - UOPS_RETIRED.MS) / SLOTS",
+ "MetricGroup": "TopdownL2;tma_retiring_group",
+ "MetricName": "tma_base",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "Counts the number of floating point operations per uop with all default weighting.",
+ "MetricExpr": "UOPS_RETIRED.FPDIV / SLOTS",
+ "MetricGroup": "TopdownL3;tma_base_group",
+ "MetricName": "tma_fp_uops",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "Counts the number of uops retired excluding ms and fp div uops.",
+ "MetricExpr": "(TOPDOWN_RETIRING.ALL - UOPS_RETIRED.MS - UOPS_RETIRED.FPDIV) / SLOTS",
+ "MetricGroup": "TopdownL3;tma_base_group",
+ "MetricName": "tma_other_ret",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "Counts the number of uops that are from the complex flows issued by the micro-sequencer (MS)",
+ "MetricExpr": "UOPS_RETIRED.MS / SLOTS",
+ "MetricGroup": "TopdownL2;tma_retiring_group",
+ "MetricName": "tma_ms_uops",
+ "PublicDescription": "Counts the number of uops that are from the complex flows issued by the micro-sequencer (MS). This includes uops from flows due to complex instructions, faults, assists, and inserted flows.",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "",
+ "MetricExpr": "CPU_CLK_UNHALTED.CORE",
+ "MetricName": "CLKS"
+ },
+ {
+ "BriefDescription": "",
+ "MetricExpr": "CPU_CLK_UNHALTED.CORE_P",
+ "MetricName": "CLKS_P"
+ },
+ {
+ "BriefDescription": "",
+ "MetricExpr": "5 * CLKS",
+ "MetricName": "SLOTS"
+ },
+ {
+ "BriefDescription": "Instructions Per Cycle",
+ "MetricExpr": "INST_RETIRED.ANY / CLKS",
+ "MetricName": "IPC"
+ },
+ {
+ "BriefDescription": "Cycles Per Instruction",
+ "MetricExpr": "CLKS / INST_RETIRED.ANY",
+ "MetricName": "CPI"
+ },
+ {
+ "BriefDescription": "Uops Per Instruction",
+ "MetricExpr": "UOPS_RETIRED.ALL / INST_RETIRED.ANY",
+ "MetricName": "UPI"
+ },
+ {
+ "BriefDescription": "Percentage of total non-speculative loads with a store forward or unknown store address block",
+ "MetricExpr": "100 * LD_BLOCKS.DATA_UNKNOWN / MEM_UOPS_RETIRED.ALL_LOADS",
+ "MetricName": "Store_Fwd_Blocks"
+ },
+ {
+ "BriefDescription": "Percentage of total non-speculative loads with a address aliasing block",
+ "MetricExpr": "100 * LD_BLOCKS.4K_ALIAS / MEM_UOPS_RETIRED.ALL_LOADS",
+ "MetricName": "Address_Alias_Blocks"
+ },
+ {
+ "BriefDescription": "Percentage of total non-speculative loads that are splits",
+ "MetricExpr": "100 * MEM_UOPS_RETIRED.SPLIT_LOADS / MEM_UOPS_RETIRED.ALL_LOADS",
+ "MetricName": "Load_Splits"
+ },
+ {
+ "BriefDescription": "Instructions per Branch (lower number means higher occurrence rate)",
+ "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.ALL_BRANCHES",
+ "MetricName": "IpBranch"
+ },
+ {
+ "BriefDescription": "Instruction per (near) call (lower number means higher occurrence rate)",
+ "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.CALL",
+ "MetricName": "IpCall"
+ },
+ {
+ "BriefDescription": "Instructions per Load",
+ "MetricExpr": "INST_RETIRED.ANY / MEM_UOPS_RETIRED.ALL_LOADS",
+ "MetricName": "IpLoad"
+ },
+ {
+ "BriefDescription": "Instructions per Store",
+ "MetricExpr": "INST_RETIRED.ANY / MEM_UOPS_RETIRED.ALL_STORES",
+ "MetricName": "IpStore"
+ },
+ {
+ "BriefDescription": "Number of Instructions per non-speculative Branch Misprediction",
+ "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.ALL_BRANCHES",
+ "MetricName": "IpMispredict"
+ },
+ {
+ "BriefDescription": "Instructions per Far Branch",
+ "MetricExpr": "INST_RETIRED.ANY / (BR_INST_RETIRED.FAR_BRANCH / 2)",
+ "MetricName": "IpFarBranch"
+ },
+ {
+ "BriefDescription": "Ratio of all branches which mispredict",
+ "MetricExpr": "BR_MISP_RETIRED.ALL_BRANCHES / BR_INST_RETIRED.ALL_BRANCHES",
+ "MetricName": "Branch_Mispredict_Ratio"
+ },
+ {
+ "BriefDescription": "Ratio between Mispredicted branches and unknown branches",
+ "MetricExpr": "BR_MISP_RETIRED.ALL_BRANCHES / BACLEARS.ANY",
+ "MetricName": "Branch_Mispredict_to_Unknown_Branch_Ratio"
+ },
+ {
+ "BriefDescription": "Percentage of all uops which are ucode ops",
+ "MetricExpr": "100 * UOPS_RETIRED.MS / UOPS_RETIRED.ALL",
+ "MetricName": "Microcode_Uop_Ratio"
+ },
+ {
+ "BriefDescription": "Percentage of all uops which are FPDiv uops",
+ "MetricExpr": "100 * UOPS_RETIRED.FPDIV / UOPS_RETIRED.ALL",
+ "MetricName": "FPDiv_Uop_Ratio"
+ },
+ {
+ "BriefDescription": "Percentage of all uops which are IDiv uops",
+ "MetricExpr": "100 * UOPS_RETIRED.IDIV / UOPS_RETIRED.ALL",
+ "MetricName": "IDiv_Uop_Ratio"
+ },
+ {
+ "BriefDescription": "Percentage of all uops which are x87 uops",
+ "MetricExpr": "100 * UOPS_RETIRED.X87 / UOPS_RETIRED.ALL",
+ "MetricName": "X87_Uop_Ratio"
+ },
+ {
+ "BriefDescription": "Average Frequency Utilization relative nominal frequency",
+ "MetricExpr": "CLKS / CPU_CLK_UNHALTED.REF_TSC",
+ "MetricName": "Turbo_Utilization"
+ },
+ {
+ "BriefDescription": "Fraction of cycles spent in Kernel mode",
+ "MetricExpr": "cpu@CPU_CLK_UNHALTED.CORE@k / CPU_CLK_UNHALTED.CORE",
+ "MetricName": "Kernel_Utilization"
+ },
+ {
+ "BriefDescription": "Average CPU Utilization",
+ "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / TSC",
+ "MetricName": "CPU_Utilization"
+ },
+ {
+ "BriefDescription": "Cycle cost per L2 hit",
+ "MetricExpr": "MEM_BOUND_STALLS.LOAD_L2_HIT / MEM_LOAD_UOPS_RETIRED.L2_HIT",
+ "MetricName": "Cycles_per_Demand_Load_L2_Hit"
+ },
+ {
+ "BriefDescription": "Cycle cost per LLC hit",
+ "MetricExpr": "MEM_BOUND_STALLS.LOAD_LLC_HIT / MEM_LOAD_UOPS_RETIRED.L3_HIT",
+ "MetricName": "Cycles_per_Demand_Load_L3_Hit"
+ },
+ {
+ "BriefDescription": "Cycle cost per DRAM hit",
+ "MetricExpr": "MEM_BOUND_STALLS.LOAD_DRAM_HIT / MEM_LOAD_UOPS_RETIRED.DRAM_HIT",
+ "MetricName": "Cycles_per_Demand_Load_DRAM_Hit"
+ },
+ {
+ "BriefDescription": "Percent of instruction miss cost that hit in the L2",
+ "MetricExpr": "100 * MEM_BOUND_STALLS.IFETCH_L2_HIT / (MEM_BOUND_STALLS.IFETCH)",
+ "MetricName": "Inst_Miss_Cost_L2Hit_Percent"
+ },
+ {
+ "BriefDescription": "Percent of instruction miss cost that hit in the L3",
+ "MetricExpr": "100 * MEM_BOUND_STALLS.IFETCH_LLC_HIT / (MEM_BOUND_STALLS.IFETCH)",
+ "MetricName": "Inst_Miss_Cost_L3Hit_Percent"
+ },
+ {
+ "BriefDescription": "Percent of instruction miss cost that hit in DRAM",
+ "MetricExpr": "100 * MEM_BOUND_STALLS.IFETCH_DRAM_HIT / (MEM_BOUND_STALLS.IFETCH)",
+ "MetricName": "Inst_Miss_Cost_DRAMHit_Percent"
+ },
+ {
+ "BriefDescription": "load ops retired per 1000 instruction",
+ "MetricExpr": "1000 * MEM_UOPS_RETIRED.ALL_LOADS / INST_RETIRED.ANY",
+ "MetricName": "MemLoadPKI"
+ },
+ {
+ "BriefDescription": "C1 residency percent per core",
+ "MetricExpr": "cstate_core@c1\\-residency@ / TSC",
+ "MetricGroup": "Power",
+ "MetricName": "C1_Core_Residency",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "C6 residency percent per core",
+ "MetricExpr": "cstate_core@c6\\-residency@ / TSC",
+ "MetricGroup": "Power",
+ "MetricName": "C6_Core_Residency",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "C7 residency percent per core",
+ "MetricExpr": "cstate_core@c7\\-residency@ / TSC",
+ "MetricGroup": "Power",
+ "MetricName": "C7_Core_Residency",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "C2 residency percent per package",
+ "MetricExpr": "cstate_pkg@c2\\-residency@ / TSC",
+ "MetricGroup": "Power",
+ "MetricName": "C2_Pkg_Residency",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "C3 residency percent per package",
+ "MetricExpr": "cstate_pkg@c3\\-residency@ / TSC",
+ "MetricGroup": "Power",
+ "MetricName": "C3_Pkg_Residency",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "C6 residency percent per package",
+ "MetricExpr": "cstate_pkg@c6\\-residency@ / TSC",
+ "MetricGroup": "Power",
+ "MetricName": "C6_Pkg_Residency",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "C7 residency percent per package",
+ "MetricExpr": "cstate_pkg@c7\\-residency@ / TSC",
+ "MetricGroup": "Power",
+ "MetricName": "C7_Pkg_Residency",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "C8 residency percent per package",
+ "MetricExpr": "cstate_pkg@c8\\-residency@ / TSC",
+ "MetricGroup": "Power",
+ "MetricName": "C8_Pkg_Residency",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "C9 residency percent per package",
+ "MetricExpr": "cstate_pkg@c9\\-residency@ / TSC",
+ "MetricGroup": "Power",
+ "MetricName": "C9_Pkg_Residency",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "C10 residency percent per package",
+ "MetricExpr": "cstate_pkg@c10\\-residency@ / TSC",
+ "MetricGroup": "Power",
+ "MetricName": "C10_Pkg_Residency",
+ "ScaleUnit": "100%"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/x86/alderlaken/cache.json b/tools/perf/pmu-events/arch/x86/alderlaken/cache.json
new file mode 100644
index 000000000000..043445ae14a8
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/alderlaken/cache.json
@@ -0,0 +1,330 @@
+[
+ {
+ "BriefDescription": "Counts the number of cacheable memory requests that miss in the LLC. Counts on a per core basis.",
+ "EventCode": "0x2e",
+ "EventName": "LONGEST_LAT_CACHE.MISS",
+ "PublicDescription": "Counts the number of cacheable memory requests that miss in the Last Level Cache (LLC). Requests include demand loads, reads for ownership (RFO), instruction fetches and L1 HW prefetches. If the platform has an L3 cache, the LLC is the L3 cache, otherwise it is the L2 cache. Counts on a per core basis.",
+ "SampleAfterValue": "200003",
+ "UMask": "0x41"
+ },
+ {
+ "BriefDescription": "Counts the number of cacheable memory requests that access the LLC. Counts on a per core basis.",
+ "EventCode": "0x2e",
+ "EventName": "LONGEST_LAT_CACHE.REFERENCE",
+ "PublicDescription": "Counts the number of cacheable memory requests that access the Last Level Cache (LLC). Requests include demand loads, reads for ownership (RFO), instruction fetches and L1 HW prefetches. If the platform has an L3 cache, the LLC is the L3 cache, otherwise it is the L2 cache. Counts on a per core basis.",
+ "SampleAfterValue": "200003",
+ "UMask": "0x4f"
+ },
+ {
+ "BriefDescription": "Counts the number of cycles the core is stalled due to an instruction cache or TLB miss which hit in the L2, LLC, DRAM or MMIO (Non-DRAM).",
+ "EventCode": "0x34",
+ "EventName": "MEM_BOUND_STALLS.IFETCH",
+ "PublicDescription": "Counts the number of cycles the core is stalled due to an instruction cache or translation lookaside buffer (TLB) miss which hit in the L2, LLC, DRAM or MMIO (Non-DRAM).",
+ "SampleAfterValue": "200003",
+ "UMask": "0x38"
+ },
+ {
+ "BriefDescription": "Counts the number of cycles the core is stalled due to an instruction cache or TLB miss which hit in DRAM or MMIO (Non-DRAM).",
+ "EventCode": "0x34",
+ "EventName": "MEM_BOUND_STALLS.IFETCH_DRAM_HIT",
+ "PublicDescription": "Counts the number of cycles the core is stalled due to an instruction cache or translation lookaside buffer (TLB) miss which hit in DRAM or MMIO (non-DRAM).",
+ "SampleAfterValue": "200003",
+ "UMask": "0x20"
+ },
+ {
+ "BriefDescription": "Counts the number of cycles the core is stalled due to an instruction cache or TLB miss which hit in the L2 cache.",
+ "EventCode": "0x34",
+ "EventName": "MEM_BOUND_STALLS.IFETCH_L2_HIT",
+ "PublicDescription": "Counts the number of cycles the core is stalled due to an instruction cache or Translation Lookaside Buffer (TLB) miss which hit in the L2 cache.",
+ "SampleAfterValue": "200003",
+ "UMask": "0x8"
+ },
+ {
+ "BriefDescription": "Counts the number of cycles the core is stalled due to an instruction cache or TLB miss which hit in the LLC or other core with HITE/F/M.",
+ "EventCode": "0x34",
+ "EventName": "MEM_BOUND_STALLS.IFETCH_LLC_HIT",
+ "PublicDescription": "Counts the number of cycles the core is stalled due to an instruction cache or Translation Lookaside Buffer (TLB) miss which hit in the Last Level Cache (LLC) or other core with HITE/F/M.",
+ "SampleAfterValue": "200003",
+ "UMask": "0x10"
+ },
+ {
+ "BriefDescription": "Counts the number of cycles the core is stalled due to a demand load miss which hit in the L2, LLC, DRAM or MMIO (Non-DRAM).",
+ "EventCode": "0x34",
+ "EventName": "MEM_BOUND_STALLS.LOAD",
+ "SampleAfterValue": "200003",
+ "UMask": "0x7"
+ },
+ {
+ "BriefDescription": "Counts the number of cycles the core is stalled due to a demand load miss which hit in DRAM or MMIO (Non-DRAM).",
+ "EventCode": "0x34",
+ "EventName": "MEM_BOUND_STALLS.LOAD_DRAM_HIT",
+ "SampleAfterValue": "200003",
+ "UMask": "0x4"
+ },
+ {
+ "BriefDescription": "Counts the number of cycles the core is stalled due to a demand load which hit in the L2 cache.",
+ "EventCode": "0x34",
+ "EventName": "MEM_BOUND_STALLS.LOAD_L2_HIT",
+ "SampleAfterValue": "200003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts the number of cycles the core is stalled due to a demand load which hit in the LLC or other core with HITE/F/M.",
+ "EventCode": "0x34",
+ "EventName": "MEM_BOUND_STALLS.LOAD_LLC_HIT",
+ "PublicDescription": "Counts the number of cycles the core is stalled due to a demand load which hit in the Last Level Cache (LLC) or other core with HITE/F/M.",
+ "SampleAfterValue": "200003",
+ "UMask": "0x2"
+ },
+ {
+ "BriefDescription": "Counts the number of load uops retired that hit in DRAM.",
+ "Data_LA": "1",
+ "EventCode": "0xd1",
+ "EventName": "MEM_LOAD_UOPS_RETIRED.DRAM_HIT",
+ "PEBS": "1",
+ "SampleAfterValue": "200003",
+ "UMask": "0x80"
+ },
+ {
+ "BriefDescription": "Counts the number of load uops retired that hit in the L2 cache.",
+ "Data_LA": "1",
+ "EventCode": "0xd1",
+ "EventName": "MEM_LOAD_UOPS_RETIRED.L2_HIT",
+ "PEBS": "1",
+ "SampleAfterValue": "200003",
+ "UMask": "0x2"
+ },
+ {
+ "BriefDescription": "Counts the number of load uops retired that hit in the L3 cache.",
+ "Data_LA": "1",
+ "EventCode": "0xd1",
+ "EventName": "MEM_LOAD_UOPS_RETIRED.L3_HIT",
+ "PEBS": "1",
+ "SampleAfterValue": "200003",
+ "UMask": "0x4"
+ },
+ {
+ "BriefDescription": "Counts the number of cycles that uops are blocked for any of the following reasons: load buffer, store buffer or RSV full.",
+ "EventCode": "0x04",
+ "EventName": "MEM_SCHEDULER_BLOCK.ALL",
+ "SampleAfterValue": "20003",
+ "UMask": "0x7"
+ },
+ {
+ "BriefDescription": "Counts the number of cycles that uops are blocked due to a load buffer full condition.",
+ "EventCode": "0x04",
+ "EventName": "MEM_SCHEDULER_BLOCK.LD_BUF",
+ "SampleAfterValue": "20003",
+ "UMask": "0x2"
+ },
+ {
+ "BriefDescription": "Counts the number of cycles that uops are blocked due to an RSV full condition.",
+ "EventCode": "0x04",
+ "EventName": "MEM_SCHEDULER_BLOCK.RSV",
+ "SampleAfterValue": "20003",
+ "UMask": "0x4"
+ },
+ {
+ "BriefDescription": "Counts the number of cycles that uops are blocked due to a store buffer full condition.",
+ "EventCode": "0x04",
+ "EventName": "MEM_SCHEDULER_BLOCK.ST_BUF",
+ "SampleAfterValue": "20003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts the number of load uops retired.",
+ "Data_LA": "1",
+ "EventCode": "0xd0",
+ "EventName": "MEM_UOPS_RETIRED.ALL_LOADS",
+ "PEBS": "1",
+ "PublicDescription": "Counts the total number of load uops retired.",
+ "SampleAfterValue": "200003",
+ "UMask": "0x81"
+ },
+ {
+ "BriefDescription": "Counts the number of store uops retired.",
+ "Data_LA": "1",
+ "EventCode": "0xd0",
+ "EventName": "MEM_UOPS_RETIRED.ALL_STORES",
+ "PEBS": "1",
+ "PublicDescription": "Counts the total number of store uops retired.",
+ "SampleAfterValue": "200003",
+ "UMask": "0x82"
+ },
+ {
+ "BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 128 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.",
+ "Data_LA": "1",
+ "EventCode": "0xd0",
+ "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_128",
+ "MSRIndex": "0x3F6",
+ "MSRValue": "0x80",
+ "PEBS": "2",
+ "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 128 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x5"
+ },
+ {
+ "BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 16 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.",
+ "Data_LA": "1",
+ "EventCode": "0xd0",
+ "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_16",
+ "MSRIndex": "0x3F6",
+ "MSRValue": "0x10",
+ "PEBS": "2",
+ "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 16 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x5"
+ },
+ {
+ "BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 256 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.",
+ "Data_LA": "1",
+ "EventCode": "0xd0",
+ "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_256",
+ "MSRIndex": "0x3F6",
+ "MSRValue": "0x100",
+ "PEBS": "2",
+ "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 256 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x5"
+ },
+ {
+ "BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 32 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.",
+ "Data_LA": "1",
+ "EventCode": "0xd0",
+ "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_32",
+ "MSRIndex": "0x3F6",
+ "MSRValue": "0x20",
+ "PEBS": "2",
+ "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 32 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x5"
+ },
+ {
+ "BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 4 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.",
+ "Data_LA": "1",
+ "EventCode": "0xd0",
+ "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_4",
+ "MSRIndex": "0x3F6",
+ "MSRValue": "0x4",
+ "PEBS": "2",
+ "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 4 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x5"
+ },
+ {
+ "BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 512 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.",
+ "Data_LA": "1",
+ "EventCode": "0xd0",
+ "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_512",
+ "MSRIndex": "0x3F6",
+ "MSRValue": "0x200",
+ "PEBS": "2",
+ "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 512 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x5"
+ },
+ {
+ "BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 64 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.",
+ "Data_LA": "1",
+ "EventCode": "0xd0",
+ "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_64",
+ "MSRIndex": "0x3F6",
+ "MSRValue": "0x40",
+ "PEBS": "2",
+ "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 64 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x5"
+ },
+ {
+ "BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 8 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.",
+ "Data_LA": "1",
+ "EventCode": "0xd0",
+ "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_8",
+ "MSRIndex": "0x3F6",
+ "MSRValue": "0x8",
+ "PEBS": "2",
+ "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 8 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x5"
+ },
+ {
+ "BriefDescription": "Counts the number of retired split load uops.",
+ "Data_LA": "1",
+ "EventCode": "0xd0",
+ "EventName": "MEM_UOPS_RETIRED.SPLIT_LOADS",
+ "PEBS": "1",
+ "SampleAfterValue": "200003",
+ "UMask": "0x41"
+ },
+ {
+ "BriefDescription": "Counts the number of stores uops retired. Counts with or without PEBS enabled.",
+ "Data_LA": "1",
+ "EventCode": "0xd0",
+ "EventName": "MEM_UOPS_RETIRED.STORE_LATENCY",
+ "PEBS": "2",
+ "PublicDescription": "Counts the number of stores uops retired. Counts with or without PEBS enabled. If PEBS is enabled and a PEBS record is generated, will populate PEBS Latency and PEBS Data Source fields accordingly.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x6"
+ },
+ {
+ "BriefDescription": "Counts demand data reads that were supplied by the L3 cache.",
+ "EventCode": "0xB7",
+ "EventName": "OCR.DEMAND_DATA_RD.L3_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x3F803C0001",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts demand data reads that were supplied by the L3 cache where a snoop was sent, the snoop hit, and modified data was forwarded.",
+ "EventCode": "0xB7",
+ "EventName": "OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x10003C0001",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts demand data reads that were supplied by the L3 cache where a snoop was sent, the snoop hit, but no data was forwarded.",
+ "EventCode": "0xB7",
+ "EventName": "OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x4003C0001",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts demand data reads that were supplied by the L3 cache where a snoop was sent, the snoop hit, and non-modified data was forwarded.",
+ "EventCode": "0xB7",
+ "EventName": "OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x8003C0001",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts demand reads for ownership (RFO) and software prefetches for exclusive ownership (PREFETCHW) that were supplied by the L3 cache.",
+ "EventCode": "0xB7",
+ "EventName": "OCR.DEMAND_RFO.L3_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x3F803C0002",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts demand reads for ownership (RFO) and software prefetches for exclusive ownership (PREFETCHW) that were supplied by the L3 cache where a snoop was sent, the snoop hit, and modified data was forwarded.",
+ "EventCode": "0xB7",
+ "EventName": "OCR.DEMAND_RFO.L3_HIT.SNOOP_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x10003C0002",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to instruction cache misses.",
+ "EventCode": "0x71",
+ "EventName": "TOPDOWN_FE_BOUND.ICACHE",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x20"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/x86/alderlaken/floating-point.json b/tools/perf/pmu-events/arch/x86/alderlaken/floating-point.json
new file mode 100644
index 000000000000..30e8ca3c1485
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/alderlaken/floating-point.json
@@ -0,0 +1,18 @@
+[
+ {
+ "BriefDescription": "Counts the number of floating point operations retired that required microcode assist.",
+ "EventCode": "0xc3",
+ "EventName": "MACHINE_CLEARS.FP_ASSIST",
+ "PublicDescription": "Counts the number of floating point operations retired that required microcode assist, which is not a reflection of the number of FP operations, instructions or uops.",
+ "SampleAfterValue": "20003",
+ "UMask": "0x4"
+ },
+ {
+ "BriefDescription": "Counts the number of floating point divide uops retired (x87 and SSE, including x87 sqrt).",
+ "EventCode": "0xc2",
+ "EventName": "UOPS_RETIRED.FPDIV",
+ "PEBS": "1",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x8"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/x86/alderlaken/frontend.json b/tools/perf/pmu-events/arch/x86/alderlaken/frontend.json
new file mode 100644
index 000000000000..36898bab2bba
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/alderlaken/frontend.json
@@ -0,0 +1,26 @@
+[
+ {
+ "BriefDescription": "Counts the total number of BACLEARS due to all branch types including conditional and unconditional jumps, returns, and indirect branches.",
+ "EventCode": "0xe6",
+ "EventName": "BACLEARS.ANY",
+ "PublicDescription": "Counts the total number of BACLEARS, which occur when the Branch Target Buffer (BTB) prediction or lack thereof, was corrected by a later branch predictor in the frontend. Includes BACLEARS due to all branch types including conditional and unconditional jumps, returns, and indirect branches.",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts the number of requests to the instruction cache for one or more bytes of a cache line.",
+ "EventCode": "0x80",
+ "EventName": "ICACHE.ACCESSES",
+ "PublicDescription": "Counts the total number of requests to the instruction cache. The event only counts new cache line accesses, so that multiple back to back fetches to the exact same cache line or byte chunk count as one. Specifically, the event counts when accesses from sequential code crosses the cache line boundary, or when a branch target is moved to a new line or to a non-sequential byte chunk of the same line.",
+ "SampleAfterValue": "200003",
+ "UMask": "0x3"
+ },
+ {
+ "BriefDescription": "Counts the number of instruction cache misses.",
+ "EventCode": "0x80",
+ "EventName": "ICACHE.MISSES",
+ "PublicDescription": "Counts the number of missed requests to the instruction cache. The event only counts new cache line accesses, so that multiple back to back fetches to the exact same cache line and byte chunk count as one. Specifically, the event counts when accesses from sequential code crosses the cache line boundary, or when a branch target is moved to a new line or to a non-sequential byte chunk of the same line.",
+ "SampleAfterValue": "200003",
+ "UMask": "0x2"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/x86/alderlaken/memory.json b/tools/perf/pmu-events/arch/x86/alderlaken/memory.json
new file mode 100644
index 000000000000..f84bf8c43495
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/alderlaken/memory.json
@@ -0,0 +1,81 @@
+[
+ {
+ "BriefDescription": "Counts the number of cycles that the head (oldest load) of the load buffer is stalled due to any number of reasons, including an L1 miss, WCB full, pagewalk, store address block or store data block, on a load that retires.",
+ "EventCode": "0x05",
+ "EventName": "LD_HEAD.ANY_AT_RET",
+ "SampleAfterValue": "1000003",
+ "UMask": "0xff"
+ },
+ {
+ "BriefDescription": "Counts the number of cycles that the head (oldest load) of the load buffer is stalled due to a core bound stall including a store address match, a DTLB miss or a page walk that detains the load from retiring.",
+ "EventCode": "0x05",
+ "EventName": "LD_HEAD.L1_BOUND_AT_RET",
+ "SampleAfterValue": "1000003",
+ "UMask": "0xf4"
+ },
+ {
+ "BriefDescription": "Counts the number of cycles that the head (oldest load) of the load buffer and retirement are both stalled due to other block cases.",
+ "EventCode": "0x05",
+ "EventName": "LD_HEAD.OTHER_AT_RET",
+ "PublicDescription": "Counts the number of cycles that the head (oldest load) of the load buffer and retirement are both stalled due to other block cases such as pipeline conflicts, fences, etc.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0xc0"
+ },
+ {
+ "BriefDescription": "Counts the number of cycles that the head (oldest load) of the load buffer and retirement are both stalled due to a pagewalk.",
+ "EventCode": "0x05",
+ "EventName": "LD_HEAD.PGWALK_AT_RET",
+ "SampleAfterValue": "1000003",
+ "UMask": "0xa0"
+ },
+ {
+ "BriefDescription": "Counts the number of cycles that the head (oldest load) of the load buffer and retirement are both stalled due to a store address match.",
+ "EventCode": "0x05",
+ "EventName": "LD_HEAD.ST_ADDR_AT_RET",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x84"
+ },
+ {
+ "BriefDescription": "Counts the number of machine clears due to memory ordering caused by a snoop from an external agent. Does not count internally generated machine clears such as those due to memory disambiguation.",
+ "EventCode": "0xc3",
+ "EventName": "MACHINE_CLEARS.MEMORY_ORDERING",
+ "SampleAfterValue": "20003",
+ "UMask": "0x2"
+ },
+ {
+ "BriefDescription": "Counts demand data reads that were not supplied by the L3 cache.",
+ "EventCode": "0xB7",
+ "EventName": "OCR.DEMAND_DATA_RD.L3_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x3F84400001",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts demand data reads that were not supplied by the L3 cache.",
+ "EventCode": "0xB7",
+ "EventName": "OCR.DEMAND_DATA_RD.L3_MISS_LOCAL",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x3F84400001",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts demand reads for ownership (RFO) and software prefetches for exclusive ownership (PREFETCHW) that were not supplied by the L3 cache.",
+ "EventCode": "0xB7",
+ "EventName": "OCR.DEMAND_RFO.L3_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x3F84400002",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts demand reads for ownership (RFO) and software prefetches for exclusive ownership (PREFETCHW) that were not supplied by the L3 cache.",
+ "EventCode": "0xB7",
+ "EventName": "OCR.DEMAND_RFO.L3_MISS_LOCAL",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x3F84400002",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/x86/alderlaken/other.json b/tools/perf/pmu-events/arch/x86/alderlaken/other.json
new file mode 100644
index 000000000000..6336de61f628
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/alderlaken/other.json
@@ -0,0 +1,38 @@
+[
+ {
+ "BriefDescription": "Counts modified writebacks from L1 cache and L2 cache that have any type of response.",
+ "EventCode": "0xB7",
+ "EventName": "OCR.COREWB_M.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x10008",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts demand data reads that have any type of response.",
+ "EventCode": "0xB7",
+ "EventName": "OCR.DEMAND_DATA_RD.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x10001",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts demand reads for ownership (RFO) and software prefetches for exclusive ownership (PREFETCHW) that have any type of response.",
+ "EventCode": "0xB7",
+ "EventName": "OCR.DEMAND_RFO.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x10002",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts streaming stores that have any type of response.",
+ "EventCode": "0xB7",
+ "EventName": "OCR.STREAMING_WR.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x10800",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/x86/alderlaken/pipeline.json b/tools/perf/pmu-events/arch/x86/alderlaken/pipeline.json
new file mode 100644
index 000000000000..fa53ff11a509
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/alderlaken/pipeline.json
@@ -0,0 +1,533 @@
+[
+ {
+ "BriefDescription": "Counts the total number of branch instructions retired for all branch types.",
+ "EventCode": "0xc4",
+ "EventName": "BR_INST_RETIRED.ALL_BRANCHES",
+ "PEBS": "1",
+ "PublicDescription": "Counts the total number of instructions in which the instruction pointer (IP) of the processor is resteered due to a branch instruction and the branch instruction successfully retires. All branch type instructions are accounted for.",
+ "SampleAfterValue": "200003"
+ },
+ {
+ "BriefDescription": "This event is deprecated. Refer to new event BR_INST_RETIRED.NEAR_CALL",
+ "Deprecated": "1",
+ "EventCode": "0xc4",
+ "EventName": "BR_INST_RETIRED.CALL",
+ "PEBS": "1",
+ "SampleAfterValue": "200003",
+ "UMask": "0xf9"
+ },
+ {
+ "BriefDescription": "Counts the number of retired JCC (Jump on Conditional Code) branch instructions retired, includes both taken and not taken branches.",
+ "EventCode": "0xc4",
+ "EventName": "BR_INST_RETIRED.COND",
+ "PEBS": "1",
+ "SampleAfterValue": "200003",
+ "UMask": "0x7e"
+ },
+ {
+ "BriefDescription": "Counts the number of taken JCC (Jump on Conditional Code) branch instructions retired.",
+ "EventCode": "0xc4",
+ "EventName": "BR_INST_RETIRED.COND_TAKEN",
+ "PEBS": "1",
+ "SampleAfterValue": "200003",
+ "UMask": "0xfe"
+ },
+ {
+ "BriefDescription": "Counts the number of far branch instructions retired, includes far jump, far call and return, and interrupt call and return.",
+ "EventCode": "0xc4",
+ "EventName": "BR_INST_RETIRED.FAR_BRANCH",
+ "PEBS": "1",
+ "SampleAfterValue": "200003",
+ "UMask": "0xbf"
+ },
+ {
+ "BriefDescription": "Counts the number of near indirect JMP and near indirect CALL branch instructions retired.",
+ "EventCode": "0xc4",
+ "EventName": "BR_INST_RETIRED.INDIRECT",
+ "PEBS": "1",
+ "SampleAfterValue": "200003",
+ "UMask": "0xeb"
+ },
+ {
+ "BriefDescription": "Counts the number of near indirect CALL branch instructions retired.",
+ "EventCode": "0xc4",
+ "EventName": "BR_INST_RETIRED.INDIRECT_CALL",
+ "PEBS": "1",
+ "SampleAfterValue": "200003",
+ "UMask": "0xfb"
+ },
+ {
+ "BriefDescription": "This event is deprecated. Refer to new event BR_INST_RETIRED.INDIRECT_CALL",
+ "Deprecated": "1",
+ "EventCode": "0xc4",
+ "EventName": "BR_INST_RETIRED.IND_CALL",
+ "PEBS": "1",
+ "SampleAfterValue": "200003",
+ "UMask": "0xfb"
+ },
+ {
+ "BriefDescription": "This event is deprecated. Refer to new event BR_INST_RETIRED.COND",
+ "Deprecated": "1",
+ "EventCode": "0xc4",
+ "EventName": "BR_INST_RETIRED.JCC",
+ "PEBS": "1",
+ "SampleAfterValue": "200003",
+ "UMask": "0x7e"
+ },
+ {
+ "BriefDescription": "Counts the number of near CALL branch instructions retired.",
+ "EventCode": "0xc4",
+ "EventName": "BR_INST_RETIRED.NEAR_CALL",
+ "PEBS": "1",
+ "SampleAfterValue": "200003",
+ "UMask": "0xf9"
+ },
+ {
+ "BriefDescription": "Counts the number of near RET branch instructions retired.",
+ "EventCode": "0xc4",
+ "EventName": "BR_INST_RETIRED.NEAR_RETURN",
+ "PEBS": "1",
+ "SampleAfterValue": "200003",
+ "UMask": "0xf7"
+ },
+ {
+ "BriefDescription": "This event is deprecated. Refer to new event BR_INST_RETIRED.INDIRECT",
+ "Deprecated": "1",
+ "EventCode": "0xc4",
+ "EventName": "BR_INST_RETIRED.NON_RETURN_IND",
+ "PEBS": "1",
+ "SampleAfterValue": "200003",
+ "UMask": "0xeb"
+ },
+ {
+ "BriefDescription": "Counts the number of near relative CALL branch instructions retired.",
+ "EventCode": "0xc4",
+ "EventName": "BR_INST_RETIRED.REL_CALL",
+ "PEBS": "1",
+ "SampleAfterValue": "200003",
+ "UMask": "0xfd"
+ },
+ {
+ "BriefDescription": "This event is deprecated. Refer to new event BR_INST_RETIRED.NEAR_RETURN",
+ "Deprecated": "1",
+ "EventCode": "0xc4",
+ "EventName": "BR_INST_RETIRED.RETURN",
+ "PEBS": "1",
+ "SampleAfterValue": "200003",
+ "UMask": "0xf7"
+ },
+ {
+ "BriefDescription": "This event is deprecated. Refer to new event BR_INST_RETIRED.COND_TAKEN",
+ "Deprecated": "1",
+ "EventCode": "0xc4",
+ "EventName": "BR_INST_RETIRED.TAKEN_JCC",
+ "PEBS": "1",
+ "SampleAfterValue": "200003",
+ "UMask": "0xfe"
+ },
+ {
+ "BriefDescription": "Counts the total number of mispredicted branch instructions retired for all branch types.",
+ "EventCode": "0xc5",
+ "EventName": "BR_MISP_RETIRED.ALL_BRANCHES",
+ "PEBS": "1",
+ "PublicDescription": "Counts the total number of mispredicted branch instructions retired. All branch type instructions are accounted for. Prediction of the branch target address enables the processor to begin executing instructions before the non-speculative execution path is known. The branch prediction unit (BPU) predicts the target address based on the instruction pointer (IP) of the branch and on the execution path through which execution reached this IP. A branch misprediction occurs when the prediction is wrong, and results in discarding all instructions executed in the speculative path and re-fetching from the correct path.",
+ "SampleAfterValue": "200003"
+ },
+ {
+ "BriefDescription": "Counts the number of mispredicted JCC (Jump on Conditional Code) branch instructions retired.",
+ "EventCode": "0xc5",
+ "EventName": "BR_MISP_RETIRED.COND",
+ "PEBS": "1",
+ "SampleAfterValue": "200003",
+ "UMask": "0x7e"
+ },
+ {
+ "BriefDescription": "Counts the number of mispredicted taken JCC (Jump on Conditional Code) branch instructions retired.",
+ "EventCode": "0xc5",
+ "EventName": "BR_MISP_RETIRED.COND_TAKEN",
+ "PEBS": "1",
+ "SampleAfterValue": "200003",
+ "UMask": "0xfe"
+ },
+ {
+ "BriefDescription": "Counts the number of mispredicted near indirect JMP and near indirect CALL branch instructions retired.",
+ "EventCode": "0xc5",
+ "EventName": "BR_MISP_RETIRED.INDIRECT",
+ "PEBS": "1",
+ "SampleAfterValue": "200003",
+ "UMask": "0xeb"
+ },
+ {
+ "BriefDescription": "Counts the number of mispredicted near indirect CALL branch instructions retired.",
+ "EventCode": "0xc5",
+ "EventName": "BR_MISP_RETIRED.INDIRECT_CALL",
+ "PEBS": "1",
+ "SampleAfterValue": "200003",
+ "UMask": "0xfb"
+ },
+ {
+ "BriefDescription": "This event is deprecated. Refer to new event BR_MISP_RETIRED.INDIRECT_CALL",
+ "Deprecated": "1",
+ "EventCode": "0xc5",
+ "EventName": "BR_MISP_RETIRED.IND_CALL",
+ "PEBS": "1",
+ "SampleAfterValue": "200003",
+ "UMask": "0xfb"
+ },
+ {
+ "BriefDescription": "This event is deprecated. Refer to new event BR_MISP_RETIRED.COND",
+ "Deprecated": "1",
+ "EventCode": "0xc5",
+ "EventName": "BR_MISP_RETIRED.JCC",
+ "PEBS": "1",
+ "SampleAfterValue": "200003",
+ "UMask": "0x7e"
+ },
+ {
+ "BriefDescription": "This event is deprecated. Refer to new event BR_MISP_RETIRED.INDIRECT",
+ "Deprecated": "1",
+ "EventCode": "0xc5",
+ "EventName": "BR_MISP_RETIRED.NON_RETURN_IND",
+ "PEBS": "1",
+ "SampleAfterValue": "200003",
+ "UMask": "0xeb"
+ },
+ {
+ "BriefDescription": "Counts the number of mispredicted near RET branch instructions retired.",
+ "EventCode": "0xc5",
+ "EventName": "BR_MISP_RETIRED.RETURN",
+ "PEBS": "1",
+ "SampleAfterValue": "200003",
+ "UMask": "0xf7"
+ },
+ {
+ "BriefDescription": "This event is deprecated. Refer to new event BR_MISP_RETIRED.COND_TAKEN",
+ "Deprecated": "1",
+ "EventCode": "0xc5",
+ "EventName": "BR_MISP_RETIRED.TAKEN_JCC",
+ "PEBS": "1",
+ "SampleAfterValue": "200003",
+ "UMask": "0xfe"
+ },
+ {
+ "BriefDescription": "Counts the number of unhalted core clock cycles. (Fixed event)",
+ "EventName": "CPU_CLK_UNHALTED.CORE",
+ "PublicDescription": "Counts the number of core cycles while the core is not in a halt state. The core enters the halt state when it is running the HLT instruction. The core frequency may change from time to time. For this reason this event may have a changing ratio with regards to time. This event uses fixed counter 1.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x2"
+ },
+ {
+ "BriefDescription": "Counts the number of unhalted core clock cycles.",
+ "EventCode": "0x3c",
+ "EventName": "CPU_CLK_UNHALTED.CORE_P",
+ "PublicDescription": "Counts the number of core cycles while the core is not in a halt state. The core enters the halt state when it is running the HLT instruction. The core frequency may change from time to time. For this reason this event may have a changing ratio with regards to time. This event uses a programmable general purpose performance counter.",
+ "SampleAfterValue": "2000003"
+ },
+ {
+ "BriefDescription": "Counts the number of unhalted reference clock cycles at TSC frequency. (Fixed event)",
+ "EventName": "CPU_CLK_UNHALTED.REF_TSC",
+ "PublicDescription": "Counts the number of reference cycles that the core is not in a halt state. The core enters the halt state when it is running the HLT instruction. This event is not affected by core frequency changes and increments at a fixed frequency that is also used for the Time Stamp Counter (TSC). This event uses fixed counter 2.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x3"
+ },
+ {
+ "BriefDescription": "Counts the number of unhalted reference clock cycles at TSC frequency.",
+ "EventCode": "0x3c",
+ "EventName": "CPU_CLK_UNHALTED.REF_TSC_P",
+ "PublicDescription": "Counts the number of reference cycles that the core is not in a halt state. The core enters the halt state when it is running the HLT instruction. This event is not affected by core frequency changes and increments at a fixed frequency that is also used for the Time Stamp Counter (TSC). This event uses a programmable general purpose performance counter.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts the number of unhalted core clock cycles. (Fixed event)",
+ "EventName": "CPU_CLK_UNHALTED.THREAD",
+ "PublicDescription": "Counts the number of core cycles while the core is not in a halt state. The core enters the halt state when it is running the HLT instruction. The core frequency may change from time to time. For this reason this event may have a changing ratio with regards to time. This event uses fixed counter 1.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x2"
+ },
+ {
+ "BriefDescription": "Counts the number of unhalted core clock cycles.",
+ "EventCode": "0x3c",
+ "EventName": "CPU_CLK_UNHALTED.THREAD_P",
+ "PublicDescription": "Counts the number of core cycles while the core is not in a halt state. The core enters the halt state when it is running the HLT instruction. The core frequency may change from time to time. For this reason this event may have a changing ratio with regards to time. This event uses a programmable general purpose performance counter.",
+ "SampleAfterValue": "2000003"
+ },
+ {
+ "BriefDescription": "Counts the total number of instructions retired. (Fixed event)",
+ "EventName": "INST_RETIRED.ANY",
+ "PEBS": "1",
+ "PublicDescription": "Counts the total number of instructions that retired. For instructions that consist of multiple uops, this event counts the retirement of the last uop of the instruction. This event continues counting during hardware interrupts, traps, and inside interrupt handlers. This event uses fixed counter 0.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts the total number of instructions retired.",
+ "EventCode": "0xc0",
+ "EventName": "INST_RETIRED.ANY_P",
+ "PEBS": "1",
+ "PublicDescription": "Counts the total number of instructions that retired. For instructions that consist of multiple uops, this event counts the retirement of the last uop of the instruction. This event continues counting during hardware interrupts, traps, and inside interrupt handlers. This event uses a programmable general purpose performance counter.",
+ "SampleAfterValue": "2000003"
+ },
+ {
+ "BriefDescription": "This event is deprecated. Refer to new event LD_BLOCKS.ADDRESS_ALIAS",
+ "Deprecated": "1",
+ "EventCode": "0x03",
+ "EventName": "LD_BLOCKS.4K_ALIAS",
+ "PEBS": "1",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x4"
+ },
+ {
+ "BriefDescription": "Counts the number of retired loads that are blocked because it initially appears to be store forward blocked, but subsequently is shown not to be blocked based on 4K alias check.",
+ "EventCode": "0x03",
+ "EventName": "LD_BLOCKS.ADDRESS_ALIAS",
+ "PEBS": "1",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x4"
+ },
+ {
+ "BriefDescription": "Counts the number of retired loads that are blocked because its address exactly matches an older store whose data is not ready.",
+ "EventCode": "0x03",
+ "EventName": "LD_BLOCKS.DATA_UNKNOWN",
+ "PEBS": "1",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts the number of machine clears due to memory ordering in which an internal load passes an older store within the same CPU.",
+ "EventCode": "0xc3",
+ "EventName": "MACHINE_CLEARS.DISAMBIGUATION",
+ "SampleAfterValue": "20003",
+ "UMask": "0x8"
+ },
+ {
+ "BriefDescription": "Counts the number of machines clears due to memory renaming.",
+ "EventCode": "0xc3",
+ "EventName": "MACHINE_CLEARS.MRN_NUKE",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x80"
+ },
+ {
+ "BriefDescription": "Counts the number of machine clears due to a page fault. Counts both I-Side and D-Side (Loads/Stores) page faults. A page fault occurs when either the page is not present, or an access violation occurs.",
+ "EventCode": "0xc3",
+ "EventName": "MACHINE_CLEARS.PAGE_FAULT",
+ "SampleAfterValue": "20003",
+ "UMask": "0x20"
+ },
+ {
+ "BriefDescription": "Counts the number of machine clears that flush the pipeline and restart the machine with the use of microcode due to SMC, MEMORY_ORDERING, FP_ASSISTS, PAGE_FAULT, DISAMBIGUATION, and FPC_VIRTUAL_TRAP.",
+ "EventCode": "0xc3",
+ "EventName": "MACHINE_CLEARS.SLOW",
+ "SampleAfterValue": "20003",
+ "UMask": "0x6f"
+ },
+ {
+ "BriefDescription": "Counts the number of machine clears due to program modifying data (self modifying code) within 1K of a recently fetched code page.",
+ "EventCode": "0xc3",
+ "EventName": "MACHINE_CLEARS.SMC",
+ "SampleAfterValue": "20003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts the number of issue slots not consumed by the backend due to a micro-sequencer (MS) scoreboard, which stalls the front-end from issuing from the UROM until a specified older uop retires.",
+ "EventCode": "0x75",
+ "EventName": "SERIALIZATION.NON_C01_MS_SCB",
+ "PublicDescription": "Counts the number of issue slots not consumed by the backend due to a micro-sequencer (MS) scoreboard, which stalls the front-end from issuing from the UROM until a specified older uop retires. The most commonly executed instruction with an MS scoreboard is PAUSE.",
+ "SampleAfterValue": "200003",
+ "UMask": "0x2"
+ },
+ {
+ "BriefDescription": "Counts the total number of issue slots that were not consumed by the backend because allocation is stalled due to a mispredicted jump or a machine clear.",
+ "EventCode": "0x73",
+ "EventName": "TOPDOWN_BAD_SPECULATION.ALL",
+ "PublicDescription": "Counts the total number of issue slots that were not consumed by the backend because allocation is stalled due to a mispredicted jump or a machine clear. Only issue slots wasted due to fast nukes such as memory ordering nukes are counted. Other nukes are not accounted for. Counts all issue slots blocked during this recovery window including relevant microcode flows and while uops are not yet available in the instruction queue (IQ) even if an FE_bound event occurs during this period. Also includes the issue slots that were consumed by the backend but were thrown away because they were younger than the mispredict or machine clear.",
+ "SampleAfterValue": "1000003"
+ },
+ {
+ "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to fast nukes such as memory ordering and memory disambiguation machine clears.",
+ "EventCode": "0x73",
+ "EventName": "TOPDOWN_BAD_SPECULATION.FASTNUKE",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x2"
+ },
+ {
+ "BriefDescription": "Counts the total number of issue slots that were not consumed by the backend because allocation is stalled due to a machine clear (nuke) of any kind including memory ordering and memory disambiguation.",
+ "EventCode": "0x73",
+ "EventName": "TOPDOWN_BAD_SPECULATION.MACHINE_CLEARS",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x3"
+ },
+ {
+ "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to branch mispredicts.",
+ "EventCode": "0x73",
+ "EventName": "TOPDOWN_BAD_SPECULATION.MISPREDICT",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x4"
+ },
+ {
+ "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to a machine clear (nuke).",
+ "EventCode": "0x73",
+ "EventName": "TOPDOWN_BAD_SPECULATION.NUKE",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts the total number of issue slots every cycle that were not consumed by the backend due to backend stalls.",
+ "EventCode": "0x74",
+ "EventName": "TOPDOWN_BE_BOUND.ALL",
+ "SampleAfterValue": "1000003"
+ },
+ {
+ "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to certain allocation restrictions.",
+ "EventCode": "0x74",
+ "EventName": "TOPDOWN_BE_BOUND.ALLOC_RESTRICTIONS",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to memory reservation stalls in which a scheduler is not able to accept uops.",
+ "EventCode": "0x74",
+ "EventName": "TOPDOWN_BE_BOUND.MEM_SCHEDULER",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x2"
+ },
+ {
+ "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to IEC or FPC RAT stalls, which can be due to FIQ or IEC reservation stalls in which the integer, floating point or SIMD scheduler is not able to accept uops.",
+ "EventCode": "0x74",
+ "EventName": "TOPDOWN_BE_BOUND.NON_MEM_SCHEDULER",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x8"
+ },
+ {
+ "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to the physical register file unable to accept an entry (marble stalls).",
+ "EventCode": "0x74",
+ "EventName": "TOPDOWN_BE_BOUND.REGISTER",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x20"
+ },
+ {
+ "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to the reorder buffer being full (ROB stalls).",
+ "EventCode": "0x74",
+ "EventName": "TOPDOWN_BE_BOUND.REORDER_BUFFER",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x40"
+ },
+ {
+ "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to scoreboards from the instruction queue (IQ), jump execution unit (JEU), or microcode sequencer (MS).",
+ "EventCode": "0x74",
+ "EventName": "TOPDOWN_BE_BOUND.SERIALIZATION",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x10"
+ },
+ {
+ "BriefDescription": "Counts the total number of issue slots every cycle that were not consumed by the backend due to frontend stalls.",
+ "EventCode": "0x71",
+ "EventName": "TOPDOWN_FE_BOUND.ALL",
+ "SampleAfterValue": "1000003"
+ },
+ {
+ "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to BACLEARS.",
+ "EventCode": "0x71",
+ "EventName": "TOPDOWN_FE_BOUND.BRANCH_DETECT",
+ "PublicDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to BACLEARS, which occurs when the Branch Target Buffer (BTB) prediction or lack thereof, was corrected by a later branch predictor in the frontend. Includes BACLEARS due to all branch types including conditional and unconditional jumps, returns, and indirect branches.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x2"
+ },
+ {
+ "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to BTCLEARS.",
+ "EventCode": "0x71",
+ "EventName": "TOPDOWN_FE_BOUND.BRANCH_RESTEER",
+ "PublicDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to BTCLEARS, which occurs when the Branch Target Buffer (BTB) predicts a taken branch.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x40"
+ },
+ {
+ "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to the microcode sequencer (MS).",
+ "EventCode": "0x71",
+ "EventName": "TOPDOWN_FE_BOUND.CISC",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to decode stalls.",
+ "EventCode": "0x71",
+ "EventName": "TOPDOWN_FE_BOUND.DECODE",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x8"
+ },
+ {
+ "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to frontend bandwidth restrictions due to decode, predecode, cisc, and other limitations.",
+ "EventCode": "0x71",
+ "EventName": "TOPDOWN_FE_BOUND.FRONTEND_BANDWIDTH",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x8d"
+ },
+ {
+ "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to a latency related stalls including BACLEARs, BTCLEARs, ITLB misses, and ICache misses.",
+ "EventCode": "0x71",
+ "EventName": "TOPDOWN_FE_BOUND.FRONTEND_LATENCY",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x72"
+ },
+ {
+ "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to ITLB misses.",
+ "EventCode": "0x71",
+ "EventName": "TOPDOWN_FE_BOUND.ITLB",
+ "PublicDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to Instruction Table Lookaside Buffer (ITLB) misses.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x10"
+ },
+ {
+ "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to other common frontend stalls not categorized.",
+ "EventCode": "0x71",
+ "EventName": "TOPDOWN_FE_BOUND.OTHER",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x80"
+ },
+ {
+ "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to wrong predecodes.",
+ "EventCode": "0x71",
+ "EventName": "TOPDOWN_FE_BOUND.PREDECODE",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x4"
+ },
+ {
+ "BriefDescription": "Counts the total number of consumed retirement slots.",
+ "EventCode": "0xc2",
+ "EventName": "TOPDOWN_RETIRING.ALL",
+ "PEBS": "1",
+ "SampleAfterValue": "1000003"
+ },
+ {
+ "BriefDescription": "Counts the total number of uops retired.",
+ "EventCode": "0xc2",
+ "EventName": "UOPS_RETIRED.ALL",
+ "PEBS": "1",
+ "SampleAfterValue": "2000003"
+ },
+ {
+ "BriefDescription": "Counts the number of integer divide uops retired.",
+ "EventCode": "0xc2",
+ "EventName": "UOPS_RETIRED.IDIV",
+ "PEBS": "1",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x10"
+ },
+ {
+ "BriefDescription": "Counts the number of uops that are from complex flows issued by the micro-sequencer (MS).",
+ "EventCode": "0xc2",
+ "EventName": "UOPS_RETIRED.MS",
+ "PEBS": "1",
+ "PublicDescription": "Counts the number of uops that are from complex flows issued by the Microcode Sequencer (MS). This includes uops from flows due to complex instructions, faults, assists, and inserted flows.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts the number of x87 uops retired, includes those in MS flows.",
+ "EventCode": "0xc2",
+ "EventName": "UOPS_RETIRED.X87",
+ "PEBS": "1",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x2"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/x86/alderlaken/uncore-memory.json b/tools/perf/pmu-events/arch/x86/alderlaken/uncore-memory.json
new file mode 100644
index 000000000000..2ccd9cf96957
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/alderlaken/uncore-memory.json
@@ -0,0 +1,175 @@
+[
+ {
+ "BriefDescription": "Counts every 64B read request entering the Memory Controller 0 to DRAM (sum of all channels).",
+ "EventName": "UNC_MC0_RDCAS_COUNT_FREERUN",
+ "PerPkg": "1",
+ "PublicDescription": "Counts every 64B read request entering the Memory Controller 0 to DRAM (sum of all channels).",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "Counts every 64B write request entering the Memory Controller 0 to DRAM (sum of all channels). Each write request counts as a new request incrementing this counter. However, same cache line write requests (both full and partial) are combined to a single 64 byte data transfer to DRAM.",
+ "EventName": "UNC_MC0_WRCAS_COUNT_FREERUN",
+ "PerPkg": "1",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "Counts every 64B read request entering the Memory Controller 1 to DRAM (sum of all channels).",
+ "EventName": "UNC_MC1_RDCAS_COUNT_FREERUN",
+ "PerPkg": "1",
+ "PublicDescription": "Counts every 64B read entering the Memory Controller 1 to DRAM (sum of all channels).",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "Counts every 64B write request entering the Memory Controller 1 to DRAM (sum of all channels). Each write request counts as a new request incrementing this counter. However, same cache line write requests (both full and partial) are combined to a single 64 byte data transfer to DRAM.",
+ "EventName": "UNC_MC1_WRCAS_COUNT_FREERUN",
+ "PerPkg": "1",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "ACT command for a read request sent to DRAM",
+ "EventCode": "0x24",
+ "EventName": "UNC_M_ACT_COUNT_RD",
+ "PerPkg": "1",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "ACT command sent to DRAM",
+ "EventCode": "0x26",
+ "EventName": "UNC_M_ACT_COUNT_TOTAL",
+ "PerPkg": "1",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "ACT command for a write request sent to DRAM",
+ "EventCode": "0x25",
+ "EventName": "UNC_M_ACT_COUNT_WR",
+ "PerPkg": "1",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "Read CAS command sent to DRAM",
+ "EventCode": "0x22",
+ "EventName": "UNC_M_CAS_COUNT_RD",
+ "PerPkg": "1",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "Write CAS command sent to DRAM",
+ "EventCode": "0x23",
+ "EventName": "UNC_M_CAS_COUNT_WR",
+ "PerPkg": "1",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "Number of clocks",
+ "EventCode": "0x01",
+ "EventName": "UNC_M_CLOCKTICKS",
+ "PerPkg": "1",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "incoming read request page status is Page Empty",
+ "EventCode": "0x1D",
+ "EventName": "UNC_M_DRAM_PAGE_EMPTY_RD",
+ "PerPkg": "1",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "incoming write request page status is Page Empty",
+ "EventCode": "0x20",
+ "EventName": "UNC_M_DRAM_PAGE_EMPTY_WR",
+ "PerPkg": "1",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "incoming read request page status is Page Hit",
+ "EventCode": "0x1C",
+ "EventName": "UNC_M_DRAM_PAGE_HIT_RD",
+ "PerPkg": "1",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "incoming write request page status is Page Hit",
+ "EventCode": "0x1F",
+ "EventName": "UNC_M_DRAM_PAGE_HIT_WR",
+ "PerPkg": "1",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "incoming read request page status is Page Miss",
+ "EventCode": "0x1E",
+ "EventName": "UNC_M_DRAM_PAGE_MISS_RD",
+ "PerPkg": "1",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "incoming write request page status is Page Miss",
+ "EventCode": "0x21",
+ "EventName": "UNC_M_DRAM_PAGE_MISS_WR",
+ "PerPkg": "1",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "Any Rank at Hot state",
+ "EventCode": "0x19",
+ "EventName": "UNC_M_DRAM_THERMAL_HOT",
+ "PerPkg": "1",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "Any Rank at Warm state",
+ "EventCode": "0x1A",
+ "EventName": "UNC_M_DRAM_THERMAL_WARM",
+ "PerPkg": "1",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "Incoming read prefetch request from IA.",
+ "EventCode": "0x0A",
+ "EventName": "UNC_M_PREFETCH_RD",
+ "PerPkg": "1",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "PRE command sent to DRAM due to page table idle timer expiration",
+ "EventCode": "0x28",
+ "EventName": "UNC_M_PRE_COUNT_IDLE",
+ "PerPkg": "1",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "PRE command sent to DRAM for a read/write request",
+ "EventCode": "0x27",
+ "EventName": "UNC_M_PRE_COUNT_PAGE_MISS",
+ "PerPkg": "1",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "Incoming VC0 read request",
+ "EventCode": "0x02",
+ "EventName": "UNC_M_VC0_REQUESTS_RD",
+ "PerPkg": "1",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "Incoming VC0 write request",
+ "EventCode": "0x03",
+ "EventName": "UNC_M_VC0_REQUESTS_WR",
+ "PerPkg": "1",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "Incoming VC1 read request",
+ "EventCode": "0x04",
+ "EventName": "UNC_M_VC1_REQUESTS_RD",
+ "PerPkg": "1",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "Incoming VC1 write request",
+ "EventCode": "0x05",
+ "EventName": "UNC_M_VC1_REQUESTS_WR",
+ "PerPkg": "1",
+ "Unit": "iMC"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/x86/alderlaken/uncore-other.json b/tools/perf/pmu-events/arch/x86/alderlaken/uncore-other.json
new file mode 100644
index 000000000000..f9e7777cd2be
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/alderlaken/uncore-other.json
@@ -0,0 +1,33 @@
+[
+ {
+ "BriefDescription": "Number of requests allocated in Coherency Tracker.",
+ "EventCode": "0x84",
+ "EventName": "UNC_ARB_COH_TRK_REQUESTS.ALL",
+ "PerPkg": "1",
+ "UMask": "0x1",
+ "Unit": "ARB"
+ },
+ {
+ "BriefDescription": "Each cycle counts number of all outgoing valid entries in ReqTrk. Such entry is defined as valid from its allocation in ReqTrk till deallocation. Accounts for Coherent and non-coherent traffic.",
+ "EventCode": "0x80",
+ "EventName": "UNC_ARB_TRK_OCCUPANCY.ALL",
+ "PerPkg": "1",
+ "UMask": "0x1",
+ "Unit": "ARB"
+ },
+ {
+ "BriefDescription": "Counts the number of coherent and in-coherent requests initiated by IA cores, processor graphic units, or LLC.",
+ "EventCode": "0x81",
+ "EventName": "UNC_ARB_TRK_REQUESTS.ALL",
+ "PerPkg": "1",
+ "UMask": "0x1",
+ "Unit": "ARB"
+ },
+ {
+ "BriefDescription": "This 48-bit fixed counter counts the UCLK cycles.",
+ "EventCode": "0xff",
+ "EventName": "UNC_CLOCK.SOCKET",
+ "PerPkg": "1",
+ "Unit": "CLOCK"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/x86/alderlaken/virtual-memory.json b/tools/perf/pmu-events/arch/x86/alderlaken/virtual-memory.json
new file mode 100644
index 000000000000..67fd640f790e
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/alderlaken/virtual-memory.json
@@ -0,0 +1,47 @@
+[
+ {
+ "BriefDescription": "Counts the number of page walks completed due to load DTLB misses to any page size.",
+ "EventCode": "0x08",
+ "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED",
+ "PublicDescription": "Counts the number of page walks completed due to loads (including SW prefetches) whose address translations missed in all Translation Lookaside Buffer (TLB) levels and were mapped to any page size. Includes page walks that page fault.",
+ "SampleAfterValue": "200003",
+ "UMask": "0xe"
+ },
+ {
+ "BriefDescription": "Counts the number of page walks completed due to store DTLB misses to any page size.",
+ "EventCode": "0x49",
+ "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED",
+ "PublicDescription": "Counts the number of page walks completed due to stores whose address translations missed in all Translation Lookaside Buffer (TLB) levels and were mapped to any page size. Includes page walks that page fault.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0xe"
+ },
+ {
+ "BriefDescription": "Counts the number of page walks initiated by a instruction fetch that missed the first and second level TLBs.",
+ "EventCode": "0x85",
+ "EventName": "ITLB_MISSES.MISS_CAUSED_WALK",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts the number of page walks due to an instruction fetch that miss the PDE (Page Directory Entry) cache.",
+ "EventCode": "0x85",
+ "EventName": "ITLB_MISSES.PDE_CACHE_MISS",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x80"
+ },
+ {
+ "BriefDescription": "Counts the number of page walks completed due to instruction fetch misses to any page size.",
+ "EventCode": "0x85",
+ "EventName": "ITLB_MISSES.WALK_COMPLETED",
+ "PublicDescription": "Counts the number of page walks completed due to instruction fetches whose address translations missed in all Translation Lookaside Buffer (TLB) levels and were mapped to any page size. Includes page walks that page fault.",
+ "SampleAfterValue": "200003",
+ "UMask": "0xe"
+ },
+ {
+ "BriefDescription": "Counts the number of cycles that the head (oldest load) of the load buffer and retirement are both stalled due to a DTLB miss.",
+ "EventCode": "0x05",
+ "EventName": "LD_HEAD.DTLB_MISS_AT_RET",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x90"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/x86/mapfile.csv b/tools/perf/pmu-events/arch/x86/mapfile.csv
index 5e609b876790..df47462a125f 100644
--- a/tools/perf/pmu-events/arch/x86/mapfile.csv
+++ b/tools/perf/pmu-events/arch/x86/mapfile.csv
@@ -1,5 +1,6 @@
Family-model,Version,Filename,EventType
-GenuineIntel-6-(97|9A|B7|BA|BE|BF),v1.15,alderlake,core
+GenuineIntel-6-(97|9A|B7|BA|BF),v1.16,alderlake,core
+GenuineIntel-6-BE,v1.16,alderlaken,core
GenuineIntel-6-(1C|26|27|35|36),v4,bonnell,core
GenuineIntel-6-(3D|47),v26,broadwell,core
GenuineIntel-6-56,v23,broadwellde,core
diff --git a/tools/perf/scripts/python/intel-pt-events.py b/tools/perf/scripts/python/intel-pt-events.py
index 6be7fd8fd615..08862a2582f4 100644
--- a/tools/perf/scripts/python/intel-pt-events.py
+++ b/tools/perf/scripts/python/intel-pt-events.py
@@ -13,10 +13,12 @@
from __future__ import print_function
+import io
import os
import sys
import struct
import argparse
+import contextlib
from libxed import LibXED
from ctypes import create_string_buffer, addressof
@@ -39,6 +41,11 @@ glb_src = False
glb_source_file_name = None
glb_line_number = None
glb_dso = None
+glb_stash_dict = {}
+glb_output = None
+glb_output_pos = 0
+glb_cpu = -1
+glb_time = 0
def get_optional_null(perf_dict, field):
if field in perf_dict:
@@ -70,6 +77,7 @@ def trace_begin():
ap.add_argument("--insn-trace", action='store_true')
ap.add_argument("--src-trace", action='store_true')
ap.add_argument("--all-switch-events", action='store_true')
+ ap.add_argument("--interleave", type=int, nargs='?', const=4, default=0)
global glb_args
global glb_insn
global glb_src
@@ -94,11 +102,39 @@ def trace_begin():
perf_set_itrace_options(perf_script_context, itrace)
def trace_end():
+ if glb_args.interleave:
+ flush_stashed_output()
print("End")
def trace_unhandled(event_name, context, event_fields_dict):
print(' '.join(['%s=%s'%(k,str(v))for k,v in sorted(event_fields_dict.items())]))
+def stash_output():
+ global glb_stash_dict
+ global glb_output_pos
+ output_str = glb_output.getvalue()[glb_output_pos:]
+ n = len(output_str)
+ if n:
+ glb_output_pos += n
+ if glb_cpu not in glb_stash_dict:
+ glb_stash_dict[glb_cpu] = []
+ glb_stash_dict[glb_cpu].append(output_str)
+
+def flush_stashed_output():
+ global glb_stash_dict
+ while glb_stash_dict:
+ cpus = list(glb_stash_dict.keys())
+ # Output at most glb_args.interleave output strings per cpu
+ for cpu in cpus:
+ items = glb_stash_dict[cpu]
+ countdown = glb_args.interleave
+ while len(items) and countdown:
+ sys.stdout.write(items[0])
+ del items[0]
+ countdown -= 1
+ if not items:
+ del glb_stash_dict[cpu]
+
def print_ptwrite(raw_buf):
data = struct.unpack_from("<IQ", raw_buf)
flags = data[0]
@@ -375,15 +411,40 @@ def do_process_event(param_dict):
print_common_start(comm, sample, name)
print_common_ip(param_dict, sample, symbol, dso)
+def interleave_events(param_dict):
+ global glb_cpu
+ global glb_time
+ global glb_output
+ global glb_output_pos
+
+ sample = param_dict["sample"]
+ glb_cpu = sample["cpu"]
+ ts = sample["time"]
+
+ if glb_time != ts:
+ glb_time = ts
+ flush_stashed_output()
+
+ glb_output_pos = 0
+ with contextlib.redirect_stdout(io.StringIO()) as glb_output:
+ do_process_event(param_dict)
+
+ stash_output()
+
def process_event(param_dict):
try:
- do_process_event(param_dict)
+ if glb_args.interleave:
+ interleave_events(param_dict)
+ else:
+ do_process_event(param_dict)
except broken_pipe_exception:
# Stop python printing broken pipe errors and traceback
sys.stdout = open(os.devnull, 'w')
sys.exit(1)
def auxtrace_error(typ, code, cpu, pid, tid, ip, ts, msg, cpumode, *x):
+ if glb_args.interleave:
+ flush_stashed_output()
if len(x) >= 2 and x[0]:
machine_pid = x[0]
vcpu = x[1]
@@ -403,6 +464,8 @@ def auxtrace_error(typ, code, cpu, pid, tid, ip, ts, msg, cpumode, *x):
sys.exit(1)
def context_switch(ts, cpu, pid, tid, np_pid, np_tid, machine_pid, out, out_preempt, *x):
+ if glb_args.interleave:
+ flush_stashed_output()
if out:
out_str = "Switch out "
else:
diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build
index 2064a640facb..11b69023011b 100644
--- a/tools/perf/tests/Build
+++ b/tools/perf/tests/Build
@@ -103,3 +103,5 @@ endif
CFLAGS_attr.o += -DBINDIR="BUILD_STR($(bindir_SQ))" -DPYTHON="BUILD_STR($(PYTHON_WORD))"
CFLAGS_python-use.o += -DPYTHONPATH="BUILD_STR($(OUTPUT)python)" -DPYTHON="BUILD_STR($(PYTHON_WORD))"
CFLAGS_dwarf-unwind.o += -fno-optimize-sibling-calls
+
+perf-y += workloads/
diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c
index 7122eae1d98d..4c6ae59a4dfd 100644
--- a/tools/perf/tests/builtin-test.c
+++ b/tools/perf/tests/builtin-test.c
@@ -118,6 +118,15 @@ static struct test_suite **tests[] = {
arch_tests,
};
+static struct test_workload *workloads[] = {
+ &workload__noploop,
+ &workload__thloop,
+ &workload__leafloop,
+ &workload__sqrtloop,
+ &workload__brstack,
+ &workload__datasym,
+};
+
static int num_subtests(const struct test_suite *t)
{
int num;
@@ -475,6 +484,21 @@ static int perf_test__list(int argc, const char **argv)
return 0;
}
+static int run_workload(const char *work, int argc, const char **argv)
+{
+ unsigned int i = 0;
+ struct test_workload *twl;
+
+ for (i = 0; i < ARRAY_SIZE(workloads); i++) {
+ twl = workloads[i];
+ if (!strcmp(twl->name, work))
+ return twl->func(argc, argv);
+ }
+
+ pr_info("No workload found: %s\n", work);
+ return -1;
+}
+
int cmd_test(int argc, const char **argv)
{
const char *test_usage[] = {
@@ -482,12 +506,14 @@ int cmd_test(int argc, const char **argv)
NULL,
};
const char *skip = NULL;
+ const char *workload = NULL;
const struct option test_options[] = {
OPT_STRING('s', "skip", &skip, "tests", "tests to skip"),
OPT_INCR('v', "verbose", &verbose,
"be more verbose (show symbol address, etc)"),
OPT_BOOLEAN('F', "dont-fork", &dont_fork,
"Do not fork for testcase"),
+ OPT_STRING('w', "workload", &workload, "work", "workload to run for testing"),
OPT_END()
};
const char * const test_subcommands[] = { "list", NULL };
@@ -504,6 +530,9 @@ int cmd_test(int argc, const char **argv)
if (argc >= 1 && !strcmp(argv[0], "list"))
return perf_test__list(argc - 1, argv + 1);
+ if (workload)
+ return run_workload(workload, argc, argv);
+
symbol_conf.priv_size = sizeof(int);
symbol_conf.sort_by_name = true;
symbol_conf.try_vmlinux_path = true;
diff --git a/tools/perf/tests/cpumap.c b/tools/perf/tests/cpumap.c
index 7c873c6ae3eb..3150fc1fed6f 100644
--- a/tools/perf/tests/cpumap.c
+++ b/tools/perf/tests/cpumap.c
@@ -6,7 +6,7 @@
#include "util/synthetic-events.h"
#include <string.h>
#include <linux/bitops.h>
-#include <perf/cpumap.h>
+#include <internal/cpumap.h>
#include "debug.h"
struct machine;
diff --git a/tools/perf/tests/dlfilter-test.c b/tools/perf/tests/dlfilter-test.c
index 84352d55347d..99aa72e425e4 100644
--- a/tools/perf/tests/dlfilter-test.c
+++ b/tools/perf/tests/dlfilter-test.c
@@ -33,6 +33,7 @@
#include "archinsn.h"
#include "dlfilter.h"
#include "tests.h"
+#include "util/sample.h"
#define MAP_START 0x400000
diff --git a/tools/perf/tests/expr.c b/tools/perf/tests/expr.c
index 6512f5e22045..b6667501ebb4 100644
--- a/tools/perf/tests/expr.c
+++ b/tools/perf/tests/expr.c
@@ -2,6 +2,7 @@
#include "util/cputopo.h"
#include "util/debug.h"
#include "util/expr.h"
+#include "util/hashmap.h"
#include "util/header.h"
#include "util/smt.h"
#include "tests.h"
diff --git a/tools/perf/tests/mmap-basic.c b/tools/perf/tests/mmap-basic.c
index 8322fc2295fa..e68ca6229756 100644
--- a/tools/perf/tests/mmap-basic.c
+++ b/tools/perf/tests/mmap-basic.c
@@ -5,11 +5,13 @@
#include <perf/cpumap.h>
#include "debug.h"
+#include "event.h"
#include "evlist.h"
#include "evsel.h"
#include "thread_map.h"
#include "tests.h"
#include "util/mmap.h"
+#include "util/sample.h"
#include <linux/err.h>
#include <linux/kernel.h>
#include <linux/string.h>
diff --git a/tools/perf/tests/openat-syscall-tp-fields.c b/tools/perf/tests/openat-syscall-tp-fields.c
index a7b2800652e4..888df8eca981 100644
--- a/tools/perf/tests/openat-syscall-tp-fields.c
+++ b/tools/perf/tests/openat-syscall-tp-fields.c
@@ -14,6 +14,7 @@
#include "util/mmap.h"
#include <errno.h>
#include <perf/mmap.h>
+#include "util/sample.h"
#ifndef O_DIRECTORY
#define O_DIRECTORY 00200000
diff --git a/tools/perf/tests/openat-syscall.c b/tools/perf/tests/openat-syscall.c
index 7e05b8b5cc95..131b62271bfa 100644
--- a/tools/perf/tests/openat-syscall.c
+++ b/tools/perf/tests/openat-syscall.c
@@ -7,6 +7,7 @@
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
+#include <unistd.h>
#include "thread_map.h"
#include "evsel.h"
#include "debug.h"
diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c
index 459afdb256a1..3440dd2616b0 100644
--- a/tools/perf/tests/parse-events.c
+++ b/tools/perf/tests/parse-events.c
@@ -2237,6 +2237,19 @@ static int test__pmu_events(struct test_suite *test __maybe_unused, int subtest
pr_debug("Test PMU event failed for '%s'", name);
ret = combine_test_results(ret, test_ret);
}
+ /*
+ * Names containing '-' are recognized as prefixes and suffixes
+ * due to '-' being a legacy PMU separator. This fails when the
+ * prefix or suffix collides with an existing legacy token. For
+ * example, branch-brs has a prefix (branch) that collides with
+ * a PE_NAME_CACHE_TYPE token causing a parse error as a suffix
+ * isn't expected after this. As event names in the config
+ * slashes are allowed a '-' in the name we check this works
+ * above.
+ */
+ if (strchr(ent->d_name, '-'))
+ continue;
+
snprintf(name, sizeof(name), "%s:u,cpu/event=%s/u", ent->d_name, ent->d_name);
e.name = name;
e.check = test__checkevent_pmu_events_mix;
diff --git a/tools/perf/tests/parse-metric.c b/tools/perf/tests/parse-metric.c
index 68f5a2a03242..21b7ac00d798 100644
--- a/tools/perf/tests/parse-metric.c
+++ b/tools/perf/tests/parse-metric.c
@@ -103,7 +103,7 @@ static int __compute_metric(const char *name, struct value *vals,
if (err)
goto out;
- err = evlist__alloc_stats(evlist, false);
+ err = evlist__alloc_stats(/*config=*/NULL, evlist, /*alloc_raw=*/false);
if (err)
goto out;
diff --git a/tools/perf/tests/parse-no-sample-id-all.c b/tools/perf/tests/parse-no-sample-id-all.c
index d62e31595ab2..202f0a9a6796 100644
--- a/tools/perf/tests/parse-no-sample-id-all.c
+++ b/tools/perf/tests/parse-no-sample-id-all.c
@@ -8,6 +8,7 @@
#include "evlist.h"
#include "header.h"
#include "debug.h"
+#include "util/sample.h"
static int process_event(struct evlist **pevlist, union perf_event *event)
{
diff --git a/tools/perf/tests/perf-record.c b/tools/perf/tests/perf-record.c
index 7aa946aa886d..1c4feec1adff 100644
--- a/tools/perf/tests/perf-record.c
+++ b/tools/perf/tests/perf-record.c
@@ -5,12 +5,14 @@
#include <sched.h>
#include <perf/mmap.h>
+#include "event.h"
#include "evlist.h"
#include "evsel.h"
#include "debug.h"
#include "record.h"
#include "tests.h"
#include "util/mmap.h"
+#include "util/sample.h"
static int sched__get_first_possible_cpu(pid_t pid, cpu_set_t *maskp)
{
diff --git a/tools/perf/tests/perf-time-to-tsc.c b/tools/perf/tests/perf-time-to-tsc.c
index c3aaa1ddff29..efcd71c2738a 100644
--- a/tools/perf/tests/perf-time-to-tsc.c
+++ b/tools/perf/tests/perf-time-to-tsc.c
@@ -20,6 +20,7 @@
#include "tsc.h"
#include "mmap.h"
#include "tests.h"
+#include "util/sample.h"
/*
* Except x86_64/i386 and Arm64, other archs don't support TSC in perf. Just
diff --git a/tools/perf/tests/pmu-events.c b/tools/perf/tests/pmu-events.c
index 097e05c796ab..f7b9dbbad97f 100644
--- a/tools/perf/tests/pmu-events.c
+++ b/tools/perf/tests/pmu-events.c
@@ -12,6 +12,7 @@
#include <perf/evlist.h>
#include "util/evlist.h"
#include "util/expr.h"
+#include "util/hashmap.h"
#include "util/parse-events.h"
#include "metricgroup.h"
#include "stat.h"
@@ -889,7 +890,7 @@ static int test__parsing_callback(const struct pmu_event *pe, const struct pmu_e
goto out_err;
}
- err = evlist__alloc_stats(evlist, false);
+ err = evlist__alloc_stats(/*config=*/NULL, evlist, /*alloc_raw=*/false);
if (err)
goto out_err;
/*
diff --git a/tools/perf/tests/shell/lock_contention.sh b/tools/perf/tests/shell/lock_contention.sh
index 04bf604e3c6f..f7bd0d8eb5c3 100755
--- a/tools/perf/tests/shell/lock_contention.sh
+++ b/tools/perf/tests/shell/lock_contention.sh
@@ -53,7 +53,7 @@ test_bpf()
if ! perf lock con -b true > /dev/null 2>&1 ; then
echo "[Skip] No BPF support"
- exit
+ return
fi
# the perf lock contention output goes to the stderr
@@ -65,9 +65,22 @@ test_bpf()
fi
}
+test_record_concurrent()
+{
+ echo "Testing perf lock record and perf lock contention at the same time"
+ perf lock record -o- -- perf bench sched messaging 2> /dev/null | \
+ perf lock contention -i- -E 1 -q 2> ${result}
+ if [ $(cat "${result}" | wc -l) != "1" ]; then
+ echo "[Fail] Recorded result count is not 1:" $(cat "${result}" | wc -l)
+ err=1
+ exit
+ fi
+}
+
check
test_record
test_bpf
+test_record_concurrent
exit ${err}
diff --git a/tools/perf/tests/shell/pipe_test.sh b/tools/perf/tests/shell/pipe_test.sh
index 1b32b4f28391..8dd115dd35a7 100755
--- a/tools/perf/tests/shell/pipe_test.sh
+++ b/tools/perf/tests/shell/pipe_test.sh
@@ -2,68 +2,33 @@
# perf pipe recording and injection test
# SPDX-License-Identifier: GPL-2.0
-# skip if there's no compiler
-if ! [ -x "$(command -v cc)" ]; then
- echo "failed: no compiler, install gcc"
- exit 2
-fi
-
-file=$(mktemp /tmp/test.file.XXXXXX)
data=$(mktemp /tmp/perf.data.XXXXXX)
+prog="perf test -w noploop"
+task="perf"
+sym="noploop"
-cat <<EOF | cc -o ${file} -x c -
-#include <signal.h>
-#include <stdlib.h>
-#include <unistd.h>
-
-volatile int done;
-
-void sigalrm(int sig) {
- done = 1;
-}
-
-__attribute__((noinline)) void noploop(void) {
- while (!done)
- continue;
-}
-
-int main(int argc, char *argv[]) {
- int sec = 1;
-
- if (argc > 1)
- sec = atoi(argv[1]);
-
- signal(SIGALRM, sigalrm);
- alarm(sec);
-
- noploop();
- return 0;
-}
-EOF
-
-
-if ! perf record -e task-clock:u -o - ${file} | perf report -i - --task | grep test.file; then
+if ! perf record -e task-clock:u -o - ${prog} | perf report -i - --task | grep ${task}; then
echo "cannot find the test file in the perf report"
exit 1
fi
-if ! perf record -e task-clock:u -o - ${file} | perf inject -b | perf report -i - | grep noploop; then
+if ! perf record -e task-clock:u -o - ${prog} | perf inject -b | perf report -i - | grep ${sym}; then
echo "cannot find noploop function in pipe #1"
exit 1
fi
-perf record -e task-clock:u -o - ${file} | perf inject -b -o ${data}
-if ! perf report -i ${data} | grep noploop; then
+perf record -e task-clock:u -o - ${prog} | perf inject -b -o ${data}
+if ! perf report -i ${data} | grep ${sym}; then
echo "cannot find noploop function in pipe #2"
exit 1
fi
-perf record -e task-clock:u -o ${data} ${file}
-if ! perf inject -b -i ${data} | perf report -i - | grep noploop; then
+perf record -e task-clock:u -o ${data} ${prog}
+if ! perf inject -b -i ${data} | perf report -i - | grep ${sym}; then
echo "cannot find noploop function in pipe #3"
exit 1
fi
-rm -f ${file} ${data} ${data}.old
+rm -f ${data} ${data}.old
exit 0
diff --git a/tools/perf/tests/shell/record.sh b/tools/perf/tests/shell/record.sh
index 301f95427159..4fbc74805d52 100755
--- a/tools/perf/tests/shell/record.sh
+++ b/tools/perf/tests/shell/record.sh
@@ -4,67 +4,89 @@
set -e
+shelldir=$(dirname "$0")
+. "${shelldir}"/lib/waiting.sh
+
err=0
perfdata=$(mktemp /tmp/__perf_test.perf.data.XXXXX)
+testprog="perf test -w thloop"
+testsym="test_loop"
cleanup() {
- rm -f ${perfdata}
- rm -f ${perfdata}.old
- trap - exit term int
+ rm -rf "${perfdata}"
+ rm -rf "${perfdata}".old
+
+ trap - EXIT TERM INT
}
trap_cleanup() {
cleanup
exit 1
}
-trap trap_cleanup exit term int
+trap trap_cleanup EXIT TERM INT
test_per_thread() {
echo "Basic --per-thread mode test"
- if ! perf record -e instructions:u -o ${perfdata} --quiet true 2> /dev/null
+ if ! perf record -o /dev/null --quiet ${testprog} 2> /dev/null
then
- echo "Per-thread record [Skipped instructions:u not supported]"
- if [ $err -ne 1 ]
- then
- err=2
- fi
+ echo "Per-thread record [Skipped event not supported]"
return
fi
- if ! perf record -e instructions:u --per-thread -o ${perfdata} true 2> /dev/null
+ if ! perf record --per-thread -o "${perfdata}" ${testprog} 2> /dev/null
then
- echo "Per-thread record of instructions:u [Failed]"
+ echo "Per-thread record [Failed record]"
err=1
return
fi
- if ! perf report -i ${perfdata} -q | egrep -q true
+ if ! perf report -i "${perfdata}" -q | grep -q "${testsym}"
then
echo "Per-thread record [Failed missing output]"
err=1
return
fi
+
+ # run the test program in background (for 30 seconds)
+ ${testprog} 30 &
+ TESTPID=$!
+
+ rm -f "${perfdata}"
+
+ wait_for_threads ${TESTPID} 2
+ perf record -p "${TESTPID}" --per-thread -o "${perfdata}" sleep 1 2> /dev/null
+ kill ${TESTPID}
+
+ if [ ! -e "${perfdata}" ]
+ then
+ echo "Per-thread record [Failed record -p]"
+ err=1
+ return
+ fi
+ if ! perf report -i "${perfdata}" -q | grep -q "${testsym}"
+ then
+ echo "Per-thread record [Failed -p missing output]"
+ err=1
+ return
+ fi
+
echo "Basic --per-thread mode test [Success]"
}
test_register_capture() {
echo "Register capture test"
- if ! perf list | egrep -q 'br_inst_retired.near_call'
+ if ! perf list | grep -q 'br_inst_retired.near_call'
then
- echo "Register capture test [Skipped missing instruction]"
- if [ $err -ne 1 ]
- then
- err=2
- fi
+ echo "Register capture test [Skipped missing event]"
return
fi
- if ! perf record --intr-regs=\? 2>&1 | egrep -q 'available registers: AX BX CX DX SI DI BP SP IP FLAGS CS SS R8 R9 R10 R11 R12 R13 R14 R15'
+ if ! perf record --intr-regs=\? 2>&1 | grep -q 'available registers: AX BX CX DX SI DI BP SP IP FLAGS CS SS R8 R9 R10 R11 R12 R13 R14 R15'
then
echo "Register capture test [Skipped missing registers]"
return
fi
- if ! perf record -o - --intr-regs=di,r8,dx,cx -e br_inst_retired.near_call:p \
- -c 1000 --per-thread true 2> /dev/null \
+ if ! perf record -o - --intr-regs=di,r8,dx,cx -e br_inst_retired.near_call \
+ -c 1000 --per-thread ${testprog} 2> /dev/null \
| perf script -F ip,sym,iregs -i - 2> /dev/null \
- | egrep -q "DI:"
+ | grep -q "DI:"
then
echo "Register capture test [Failed missing output]"
err=1
@@ -73,8 +95,69 @@ test_register_capture() {
echo "Register capture test [Success]"
}
+test_system_wide() {
+ echo "Basic --system-wide mode test"
+ if ! perf record -aB --synth=no -o "${perfdata}" ${testprog} 2> /dev/null
+ then
+ echo "System-wide record [Skipped not supported]"
+ return
+ fi
+ if ! perf report -i "${perfdata}" -q | grep -q "${testsym}"
+ then
+ echo "System-wide record [Failed missing output]"
+ err=1
+ return
+ fi
+ if ! perf record -aB --synth=no -e cpu-clock,cs --threads=cpu \
+ -o "${perfdata}" ${testprog} 2> /dev/null
+ then
+ echo "System-wide record [Failed record --threads option]"
+ err=1
+ return
+ fi
+ if ! perf report -i "${perfdata}" -q | grep -q "${testsym}"
+ then
+ echo "System-wide record [Failed --threads missing output]"
+ err=1
+ return
+ fi
+ echo "Basic --system-wide mode test [Success]"
+}
+
+test_workload() {
+ echo "Basic target workload test"
+ if ! perf record -o "${perfdata}" ${testprog} 2> /dev/null
+ then
+ echo "Workload record [Failed record]"
+ err=1
+ return
+ fi
+ if ! perf report -i "${perfdata}" -q | grep -q "${testsym}"
+ then
+ echo "Workload record [Failed missing output]"
+ err=1
+ return
+ fi
+ if ! perf record -e cpu-clock,cs --threads=package \
+ -o "${perfdata}" ${testprog} 2> /dev/null
+ then
+ echo "Workload record [Failed record --threads option]"
+ err=1
+ return
+ fi
+ if ! perf report -i "${perfdata}" -q | grep -q "${testsym}"
+ then
+ echo "Workload record [Failed --threads missing output]"
+ err=1
+ return
+ fi
+ echo "Basic target workload test [Success]"
+}
+
test_per_thread
test_register_capture
+test_system_wide
+test_workload
cleanup
exit $err
diff --git a/tools/perf/tests/shell/test_arm_callgraph_fp.sh b/tools/perf/tests/shell/test_arm_callgraph_fp.sh
index ec108d45d3c6..e61d8deaa0c4 100755
--- a/tools/perf/tests/shell/test_arm_callgraph_fp.sh
+++ b/tools/perf/tests/shell/test_arm_callgraph_fp.sh
@@ -4,44 +4,16 @@
lscpu | grep -q "aarch64" || exit 2
-if ! [ -x "$(command -v cc)" ]; then
- echo "failed: no compiler, install gcc"
- exit 2
-fi
-
PERF_DATA=$(mktemp /tmp/__perf_test.perf.data.XXXXX)
-TEST_PROGRAM_SOURCE=$(mktemp /tmp/test_program.XXXXX.c)
-TEST_PROGRAM=$(mktemp /tmp/test_program.XXXXX)
+TEST_PROGRAM="perf test -w leafloop"
cleanup_files()
{
rm -f $PERF_DATA
- rm -f $TEST_PROGRAM_SOURCE
- rm -f $TEST_PROGRAM
}
trap cleanup_files exit term int
-cat << EOF > $TEST_PROGRAM_SOURCE
-int a = 0;
-void leaf(void) {
- for (;;)
- a += a;
-}
-void parent(void) {
- leaf();
-}
-int main(void) {
- parent();
- return 0;
-}
-EOF
-
-echo " + Compiling test program ($TEST_PROGRAM)..."
-
-CFLAGS="-g -O0 -fno-inline -fno-omit-frame-pointer"
-cc $CFLAGS $TEST_PROGRAM_SOURCE -o $TEST_PROGRAM || exit 1
-
# Add a 1 second delay to skip samples that are not in the leaf() function
perf record -o $PERF_DATA --call-graph fp -e cycles//u -D 1000 --user-callchains -- $TEST_PROGRAM 2> /dev/null &
PID=$!
@@ -58,11 +30,11 @@ wait $PID
# program
# 728 leaf
# 753 parent
-# 76c main
+# 76c leafloop
# ...
perf script -i $PERF_DATA -F comm,ip,sym | head -n4
perf script -i $PERF_DATA -F comm,ip,sym | head -n4 | \
awk '{ if ($2 != "") sym[i++] = $2 } END { if (sym[0] != "leaf" ||
sym[1] != "parent" ||
- sym[2] != "main") exit 1 }'
+ sym[2] != "leafloop") exit 1 }'
diff --git a/tools/perf/tests/shell/test_arm_spe_fork.sh b/tools/perf/tests/shell/test_arm_spe_fork.sh
index c920d3583d30..da810e1b2b9e 100755
--- a/tools/perf/tests/shell/test_arm_spe_fork.sh
+++ b/tools/perf/tests/shell/test_arm_spe_fork.sh
@@ -11,14 +11,7 @@ skip_if_no_arm_spe_event() {
skip_if_no_arm_spe_event || exit 2
-# skip if there's no compiler
-if ! [ -x "$(command -v cc)" ]; then
- echo "failed: no compiler, install gcc"
- exit 2
-fi
-
-TEST_PROGRAM_SOURCE=$(mktemp /tmp/__perf_test.program.XXXXX.c)
-TEST_PROGRAM=$(mktemp /tmp/__perf_test.program.XXXXX)
+TEST_PROGRAM="perf test -w sqrtloop 10"
PERF_DATA=$(mktemp /tmp/__perf_test.perf.data.XXXXX)
PERF_RECORD_LOG=$(mktemp /tmp/__perf_test.log.XXXXX)
@@ -27,43 +20,10 @@ cleanup_files()
echo "Cleaning up files..."
rm -f ${PERF_RECORD_LOG}
rm -f ${PERF_DATA}
- rm -f ${TEST_PROGRAM_SOURCE}
- rm -f ${TEST_PROGRAM}
}
trap cleanup_files exit term int
-# compile test program
-cat << EOF > $TEST_PROGRAM_SOURCE
-#include <math.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <sys/wait.h>
-
-int workload() {
- while (1)
- sqrt(rand());
- return 0;
-}
-
-int main() {
- switch (fork()) {
- case 0:
- return workload();
- case -1:
- return 1;
- default:
- wait(NULL);
- }
- return 0;
-}
-EOF
-
-echo "Compiling test program..."
-CFLAGS="-lm"
-cc $TEST_PROGRAM_SOURCE $CFLAGS -o $TEST_PROGRAM || exit 1
-
echo "Recording workload..."
perf record -o ${PERF_DATA} -e arm_spe/period=65536/ -vvv -- $TEST_PROGRAM > ${PERF_RECORD_LOG} 2>&1 &
PERFPID=$!
@@ -78,8 +38,6 @@ echo Log lines after 1 second = $log1
kill $PERFPID
wait $PERFPID
-# test program may leave an orphan process running the workload
-killall $(basename $TEST_PROGRAM)
if [ "$log0" = "$log1" ];
then
diff --git a/tools/perf/tests/shell/test_brstack.sh b/tools/perf/tests/shell/test_brstack.sh
index d7ff5c4b4da4..5856639b565b 100755
--- a/tools/perf/tests/shell/test_brstack.sh
+++ b/tools/perf/tests/shell/test_brstack.sh
@@ -4,13 +4,6 @@
# SPDX-License-Identifier: GPL-2.0
# German Gomez <german.gomez@arm.com>, 2022
-# we need a C compiler to build the test programs
-# so bail if none is found
-if ! [ -x "$(command -v cc)" ]; then
- echo "failed: no compiler, install gcc"
- exit 2
-fi
-
# skip the test if the hardware doesn't support branch stack sampling
# and if the architecture doesn't support filter types: any,save_type,u
if ! perf record -o- --no-buildid --branch-filter any,save_type,u -- true > /dev/null 2>&1 ; then
@@ -19,6 +12,7 @@ if ! perf record -o- --no-buildid --branch-filter any,save_type,u -- true > /dev
fi
TMPDIR=$(mktemp -d /tmp/__perf_test.program.XXXXX)
+TESTPROG="perf test -w brstack"
cleanup() {
rm -rf $TMPDIR
@@ -26,57 +20,24 @@ cleanup() {
trap cleanup exit term int
-gen_test_program() {
- # generate test program
- cat << EOF > $1
-#define BENCH_RUNS 999999
-int cnt;
-void bar(void) {
-} /* return */
-void foo(void) {
- bar(); /* call */
-} /* return */
-void bench(void) {
- void (*foo_ind)(void) = foo;
- if ((cnt++) % 3) /* branch (cond) */
- foo(); /* call */
- bar(); /* call */
- foo_ind(); /* call (ind) */
-}
-int main(void)
-{
- int cnt = 0;
- while (1) {
- if ((cnt++) > BENCH_RUNS)
- break;
- bench(); /* call */
- } /* branch (uncond) */
- return 0;
-}
-EOF
-}
-
test_user_branches() {
echo "Testing user branch stack sampling"
- gen_test_program "$TEMPDIR/program.c"
- cc -fno-inline -g "$TEMPDIR/program.c" -o $TMPDIR/a.out
-
- perf record -o $TMPDIR/perf.data --branch-filter any,save_type,u -- $TMPDIR/a.out > /dev/null 2>&1
+ perf record -o $TMPDIR/perf.data --branch-filter any,save_type,u -- ${TESTPROG} > /dev/null 2>&1
perf script -i $TMPDIR/perf.data --fields brstacksym | xargs -n1 > $TMPDIR/perf.script
# example of branch entries:
- # foo+0x14/bar+0x40/P/-/-/0/CALL
+ # brstack_foo+0x14/brstack_bar+0x40/P/-/-/0/CALL
set -x
- egrep -m1 "^bench\+[^ ]*/foo\+[^ ]*/IND_CALL$" $TMPDIR/perf.script
- egrep -m1 "^foo\+[^ ]*/bar\+[^ ]*/CALL$" $TMPDIR/perf.script
- egrep -m1 "^bench\+[^ ]*/foo\+[^ ]*/CALL$" $TMPDIR/perf.script
- egrep -m1 "^bench\+[^ ]*/bar\+[^ ]*/CALL$" $TMPDIR/perf.script
- egrep -m1 "^bar\+[^ ]*/foo\+[^ ]*/RET$" $TMPDIR/perf.script
- egrep -m1 "^foo\+[^ ]*/bench\+[^ ]*/RET$" $TMPDIR/perf.script
- egrep -m1 "^bench\+[^ ]*/bench\+[^ ]*/COND$" $TMPDIR/perf.script
- egrep -m1 "^main\+[^ ]*/main\+[^ ]*/UNCOND$" $TMPDIR/perf.script
+ egrep -m1 "^brstack_bench\+[^ ]*/brstack_foo\+[^ ]*/IND_CALL$" $TMPDIR/perf.script
+ egrep -m1 "^brstack_foo\+[^ ]*/brstack_bar\+[^ ]*/CALL$" $TMPDIR/perf.script
+ egrep -m1 "^brstack_bench\+[^ ]*/brstack_foo\+[^ ]*/CALL$" $TMPDIR/perf.script
+ egrep -m1 "^brstack_bench\+[^ ]*/brstack_bar\+[^ ]*/CALL$" $TMPDIR/perf.script
+ egrep -m1 "^brstack_bar\+[^ ]*/brstack_foo\+[^ ]*/RET$" $TMPDIR/perf.script
+ egrep -m1 "^brstack_foo\+[^ ]*/brstack_bench\+[^ ]*/RET$" $TMPDIR/perf.script
+ egrep -m1 "^brstack_bench\+[^ ]*/brstack_bench\+[^ ]*/COND$" $TMPDIR/perf.script
+ egrep -m1 "^brstack\+[^ ]*/brstack\+[^ ]*/UNCOND$" $TMPDIR/perf.script
set +x
# some branch types are still not being tested:
@@ -91,10 +52,7 @@ test_filter() {
echo "Testing branch stack filtering permutation ($filter,$expect)"
- gen_test_program "$TEMPDIR/program.c"
- cc -fno-inline -g "$TEMPDIR/program.c" -o $TMPDIR/a.out
-
- perf record -o $TMPDIR/perf.data --branch-filter $filter,save_type,u -- $TMPDIR/a.out > /dev/null 2>&1
+ perf record -o $TMPDIR/perf.data --branch-filter $filter,save_type,u -- ${TESTPROG} > /dev/null 2>&1
perf script -i $TMPDIR/perf.data --fields brstack | xargs -n1 > $TMPDIR/perf.script
# fail if we find any branch type that doesn't match any of the expected ones
diff --git a/tools/perf/tests/shell/test_data_symbol.sh b/tools/perf/tests/shell/test_data_symbol.sh
index cd6eb54d235d..d871e6c743ef 100755
--- a/tools/perf/tests/shell/test_data_symbol.sh
+++ b/tools/perf/tests/shell/test_data_symbol.sh
@@ -11,13 +11,7 @@ skip_if_no_mem_event() {
skip_if_no_mem_event || exit 2
-# skip if there's no compiler
-if ! [ -x "$(command -v cc)" ]; then
- echo "skip: no compiler, install gcc"
- exit 2
-fi
-
-TEST_PROGRAM=$(mktemp /tmp/__perf_test.program.XXXXX)
+TEST_PROGRAM="perf test -w datasym"
PERF_DATA=$(mktemp /tmp/__perf_test.perf.data.XXXXX)
check_result() {
@@ -45,31 +39,10 @@ cleanup_files()
{
echo "Cleaning up files..."
rm -f ${PERF_DATA}
- rm -f ${TEST_PROGRAM}
}
trap cleanup_files exit term int
-# compile test program
-echo "Compiling test program..."
-cat << EOF | cc -o ${TEST_PROGRAM} -x c -
-typedef struct _buf {
- char data1;
- char reserved[55];
- char data2;
-} buf __attribute__((aligned(64)));
-
-static buf buf1;
-
-int main(void) {
- for (;;) {
- buf1.data1++;
- buf1.data2 += buf1.data1;
- }
- return 0;
-}
-EOF
-
echo "Recording workload..."
# perf mem/c2c internally uses IBS PMU on AMD CPU which doesn't support
diff --git a/tools/perf/tests/sw-clock.c b/tools/perf/tests/sw-clock.c
index 9cd6fec375ee..4d7493fa0105 100644
--- a/tools/perf/tests/sw-clock.c
+++ b/tools/perf/tests/sw-clock.c
@@ -13,6 +13,7 @@
#include "util/evlist.h"
#include "util/cpumap.h"
#include "util/mmap.h"
+#include "util/sample.h"
#include "util/thread_map.h"
#include <perf/evlist.h>
#include <perf/mmap.h>
diff --git a/tools/perf/tests/switch-tracking.c b/tools/perf/tests/switch-tracking.c
index 87f565c7f650..b3bd14b025a8 100644
--- a/tools/perf/tests/switch-tracking.c
+++ b/tools/perf/tests/switch-tracking.c
@@ -19,6 +19,7 @@
#include "record.h"
#include "tests.h"
#include "util/mmap.h"
+#include "util/sample.h"
#include "pmu.h"
static int spin_sleep(void)
diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h
index 5bbb8f6a48fc..e15f24cfc909 100644
--- a/tools/perf/tests/tests.h
+++ b/tools/perf/tests/tests.h
@@ -180,4 +180,31 @@ int test__arch_unwind_sample(struct perf_sample *sample,
DECLARE_SUITE(vectors_page);
#endif
+/*
+ * Define test workloads to be used in test suites.
+ */
+typedef int (*workload_fnptr)(int argc, const char **argv);
+
+struct test_workload {
+ const char *name;
+ workload_fnptr func;
+};
+
+#define DECLARE_WORKLOAD(work) \
+ extern struct test_workload workload__##work
+
+#define DEFINE_WORKLOAD(work) \
+struct test_workload workload__##work = { \
+ .name = #work, \
+ .func = work, \
+}
+
+/* The list of test workloads */
+DECLARE_WORKLOAD(noploop);
+DECLARE_WORKLOAD(thloop);
+DECLARE_WORKLOAD(leafloop);
+DECLARE_WORKLOAD(sqrtloop);
+DECLARE_WORKLOAD(brstack);
+DECLARE_WORKLOAD(datasym);
+
#endif /* TESTS_H */
diff --git a/tools/perf/tests/thread-map.c b/tools/perf/tests/thread-map.c
index e413c1387fcb..74308c1368fe 100644
--- a/tools/perf/tests/thread-map.c
+++ b/tools/perf/tests/thread-map.c
@@ -11,6 +11,7 @@
#include "util/synthetic-events.h"
#include <linux/zalloc.h>
#include <perf/event.h>
+#include <internal/threadmap.h>
struct perf_sample;
struct perf_tool;
diff --git a/tools/perf/tests/workloads/Build b/tools/perf/tests/workloads/Build
new file mode 100644
index 000000000000..a1f34d5861e3
--- /dev/null
+++ b/tools/perf/tests/workloads/Build
@@ -0,0 +1,13 @@
+# SPDX-License-Identifier: GPL-2.0
+
+perf-y += noploop.o
+perf-y += thloop.o
+perf-y += leafloop.o
+perf-y += sqrtloop.o
+perf-y += brstack.o
+perf-y += datasym.o
+
+CFLAGS_sqrtloop.o = -g -O0 -fno-inline -U_FORTIFY_SOURCE
+CFLAGS_leafloop.o = -g -O0 -fno-inline -fno-omit-frame-pointer -U_FORTIFY_SOURCE
+CFLAGS_brstack.o = -g -O0 -fno-inline -U_FORTIFY_SOURCE
+CFLAGS_datasym.o = -g -O0 -fno-inline -U_FORTIFY_SOURCE
diff --git a/tools/perf/tests/workloads/brstack.c b/tools/perf/tests/workloads/brstack.c
new file mode 100644
index 000000000000..0b60bd37b9d1
--- /dev/null
+++ b/tools/perf/tests/workloads/brstack.c
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <stdlib.h>
+#include "../tests.h"
+
+#define BENCH_RUNS 999999
+
+static volatile int cnt;
+
+static void brstack_bar(void) {
+} /* return */
+
+static void brstack_foo(void) {
+ brstack_bar(); /* call */
+} /* return */
+
+static void brstack_bench(void) {
+ void (*brstack_foo_ind)(void) = brstack_foo;
+
+ if ((cnt++) % 3) /* branch (cond) */
+ brstack_foo(); /* call */
+ brstack_bar(); /* call */
+ brstack_foo_ind(); /* call (ind) */
+}
+
+static int brstack(int argc, const char **argv)
+{
+ int num_loops = BENCH_RUNS;
+
+ if (argc > 0)
+ num_loops = atoi(argv[0]);
+
+ while (1) {
+ if ((cnt++) > num_loops)
+ break;
+ brstack_bench();/* call */
+ } /* branch (uncond) */
+ return 0;
+}
+
+DEFINE_WORKLOAD(brstack);
diff --git a/tools/perf/tests/workloads/datasym.c b/tools/perf/tests/workloads/datasym.c
new file mode 100644
index 000000000000..ddd40bc63448
--- /dev/null
+++ b/tools/perf/tests/workloads/datasym.c
@@ -0,0 +1,24 @@
+#include <linux/compiler.h>
+#include "../tests.h"
+
+typedef struct _buf {
+ char data1;
+ char reserved[55];
+ char data2;
+} buf __attribute__((aligned(64)));
+
+static buf buf1 = {
+ /* to have this in the data section */
+ .reserved[0] = 1,
+};
+
+static int datasym(int argc __maybe_unused, const char **argv __maybe_unused)
+{
+ for (;;) {
+ buf1.data1++;
+ buf1.data2 += buf1.data1;
+ }
+ return 0;
+}
+
+DEFINE_WORKLOAD(datasym);
diff --git a/tools/perf/tests/workloads/leafloop.c b/tools/perf/tests/workloads/leafloop.c
new file mode 100644
index 000000000000..1bf5cc97649b
--- /dev/null
+++ b/tools/perf/tests/workloads/leafloop.c
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <stdlib.h>
+#include <linux/compiler.h>
+#include "../tests.h"
+
+/* We want to check these symbols in perf script */
+noinline void leaf(volatile int b);
+noinline void parent(volatile int b);
+
+static volatile int a;
+
+noinline void leaf(volatile int b)
+{
+ for (;;)
+ a += b;
+}
+
+noinline void parent(volatile int b)
+{
+ leaf(b);
+}
+
+static int leafloop(int argc, const char **argv)
+{
+ int c = 1;
+
+ if (argc > 0)
+ c = atoi(argv[0]);
+
+ parent(c);
+ return 0;
+}
+
+DEFINE_WORKLOAD(leafloop);
diff --git a/tools/perf/tests/workloads/noploop.c b/tools/perf/tests/workloads/noploop.c
new file mode 100644
index 000000000000..940ea5910a84
--- /dev/null
+++ b/tools/perf/tests/workloads/noploop.c
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <stdlib.h>
+#include <signal.h>
+#include <unistd.h>
+#include <linux/compiler.h>
+#include "../tests.h"
+
+static volatile sig_atomic_t done;
+
+static void sighandler(int sig __maybe_unused)
+{
+ done = 1;
+}
+
+static int noploop(int argc, const char **argv)
+{
+ int sec = 1;
+
+ if (argc > 0)
+ sec = atoi(argv[0]);
+
+ signal(SIGINT, sighandler);
+ signal(SIGALRM, sighandler);
+ alarm(sec);
+
+ while (!done)
+ continue;
+
+ return 0;
+}
+
+DEFINE_WORKLOAD(noploop);
diff --git a/tools/perf/tests/workloads/sqrtloop.c b/tools/perf/tests/workloads/sqrtloop.c
new file mode 100644
index 000000000000..ccc94c6a6676
--- /dev/null
+++ b/tools/perf/tests/workloads/sqrtloop.c
@@ -0,0 +1,45 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <math.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <linux/compiler.h>
+#include <sys/wait.h>
+#include "../tests.h"
+
+static volatile sig_atomic_t done;
+
+static void sighandler(int sig __maybe_unused)
+{
+ done = 1;
+}
+
+static int __sqrtloop(int sec)
+{
+ signal(SIGALRM, sighandler);
+ alarm(sec);
+
+ while (!done)
+ (void)sqrt(rand());
+ return 0;
+}
+
+static int sqrtloop(int argc, const char **argv)
+{
+ int sec = 1;
+
+ if (argc > 0)
+ sec = atoi(argv[0]);
+
+ switch (fork()) {
+ case 0:
+ return __sqrtloop(sec);
+ case -1:
+ return -1;
+ default:
+ wait(NULL);
+ }
+ return 0;
+}
+
+DEFINE_WORKLOAD(sqrtloop);
diff --git a/tools/perf/tests/workloads/thloop.c b/tools/perf/tests/workloads/thloop.c
new file mode 100644
index 000000000000..29193b75717e
--- /dev/null
+++ b/tools/perf/tests/workloads/thloop.c
@@ -0,0 +1,53 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <pthread.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <unistd.h>
+#include <linux/compiler.h>
+#include "../tests.h"
+
+static volatile sig_atomic_t done;
+static volatile unsigned count;
+
+/* We want to check this symbol in perf report */
+noinline void test_loop(void);
+
+static void sighandler(int sig __maybe_unused)
+{
+ done = 1;
+}
+
+noinline void test_loop(void)
+{
+ while (!done)
+ count++;
+}
+
+static void *thfunc(void *arg)
+{
+ void (*loop_fn)(void) = arg;
+
+ loop_fn();
+ return NULL;
+}
+
+static int thloop(int argc, const char **argv)
+{
+ int sec = 1;
+ pthread_t th;
+
+ if (argc > 0)
+ sec = atoi(argv[0]);
+
+ signal(SIGINT, sighandler);
+ signal(SIGALRM, sighandler);
+ alarm(sec);
+
+ pthread_create(&th, NULL, thfunc, test_loop);
+ test_loop();
+ pthread_join(th, NULL);
+
+ return 0;
+}
+
+DEFINE_WORKLOAD(thloop);
diff --git a/tools/perf/tests/wp.c b/tools/perf/tests/wp.c
index 56455da30341..cc8719609b19 100644
--- a/tools/perf/tests/wp.c
+++ b/tools/perf/tests/wp.c
@@ -59,8 +59,10 @@ static int __event(int wp_type, void *wp_addr, unsigned long wp_len)
get__perf_event_attr(&attr, wp_type, wp_addr, wp_len);
fd = sys_perf_event_open(&attr, 0, -1, -1,
perf_event_open_cloexec_flag());
- if (fd < 0)
+ if (fd < 0) {
+ fd = -errno;
pr_debug("failed opening event %x\n", attr.bp_type);
+ }
return fd;
}
@@ -77,7 +79,7 @@ static int test__wp_ro(struct test_suite *test __maybe_unused,
fd = __event(HW_BREAKPOINT_R, (void *)&data1, sizeof(data1));
if (fd < 0)
- return -1;
+ return fd == -ENODEV ? TEST_SKIP : -1;
tmp = data1;
WP_TEST_ASSERT_VAL(fd, "RO watchpoint", 1);
@@ -101,7 +103,7 @@ static int test__wp_wo(struct test_suite *test __maybe_unused,
fd = __event(HW_BREAKPOINT_W, (void *)&data1, sizeof(data1));
if (fd < 0)
- return -1;
+ return fd == -ENODEV ? TEST_SKIP : -1;
tmp = data1;
WP_TEST_ASSERT_VAL(fd, "WO watchpoint", 0);
@@ -126,7 +128,7 @@ static int test__wp_rw(struct test_suite *test __maybe_unused,
fd = __event(HW_BREAKPOINT_R | HW_BREAKPOINT_W, (void *)&data1,
sizeof(data1));
if (fd < 0)
- return -1;
+ return fd == -ENODEV ? TEST_SKIP : -1;
tmp = data1;
WP_TEST_ASSERT_VAL(fd, "RW watchpoint", 1);
@@ -150,7 +152,7 @@ static int test__wp_modify(struct test_suite *test __maybe_unused, int subtest _
fd = __event(HW_BREAKPOINT_W, (void *)&data1, sizeof(data1));
if (fd < 0)
- return -1;
+ return fd == -ENODEV ? TEST_SKIP : -1;
data1 = tmp;
WP_TEST_ASSERT_VAL(fd, "Modify watchpoint", 1);
diff --git a/tools/perf/trace/beauty/Build b/tools/perf/trace/beauty/Build
index 433dc39053a7..d11ce256f511 100644
--- a/tools/perf/trace/beauty/Build
+++ b/tools/perf/trace/beauty/Build
@@ -17,4 +17,5 @@ perf-y += sockaddr.o
perf-y += socket.o
perf-y += statx.o
perf-y += sync_file_range.o
+perf-y += timespec.o
perf-y += tracepoints/
diff --git a/tools/perf/trace/beauty/beauty.h b/tools/perf/trace/beauty/beauty.h
index f527a46ab4e7..4c59edddd6a8 100644
--- a/tools/perf/trace/beauty/beauty.h
+++ b/tools/perf/trace/beauty/beauty.h
@@ -244,6 +244,9 @@ size_t syscall_arg__scnprintf_statx_mask(char *bf, size_t size, struct syscall_a
size_t syscall_arg__scnprintf_sync_file_range_flags(char *bf, size_t size, struct syscall_arg *arg);
#define SCA_SYNC_FILE_RANGE_FLAGS syscall_arg__scnprintf_sync_file_range_flags
+size_t syscall_arg__scnprintf_timespec(char *bf, size_t size, struct syscall_arg *arg);
+#define SCA_TIMESPEC syscall_arg__scnprintf_timespec
+
size_t open__scnprintf_flags(unsigned long flags, char *bf, size_t size, bool show_prefix);
void syscall_arg__set_ret_scnprintf(struct syscall_arg *arg,
diff --git a/tools/perf/trace/beauty/perf_event_open.c b/tools/perf/trace/beauty/perf_event_open.c
index 11d47dbe63bd..01ee15fe9d0c 100644
--- a/tools/perf/trace/beauty/perf_event_open.c
+++ b/tools/perf/trace/beauty/perf_event_open.c
@@ -44,3 +44,47 @@ static size_t syscall_arg__scnprintf_perf_flags(char *bf, size_t size,
}
#define SCA_PERF_FLAGS syscall_arg__scnprintf_perf_flags
+
+struct attr_fprintf_args {
+ size_t size, printed;
+ char *bf;
+ bool first;
+};
+
+static int attr__fprintf(FILE *fp __maybe_unused, const char *name, const char *val, void *priv)
+{
+ struct attr_fprintf_args *args = priv;
+ size_t printed = scnprintf(args->bf + args->printed , args->size - args->printed, "%s%s: %s", args->first ? "" : ", ", name, val);
+
+ args->first = false;
+ args->printed += printed;
+ return printed;
+}
+
+static size_t perf_event_attr___scnprintf(struct perf_event_attr *attr, char *bf, size_t size, bool show_zeros __maybe_unused)
+{
+ struct attr_fprintf_args args = {
+ .printed = scnprintf(bf, size, "{ "),
+ .size = size,
+ .first = true,
+ .bf = bf,
+ };
+
+ perf_event_attr__fprintf(stdout, attr, attr__fprintf, &args);
+ return args.printed + scnprintf(bf + args.printed, size - args.printed, " }");
+}
+
+static size_t syscall_arg__scnprintf_augmented_perf_event_attr(struct syscall_arg *arg, char *bf, size_t size)
+{
+ return perf_event_attr___scnprintf((void *)arg->augmented.args, bf, size, arg->trace->show_zeros);
+}
+
+static size_t syscall_arg__scnprintf_perf_event_attr(char *bf, size_t size, struct syscall_arg *arg)
+{
+ if (arg->augmented.args)
+ return syscall_arg__scnprintf_augmented_perf_event_attr(arg, bf, size);
+
+ return scnprintf(bf, size, "%#lx", arg->val);
+}
+
+#define SCA_PERF_ATTR syscall_arg__scnprintf_perf_event_attr
diff --git a/tools/perf/trace/beauty/timespec.c b/tools/perf/trace/beauty/timespec.c
new file mode 100644
index 000000000000..e1a61f092aad
--- /dev/null
+++ b/tools/perf/trace/beauty/timespec.c
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: LGPL-2.1
+// Copyright (C) 2022, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
+
+#include "trace/beauty/beauty.h"
+#include <inttypes.h>
+#include <time.h>
+
+static size_t syscall_arg__scnprintf_augmented_timespec(struct syscall_arg *arg, char *bf, size_t size)
+{
+ struct timespec *ts = (struct timespec *)arg->augmented.args;
+
+ return scnprintf(bf, size, "{ .tv_sec: %" PRIu64 ", .tv_nsec: %" PRIu64 " }", ts->tv_sec, ts->tv_nsec);
+}
+
+size_t syscall_arg__scnprintf_timespec(char *bf, size_t size, struct syscall_arg *arg)
+{
+ if (arg->augmented.args)
+ return syscall_arg__scnprintf_augmented_timespec(arg, bf, size);
+
+ return scnprintf(bf, size, "%#lx", arg->val);
+}
diff --git a/tools/perf/ui/util.c b/tools/perf/ui/util.c
index 689b27c34246..1d38ddf01b60 100644
--- a/tools/perf/ui/util.c
+++ b/tools/perf/ui/util.c
@@ -15,6 +15,9 @@ static int perf_stdio__error(const char *format, va_list args)
static int perf_stdio__warning(const char *format, va_list args)
{
+ if (quiet)
+ return 0;
+
fprintf(stderr, "Warning:\n");
vfprintf(stderr, format, args);
return 0;
@@ -45,6 +48,8 @@ int ui__warning(const char *format, ...)
{
int ret;
va_list args;
+ if (quiet)
+ return 0;
va_start(args, format);
ret = perf_eops->warning(format, args);
diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index e315ecaec323..ab37f588ee8b 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -19,7 +19,6 @@ perf-y += perf_event_attr_fprintf.o
perf-y += evswitch.o
perf-y += find_bit.o
perf-y += get_current_dir_name.o
-perf-y += kallsyms.o
perf-y += levenshtein.o
perf-y += llvm-utils.o
perf-y += mmap.o
@@ -220,7 +219,7 @@ perf-$(CONFIG_CXX) += c++/
perf-$(CONFIG_LIBPFM4) += pfm.o
CFLAGS_config.o += -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))"
-CFLAGS_llvm-utils.o += -DPERF_INCLUDE_DIR="BUILD_STR($(perf_include_dir_SQ))"
+CFLAGS_llvm-utils.o += -DLIBBPF_INCLUDE_DIR="BUILD_STR($(libbpf_include_dir_SQ))"
# avoid compiler warnings in 32-bit mode
CFLAGS_genelf_debug.o += -Wno-packed
@@ -294,10 +293,6 @@ CFLAGS_expr.o += -Wno-redundant-decls
CFLAGS_header.o += -include $(OUTPUT)PERF-VERSION-FILE
CFLAGS_arm-spe.o += -I$(srctree)/tools/arch/arm64/include/
-$(OUTPUT)util/kallsyms.o: ../lib/symbol/kallsyms.c FORCE
- $(call rule_mkdir)
- $(call if_changed_dep,cc_o_c)
-
$(OUTPUT)util/argv_split.o: ../lib/argv_split.c FORCE
$(call rule_mkdir)
$(call if_changed_dep,cc_o_c)
diff --git a/tools/perf/util/amd-sample-raw.c b/tools/perf/util/amd-sample-raw.c
index 238305868644..b0e70ce9d87a 100644
--- a/tools/perf/util/amd-sample-raw.c
+++ b/tools/perf/util/amd-sample-raw.c
@@ -16,6 +16,7 @@
#include "evlist.h"
#include "sample-raw.h"
#include "pmu-events/pmu-events.h"
+#include "util/sample.h"
static u32 cpu_family, cpu_model, ibs_fetch_type, ibs_op_type;
static bool zen4_ibs_extensions;
diff --git a/tools/perf/util/arm64-frame-pointer-unwind-support.h b/tools/perf/util/arm64-frame-pointer-unwind-support.h
index 32af9ce94398..42d3a45490f5 100644
--- a/tools/perf/util/arm64-frame-pointer-unwind-support.h
+++ b/tools/perf/util/arm64-frame-pointer-unwind-support.h
@@ -2,8 +2,10 @@
#ifndef __PERF_ARM_FRAME_POINTER_UNWIND_SUPPORT_H
#define __PERF_ARM_FRAME_POINTER_UNWIND_SUPPORT_H
-#include "event.h"
-#include "thread.h"
+#include <linux/types.h>
+
+struct perf_sample;
+struct thread;
u64 get_leaf_frame_caller_aarch64(struct perf_sample *sample, struct thread *thread, int user_idx);
diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c
index 46ada5ec3f9a..265d20cc126b 100644
--- a/tools/perf/util/auxtrace.c
+++ b/tools/perf/util/auxtrace.c
@@ -59,6 +59,7 @@
#include <linux/ctype.h>
#include "symbol/kallsyms.h"
#include <internal/lib.h>
+#include "util/sample.h"
/*
* Make a group from 'leader' to 'last', requiring that the events were not
diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h
index 6a0f9b98f059..2cf63d377831 100644
--- a/tools/perf/util/auxtrace.h
+++ b/tools/perf/util/auxtrace.h
@@ -15,7 +15,7 @@
#include <linux/list.h>
#include <linux/perf_event.h>
#include <linux/types.h>
-#include <internal/cpumap.h>
+#include <perf/cpumap.h>
#include <asm/bitsperlong.h>
#include <asm/barrier.h>
diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c
index f4adeccdbbcb..b3c8174360bf 100644
--- a/tools/perf/util/bpf-loader.c
+++ b/tools/perf/util/bpf-loader.c
@@ -27,11 +27,7 @@
#include "util.h"
#include "llvm-utils.h"
#include "c++/clang-c.h"
-#ifdef HAVE_LIBBPF_SUPPORT
-#include <bpf/hashmap.h>
-#else
#include "util/hashmap.h"
-#endif
#include "asm/bug.h"
#include <internal/xyarray.h>
diff --git a/tools/perf/util/bpf-prologue.h b/tools/perf/util/bpf-prologue.h
index c50c7358009f..66dcf751ef65 100644
--- a/tools/perf/util/bpf-prologue.h
+++ b/tools/perf/util/bpf-prologue.h
@@ -6,9 +6,8 @@
#ifndef __BPF_PROLOGUE_H
#define __BPF_PROLOGUE_H
-#include <linux/compiler.h>
-#include <linux/filter.h>
-#include "probe-event.h"
+struct probe_trace_arg;
+struct bpf_insn;
#define BPF_PROLOGUE_MAX_ARGS 3
#define BPF_PROLOGUE_START_ARG_REG BPF_REG_3
@@ -19,6 +18,7 @@ int bpf__gen_prologue(struct probe_trace_arg *args, int nargs,
struct bpf_insn *new_prog, size_t *new_cnt,
size_t cnt_space);
#else
+#include <linux/compiler.h>
#include <errno.h>
static inline int
diff --git a/tools/perf/util/bpf_counter.c b/tools/perf/util/bpf_counter.c
index ef1c15e4aeba..eeee899fcf34 100644
--- a/tools/perf/util/bpf_counter.c
+++ b/tools/perf/util/bpf_counter.c
@@ -561,7 +561,7 @@ static int bperf__load(struct evsel *evsel, struct target *target)
if (filter_type == BPERF_FILTER_PID ||
filter_type == BPERF_FILTER_TGID)
- key = evsel->core.threads->map[i].pid;
+ key = perf_thread_map__pid(evsel->core.threads, i);
else if (filter_type == BPERF_FILTER_CPU)
key = evsel->core.cpus->map[i].cpu;
else
diff --git a/tools/perf/util/bpf_kwork.c b/tools/perf/util/bpf_kwork.c
index b629dd679d3f..6eb2c78fd7f4 100644
--- a/tools/perf/util/bpf_kwork.c
+++ b/tools/perf/util/bpf_kwork.c
@@ -7,15 +7,18 @@
#include <time.h>
#include <fcntl.h>
+#include <signal.h>
#include <stdio.h>
#include <unistd.h>
#include <linux/time64.h>
#include "util/debug.h"
+#include "util/evsel.h"
#include "util/kwork.h"
#include <bpf/bpf.h>
+#include <perf/cpumap.h>
#include "util/bpf_skel/kwork_trace.skel.h"
diff --git a/tools/perf/util/bpf_lock_contention.c b/tools/perf/util/bpf_lock_contention.c
index fc4d613cb979..4db9ad3d50c4 100644
--- a/tools/perf/util/bpf_lock_contention.c
+++ b/tools/perf/util/bpf_lock_contention.c
@@ -39,6 +39,7 @@ int lock_contention_prepare(struct lock_contention *con)
bpf_map__set_value_size(skel->maps.stacks, con->max_stack * sizeof(u64));
bpf_map__set_max_entries(skel->maps.stacks, con->map_nr_entries);
bpf_map__set_max_entries(skel->maps.lock_stat, con->map_nr_entries);
+ bpf_map__set_max_entries(skel->maps.tstamp, con->map_nr_entries);
if (target__has_cpu(target))
ncpus = perf_cpu_map__nr(evlist->core.user_requested_cpus);
@@ -108,28 +109,36 @@ int lock_contention_stop(void)
int lock_contention_read(struct lock_contention *con)
{
- int fd, stack;
+ int fd, stack, err = 0;
s32 prev_key, key;
- struct lock_contention_data data;
- struct lock_stat *st;
+ struct lock_contention_data data = {};
+ struct lock_stat *st = NULL;
struct machine *machine = con->machine;
- u64 stack_trace[con->max_stack];
+ u64 *stack_trace;
+ size_t stack_size = con->max_stack * sizeof(*stack_trace);
fd = bpf_map__fd(skel->maps.lock_stat);
stack = bpf_map__fd(skel->maps.stacks);
con->lost = skel->bss->lost;
+ stack_trace = zalloc(stack_size);
+ if (stack_trace == NULL)
+ return -1;
+
prev_key = 0;
while (!bpf_map_get_next_key(fd, &prev_key, &key)) {
struct map *kmap;
struct symbol *sym;
int idx = 0;
+ /* to handle errors in the loop body */
+ err = -1;
+
bpf_map_lookup_elem(fd, &key, &data);
st = zalloc(sizeof(*st));
if (st == NULL)
- return -1;
+ break;
st->nr_contended = data.count;
st->wait_time_total = data.total_time;
@@ -163,25 +172,32 @@ int lock_contention_read(struct lock_contention *con)
st->name = strdup(sym->name);
if (ret < 0 || st->name == NULL)
- return -1;
+ break;
} else if (asprintf(&st->name, "%#lx", (unsigned long)st->addr) < 0) {
- free(st);
- return -1;
+ break;
}
if (verbose) {
- st->callstack = memdup(stack_trace, sizeof(stack_trace));
- if (st->callstack == NULL) {
- free(st);
- return -1;
- }
+ st->callstack = memdup(stack_trace, stack_size);
+ if (st->callstack == NULL)
+ break;
}
hlist_add_head(&st->hash_entry, con->result);
prev_key = key;
+
+ /* we're fine now, reset the values */
+ st = NULL;
+ err = 0;
}
- return 0;
+ free(stack_trace);
+ if (st) {
+ free(st->name);
+ free(st);
+ }
+
+ return err;
}
int lock_contention_finish(void)
diff --git a/tools/perf/util/bpf_map.h b/tools/perf/util/bpf_map.h
index d6abd5e47af8..c2f7c13cba23 100644
--- a/tools/perf/util/bpf_map.h
+++ b/tools/perf/util/bpf_map.h
@@ -3,7 +3,6 @@
#define __PERF_BPF_MAP_H 1
#include <stdio.h>
-#include <linux/compiler.h>
struct bpf_map;
#ifdef HAVE_LIBBPF_SUPPORT
@@ -12,6 +11,8 @@ int bpf_map__fprintf(struct bpf_map *map, FILE *fp);
#else
+#include <linux/compiler.h>
+
static inline int bpf_map__fprintf(struct bpf_map *map __maybe_unused, FILE *fp __maybe_unused)
{
return 0;
diff --git a/tools/perf/util/bpf_skel/lock_contention.bpf.c b/tools/perf/util/bpf_skel/lock_contention.bpf.c
index 1bb8628e7c9f..9681cb59b0df 100644
--- a/tools/perf/util/bpf_skel/lock_contention.bpf.c
+++ b/tools/perf/util/bpf_skel/lock_contention.bpf.c
@@ -40,10 +40,10 @@ struct {
/* maintain timestamp at the beginning of contention */
struct {
- __uint(type, BPF_MAP_TYPE_TASK_STORAGE);
- __uint(map_flags, BPF_F_NO_PREALLOC);
+ __uint(type, BPF_MAP_TYPE_HASH);
__type(key, int);
__type(value, struct tstamp_data);
+ __uint(max_entries, MAX_ENTRIES);
} tstamp SEC(".maps");
/* actual lock contention statistics */
@@ -103,18 +103,28 @@ static inline int can_record(void)
SEC("tp_btf/contention_begin")
int contention_begin(u64 *ctx)
{
- struct task_struct *curr;
+ __u32 pid;
struct tstamp_data *pelem;
if (!enabled || !can_record())
return 0;
- curr = bpf_get_current_task_btf();
- pelem = bpf_task_storage_get(&tstamp, curr, NULL,
- BPF_LOCAL_STORAGE_GET_F_CREATE);
- if (!pelem || pelem->lock)
+ pid = bpf_get_current_pid_tgid();
+ pelem = bpf_map_lookup_elem(&tstamp, &pid);
+ if (pelem && pelem->lock)
return 0;
+ if (pelem == NULL) {
+ struct tstamp_data zero = {};
+
+ bpf_map_update_elem(&tstamp, &pid, &zero, BPF_ANY);
+ pelem = bpf_map_lookup_elem(&tstamp, &pid);
+ if (pelem == NULL) {
+ lost++;
+ return 0;
+ }
+ }
+
pelem->timestamp = bpf_ktime_get_ns();
pelem->lock = (__u64)ctx[0];
pelem->flags = (__u32)ctx[1];
@@ -128,7 +138,7 @@ int contention_begin(u64 *ctx)
SEC("tp_btf/contention_end")
int contention_end(u64 *ctx)
{
- struct task_struct *curr;
+ __u32 pid;
struct tstamp_data *pelem;
struct contention_key key;
struct contention_data *data;
@@ -137,8 +147,8 @@ int contention_end(u64 *ctx)
if (!enabled)
return 0;
- curr = bpf_get_current_task_btf();
- pelem = bpf_task_storage_get(&tstamp, curr, NULL, 0);
+ pid = bpf_get_current_pid_tgid();
+ pelem = bpf_map_lookup_elem(&tstamp, &pid);
if (!pelem || pelem->lock != ctx[0])
return 0;
@@ -156,7 +166,7 @@ int contention_end(u64 *ctx)
};
bpf_map_update_elem(&lock_stat, &key, &first, BPF_NOEXIST);
- pelem->lock = 0;
+ bpf_map_delete_elem(&tstamp, &pid);
return 0;
}
@@ -169,7 +179,7 @@ int contention_end(u64 *ctx)
if (data->min_time > duration)
data->min_time = duration;
- pelem->lock = 0;
+ bpf_map_delete_elem(&tstamp, &pid);
return 0;
}
diff --git a/tools/perf/util/branch.h b/tools/perf/util/branch.h
index f838b23db180..d6017c9b1872 100644
--- a/tools/perf/util/branch.h
+++ b/tools/perf/util/branch.h
@@ -7,12 +7,10 @@
* detected in at least musl libc, used in Alpine Linux. -acme
*/
#include <stdio.h>
-#include <stdint.h>
-#include <linux/compiler.h>
-#include <linux/stddef.h>
#include <linux/perf_event.h>
#include <linux/types.h>
-#include "event.h"
+#include "util/map_symbol.h"
+#include "util/sample.h"
struct branch_flags {
union {
diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c
index 3f2ae19a1dd4..658170b8dcef 100644
--- a/tools/perf/util/config.c
+++ b/tools/perf/util/config.c
@@ -556,7 +556,7 @@ static char *home_perfconfig(void)
config = strdup(mkpath("%s/.perfconfig", home));
if (config == NULL) {
- pr_warning("Not enough memory to process %s/.perfconfig, ignoring it.", home);
+ pr_warning("Not enough memory to process %s/.perfconfig, ignoring it.\n", home);
return NULL;
}
@@ -564,7 +564,7 @@ static char *home_perfconfig(void)
goto out_free;
if (st.st_uid && (st.st_uid != geteuid())) {
- pr_warning("File %s not owned by current user or root, ignoring it.", config);
+ pr_warning("File %s not owned by current user or root, ignoring it.\n", config);
goto out_free;
}
diff --git a/tools/perf/util/counts.c b/tools/perf/util/counts.c
index 7a447d918458..11cd85b278a6 100644
--- a/tools/perf/util/counts.c
+++ b/tools/perf/util/counts.c
@@ -48,7 +48,6 @@ void perf_counts__reset(struct perf_counts *counts)
{
xyarray__reset(counts->loaded);
xyarray__reset(counts->values);
- memset(&counts->aggr, 0, sizeof(struct perf_counts_values));
}
void evsel__reset_counts(struct evsel *evsel)
diff --git a/tools/perf/util/counts.h b/tools/perf/util/counts.h
index 5de275194f2b..42760242e0df 100644
--- a/tools/perf/util/counts.h
+++ b/tools/perf/util/counts.h
@@ -11,7 +11,6 @@ struct evsel;
struct perf_counts {
s8 scaled;
- struct perf_counts_values aggr;
struct xyarray *values;
struct xyarray *loaded;
};
diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c
index 8486ca3bec75..5e564974fba4 100644
--- a/tools/perf/util/cpumap.c
+++ b/tools/perf/util/cpumap.c
@@ -12,6 +12,7 @@
#include <linux/ctype.h>
#include <linux/zalloc.h>
+#include <internal/cpumap.h>
static struct perf_cpu max_cpu_num;
static struct perf_cpu max_present_cpu_num;
@@ -234,7 +235,7 @@ static int aggr_cpu_id__cmp(const void *a_pointer, const void *b_pointer)
struct cpu_aggr_map *cpu_aggr_map__new(const struct perf_cpu_map *cpus,
aggr_cpu_id_get_t get_id,
- void *data)
+ void *data, bool needs_sort)
{
int idx;
struct perf_cpu cpu;
@@ -270,8 +271,10 @@ struct cpu_aggr_map *cpu_aggr_map__new(const struct perf_cpu_map *cpus,
if (trimmed_c)
c = trimmed_c;
}
+
/* ensure we process id in increasing order */
- qsort(c->map, c->nr, sizeof(struct aggr_cpu_id), aggr_cpu_id__cmp);
+ if (needs_sort)
+ qsort(c->map, c->nr, sizeof(struct aggr_cpu_id), aggr_cpu_id__cmp);
return c;
@@ -354,6 +357,16 @@ struct aggr_cpu_id aggr_cpu_id__node(struct perf_cpu cpu, void *data __maybe_unu
return id;
}
+struct aggr_cpu_id aggr_cpu_id__global(struct perf_cpu cpu, void *data __maybe_unused)
+{
+ struct aggr_cpu_id id = aggr_cpu_id__empty();
+
+ /* it always aggregates to the cpu 0 */
+ cpu.cpu = 0;
+ id.cpu = cpu;
+ return id;
+}
+
/* setup simple routines to easily access node numbers given a cpu number */
static int get_max_num(char *path, int *max)
{
diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h
index 4a6d029576ee..c2f5824a3a22 100644
--- a/tools/perf/util/cpumap.h
+++ b/tools/perf/util/cpumap.h
@@ -4,8 +4,8 @@
#include <stdbool.h>
#include <stdio.h>
-#include <internal/cpumap.h>
#include <perf/cpumap.h>
+#include <linux/refcount.h>
/** Identify where counts are aggregated, -1 implies not to aggregate. */
struct aggr_cpu_id {
@@ -97,7 +97,7 @@ typedef struct aggr_cpu_id (*aggr_cpu_id_get_t)(struct perf_cpu cpu, void *data)
*/
struct cpu_aggr_map *cpu_aggr_map__new(const struct perf_cpu_map *cpus,
aggr_cpu_id_get_t get_id,
- void *data);
+ void *data, bool needs_sort);
bool aggr_cpu_id__equal(const struct aggr_cpu_id *a, const struct aggr_cpu_id *b);
bool aggr_cpu_id__is_empty(const struct aggr_cpu_id *a);
@@ -133,5 +133,9 @@ struct aggr_cpu_id aggr_cpu_id__cpu(struct perf_cpu cpu, void *data);
* cpu. The function signature is compatible with aggr_cpu_id_get_t.
*/
struct aggr_cpu_id aggr_cpu_id__node(struct perf_cpu cpu, void *data);
-
+/**
+ * aggr_cpu_id__global - Create an aggr_cpu_id for global aggregation.
+ * The function signature is compatible with aggr_cpu_id_get_t.
+ */
+struct aggr_cpu_id aggr_cpu_id__global(struct perf_cpu cpu, void *data);
#endif /* __PERF_CPUMAP_H */
diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c
index 9e0aee276df8..c65cdaf6975e 100644
--- a/tools/perf/util/data-convert-bt.c
+++ b/tools/perf/util/data-convert-bt.c
@@ -34,6 +34,7 @@
#include <linux/time64.h>
#include "util.h"
#include "clockid.h"
+#include "util/sample.h"
#define pr_N(n, fmt, ...) \
eprintf(n, debug_data_convert, fmt, ##__VA_ARGS__)
diff --git a/tools/perf/util/data-convert-json.c b/tools/perf/util/data-convert-json.c
index 613d6ae82663..57db59068cb6 100644
--- a/tools/perf/util/data-convert-json.c
+++ b/tools/perf/util/data-convert-json.c
@@ -217,6 +217,26 @@ static int process_sample_event(struct perf_tool *tool,
}
output_json_format(out, false, 3, "]");
+ if (sample->raw_data) {
+ int i;
+ struct tep_format_field **fields;
+
+ fields = tep_event_fields(evsel->tp_format);
+ if (fields) {
+ i = 0;
+ while (fields[i]) {
+ struct trace_seq s;
+
+ trace_seq_init(&s);
+ tep_print_field(&s, sample->raw_data, fields[i]);
+ output_json_key_string(out, true, 3, fields[i]->name, s.buffer);
+
+ i++;
+ }
+ free(fields);
+ }
+ }
+
output_json_format(out, false, 2, "}");
return 0;
}
diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c
index 609ca1671501..b07414409771 100644
--- a/tools/perf/util/dwarf-aux.c
+++ b/tools/perf/util/dwarf-aux.c
@@ -123,7 +123,7 @@ int cu_find_lineinfo(Dwarf_Die *cu_die, Dwarf_Addr addr,
if (die_find_realfunc(cu_die, addr, &die_mem)
&& die_entrypc(&die_mem, &faddr) == 0 &&
faddr == addr) {
- *fname = dwarf_decl_file(&die_mem);
+ *fname = die_get_decl_file(&die_mem);
dwarf_decl_line(&die_mem, lineno);
goto out;
}
@@ -137,7 +137,7 @@ int cu_find_lineinfo(Dwarf_Die *cu_die, Dwarf_Addr addr,
}
out:
- return *lineno ?: -ENOENT;
+ return (*lineno && *fname) ? *lineno : -ENOENT;
}
static int __die_find_inline_cb(Dwarf_Die *die_mem, void *data);
@@ -308,26 +308,13 @@ static int die_get_attr_udata(Dwarf_Die *tp_die, unsigned int attr_name,
{
Dwarf_Attribute attr;
- if (dwarf_attr(tp_die, attr_name, &attr) == NULL ||
+ if (dwarf_attr_integrate(tp_die, attr_name, &attr) == NULL ||
dwarf_formudata(&attr, result) != 0)
return -ENOENT;
return 0;
}
-/* Get attribute and translate it as a sdata */
-static int die_get_attr_sdata(Dwarf_Die *tp_die, unsigned int attr_name,
- Dwarf_Sword *result)
-{
- Dwarf_Attribute attr;
-
- if (dwarf_attr(tp_die, attr_name, &attr) == NULL ||
- dwarf_formsdata(&attr, result) != 0)
- return -ENOENT;
-
- return 0;
-}
-
/**
* die_is_signed_type - Check whether a type DIE is signed or not
* @tp_die: a DIE of a type
@@ -467,9 +454,9 @@ int die_get_data_member_location(Dwarf_Die *mb_die, Dwarf_Word *offs)
/* Get the call file index number in CU DIE */
static int die_get_call_fileno(Dwarf_Die *in_die)
{
- Dwarf_Sword idx;
+ Dwarf_Word idx;
- if (die_get_attr_sdata(in_die, DW_AT_call_file, &idx) == 0)
+ if (die_get_attr_udata(in_die, DW_AT_call_file, &idx) == 0)
return (int)idx;
else
return -ENOENT;
@@ -478,14 +465,27 @@ static int die_get_call_fileno(Dwarf_Die *in_die)
/* Get the declared file index number in CU DIE */
static int die_get_decl_fileno(Dwarf_Die *pdie)
{
- Dwarf_Sword idx;
+ Dwarf_Word idx;
- if (die_get_attr_sdata(pdie, DW_AT_decl_file, &idx) == 0)
+ if (die_get_attr_udata(pdie, DW_AT_decl_file, &idx) == 0)
return (int)idx;
else
return -ENOENT;
}
+/* Return the file name by index */
+static const char *die_get_file_name(Dwarf_Die *dw_die, int idx)
+{
+ Dwarf_Die cu_die;
+ Dwarf_Files *files;
+
+ if (idx < 0 || !dwarf_diecu(dw_die, &cu_die, NULL, NULL) ||
+ dwarf_getsrcfiles(&cu_die, &files, NULL) != 0)
+ return NULL;
+
+ return dwarf_filesrc(files, idx, NULL, NULL);
+}
+
/**
* die_get_call_file - Get callsite file name of inlined function instance
* @in_die: a DIE of an inlined function instance
@@ -495,18 +495,22 @@ static int die_get_decl_fileno(Dwarf_Die *pdie)
*/
const char *die_get_call_file(Dwarf_Die *in_die)
{
- Dwarf_Die cu_die;
- Dwarf_Files *files;
- int idx;
-
- idx = die_get_call_fileno(in_die);
- if (idx < 0 || !dwarf_diecu(in_die, &cu_die, NULL, NULL) ||
- dwarf_getsrcfiles(&cu_die, &files, NULL) != 0)
- return NULL;
-
- return dwarf_filesrc(files, idx, NULL, NULL);
+ return die_get_file_name(in_die, die_get_call_fileno(in_die));
}
+/**
+ * die_get_decl_file - Find the declared file name of this DIE
+ * @dw_die: a DIE for something declared.
+ *
+ * Get declared file name of @dw_die.
+ * NOTE: Since some version of clang DWARF5 implementation incorrectly uses
+ * file index 0 for DW_AT_decl_file, die_get_decl_file() will return NULL for
+ * such cases. Use this function instead.
+ */
+const char *die_get_decl_file(Dwarf_Die *dw_die)
+{
+ return die_get_file_name(dw_die, die_get_decl_fileno(dw_die));
+}
/**
* die_find_child - Generic DIE search function in DIE tree
@@ -790,7 +794,7 @@ static int __die_walk_funclines_cb(Dwarf_Die *in_die, void *data)
}
if (addr) {
- fname = dwarf_decl_file(in_die);
+ fname = die_get_decl_file(in_die);
if (fname && dwarf_decl_line(in_die, &lineno) == 0) {
lw->retval = lw->callback(fname, lineno, addr, lw->data);
if (lw->retval != 0)
@@ -818,7 +822,7 @@ static int __die_walk_funclines(Dwarf_Die *sp_die, bool recursive,
int lineno;
/* Handle function declaration line */
- fname = dwarf_decl_file(sp_die);
+ fname = die_get_decl_file(sp_die);
if (fname && dwarf_decl_line(sp_die, &lineno) == 0 &&
die_entrypc(sp_die, &addr) == 0) {
lw.retval = callback(fname, lineno, addr, data);
@@ -873,7 +877,12 @@ int die_walk_lines(Dwarf_Die *rt_die, line_walk_callback_t callback, void *data)
if (dwarf_tag(rt_die) != DW_TAG_compile_unit) {
cu_die = dwarf_diecu(rt_die, &die_mem, NULL, NULL);
dwarf_decl_line(rt_die, &decl);
- decf = dwarf_decl_file(rt_die);
+ decf = die_get_decl_file(rt_die);
+ if (!decf) {
+ pr_debug2("Failed to get the declared file name of %s\n",
+ dwarf_diename(rt_die));
+ return -EINVAL;
+ }
} else
cu_die = rt_die;
if (!cu_die) {
@@ -923,7 +932,7 @@ int die_walk_lines(Dwarf_Die *rt_die, line_walk_callback_t callback, void *data)
dwarf_decl_line(&die_mem, &inl);
if (inl != decl ||
- decf != dwarf_decl_file(&die_mem))
+ decf != die_get_decl_file(&die_mem))
continue;
}
}
diff --git a/tools/perf/util/dwarf-aux.h b/tools/perf/util/dwarf-aux.h
index 7ee0fa19b5c4..7ec8bc1083bb 100644
--- a/tools/perf/util/dwarf-aux.h
+++ b/tools/perf/util/dwarf-aux.h
@@ -50,6 +50,9 @@ int die_get_call_lineno(Dwarf_Die *in_die);
/* Get callsite file name of inlined function instance */
const char *die_get_call_file(Dwarf_Die *in_die);
+/* Get declared file name of a DIE */
+const char *die_get_decl_file(Dwarf_Die *dw_die);
+
/* Get type die */
Dwarf_Die *die_get_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem);
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index 12eae6917022..6663a676eadc 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -12,11 +12,10 @@
#include <perf/event.h>
#include <linux/types.h>
-#include "perf_regs.h"
-
struct dso;
struct machine;
struct perf_event_attr;
+struct perf_sample;
#ifdef __LP64__
/*
@@ -44,61 +43,6 @@ struct perf_event_attr;
/* perf sample has 16 bits size limit */
#define PERF_SAMPLE_MAX_SIZE (1 << 16)
-/* number of register is bound by the number of bits in regs_dump::mask (64) */
-#define PERF_SAMPLE_REGS_CACHE_SIZE (8 * sizeof(u64))
-
-struct regs_dump {
- u64 abi;
- u64 mask;
- u64 *regs;
-
- /* Cached values/mask filled by first register access. */
- u64 cache_regs[PERF_SAMPLE_REGS_CACHE_SIZE];
- u64 cache_mask;
-};
-
-struct stack_dump {
- u16 offset;
- u64 size;
- char *data;
-};
-
-struct sample_read_value {
- u64 value;
- u64 id; /* only if PERF_FORMAT_ID */
- u64 lost; /* only if PERF_FORMAT_LOST */
-};
-
-struct sample_read {
- u64 time_enabled;
- u64 time_running;
- union {
- struct {
- u64 nr;
- struct sample_read_value *values;
- } group;
- struct sample_read_value one;
- };
-};
-
-static inline size_t sample_read_value_size(u64 read_format)
-{
- /* PERF_FORMAT_ID is forced for PERF_SAMPLE_READ */
- if (read_format & PERF_FORMAT_LOST)
- return sizeof(struct sample_read_value);
- else
- return offsetof(struct sample_read_value, lost);
-}
-
-static inline struct sample_read_value *
-next_sample_read_value(struct sample_read_value *v, u64 read_format)
-{
- return (void *)v + sample_read_value_size(read_format);
-}
-
-#define sample_read_group__for_each(v, nr, rf) \
- for (int __i = 0; __i < (int)nr; v = next_sample_read_value(v, rf), __i++)
-
struct ip_callchain {
u64 nr;
u64 ips[];
@@ -140,52 +84,6 @@ enum {
PERF_IP_FLAG_VMENTRY |\
PERF_IP_FLAG_VMEXIT)
-#define MAX_INSN 16
-
-struct aux_sample {
- u64 size;
- void *data;
-};
-
-struct perf_sample {
- u64 ip;
- u32 pid, tid;
- u64 time;
- u64 addr;
- u64 id;
- u64 stream_id;
- u64 period;
- u64 weight;
- u64 transaction;
- u64 insn_cnt;
- u64 cyc_cnt;
- u32 cpu;
- u32 raw_size;
- u64 data_src;
- u64 phys_addr;
- u64 data_page_size;
- u64 code_page_size;
- u64 cgroup;
- u32 flags;
- u32 machine_pid;
- u32 vcpu;
- u16 insn_len;
- u8 cpumode;
- u16 misc;
- u16 ins_lat;
- u16 p_stage_cyc;
- bool no_hw_idx; /* No hw_idx collected in branch_stack */
- char insn[MAX_INSN];
- void *raw_data;
- struct ip_callchain *callchain;
- struct branch_stack *branch_stack;
- struct regs_dump user_regs;
- struct regs_dump intr_regs;
- struct stack_dump user_stack;
- struct sample_read read;
- struct aux_sample aux_sample;
-};
-
#define PERF_MEM_DATA_SRC_NONE \
(PERF_MEM_S(OP, NA) |\
PERF_MEM_S(LVL, NA) |\
@@ -344,15 +242,6 @@ struct perf_synth_intel_iflag_chg {
u64 branch_ip; /* If via_branch */
};
-/*
- * raw_data is always 4 bytes from an 8-byte boundary, so subtract 4 to get
- * 8-byte alignment.
- */
-static inline void *perf_sample__synth_ptr(struct perf_sample *sample)
-{
- return sample->raw_data - 4;
-}
-
static inline void *perf_synth__raw_data(void *p)
{
return p + 4;
@@ -446,19 +335,8 @@ int perf_event__process(struct perf_tool *tool,
struct perf_sample *sample,
struct machine *machine);
-struct addr_location;
-
-int machine__resolve(struct machine *machine, struct addr_location *al,
- struct perf_sample *sample);
-
-void addr_location__put(struct addr_location *al);
-
-struct thread;
-
bool is_bts_event(struct perf_event_attr *attr);
bool sample_addr_correlates_sym(struct perf_event_attr *attr);
-void thread__resolve(struct thread *thread, struct addr_location *al,
- struct perf_sample *sample);
const char *perf_event__name(unsigned int id);
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 6612b00949e7..fbf3192bced9 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -24,11 +24,13 @@
#include "../perf.h"
#include "asm/bug.h"
#include "bpf-event.h"
+#include "util/event.h"
#include "util/string2.h"
#include "util/perf_api_probe.h"
#include "util/evsel_fprintf.h"
#include "util/evlist-hybrid.h"
#include "util/pmu.h"
+#include "util/sample.h"
#include <signal.h>
#include <unistd.h>
#include <sched.h>
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 76605fde3507..45f4f08399ae 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -46,17 +46,14 @@
#include "string2.h"
#include "memswap.h"
#include "util.h"
-#ifdef HAVE_LIBBPF_SUPPORT
-#include <bpf/hashmap.h>
-#else
#include "util/hashmap.h"
-#endif
#include "pmu-hybrid.h"
#include "off_cpu.h"
#include "../perf-sys.h"
#include "util/parse-branch-options.h"
#include <internal/xyarray.h>
#include <internal/lib.h>
+#include <internal/threadmap.h>
#include <linux/ctype.h>
@@ -467,6 +464,7 @@ struct evsel *evsel__clone(struct evsel *orig)
evsel->collect_stat = orig->collect_stat;
evsel->weak_group = orig->weak_group;
evsel->use_config_name = orig->use_config_name;
+ evsel->pmu = orig->pmu;
if (evsel__copy_config_terms(evsel, orig) < 0)
goto out_err;
@@ -1525,13 +1523,8 @@ void evsel__compute_deltas(struct evsel *evsel, int cpu_map_idx, int thread,
if (!evsel->prev_raw_counts)
return;
- if (cpu_map_idx == -1) {
- tmp = evsel->prev_raw_counts->aggr;
- evsel->prev_raw_counts->aggr = *count;
- } else {
- tmp = *perf_counts(evsel->prev_raw_counts, cpu_map_idx, thread);
- *perf_counts(evsel->prev_raw_counts, cpu_map_idx, thread) = *count;
- }
+ tmp = *perf_counts(evsel->prev_raw_counts, cpu_map_idx, thread);
+ *perf_counts(evsel->prev_raw_counts, cpu_map_idx, thread) = *count;
count->val = count->val - tmp.val;
count->ena = count->ena - tmp.ena;
@@ -1966,17 +1959,16 @@ bool evsel__detect_missing_features(struct evsel *evsel)
perf_missing_features.mmap2 = true;
pr_debug2_peo("switching off mmap2\n");
return true;
- } else if ((evsel->core.attr.exclude_guest || evsel->core.attr.exclude_host) &&
- (evsel->pmu == NULL || evsel->pmu->missing_features.exclude_guest)) {
- if (evsel->pmu == NULL) {
+ } else if (evsel->core.attr.exclude_guest || evsel->core.attr.exclude_host) {
+ if (evsel->pmu == NULL)
evsel->pmu = evsel__find_pmu(evsel);
- if (evsel->pmu)
- evsel->pmu->missing_features.exclude_guest = true;
- else {
- /* we cannot find PMU, disable attrs now */
- evsel->core.attr.exclude_host = false;
- evsel->core.attr.exclude_guest = false;
- }
+
+ if (evsel->pmu)
+ evsel->pmu->missing_features.exclude_guest = true;
+ else {
+ /* we cannot find PMU, disable attrs now */
+ evsel->core.attr.exclude_host = false;
+ evsel->core.attr.exclude_guest = false;
}
if (evsel->exclude_GH) {
@@ -3129,7 +3121,7 @@ void evsel__zero_per_pkg(struct evsel *evsel)
}
}
-bool evsel__is_hybrid(struct evsel *evsel)
+bool evsel__is_hybrid(const struct evsel *evsel)
{
return evsel->pmu_name && perf_pmu__is_hybrid(evsel->pmu_name);
}
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 989865e16aad..f3485799ddf9 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -10,8 +10,6 @@
#include <internal/evsel.h>
#include <perf/evsel.h>
#include "symbol_conf.h"
-#include <internal/cpumap.h>
-#include <perf/cpumap.h>
struct bpf_object;
struct cgroup;
@@ -498,7 +496,7 @@ struct perf_env *evsel__env(struct evsel *evsel);
int evsel__store_ids(struct evsel *evsel, struct evlist *evlist);
void evsel__zero_per_pkg(struct evsel *evsel);
-bool evsel__is_hybrid(struct evsel *evsel);
+bool evsel__is_hybrid(const struct evsel *evsel);
struct evsel *evsel__leader(struct evsel *evsel);
bool evsel__has_leader(struct evsel *evsel, struct evsel *leader);
bool evsel__is_leader(struct evsel *evsel);
diff --git a/tools/perf/util/expr.c b/tools/perf/util/expr.c
index aaacf514dc09..140f2acdb325 100644
--- a/tools/perf/util/expr.c
+++ b/tools/perf/util/expr.c
@@ -11,6 +11,7 @@
#include "expr.h"
#include "expr-bison.h"
#include "expr-flex.h"
+#include "util/hashmap.h"
#include "smt.h"
#include "tsc.h"
#include <linux/err.h>
diff --git a/tools/perf/util/expr.h b/tools/perf/util/expr.h
index d6c1668dc1a0..029271540fb0 100644
--- a/tools/perf/util/expr.h
+++ b/tools/perf/util/expr.h
@@ -2,12 +2,7 @@
#ifndef PARSE_CTX_H
#define PARSE_CTX_H 1
-#ifdef HAVE_LIBBPF_SUPPORT
-#include <bpf/hashmap.h>
-#else
-#include "util/hashmap.h"
-#endif
-
+struct hashmap;
struct metric_ref;
struct expr_scanner_ctx {
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c
index 1376077183f7..22308dd93010 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c
@@ -18,6 +18,7 @@
#include "intel-pt-insn-decoder.h"
#include "dump-insn.h"
+#include "util/sample.h"
#if INTEL_PT_INSN_BUF_SZ < MAX_INSN_SIZE || INTEL_PT_INSN_BUF_SZ > MAX_INSN
#error Instruction buffer size too small
diff --git a/tools/perf/util/iostat.c b/tools/perf/util/iostat.c
index 57dd49da28fe..b770bd473af7 100644
--- a/tools/perf/util/iostat.c
+++ b/tools/perf/util/iostat.c
@@ -48,6 +48,7 @@ __weak void iostat_print_counters(struct evlist *evlist __maybe_unused,
struct perf_stat_config *config __maybe_unused,
struct timespec *ts __maybe_unused,
char *prefix __maybe_unused,
- iostat_print_counter_t print_cnt_cb __maybe_unused)
+ iostat_print_counter_t print_cnt_cb __maybe_unused,
+ void *arg __maybe_unused)
{
}
diff --git a/tools/perf/util/iostat.h b/tools/perf/util/iostat.h
index 23c1c46a331a..a4e7299c5c2f 100644
--- a/tools/perf/util/iostat.h
+++ b/tools/perf/util/iostat.h
@@ -28,7 +28,7 @@ enum iostat_mode_t {
extern enum iostat_mode_t iostat_mode;
-typedef void (*iostat_print_counter_t)(struct perf_stat_config *, struct evsel *, char *);
+typedef void (*iostat_print_counter_t)(struct perf_stat_config *, struct evsel *, void *);
int iostat_prepare(struct evlist *evlist, struct perf_stat_config *config);
int iostat_parse(const struct option *opt, const char *str,
@@ -42,6 +42,6 @@ void iostat_print_metric(struct perf_stat_config *config, struct evsel *evsel,
struct perf_stat_output_ctx *out);
void iostat_print_counters(struct evlist *evlist,
struct perf_stat_config *config, struct timespec *ts,
- char *prefix, iostat_print_counter_t print_cnt_cb);
+ char *prefix, iostat_print_counter_t print_cnt_cb, void *arg);
#endif /* _IOSTAT_H */
diff --git a/tools/perf/util/kwork.h b/tools/perf/util/kwork.h
index 320c0a6d2e08..53b7327550b8 100644
--- a/tools/perf/util/kwork.h
+++ b/tools/perf/util/kwork.h
@@ -1,16 +1,16 @@
#ifndef PERF_UTIL_KWORK_H
#define PERF_UTIL_KWORK_H
-#include "perf.h"
-
#include "util/tool.h"
-#include "util/event.h"
-#include "util/evlist.h"
-#include "util/session.h"
#include "util/time-utils.h"
-#include <linux/list.h>
#include <linux/bitmap.h>
+#include <linux/list.h>
+#include <linux/rbtree.h>
+#include <linux/types.h>
+
+struct perf_sample;
+struct perf_session;
enum kwork_class_type {
KWORK_CLASS_IRQ,
diff --git a/tools/perf/util/llvm-utils.c b/tools/perf/util/llvm-utils.c
index 2dc797007419..650ffe336f3a 100644
--- a/tools/perf/util/llvm-utils.c
+++ b/tools/perf/util/llvm-utils.c
@@ -463,7 +463,7 @@ int llvm__compile_bpf(const char *path, void **p_obj_buf,
char *pipe_template = NULL;
const char *opts = llvm_param.opts;
char *command_echo = NULL, *command_out;
- char *perf_include_dir = system_path(PERF_INCLUDE_DIR);
+ char *libbpf_include_dir = system_path(LIBBPF_INCLUDE_DIR);
if (path[0] != '-' && realpath(path, abspath) == NULL) {
err = errno;
@@ -495,7 +495,7 @@ int llvm__compile_bpf(const char *path, void **p_obj_buf,
snprintf(linux_version_code_str, sizeof(linux_version_code_str),
"0x%x", kernel_version);
- if (asprintf(&perf_bpf_include_opts, "-I%s/bpf", perf_include_dir) < 0)
+ if (asprintf(&perf_bpf_include_opts, "-I%s/", libbpf_include_dir) < 0)
goto errout;
force_set_env("NR_CPUS", nr_cpus_avail_str);
force_set_env("LINUX_VERSION_CODE", linux_version_code_str);
@@ -556,7 +556,7 @@ int llvm__compile_bpf(const char *path, void **p_obj_buf,
free(kbuild_dir);
free(kbuild_include_opts);
free(perf_bpf_include_opts);
- free(perf_include_dir);
+ free(libbpf_include_dir);
if (!p_obj_buf)
free(obj_buf);
@@ -572,7 +572,7 @@ errout:
free(kbuild_include_opts);
free(obj_buf);
free(perf_bpf_include_opts);
- free(perf_include_dir);
+ free(libbpf_include_dir);
free(pipe_template);
if (p_obj_buf)
*p_obj_buf = NULL;
diff --git a/tools/perf/util/lock-contention.h b/tools/perf/util/lock-contention.h
index b8cb8830b7bc..e3c061b1795b 100644
--- a/tools/perf/util/lock-contention.h
+++ b/tools/perf/util/lock-contention.h
@@ -91,7 +91,7 @@ struct thread_stat {
* Number of stack trace entries to skip when finding callers.
* The first few entries belong to the locking implementation itself.
*/
-#define CONTENTION_STACK_SKIP 3
+#define CONTENTION_STACK_SKIP 4
/*
* flags for lock:contention_begin
diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h
index 74935dfaa937..6267c1d6f232 100644
--- a/tools/perf/util/machine.h
+++ b/tools/perf/util/machine.h
@@ -305,4 +305,7 @@ int machine__create_extra_kernel_map(struct machine *machine,
int machine__map_x86_64_entry_trampolines(struct machine *machine,
struct dso *kernel);
+int machine__resolve(struct machine *machine, struct addr_location *al,
+ struct perf_sample *sample);
+
#endif /* __PERF_MACHINE_H */
diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c
index 4c98ac29ee13..6eac7a60ed27 100644
--- a/tools/perf/util/metricgroup.c
+++ b/tools/perf/util/metricgroup.c
@@ -12,6 +12,7 @@
#include "strbuf.h"
#include "pmu.h"
#include "pmu-hybrid.h"
+#include "print-events.h"
#include "expr.h"
#include "rblist.h"
#include <string.h>
@@ -28,6 +29,7 @@
#include "util.h"
#include <asm/bug.h>
#include "cgroup.h"
+#include "util/hashmap.h"
struct metric_event *metricgroup__lookup(struct rblist *metric_events,
struct evsel *evsel,
@@ -352,51 +354,65 @@ static bool match_pe_metric(const struct pmu_event *pe, const char *metric)
match_metric(pe->metric_name, metric);
}
+/** struct mep - RB-tree node for building printing information. */
struct mep {
+ /** nd - RB-tree element. */
struct rb_node nd;
- const char *name;
- struct strlist *metrics;
+ /** @metric_group: Owned metric group name, separated others with ';'. */
+ char *metric_group;
+ const char *metric_name;
+ const char *metric_desc;
+ const char *metric_long_desc;
+ const char *metric_expr;
+ const char *metric_unit;
};
static int mep_cmp(struct rb_node *rb_node, const void *entry)
{
struct mep *a = container_of(rb_node, struct mep, nd);
struct mep *b = (struct mep *)entry;
+ int ret;
- return strcmp(a->name, b->name);
+ ret = strcmp(a->metric_group, b->metric_group);
+ if (ret)
+ return ret;
+
+ return strcmp(a->metric_name, b->metric_name);
}
-static struct rb_node *mep_new(struct rblist *rl __maybe_unused,
- const void *entry)
+static struct rb_node *mep_new(struct rblist *rl __maybe_unused, const void *entry)
{
struct mep *me = malloc(sizeof(struct mep));
if (!me)
return NULL;
+
memcpy(me, entry, sizeof(struct mep));
- me->name = strdup(me->name);
- if (!me->name)
- goto out_me;
- me->metrics = strlist__new(NULL, NULL);
- if (!me->metrics)
- goto out_name;
return &me->nd;
-out_name:
- zfree(&me->name);
-out_me:
+}
+
+static void mep_delete(struct rblist *rl __maybe_unused,
+ struct rb_node *nd)
+{
+ struct mep *me = container_of(nd, struct mep, nd);
+
+ zfree(&me->metric_group);
free(me);
- return NULL;
}
-static struct mep *mep_lookup(struct rblist *groups, const char *name)
+static struct mep *mep_lookup(struct rblist *groups, const char *metric_group,
+ const char *metric_name)
{
struct rb_node *nd;
struct mep me = {
- .name = name
+ .metric_group = strdup(metric_group),
+ .metric_name = metric_name,
};
nd = rblist__find(groups, &me);
- if (nd)
+ if (nd) {
+ free(me.metric_group);
return container_of(nd, struct mep, nd);
+ }
rblist__add_node(groups, &me);
nd = rblist__find(groups, &me);
if (nd)
@@ -404,107 +420,37 @@ static struct mep *mep_lookup(struct rblist *groups, const char *name)
return NULL;
}
-static void mep_delete(struct rblist *rl __maybe_unused,
- struct rb_node *nd)
-{
- struct mep *me = container_of(nd, struct mep, nd);
-
- strlist__delete(me->metrics);
- zfree(&me->name);
- free(me);
-}
-
-static void metricgroup__print_strlist(struct strlist *metrics, bool raw)
-{
- struct str_node *sn;
- int n = 0;
-
- strlist__for_each_entry (sn, metrics) {
- if (raw)
- printf("%s%s", n > 0 ? " " : "", sn->s);
- else
- printf(" %s\n", sn->s);
- n++;
- }
- if (raw)
- putchar('\n');
-}
-
-static int metricgroup__print_pmu_event(const struct pmu_event *pe,
- bool metricgroups, char *filter,
- bool raw, bool details,
- struct rblist *groups,
- struct strlist *metriclist)
+static int metricgroup__add_to_mep_groups(const struct pmu_event *pe,
+ struct rblist *groups)
{
const char *g;
char *omg, *mg;
- g = pe->metric_group;
- if (!g && pe->metric_name) {
- if (pe->name)
- return 0;
- g = "No_group";
- }
-
- if (!g)
- return 0;
-
- mg = strdup(g);
-
+ mg = strdup(pe->metric_group ?: "No_group");
if (!mg)
return -ENOMEM;
omg = mg;
while ((g = strsep(&mg, ";")) != NULL) {
struct mep *me;
- char *s;
g = skip_spaces(g);
- if (*g == 0)
- g = "No_group";
- if (filter && !strstr(g, filter))
- continue;
- if (raw)
- s = (char *)pe->metric_name;
- else {
- if (asprintf(&s, "%s\n%*s%s]",
- pe->metric_name, 8, "[", pe->desc) < 0)
- return -1;
- if (details) {
- if (asprintf(&s, "%s\n%*s%s]",
- s, 8, "[", pe->metric_expr) < 0)
- return -1;
- }
- }
-
- if (!s)
- continue;
+ if (strlen(g))
+ me = mep_lookup(groups, g, pe->metric_name);
+ else
+ me = mep_lookup(groups, "No_group", pe->metric_name);
- if (!metricgroups) {
- strlist__add(metriclist, s);
- } else {
- me = mep_lookup(groups, g);
- if (!me)
- continue;
- strlist__add(me->metrics, s);
+ if (me) {
+ me->metric_desc = pe->desc;
+ me->metric_long_desc = pe->long_desc;
+ me->metric_expr = pe->metric_expr;
+ me->metric_unit = pe->unit;
}
-
- if (!raw)
- free(s);
}
free(omg);
return 0;
}
-struct metricgroup_print_sys_idata {
- struct strlist *metriclist;
- char *filter;
- struct rblist *groups;
- bool metricgroups;
- bool raw;
- bool details;
-};
-
struct metricgroup_iter_data {
pmu_event_iter_fn fn;
void *data;
@@ -527,60 +473,26 @@ static int metricgroup__sys_event_iter(const struct pmu_event *pe,
return d->fn(pe, table, d->data);
}
-
return 0;
}
-static int metricgroup__print_sys_event_iter(const struct pmu_event *pe,
- const struct pmu_events_table *table __maybe_unused,
- void *data)
-{
- struct metricgroup_print_sys_idata *d = data;
-
- return metricgroup__print_pmu_event(pe, d->metricgroups, d->filter, d->raw,
- d->details, d->groups, d->metriclist);
-}
-
-struct metricgroup_print_data {
- const char *pmu_name;
- struct strlist *metriclist;
- char *filter;
- struct rblist *groups;
- bool metricgroups;
- bool raw;
- bool details;
-};
-
-static int metricgroup__print_callback(const struct pmu_event *pe,
- const struct pmu_events_table *table __maybe_unused,
- void *vdata)
+static int metricgroup__add_to_mep_groups_callback(const struct pmu_event *pe,
+ const struct pmu_events_table *table __maybe_unused,
+ void *vdata)
{
- struct metricgroup_print_data *data = vdata;
+ struct rblist *groups = vdata;
- if (!pe->metric_expr)
- return 0;
-
- if (data->pmu_name && perf_pmu__is_hybrid(pe->pmu) && strcmp(data->pmu_name, pe->pmu))
+ if (!pe->metric_name)
return 0;
- return metricgroup__print_pmu_event(pe, data->metricgroups, data->filter,
- data->raw, data->details, data->groups,
- data->metriclist);
+ return metricgroup__add_to_mep_groups(pe, groups);
}
-void metricgroup__print(bool metrics, bool metricgroups, char *filter,
- bool raw, bool details, const char *pmu_name)
+void metricgroup__print(const struct print_callbacks *print_cb, void *print_state)
{
struct rblist groups;
- struct rb_node *node, *next;
- struct strlist *metriclist = NULL;
const struct pmu_events_table *table;
-
- if (!metricgroups) {
- metriclist = strlist__new(NULL, NULL);
- if (!metriclist)
- return;
- }
+ struct rb_node *node, *next;
rblist__init(&groups);
groups.node_new = mep_new;
@@ -588,56 +500,31 @@ void metricgroup__print(bool metrics, bool metricgroups, char *filter,
groups.node_delete = mep_delete;
table = pmu_events_table__find();
if (table) {
- struct metricgroup_print_data data = {
- .pmu_name = pmu_name,
- .metriclist = metriclist,
- .metricgroups = metricgroups,
- .filter = filter,
- .raw = raw,
- .details = details,
- .groups = &groups,
- };
-
pmu_events_table_for_each_event(table,
- metricgroup__print_callback,
- &data);
+ metricgroup__add_to_mep_groups_callback,
+ &groups);
}
{
struct metricgroup_iter_data data = {
- .fn = metricgroup__print_sys_event_iter,
- .data = (void *) &(struct metricgroup_print_sys_idata){
- .metriclist = metriclist,
- .metricgroups = metricgroups,
- .filter = filter,
- .raw = raw,
- .details = details,
- .groups = &groups,
- },
+ .fn = metricgroup__add_to_mep_groups_callback,
+ .data = &groups,
};
-
pmu_for_each_sys_event(metricgroup__sys_event_iter, &data);
}
- if (!filter || !rblist__empty(&groups)) {
- if (metricgroups && !raw)
- printf("\nMetric Groups:\n\n");
- else if (metrics && !raw)
- printf("\nMetrics:\n\n");
- }
-
for (node = rb_first_cached(&groups.entries); node; node = next) {
struct mep *me = container_of(node, struct mep, nd);
- if (metricgroups)
- printf("%s%s%s", me->name, metrics && !raw ? ":" : "", raw ? " " : "\n");
- if (metrics)
- metricgroup__print_strlist(me->metrics, raw);
+ print_cb->print_metric(print_state,
+ me->metric_group,
+ me->metric_name,
+ me->metric_desc,
+ me->metric_long_desc,
+ me->metric_expr,
+ me->metric_unit);
next = rb_next(node);
rblist__remove_node(&groups, node);
}
- if (!metricgroups)
- metricgroup__print_strlist(metriclist, raw);
- strlist__delete(metriclist);
}
static const char *code_characters = ",-=@";
diff --git a/tools/perf/util/metricgroup.h b/tools/perf/util/metricgroup.h
index 732d3a0d3334..0013cf582173 100644
--- a/tools/perf/util/metricgroup.h
+++ b/tools/perf/util/metricgroup.h
@@ -10,6 +10,7 @@
struct evlist;
struct evsel;
struct option;
+struct print_callbacks;
struct rblist;
struct cgroup;
@@ -78,8 +79,7 @@ int metricgroup__parse_groups_test(struct evlist *evlist,
bool metric_no_merge,
struct rblist *metric_events);
-void metricgroup__print(bool metrics, bool groups, char *filter,
- bool raw, bool details, const char *pmu_name);
+void metricgroup__print(const struct print_callbacks *print_cb, void *print_state);
bool metricgroup__has_metric(const char *metric);
int arch_get_runtimeparam(const struct pmu_event *pe __maybe_unused);
void metricgroup__rblist_exit(struct rblist *metric_events);
diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h
index cd4ccec7f361..f944c3cd5efa 100644
--- a/tools/perf/util/mmap.h
+++ b/tools/perf/util/mmap.h
@@ -2,18 +2,13 @@
#define __PERF_MMAP_H 1
#include <internal/mmap.h>
-#include <linux/compiler.h>
-#include <linux/refcount.h>
#include <linux/types.h>
-#include <linux/ring_buffer.h>
#include <linux/bitops.h>
#include <perf/cpumap.h>
-#include <stdbool.h>
#ifdef HAVE_AIO_SUPPORT
#include <aio.h>
#endif
#include "auxtrace.h"
-#include "event.h"
#include "util/compress.h"
struct aiocb;
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 5973f46c2375..6502cd679f57 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -266,6 +266,7 @@ __add_event(struct list_head *list, int *idx,
evsel->core.own_cpus = perf_cpu_map__get(cpus);
evsel->core.requires_cpu = pmu ? pmu->is_uncore : false;
evsel->auto_merge_stats = auto_merge_stats;
+ evsel->pmu = pmu;
if (name)
evsel->name = strdup(name);
diff --git a/tools/perf/util/perf_regs.c b/tools/perf/util/perf_regs.c
index 872dd3d38782..57a567ee2cea 100644
--- a/tools/perf/util/perf_regs.c
+++ b/tools/perf/util/perf_regs.c
@@ -2,7 +2,7 @@
#include <errno.h>
#include <string.h>
#include "perf_regs.h"
-#include "event.h"
+#include "util/sample.h"
int __weak arch_sdt_arg_parse_op(char *old_op __maybe_unused,
char **new_op __maybe_unused)
diff --git a/tools/perf/util/pfm.c b/tools/perf/util/pfm.c
index f0bcfcab1a93..ac3227ba769c 100644
--- a/tools/perf/util/pfm.c
+++ b/tools/perf/util/pfm.c
@@ -12,6 +12,7 @@
#include "util/parse-events.h"
#include "util/pmu.h"
#include "util/pfm.h"
+#include "util/strbuf.h"
#include <string.h>
#include <linux/kernel.h>
@@ -130,53 +131,36 @@ static const char *srcs[PFM_ATTR_CTRL_MAX] = {
};
static void
-print_attr_flags(pfm_event_attr_info_t *info)
+print_attr_flags(struct strbuf *buf, const pfm_event_attr_info_t *info)
{
- int n = 0;
+ if (info->is_dfl)
+ strbuf_addf(buf, "[default] ");
- if (info->is_dfl) {
- printf("[default] ");
- n++;
- }
-
- if (info->is_precise) {
- printf("[precise] ");
- n++;
- }
-
- if (!n)
- printf("- ");
+ if (info->is_precise)
+ strbuf_addf(buf, "[precise] ");
}
static void
-print_libpfm_events_detailed(pfm_event_info_t *info, bool long_desc)
+print_libpfm_event(const struct print_callbacks *print_cb, void *print_state,
+ const pfm_pmu_info_t *pinfo, const pfm_event_info_t *info,
+ struct strbuf *buf)
{
- pfm_event_attr_info_t ainfo;
- const char *src;
int j, ret;
+ char topic[80], name[80];
- ainfo.size = sizeof(ainfo);
+ strbuf_setlen(buf, 0);
+ snprintf(topic, sizeof(topic), "pfm %s", pinfo->name);
- printf(" %s\n", info->name);
- printf(" [%s]\n", info->desc);
- if (long_desc) {
- if (info->equiv)
- printf(" Equiv: %s\n", info->equiv);
+ snprintf(name, sizeof(name), "%s::%s", pinfo->name, info->name);
+ strbuf_addf(buf, "Code: 0x%"PRIx64"\n", info->code);
- printf(" Code : 0x%"PRIx64"\n", info->code);
- }
pfm_for_each_event_attr(j, info) {
- ret = pfm_get_event_attr_info(info->idx, j,
- PFM_OS_PERF_EVENT_EXT, &ainfo);
- if (ret != PFM_SUCCESS)
- continue;
-
- if (ainfo.type == PFM_ATTR_UMASK) {
- printf(" %s:%s\n", info->name, ainfo.name);
- printf(" [%s]\n", ainfo.desc);
- }
+ pfm_event_attr_info_t ainfo;
+ const char *src;
- if (!long_desc)
+ ainfo.size = sizeof(ainfo);
+ ret = pfm_get_event_attr_info(info->idx, j, PFM_OS_PERF_EVENT_EXT, &ainfo);
+ if (ret != PFM_SUCCESS)
continue;
if (ainfo.ctrl >= PFM_ATTR_CTRL_MAX)
@@ -184,64 +168,74 @@ print_libpfm_events_detailed(pfm_event_info_t *info, bool long_desc)
src = srcs[ainfo.ctrl];
switch (ainfo.type) {
- case PFM_ATTR_UMASK:
- printf(" Umask : 0x%02"PRIx64" : %s: ",
- ainfo.code, src);
- print_attr_flags(&ainfo);
- putchar('\n');
+ case PFM_ATTR_UMASK: /* Ignore for now */
break;
case PFM_ATTR_MOD_BOOL:
- printf(" Modif : %s: [%s] : %s (boolean)\n", src,
- ainfo.name, ainfo.desc);
+ strbuf_addf(buf, " Modif: %s: [%s] : %s (boolean)\n", src,
+ ainfo.name, ainfo.desc);
break;
case PFM_ATTR_MOD_INTEGER:
- printf(" Modif : %s: [%s] : %s (integer)\n", src,
- ainfo.name, ainfo.desc);
+ strbuf_addf(buf, " Modif: %s: [%s] : %s (integer)\n", src,
+ ainfo.name, ainfo.desc);
break;
case PFM_ATTR_NONE:
case PFM_ATTR_RAW_UMASK:
case PFM_ATTR_MAX:
default:
- printf(" Attr : %s: [%s] : %s\n", src,
- ainfo.name, ainfo.desc);
+ strbuf_addf(buf, " Attr: %s: [%s] : %s\n", src,
+ ainfo.name, ainfo.desc);
}
}
-}
+ print_cb->print_event(print_state,
+ pinfo->name,
+ topic,
+ name, info->equiv,
+ /*scale_unit=*/NULL,
+ /*deprecated=*/NULL, "PFM event",
+ info->desc, /*long_desc=*/NULL,
+ /*encoding_desc=*/buf->buf,
+ /*metric_name=*/NULL, /*metric_expr=*/NULL);
-/*
- * list all pmu::event:umask, pmu::event
- * printed events may not be all valid combinations of umask for an event
- */
-static void
-print_libpfm_events_raw(pfm_pmu_info_t *pinfo, pfm_event_info_t *info)
-{
- pfm_event_attr_info_t ainfo;
- int j, ret;
- bool has_umask = false;
+ pfm_for_each_event_attr(j, info) {
+ pfm_event_attr_info_t ainfo;
+ const char *src;
- ainfo.size = sizeof(ainfo);
+ strbuf_setlen(buf, 0);
- pfm_for_each_event_attr(j, info) {
- ret = pfm_get_event_attr_info(info->idx, j,
- PFM_OS_PERF_EVENT_EXT, &ainfo);
+ ainfo.size = sizeof(ainfo);
+ ret = pfm_get_event_attr_info(info->idx, j, PFM_OS_PERF_EVENT_EXT, &ainfo);
if (ret != PFM_SUCCESS)
continue;
- if (ainfo.type != PFM_ATTR_UMASK)
- continue;
+ if (ainfo.ctrl >= PFM_ATTR_CTRL_MAX)
+ ainfo.ctrl = PFM_ATTR_CTRL_UNKNOWN;
- printf("%s::%s:%s\n", pinfo->name, info->name, ainfo.name);
- has_umask = true;
+ src = srcs[ainfo.ctrl];
+ if (ainfo.type == PFM_ATTR_UMASK) {
+ strbuf_addf(buf, "Umask: 0x%02"PRIx64" : %s: ",
+ ainfo.code, src);
+ print_attr_flags(buf, &ainfo);
+ snprintf(name, sizeof(name), "%s::%s:%s",
+ pinfo->name, info->name, ainfo.name);
+ print_cb->print_event(print_state,
+ pinfo->name,
+ topic,
+ name, /*alias=*/NULL,
+ /*scale_unit=*/NULL,
+ /*deprecated=*/NULL, "PFM event",
+ ainfo.desc, /*long_desc=*/NULL,
+ /*encoding_desc=*/buf->buf,
+ /*metric_name=*/NULL, /*metric_expr=*/NULL);
+ }
}
- if (!has_umask)
- printf("%s::%s\n", pinfo->name, info->name);
}
-void print_libpfm_events(bool name_only, bool long_desc)
+void print_libpfm_events(const struct print_callbacks *print_cb, void *print_state)
{
pfm_event_info_t info;
pfm_pmu_info_t pinfo;
- int i, p, ret;
+ int p, ret;
+ struct strbuf storage;
libpfm_initialize();
@@ -249,12 +243,9 @@ void print_libpfm_events(bool name_only, bool long_desc)
info.size = sizeof(info);
pinfo.size = sizeof(pinfo);
- if (!name_only)
- puts("\nList of pre-defined events (to be used in --pfm-events):\n");
+ strbuf_init(&storage, 2048);
pfm_for_all_pmus(p) {
- bool printed_pmu = false;
-
ret = pfm_get_pmu_info(p, &pinfo);
if (ret != PFM_SUCCESS)
continue;
@@ -267,25 +258,14 @@ void print_libpfm_events(bool name_only, bool long_desc)
if (pinfo.pmu == PFM_PMU_PERF_EVENT)
continue;
- for (i = pinfo.first_event; i != -1;
- i = pfm_get_event_next(i)) {
-
+ for (int i = pinfo.first_event; i != -1; i = pfm_get_event_next(i)) {
ret = pfm_get_event_info(i, PFM_OS_PERF_EVENT_EXT,
&info);
if (ret != PFM_SUCCESS)
continue;
- if (!name_only && !printed_pmu) {
- printf("%s:\n", pinfo.name);
- printed_pmu = true;
- }
-
- if (!name_only)
- print_libpfm_events_detailed(&info, long_desc);
- else
- print_libpfm_events_raw(&pinfo, &info);
+ print_libpfm_event(print_cb, print_state, &pinfo, &info, &storage);
}
- if (!name_only && printed_pmu)
- putchar('\n');
}
+ strbuf_release(&storage);
}
diff --git a/tools/perf/util/pfm.h b/tools/perf/util/pfm.h
index 7d70dda87012..fb25c2749d26 100644
--- a/tools/perf/util/pfm.h
+++ b/tools/perf/util/pfm.h
@@ -7,13 +7,14 @@
#ifndef __PERF_PFM_H
#define __PERF_PFM_H
+#include "print-events.h"
#include <subcmd/parse-options.h>
#ifdef HAVE_LIBPFM
int parse_libpfm_events_option(const struct option *opt, const char *str,
int unset);
-void print_libpfm_events(bool name_only, bool long_desc);
+void print_libpfm_events(const struct print_callbacks *print_cb, void *print_state);
#else
#include <linux/compiler.h>
@@ -26,8 +27,8 @@ static inline int parse_libpfm_events_option(
return 0;
}
-static inline void print_libpfm_events(bool name_only __maybe_unused,
- bool long_desc __maybe_unused)
+static inline void print_libpfm_events(const struct print_callbacks *print_cb __maybe_unused,
+ void *print_state __maybe_unused)
{
}
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index 03284059175f..e9a4f31926bf 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -23,6 +23,7 @@
#include "evsel.h"
#include "pmu.h"
#include "parse-events.h"
+#include "print-events.h"
#include "header.h"
#include "string2.h"
#include "strbuf.h"
@@ -31,10 +32,26 @@
struct perf_pmu perf_pmu__fake;
+/**
+ * struct perf_pmu_format - Values from a format file read from
+ * <sysfs>/devices/cpu/format/ held in struct perf_pmu.
+ *
+ * For example, the contents of <sysfs>/devices/cpu/format/event may be
+ * "config:0-7" and will be represented here as name="event",
+ * value=PERF_PMU_FORMAT_VALUE_CONFIG and bits 0 to 7 will be set.
+ */
struct perf_pmu_format {
+ /** @name: The modifier/file name. */
char *name;
+ /**
+ * @value : Which config value the format relates to. Supported values
+ * are from PERF_PMU_FORMAT_VALUE_CONFIG to
+ * PERF_PMU_FORMAT_VALUE_CONFIG_END.
+ */
int value;
+ /** @bits: Which config bits are set by this format value. */
DECLARE_BITMAP(bits, PERF_PMU_FORMAT_BITS);
+ /** @list: Element on list within struct perf_pmu. */
struct list_head list;
};
@@ -980,7 +997,6 @@ static struct perf_pmu *pmu_lookup(const char *lookup_name)
pmu->is_uncore = pmu_is_uncore(name);
if (pmu->is_uncore)
pmu->id = pmu_id(name);
- pmu->is_hybrid = is_hybrid;
pmu->max_precise = pmu_max_precise(name);
pmu_add_cpu_aliases(&aliases, pmu);
pmu_add_sys_aliases(&aliases, pmu);
@@ -992,7 +1008,7 @@ static struct perf_pmu *pmu_lookup(const char *lookup_name)
list_splice(&aliases, &pmu->aliases);
list_add_tail(&pmu->list, &pmus);
- if (pmu->is_hybrid)
+ if (is_hybrid)
list_add_tail(&pmu->hybrid_list, &perf_pmu__hybrid_pmus);
pmu->default_config = perf_pmu__get_default_config(pmu);
@@ -1065,11 +1081,15 @@ struct perf_pmu *evsel__find_pmu(struct evsel *evsel)
{
struct perf_pmu *pmu = NULL;
+ if (evsel->pmu)
+ return evsel->pmu;
+
while ((pmu = perf_pmu__scan(pmu)) != NULL) {
if (pmu->type == evsel->core.attr.type)
break;
}
+ evsel->pmu = pmu;
return pmu;
}
@@ -1534,8 +1554,8 @@ static int sub_non_neg(int a, int b)
return a - b;
}
-static char *format_alias(char *buf, int len, struct perf_pmu *pmu,
- struct perf_pmu_alias *alias)
+static char *format_alias(char *buf, int len, const struct perf_pmu *pmu,
+ const struct perf_pmu_alias *alias)
{
struct parse_events_term *term;
int used = snprintf(buf, len, "%s/%s", pmu->name, alias->name);
@@ -1560,72 +1580,60 @@ static char *format_alias(char *buf, int len, struct perf_pmu *pmu,
return buf;
}
-static char *format_alias_or(char *buf, int len, struct perf_pmu *pmu,
- struct perf_pmu_alias *alias)
-{
- snprintf(buf, len, "%s OR %s/%s/", alias->name, pmu->name, alias->name);
- return buf;
-}
-
+/** Struct for ordering events as output in perf list. */
struct sevent {
- char *name;
- char *desc;
- char *topic;
- char *str;
- char *pmu;
- char *metric_expr;
- char *metric_name;
- int is_cpu;
+ /** PMU for event. */
+ const struct perf_pmu *pmu;
+ /**
+ * Optional event for name, desc, etc. If not present then this is a
+ * selectable PMU and the event name is shown as "//".
+ */
+ const struct perf_pmu_alias *event;
+ /** Is the PMU for the CPU? */
+ bool is_cpu;
};
static int cmp_sevent(const void *a, const void *b)
{
const struct sevent *as = a;
const struct sevent *bs = b;
+ const char *a_pmu_name, *b_pmu_name;
+ const char *a_name = "//", *a_desc = NULL, *a_topic = "";
+ const char *b_name = "//", *b_desc = NULL, *b_topic = "";
int ret;
- /* Put extra events last */
- if (!!as->desc != !!bs->desc)
- return !!as->desc - !!bs->desc;
- if (as->topic && bs->topic) {
- int n = strcmp(as->topic, bs->topic);
-
- if (n)
- return n;
+ if (as->event) {
+ a_name = as->event->name;
+ a_desc = as->event->desc;
+ a_topic = as->event->topic ?: "";
}
-
- /* Order CPU core events to be first */
- if (as->is_cpu != bs->is_cpu)
- return bs->is_cpu - as->is_cpu;
-
- ret = strcmp(as->name, bs->name);
- if (!ret) {
- if (as->pmu && bs->pmu)
- return strcmp(as->pmu, bs->pmu);
+ if (bs->event) {
+ b_name = bs->event->name;
+ b_desc = bs->event->desc;
+ b_topic = bs->event->topic ?: "";
}
+ /* Put extra events last. */
+ if (!!a_desc != !!b_desc)
+ return !!a_desc - !!b_desc;
- return ret;
-}
+ /* Order by topics. */
+ ret = strcmp(a_topic, b_topic);
+ if (ret)
+ return ret;
-static void wordwrap(char *s, int start, int max, int corr)
-{
- int column = start;
- int n;
+ /* Order CPU core events to be first */
+ if (as->is_cpu != bs->is_cpu)
+ return as->is_cpu ? -1 : 1;
- while (*s) {
- int wlen = strcspn(s, " \t");
+ /* Order by PMU name. */
+ a_pmu_name = as->pmu->name ?: "";
+ b_pmu_name = bs->pmu->name ?: "";
+ ret = strcmp(a_pmu_name, b_pmu_name);
+ if (ret)
+ return ret;
- if (column + wlen >= max && column > start) {
- printf("\n%*s", start, "");
- column = start + corr;
- }
- n = printf("%s%.*s", column > start ? " " : "", wlen, s);
- if (n <= 0)
- break;
- s += wlen;
- column += n;
- s = skip_spaces(s);
- }
+ /* Order by event name. */
+ return strcmp(a_name, b_name);
}
bool is_pmu_core(const char *name)
@@ -1636,147 +1644,127 @@ bool is_pmu_core(const char *name)
static bool pmu_alias_is_duplicate(struct sevent *alias_a,
struct sevent *alias_b)
{
- /* Different names -> never duplicates */
- if (strcmp(alias_a->name, alias_b->name))
- return false;
+ const char *a_pmu_name, *b_pmu_name;
+ const char *a_name = alias_a->event ? alias_a->event->name : "//";
+ const char *b_name = alias_b->event ? alias_b->event->name : "//";
- /* Don't remove duplicates for hybrid PMUs */
- if (perf_pmu__is_hybrid(alias_a->pmu) &&
- perf_pmu__is_hybrid(alias_b->pmu))
+ /* Different names -> never duplicates */
+ if (strcmp(a_name, b_name))
return false;
- return true;
+ /* Don't remove duplicates for different PMUs */
+ a_pmu_name = alias_a->pmu->name ?: "";
+ b_pmu_name = alias_b->pmu->name ?: "";
+ return strcmp(a_pmu_name, b_pmu_name) == 0;
}
-void print_pmu_events(const char *event_glob, bool name_only, bool quiet_flag,
- bool long_desc, bool details_flag, bool deprecated,
- const char *pmu_name)
+void print_pmu_events(const struct print_callbacks *print_cb, void *print_state)
{
struct perf_pmu *pmu;
- struct perf_pmu_alias *alias;
+ struct perf_pmu_alias *event;
char buf[1024];
int printed = 0;
int len, j;
struct sevent *aliases;
- int numdesc = 0;
- int columns = pager_get_columns();
- char *topic = NULL;
pmu = NULL;
len = 0;
while ((pmu = perf_pmu__scan(pmu)) != NULL) {
- list_for_each_entry(alias, &pmu->aliases, list)
+ list_for_each_entry(event, &pmu->aliases, list)
len++;
if (pmu->selectable)
len++;
}
aliases = zalloc(sizeof(struct sevent) * len);
- if (!aliases)
- goto out_enomem;
+ if (!aliases) {
+ pr_err("FATAL: not enough memory to print PMU events\n");
+ return;
+ }
pmu = NULL;
j = 0;
while ((pmu = perf_pmu__scan(pmu)) != NULL) {
- if (pmu_name && perf_pmu__is_hybrid(pmu->name) &&
- strcmp(pmu_name, pmu->name)) {
- continue;
- }
-
- list_for_each_entry(alias, &pmu->aliases, list) {
- char *name = alias->desc ? alias->name :
- format_alias(buf, sizeof(buf), pmu, alias);
- bool is_cpu = is_pmu_core(pmu->name) ||
- perf_pmu__is_hybrid(pmu->name);
+ bool is_cpu = is_pmu_core(pmu->name) || perf_pmu__is_hybrid(pmu->name);
- if (alias->deprecated && !deprecated)
- continue;
-
- if (event_glob != NULL &&
- !(strglobmatch_nocase(name, event_glob) ||
- (!is_cpu && strglobmatch_nocase(alias->name,
- event_glob)) ||
- (alias->topic &&
- strglobmatch_nocase(alias->topic, event_glob))))
- continue;
-
- if (is_cpu && !name_only && !alias->desc)
- name = format_alias_or(buf, sizeof(buf), pmu, alias);
-
- aliases[j].name = name;
- if (is_cpu && !name_only && !alias->desc)
- aliases[j].name = format_alias_or(buf,
- sizeof(buf),
- pmu, alias);
- aliases[j].name = strdup(aliases[j].name);
- if (!aliases[j].name)
- goto out_enomem;
-
- aliases[j].desc = long_desc ? alias->long_desc :
- alias->desc;
- aliases[j].topic = alias->topic;
- aliases[j].str = alias->str;
- aliases[j].pmu = pmu->name;
- aliases[j].metric_expr = alias->metric_expr;
- aliases[j].metric_name = alias->metric_name;
+ list_for_each_entry(event, &pmu->aliases, list) {
+ aliases[j].event = event;
+ aliases[j].pmu = pmu;
aliases[j].is_cpu = is_cpu;
j++;
}
- if (pmu->selectable &&
- (event_glob == NULL || strglobmatch(pmu->name, event_glob))) {
- char *s;
- if (asprintf(&s, "%s//", pmu->name) < 0)
- goto out_enomem;
- aliases[j].name = s;
+ if (pmu->selectable) {
+ aliases[j].event = NULL;
+ aliases[j].pmu = pmu;
+ aliases[j].is_cpu = is_cpu;
j++;
}
}
len = j;
qsort(aliases, len, sizeof(struct sevent), cmp_sevent);
for (j = 0; j < len; j++) {
+ const char *name, *alias = NULL, *scale_unit = NULL,
+ *desc = NULL, *long_desc = NULL,
+ *encoding_desc = NULL, *topic = NULL,
+ *metric_name = NULL, *metric_expr = NULL;
+ bool deprecated = false;
+ size_t buf_used;
+
/* Skip duplicates */
if (j > 0 && pmu_alias_is_duplicate(&aliases[j], &aliases[j - 1]))
continue;
- if (name_only) {
- printf("%s ", aliases[j].name);
- continue;
- }
- if (aliases[j].desc && !quiet_flag) {
- if (numdesc++ == 0)
- printf("\n");
- if (aliases[j].topic && (!topic ||
- strcmp(topic, aliases[j].topic))) {
- printf("%s%s:\n", topic ? "\n" : "",
- aliases[j].topic);
- topic = aliases[j].topic;
+ if (!aliases[j].event) {
+ /* A selectable event. */
+ buf_used = snprintf(buf, sizeof(buf), "%s//", aliases[j].pmu->name) + 1;
+ name = buf;
+ } else {
+ if (aliases[j].event->desc) {
+ name = aliases[j].event->name;
+ buf_used = 0;
+ } else {
+ name = format_alias(buf, sizeof(buf), aliases[j].pmu,
+ aliases[j].event);
+ if (aliases[j].is_cpu) {
+ alias = name;
+ name = aliases[j].event->name;
+ }
+ buf_used = strlen(buf) + 1;
}
- printf(" %-50s\n", aliases[j].name);
- printf("%*s", 8, "[");
- wordwrap(aliases[j].desc, 8, columns, 0);
- printf("]\n");
- if (details_flag) {
- printf("%*s%s/%s/ ", 8, "", aliases[j].pmu, aliases[j].str);
- if (aliases[j].metric_name)
- printf(" MetricName: %s", aliases[j].metric_name);
- if (aliases[j].metric_expr)
- printf(" MetricExpr: %s", aliases[j].metric_expr);
- putchar('\n');
+ if (strlen(aliases[j].event->unit) || aliases[j].event->scale != 1.0) {
+ scale_unit = buf + buf_used;
+ buf_used += snprintf(buf + buf_used, sizeof(buf) - buf_used,
+ "%G%s", aliases[j].event->scale,
+ aliases[j].event->unit) + 1;
}
- } else
- printf(" %-50s [Kernel PMU event]\n", aliases[j].name);
- printed++;
+ desc = aliases[j].event->desc;
+ long_desc = aliases[j].event->long_desc;
+ topic = aliases[j].event->topic;
+ encoding_desc = buf + buf_used;
+ buf_used += snprintf(buf + buf_used, sizeof(buf) - buf_used,
+ "%s/%s/", aliases[j].pmu->name,
+ aliases[j].event->str) + 1;
+ metric_name = aliases[j].event->metric_name;
+ metric_expr = aliases[j].event->metric_expr;
+ deprecated = aliases[j].event->deprecated;
+ }
+ print_cb->print_event(print_state,
+ aliases[j].pmu->name,
+ topic,
+ name,
+ alias,
+ scale_unit,
+ deprecated,
+ "Kernel PMU event",
+ desc,
+ long_desc,
+ encoding_desc,
+ metric_name,
+ metric_expr);
}
if (printed && pager_in_use())
printf("\n");
-out_free:
- for (j = 0; j < len; j++)
- zfree(&aliases[j].name);
+
zfree(&aliases);
return;
-
-out_enomem:
- printf("FATAL: not enough memory to print PMU events\n");
- if (aliases)
- goto out_free;
}
bool pmu_have_event(const char *pname, const char *name)
diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h
index 68e15c38ae71..69ca0004f94f 100644
--- a/tools/perf/util/pmu.h
+++ b/tools/perf/util/pmu.h
@@ -12,6 +12,7 @@
struct evsel_config_term;
struct perf_cpu_map;
+struct print_callbacks;
enum {
PERF_PMU_FORMAT_VALUE_CONFIG,
@@ -33,31 +34,101 @@ struct perf_pmu_caps {
struct list_head list;
};
+/**
+ * struct perf_pmu - hi
+ */
struct perf_pmu {
+ /** @name: The name of the PMU such as "cpu". */
char *name;
+ /**
+ * @alias_name: Optional alternate name for the PMU determined in
+ * architecture specific code.
+ */
char *alias_name;
+ /**
+ * @id: Optional PMU identifier read from
+ * <sysfs>/bus/event_source/devices/<name>/identifier.
+ */
char *id;
+ /**
+ * @type: Perf event attributed type value, read from
+ * <sysfs>/bus/event_source/devices/<name>/type.
+ */
__u32 type;
+ /**
+ * @selectable: Can the PMU name be selected as if it were an event?
+ */
bool selectable;
+ /**
+ * @is_uncore: Is the PMU not within the CPU core? Determined by the
+ * presence of <sysfs>/bus/event_source/devices/<name>/cpumask.
+ */
bool is_uncore;
- bool is_hybrid;
+ /**
+ * @auxtrace: Are events auxiliary events? Determined in architecture
+ * specific code.
+ */
bool auxtrace;
+ /**
+ * @max_precise: Number of levels of :ppp precision supported by the
+ * PMU, read from
+ * <sysfs>/bus/event_source/devices/<name>/caps/max_precise.
+ */
int max_precise;
+ /**
+ * @default_config: Optional default perf_event_attr determined in
+ * architecture specific code.
+ */
struct perf_event_attr *default_config;
+ /**
+ * @cpus: Empty or the contents of either of:
+ * <sysfs>/bus/event_source/devices/<name>/cpumask.
+ * <sysfs>/bus/event_source/devices/<cpu>/cpus.
+ */
struct perf_cpu_map *cpus;
- struct list_head format; /* HEAD struct perf_pmu_format -> list */
- struct list_head aliases; /* HEAD struct perf_pmu_alias -> list */
+ /**
+ * @format: Holds the contents of files read from
+ * <sysfs>/bus/event_source/devices/<name>/format/. The contents specify
+ * which event parameter changes what config, config1 or config2 bits.
+ */
+ struct list_head format;
+ /**
+ * @aliases: List of struct perf_pmu_alias. Each alias corresponds to an
+ * event read from <sysfs>/bus/event_source/devices/<name>/events/ or
+ * from json events in pmu-events.c.
+ */
+ struct list_head aliases;
+ /** @caps_initialized: Has the list caps been initialized? */
bool caps_initialized;
+ /** @nr_caps: The length of the list caps. */
u32 nr_caps;
- struct list_head caps; /* HEAD struct perf_pmu_caps -> list */
- struct list_head list; /* ELEM */
+ /**
+ * @caps: Holds the contents of files read from
+ * <sysfs>/bus/event_source/devices/<name>/caps/.
+ *
+ * The contents are pairs of the filename with the value of its
+ * contents, for example, max_precise (see above) may have a value of 3.
+ */
+ struct list_head caps;
+ /** @list: Element on pmus list in pmu.c. */
+ struct list_head list;
+ /** @hybrid_list: Element on perf_pmu__hybrid_pmus. */
struct list_head hybrid_list;
+ /**
+ * @missing_features: Features to inhibit when events on this PMU are
+ * opened.
+ */
struct {
+ /**
+ * @exclude_guest: Disables perf_event_attr exclude_guest and
+ * exclude_host.
+ */
bool exclude_guest;
} missing_features;
};
+/** @perf_pmu__fake: A special global PMU used for testing. */
extern struct perf_pmu perf_pmu__fake;
struct perf_pmu_info {
@@ -71,21 +142,60 @@ struct perf_pmu_info {
#define UNIT_MAX_LEN 31 /* max length for event unit name */
+/**
+ * struct perf_pmu_alias - An event either read from sysfs or builtin in
+ * pmu-events.c, created by parsing the pmu-events json files.
+ */
struct perf_pmu_alias {
+ /** @name: Name of the event like "mem-loads". */
char *name;
+ /** @desc: Optional short description of the event. */
char *desc;
+ /** @long_desc: Optional long description. */
char *long_desc;
+ /**
+ * @topic: Optional topic such as cache or pipeline, particularly for
+ * json events.
+ */
char *topic;
+ /**
+ * @str: Comma separated parameter list like
+ * "event=0xcd,umask=0x1,ldlat=0x3".
+ */
char *str;
- struct list_head terms; /* HEAD struct parse_events_term -> list */
- struct list_head list; /* ELEM */
+ /** @terms: Owned list of the original parsed parameters. */
+ struct list_head terms;
+ /** @list: List element of struct perf_pmu aliases. */
+ struct list_head list;
+ /** @unit: Units for the event, such as bytes or cache lines. */
char unit[UNIT_MAX_LEN+1];
+ /** @scale: Value to scale read counter values by. */
double scale;
+ /**
+ * @per_pkg: Does the file
+ * <sysfs>/bus/event_source/devices/<pmu_name>/events/<name>.per-pkg or
+ * equivalent json value exist and have the value 1.
+ */
bool per_pkg;
+ /**
+ * @snapshot: Does the file
+ * <sysfs>/bus/event_source/devices/<pmu_name>/events/<name>.snapshot
+ * exist and have the value 1.
+ */
bool snapshot;
+ /**
+ * @deprecated: Is the event hidden and so not shown in perf list by
+ * default.
+ */
bool deprecated;
+ /**
+ * @metric_expr: A metric expression associated with an event. Doing
+ * this makes little sense due to scale and unit applying to both.
+ */
char *metric_expr;
+ /** @metric_name: A name for the metric. unit applying to both. */
char *metric_name;
+ /** @pmu_name: The name copied from struct perf_pmu. */
char *pmu_name;
};
@@ -116,9 +226,7 @@ void perf_pmu__del_formats(struct list_head *formats);
struct perf_pmu *perf_pmu__scan(struct perf_pmu *pmu);
bool is_pmu_core(const char *name);
-void print_pmu_events(const char *event_glob, bool name_only, bool quiet,
- bool long_desc, bool details_flag,
- bool deprecated, const char *pmu_name);
+void print_pmu_events(const struct print_callbacks *print_cb, void *print_state);
bool pmu_have_event(const char *pname, const char *name);
int perf_pmu__scan_file(struct perf_pmu *pmu, const char *name, const char *fmt, ...) __scanf(3, 4);
diff --git a/tools/perf/util/print-events.c b/tools/perf/util/print-events.c
index c4d5d87fae2f..2646ae18d9f9 100644
--- a/tools/perf/util/print-events.c
+++ b/tools/perf/util/print-events.c
@@ -28,6 +28,7 @@
#define MAX_NAME_LEN 100
+/** Strings corresponding to enum perf_type_id. */
static const char * const event_type_descriptors[] = {
"Hardware event",
"Software event",
@@ -52,125 +53,77 @@ static const struct event_symbol event_symbols_tool[PERF_TOOL_MAX] = {
},
};
-static int cmp_string(const void *a, const void *b)
-{
- const char * const *as = a;
- const char * const *bs = b;
-
- return strcmp(*as, *bs);
-}
-
/*
* Print the events from <debugfs_mount_point>/tracing/events
*/
-void print_tracepoint_events(const char *subsys_glob,
- const char *event_glob, bool name_only)
+void print_tracepoint_events(const struct print_callbacks *print_cb, void *print_state)
{
- DIR *sys_dir, *evt_dir;
- struct dirent *sys_dirent, *evt_dirent;
- char evt_path[MAXPATHLEN];
- char *dir_path;
- char **evt_list = NULL;
- unsigned int evt_i = 0, evt_num = 0;
- bool evt_num_known = false;
-
-restart:
- sys_dir = tracing_events__opendir();
- if (!sys_dir)
- return;
-
- if (evt_num_known) {
- evt_list = zalloc(sizeof(char *) * evt_num);
- if (!evt_list)
- goto out_close_sys_dir;
- }
-
- for_each_subsystem(sys_dir, sys_dirent) {
- if (subsys_glob != NULL &&
- !strglobmatch(sys_dirent->d_name, subsys_glob))
+ struct dirent **sys_namelist = NULL;
+ int sys_items = tracing_events__scandir_alphasort(&sys_namelist);
+
+ for (int i = 0; i < sys_items; i++) {
+ struct dirent *sys_dirent = sys_namelist[i];
+ struct dirent **evt_namelist = NULL;
+ char *dir_path;
+ int evt_items;
+
+ if (sys_dirent->d_type != DT_DIR ||
+ !strcmp(sys_dirent->d_name, ".") ||
+ !strcmp(sys_dirent->d_name, ".."))
continue;
dir_path = get_events_file(sys_dirent->d_name);
if (!dir_path)
continue;
- evt_dir = opendir(dir_path);
- if (!evt_dir)
- goto next;
- for_each_event(dir_path, evt_dir, evt_dirent) {
- if (event_glob != NULL &&
- !strglobmatch(evt_dirent->d_name, event_glob))
+ evt_items = scandir(dir_path, &evt_namelist, NULL, alphasort);
+ for (int j = 0; j < evt_items; j++) {
+ struct dirent *evt_dirent = evt_namelist[j];
+ char evt_path[MAXPATHLEN];
+
+ if (evt_dirent->d_type != DT_DIR ||
+ !strcmp(evt_dirent->d_name, ".") ||
+ !strcmp(evt_dirent->d_name, ".."))
continue;
- if (!evt_num_known) {
- evt_num++;
+ if (tp_event_has_id(dir_path, evt_dirent) != 0)
continue;
- }
snprintf(evt_path, MAXPATHLEN, "%s:%s",
sys_dirent->d_name, evt_dirent->d_name);
-
- evt_list[evt_i] = strdup(evt_path);
- if (evt_list[evt_i] == NULL) {
- put_events_file(dir_path);
- goto out_close_evt_dir;
- }
- evt_i++;
- }
- closedir(evt_dir);
-next:
- put_events_file(dir_path);
- }
- closedir(sys_dir);
-
- if (!evt_num_known) {
- evt_num_known = true;
- goto restart;
- }
- qsort(evt_list, evt_num, sizeof(char *), cmp_string);
- evt_i = 0;
- while (evt_i < evt_num) {
- if (name_only) {
- printf("%s ", evt_list[evt_i++]);
- continue;
+ print_cb->print_event(print_state,
+ /*topic=*/NULL,
+ /*pmu_name=*/NULL,
+ evt_path,
+ /*event_alias=*/NULL,
+ /*scale_unit=*/NULL,
+ /*deprecated=*/false,
+ "Tracepoint event",
+ /*desc=*/NULL,
+ /*long_desc=*/NULL,
+ /*encoding_desc=*/NULL,
+ /*metric_name=*/NULL,
+ /*metric_expr=*/NULL);
}
- printf(" %-50s [%s]\n", evt_list[evt_i++],
- event_type_descriptors[PERF_TYPE_TRACEPOINT]);
+ free(dir_path);
+ free(evt_namelist);
}
- if (evt_num && pager_in_use())
- printf("\n");
-
-out_free:
- evt_num = evt_i;
- for (evt_i = 0; evt_i < evt_num; evt_i++)
- zfree(&evt_list[evt_i]);
- zfree(&evt_list);
- return;
-
-out_close_evt_dir:
- closedir(evt_dir);
-out_close_sys_dir:
- closedir(sys_dir);
-
- printf("FATAL: not enough memory to print %s\n",
- event_type_descriptors[PERF_TYPE_TRACEPOINT]);
- if (evt_list)
- goto out_free;
+ free(sys_namelist);
}
-void print_sdt_events(const char *subsys_glob, const char *event_glob,
- bool name_only)
+void print_sdt_events(const struct print_callbacks *print_cb, void *print_state)
{
- struct probe_cache *pcache;
- struct probe_cache_entry *ent;
struct strlist *bidlist, *sdtlist;
- struct strlist_config cfg = {.dont_dupstr = true};
- struct str_node *nd, *nd2;
- char *buf, *path, *ptr = NULL;
- bool show_detail = false;
- int ret;
-
- sdtlist = strlist__new(NULL, &cfg);
+ struct str_node *bid_nd, *sdt_name, *next_sdt_name;
+ const char *last_sdt_name = NULL;
+
+ /*
+ * The implicitly sorted sdtlist will hold the tracepoint name followed
+ * by @<buildid>. If the tracepoint name is unique (determined by
+ * looking at the adjacent nodes) the @<buildid> is dropped otherwise
+ * the executable path and buildid are added to the name.
+ */
+ sdtlist = strlist__new(NULL, NULL);
if (!sdtlist) {
pr_debug("Failed to allocate new strlist for SDT\n");
return;
@@ -180,354 +133,274 @@ void print_sdt_events(const char *subsys_glob, const char *event_glob,
pr_debug("Failed to get buildids: %d\n", errno);
return;
}
- strlist__for_each_entry(nd, bidlist) {
- pcache = probe_cache__new(nd->s, NULL);
+ strlist__for_each_entry(bid_nd, bidlist) {
+ struct probe_cache *pcache;
+ struct probe_cache_entry *ent;
+
+ pcache = probe_cache__new(bid_nd->s, NULL);
if (!pcache)
continue;
list_for_each_entry(ent, &pcache->entries, node) {
- if (!ent->sdt)
- continue;
- if (subsys_glob &&
- !strglobmatch(ent->pev.group, subsys_glob))
- continue;
- if (event_glob &&
- !strglobmatch(ent->pev.event, event_glob))
- continue;
- ret = asprintf(&buf, "%s:%s@%s", ent->pev.group,
- ent->pev.event, nd->s);
- if (ret > 0)
- strlist__add(sdtlist, buf);
+ char buf[1024];
+
+ snprintf(buf, sizeof(buf), "%s:%s@%s",
+ ent->pev.group, ent->pev.event, bid_nd->s);
+ strlist__add(sdtlist, buf);
}
probe_cache__delete(pcache);
}
strlist__delete(bidlist);
- strlist__for_each_entry(nd, sdtlist) {
- buf = strchr(nd->s, '@');
- if (buf)
- *(buf++) = '\0';
- if (name_only) {
- printf("%s ", nd->s);
- continue;
- }
- nd2 = strlist__next(nd);
- if (nd2) {
- ptr = strchr(nd2->s, '@');
- if (ptr)
- *ptr = '\0';
- if (strcmp(nd->s, nd2->s) == 0)
- show_detail = true;
+ strlist__for_each_entry(sdt_name, sdtlist) {
+ bool show_detail = false;
+ char *bid = strchr(sdt_name->s, '@');
+ char *evt_name = NULL;
+
+ if (bid)
+ *(bid++) = '\0';
+
+ if (last_sdt_name && !strcmp(last_sdt_name, sdt_name->s)) {
+ show_detail = true;
+ } else {
+ next_sdt_name = strlist__next(sdt_name);
+ if (next_sdt_name) {
+ char *bid2 = strchr(next_sdt_name->s, '@');
+
+ if (bid2)
+ *bid2 = '\0';
+ if (strcmp(sdt_name->s, next_sdt_name->s) == 0)
+ show_detail = true;
+ if (bid2)
+ *bid2 = '@';
+ }
}
+ last_sdt_name = sdt_name->s;
+
if (show_detail) {
- path = build_id_cache__origname(buf);
- ret = asprintf(&buf, "%s@%s(%.12s)", nd->s, path, buf);
- if (ret > 0) {
- printf(" %-50s [%s]\n", buf, "SDT event");
- free(buf);
+ char *path = build_id_cache__origname(bid);
+
+ if (path) {
+ if (asprintf(&evt_name, "%s@%s(%.12s)", sdt_name->s, path, bid) < 0)
+ evt_name = NULL;
+ free(path);
}
- free(path);
- } else
- printf(" %-50s [%s]\n", nd->s, "SDT event");
- if (nd2) {
- if (strcmp(nd->s, nd2->s) != 0)
- show_detail = false;
- if (ptr)
- *ptr = '@';
}
+ print_cb->print_event(print_state,
+ /*topic=*/NULL,
+ /*pmu_name=*/NULL,
+ evt_name ?: sdt_name->s,
+ /*event_alias=*/NULL,
+ /*deprecated=*/false,
+ /*scale_unit=*/NULL,
+ "SDT event",
+ /*desc=*/NULL,
+ /*long_desc=*/NULL,
+ /*encoding_desc=*/NULL,
+ /*metric_name=*/NULL,
+ /*metric_expr=*/NULL);
+
+ free(evt_name);
}
strlist__delete(sdtlist);
}
-int print_hwcache_events(const char *event_glob, bool name_only)
+int print_hwcache_events(const struct print_callbacks *print_cb, void *print_state)
{
- unsigned int type, op, i, evt_i = 0, evt_num = 0, npmus = 0;
- char name[64], new_name[128];
- char **evt_list = NULL, **evt_pmus = NULL;
- bool evt_num_known = false;
- struct perf_pmu *pmu = NULL;
-
- if (perf_pmu__has_hybrid()) {
- npmus = perf_pmu__hybrid_pmu_num();
- evt_pmus = zalloc(sizeof(char *) * npmus);
- if (!evt_pmus)
- goto out_enomem;
- }
+ struct strlist *evt_name_list = strlist__new(NULL, NULL);
+ struct str_node *nd;
-restart:
- if (evt_num_known) {
- evt_list = zalloc(sizeof(char *) * evt_num);
- if (!evt_list)
- goto out_enomem;
+ if (!evt_name_list) {
+ pr_debug("Failed to allocate new strlist for hwcache events\n");
+ return -ENOMEM;
}
-
- for (type = 0; type < PERF_COUNT_HW_CACHE_MAX; type++) {
- for (op = 0; op < PERF_COUNT_HW_CACHE_OP_MAX; op++) {
+ for (int type = 0; type < PERF_COUNT_HW_CACHE_MAX; type++) {
+ for (int op = 0; op < PERF_COUNT_HW_CACHE_OP_MAX; op++) {
/* skip invalid cache type */
if (!evsel__is_cache_op_valid(type, op))
continue;
- for (i = 0; i < PERF_COUNT_HW_CACHE_RESULT_MAX; i++) {
- unsigned int hybrid_supported = 0, j;
- bool supported;
+ for (int i = 0; i < PERF_COUNT_HW_CACHE_RESULT_MAX; i++) {
+ struct perf_pmu *pmu = NULL;
+ char name[64];
__evsel__hw_cache_type_op_res_name(type, op, i, name, sizeof(name));
- if (event_glob != NULL && !strglobmatch(name, event_glob))
- continue;
-
if (!perf_pmu__has_hybrid()) {
- if (!is_event_supported(PERF_TYPE_HW_CACHE,
- type | (op << 8) | (i << 16))) {
- continue;
- }
- } else {
- perf_pmu__for_each_hybrid_pmu(pmu) {
- if (!evt_num_known) {
- evt_num++;
- continue;
- }
-
- supported = is_event_supported(
- PERF_TYPE_HW_CACHE,
- type | (op << 8) | (i << 16) |
- ((__u64)pmu->type << PERF_PMU_TYPE_SHIFT));
- if (supported) {
- snprintf(new_name, sizeof(new_name),
- "%s/%s/", pmu->name, name);
- evt_pmus[hybrid_supported] =
- strdup(new_name);
- hybrid_supported++;
- }
- }
-
- if (hybrid_supported == 0)
- continue;
- }
-
- if (!evt_num_known) {
- evt_num++;
+ if (is_event_supported(PERF_TYPE_HW_CACHE,
+ type | (op << 8) | (i << 16)))
+ strlist__add(evt_name_list, name);
continue;
}
-
- if ((hybrid_supported == 0) ||
- (hybrid_supported == npmus)) {
- evt_list[evt_i] = strdup(name);
- if (npmus > 0) {
- for (j = 0; j < npmus; j++)
- zfree(&evt_pmus[j]);
- }
- } else {
- for (j = 0; j < hybrid_supported; j++) {
- evt_list[evt_i++] = evt_pmus[j];
- evt_pmus[j] = NULL;
+ perf_pmu__for_each_hybrid_pmu(pmu) {
+ if (is_event_supported(PERF_TYPE_HW_CACHE,
+ type | (op << 8) | (i << 16) |
+ ((__u64)pmu->type << PERF_PMU_TYPE_SHIFT))) {
+ char new_name[128];
+ snprintf(new_name, sizeof(new_name),
+ "%s/%s/", pmu->name, name);
+ strlist__add(evt_name_list, new_name);
}
- continue;
}
-
- if (evt_list[evt_i] == NULL)
- goto out_enomem;
- evt_i++;
}
}
}
- if (!evt_num_known) {
- evt_num_known = true;
- goto restart;
- }
-
- for (evt_i = 0; evt_i < evt_num; evt_i++) {
- if (!evt_list[evt_i])
- break;
+ strlist__for_each_entry(nd, evt_name_list) {
+ print_cb->print_event(print_state,
+ "cache",
+ /*pmu_name=*/NULL,
+ nd->s,
+ /*event_alias=*/NULL,
+ /*scale_unit=*/NULL,
+ /*deprecated=*/false,
+ event_type_descriptors[PERF_TYPE_HW_CACHE],
+ /*desc=*/NULL,
+ /*long_desc=*/NULL,
+ /*encoding_desc=*/NULL,
+ /*metric_name=*/NULL,
+ /*metric_expr=*/NULL);
}
-
- evt_num = evt_i;
- qsort(evt_list, evt_num, sizeof(char *), cmp_string);
- evt_i = 0;
- while (evt_i < evt_num) {
- if (name_only) {
- printf("%s ", evt_list[evt_i++]);
- continue;
- }
- printf(" %-50s [%s]\n", evt_list[evt_i++],
- event_type_descriptors[PERF_TYPE_HW_CACHE]);
- }
- if (evt_num && pager_in_use())
- printf("\n");
-
-out_free:
- evt_num = evt_i;
- for (evt_i = 0; evt_i < evt_num; evt_i++)
- zfree(&evt_list[evt_i]);
- zfree(&evt_list);
-
- for (evt_i = 0; evt_i < npmus; evt_i++)
- zfree(&evt_pmus[evt_i]);
- zfree(&evt_pmus);
- return evt_num;
-
-out_enomem:
- printf("FATAL: not enough memory to print %s\n",
- event_type_descriptors[PERF_TYPE_HW_CACHE]);
- if (evt_list)
- goto out_free;
- return evt_num;
+ strlist__delete(evt_name_list);
+ return 0;
}
-static void print_tool_event(const struct event_symbol *syms, const char *event_glob,
- bool name_only)
+void print_tool_events(const struct print_callbacks *print_cb, void *print_state)
{
- if (syms->symbol == NULL)
- return;
-
- if (event_glob && !(strglobmatch(syms->symbol, event_glob) ||
- (syms->alias && strglobmatch(syms->alias, event_glob))))
- return;
-
- if (name_only)
- printf("%s ", syms->symbol);
- else {
- char name[MAX_NAME_LEN];
-
- if (syms->alias && strlen(syms->alias))
- snprintf(name, MAX_NAME_LEN, "%s OR %s", syms->symbol, syms->alias);
- else
- strlcpy(name, syms->symbol, MAX_NAME_LEN);
- printf(" %-50s [%s]\n", name, "Tool event");
+ // Start at 1 because the first enum entry means no tool event.
+ for (int i = 1; i < PERF_TOOL_MAX; ++i) {
+ print_cb->print_event(print_state,
+ "tool",
+ /*pmu_name=*/NULL,
+ event_symbols_tool[i].symbol,
+ event_symbols_tool[i].alias,
+ /*scale_unit=*/NULL,
+ /*deprecated=*/false,
+ "Tool event",
+ /*desc=*/NULL,
+ /*long_desc=*/NULL,
+ /*encoding_desc=*/NULL,
+ /*metric_name=*/NULL,
+ /*metric_expr=*/NULL);
}
}
-void print_tool_events(const char *event_glob, bool name_only)
+void print_symbol_events(const struct print_callbacks *print_cb, void *print_state,
+ unsigned int type, const struct event_symbol *syms,
+ unsigned int max)
{
- // Start at 1 because the first enum entry means no tool event.
- for (int i = 1; i < PERF_TOOL_MAX; ++i)
- print_tool_event(event_symbols_tool + i, event_glob, name_only);
-
- if (pager_in_use())
- printf("\n");
-}
+ struct strlist *evt_name_list = strlist__new(NULL, NULL);
+ struct str_node *nd;
-void print_symbol_events(const char *event_glob, unsigned int type,
- struct event_symbol *syms, unsigned int max,
- bool name_only)
-{
- unsigned int i, evt_i = 0, evt_num = 0;
- char name[MAX_NAME_LEN];
- char **evt_list = NULL;
- bool evt_num_known = false;
-
-restart:
- if (evt_num_known) {
- evt_list = zalloc(sizeof(char *) * evt_num);
- if (!evt_list)
- goto out_enomem;
- syms -= max;
+ if (!evt_name_list) {
+ pr_debug("Failed to allocate new strlist for symbol events\n");
+ return;
}
-
- for (i = 0; i < max; i++, syms++) {
+ for (unsigned int i = 0; i < max; i++) {
/*
* New attr.config still not supported here, the latest
* example was PERF_COUNT_SW_CGROUP_SWITCHES
*/
- if (syms->symbol == NULL)
- continue;
-
- if (event_glob != NULL && !(strglobmatch(syms->symbol, event_glob) ||
- (syms->alias && strglobmatch(syms->alias, event_glob))))
+ if (syms[i].symbol == NULL)
continue;
if (!is_event_supported(type, i))
continue;
- if (!evt_num_known) {
- evt_num++;
- continue;
- }
-
- if (!name_only && strlen(syms->alias))
- snprintf(name, MAX_NAME_LEN, "%s OR %s", syms->symbol, syms->alias);
- else
- strlcpy(name, syms->symbol, MAX_NAME_LEN);
+ if (strlen(syms[i].alias)) {
+ char name[MAX_NAME_LEN];
- evt_list[evt_i] = strdup(name);
- if (evt_list[evt_i] == NULL)
- goto out_enomem;
- evt_i++;
+ snprintf(name, MAX_NAME_LEN, "%s OR %s", syms[i].symbol, syms[i].alias);
+ strlist__add(evt_name_list, name);
+ } else
+ strlist__add(evt_name_list, syms[i].symbol);
}
- if (!evt_num_known) {
- evt_num_known = true;
- goto restart;
- }
- qsort(evt_list, evt_num, sizeof(char *), cmp_string);
- evt_i = 0;
- while (evt_i < evt_num) {
- if (name_only) {
- printf("%s ", evt_list[evt_i++]);
- continue;
+ strlist__for_each_entry(nd, evt_name_list) {
+ char *alias = strstr(nd->s, " OR ");
+
+ if (alias) {
+ *alias = '\0';
+ alias += 4;
}
- printf(" %-50s [%s]\n", evt_list[evt_i++], event_type_descriptors[type]);
+ print_cb->print_event(print_state,
+ /*topic=*/NULL,
+ /*pmu_name=*/NULL,
+ nd->s,
+ alias,
+ /*scale_unit=*/NULL,
+ /*deprecated=*/false,
+ event_type_descriptors[type],
+ /*desc=*/NULL,
+ /*long_desc=*/NULL,
+ /*encoding_desc=*/NULL,
+ /*metric_name=*/NULL,
+ /*metric_expr=*/NULL);
}
- if (evt_num && pager_in_use())
- printf("\n");
-
-out_free:
- evt_num = evt_i;
- for (evt_i = 0; evt_i < evt_num; evt_i++)
- zfree(&evt_list[evt_i]);
- zfree(&evt_list);
- return;
-
-out_enomem:
- printf("FATAL: not enough memory to print %s\n", event_type_descriptors[type]);
- if (evt_list)
- goto out_free;
+ strlist__delete(evt_name_list);
}
/*
* Print the help text for the event symbols:
*/
-void print_events(const char *event_glob, bool name_only, bool quiet_flag,
- bool long_desc, bool details_flag, bool deprecated,
- const char *pmu_name)
+void print_events(const struct print_callbacks *print_cb, void *print_state)
{
- print_symbol_events(event_glob, PERF_TYPE_HARDWARE,
- event_symbols_hw, PERF_COUNT_HW_MAX, name_only);
-
- print_symbol_events(event_glob, PERF_TYPE_SOFTWARE,
- event_symbols_sw, PERF_COUNT_SW_MAX, name_only);
- print_tool_events(event_glob, name_only);
-
- print_hwcache_events(event_glob, name_only);
-
- print_pmu_events(event_glob, name_only, quiet_flag, long_desc,
- details_flag, deprecated, pmu_name);
-
- if (event_glob != NULL)
- return;
-
- if (!name_only) {
- printf(" %-50s [%s]\n",
- "rNNN",
- event_type_descriptors[PERF_TYPE_RAW]);
- printf(" %-50s [%s]\n",
- "cpu/t1=v1[,t2=v2,t3 ...]/modifier",
- event_type_descriptors[PERF_TYPE_RAW]);
- if (pager_in_use())
- printf(" (see 'man perf-list' on how to encode it)\n\n");
-
- printf(" %-50s [%s]\n",
- "mem:<addr>[/len][:access]",
- event_type_descriptors[PERF_TYPE_BREAKPOINT]);
- if (pager_in_use())
- printf("\n");
- }
-
- print_tracepoint_events(NULL, NULL, name_only);
-
- print_sdt_events(NULL, NULL, name_only);
-
- metricgroup__print(true, true, NULL, name_only, details_flag,
- pmu_name);
-
- print_libpfm_events(name_only, long_desc);
+ print_symbol_events(print_cb, print_state, PERF_TYPE_HARDWARE,
+ event_symbols_hw, PERF_COUNT_HW_MAX);
+ print_symbol_events(print_cb, print_state, PERF_TYPE_SOFTWARE,
+ event_symbols_sw, PERF_COUNT_SW_MAX);
+
+ print_tool_events(print_cb, print_state);
+
+ print_hwcache_events(print_cb, print_state);
+
+ print_pmu_events(print_cb, print_state);
+
+ print_cb->print_event(print_state,
+ /*topic=*/NULL,
+ /*pmu_name=*/NULL,
+ "rNNN",
+ /*event_alias=*/NULL,
+ /*scale_unit=*/NULL,
+ /*deprecated=*/false,
+ event_type_descriptors[PERF_TYPE_RAW],
+ /*desc=*/NULL,
+ /*long_desc=*/NULL,
+ /*encoding_desc=*/NULL,
+ /*metric_name=*/NULL,
+ /*metric_expr=*/NULL);
+
+ print_cb->print_event(print_state,
+ /*topic=*/NULL,
+ /*pmu_name=*/NULL,
+ "cpu/t1=v1[,t2=v2,t3 ...]/modifier",
+ /*event_alias=*/NULL,
+ /*scale_unit=*/NULL,
+ /*deprecated=*/false,
+ event_type_descriptors[PERF_TYPE_RAW],
+ "(see 'man perf-list' on how to encode it)",
+ /*long_desc=*/NULL,
+ /*encoding_desc=*/NULL,
+ /*metric_name=*/NULL,
+ /*metric_expr=*/NULL);
+
+ print_cb->print_event(print_state,
+ /*topic=*/NULL,
+ /*pmu_name=*/NULL,
+ "mem:<addr>[/len][:access]",
+ /*scale_unit=*/NULL,
+ /*event_alias=*/NULL,
+ /*deprecated=*/false,
+ event_type_descriptors[PERF_TYPE_BREAKPOINT],
+ /*desc=*/NULL,
+ /*long_desc=*/NULL,
+ /*encoding_desc=*/NULL,
+ /*metric_name=*/NULL,
+ /*metric_expr=*/NULL);
+
+ print_tracepoint_events(print_cb, print_state);
+
+ print_sdt_events(print_cb, print_state);
+
+ metricgroup__print(print_cb, print_state);
+
+ print_libpfm_events(print_cb, print_state);
}
diff --git a/tools/perf/util/print-events.h b/tools/perf/util/print-events.h
index 1da9910d83a6..c237e53c4487 100644
--- a/tools/perf/util/print-events.h
+++ b/tools/perf/util/print-events.h
@@ -2,21 +2,39 @@
#ifndef __PERF_PRINT_EVENTS_H
#define __PERF_PRINT_EVENTS_H
+#include <linux/perf_event.h>
#include <stdbool.h>
struct event_symbol;
-void print_events(const char *event_glob, bool name_only, bool quiet_flag,
- bool long_desc, bool details_flag, bool deprecated,
- const char *pmu_name);
-int print_hwcache_events(const char *event_glob, bool name_only);
-void print_sdt_events(const char *subsys_glob, const char *event_glob,
- bool name_only);
-void print_symbol_events(const char *event_glob, unsigned int type,
- struct event_symbol *syms, unsigned int max,
- bool name_only);
-void print_tool_events(const char *event_glob, bool name_only);
-void print_tracepoint_events(const char *subsys_glob, const char *event_glob,
- bool name_only);
+struct print_callbacks {
+ void (*print_start)(void *print_state);
+ void (*print_end)(void *print_state);
+ void (*print_event)(void *print_state, const char *topic,
+ const char *pmu_name,
+ const char *event_name, const char *event_alias,
+ const char *scale_unit,
+ bool deprecated, const char *event_type_desc,
+ const char *desc, const char *long_desc,
+ const char *encoding_desc,
+ const char *metric_name, const char *metric_expr);
+ void (*print_metric)(void *print_state,
+ const char *group,
+ const char *name,
+ const char *desc,
+ const char *long_desc,
+ const char *expr,
+ const char *unit);
+};
+
+/** Print all events, the default when no options are specified. */
+void print_events(const struct print_callbacks *print_cb, void *print_state);
+int print_hwcache_events(const struct print_callbacks *print_cb, void *print_state);
+void print_sdt_events(const struct print_callbacks *print_cb, void *print_state);
+void print_symbol_events(const struct print_callbacks *print_cb, void *print_state,
+ unsigned int type, const struct event_symbol *syms,
+ unsigned int max);
+void print_tool_events(const struct print_callbacks *print_cb, void *print_state);
+void print_tracepoint_events(const struct print_callbacks *print_cb, void *print_state);
#endif /* __PERF_PRINT_EVENTS_H */
diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c
index 50d861a80f57..54b49ce85c9f 100644
--- a/tools/perf/util/probe-finder.c
+++ b/tools/perf/util/probe-finder.c
@@ -763,7 +763,7 @@ static int find_best_scope_cb(Dwarf_Die *fn_die, void *data)
/* Skip if declared file name does not match */
if (fsp->file) {
- file = dwarf_decl_file(fn_die);
+ file = die_get_decl_file(fn_die);
if (!file || strcmp(fsp->file, file) != 0)
return 0;
}
@@ -1063,6 +1063,7 @@ static int probe_point_search_cb(Dwarf_Die *sp_die, void *data)
struct dwarf_callback_param *param = data;
struct probe_finder *pf = param->data;
struct perf_probe_point *pp = &pf->pev->point;
+ const char *fname;
/* Check tag and diename */
if (!die_is_func_def(sp_die) ||
@@ -1070,12 +1071,17 @@ static int probe_point_search_cb(Dwarf_Die *sp_die, void *data)
return DWARF_CB_OK;
/* Check declared file */
- if (pp->file && strtailcmp(pp->file, dwarf_decl_file(sp_die)))
+ fname = die_get_decl_file(sp_die);
+ if (!fname) {
+ pr_warning("A function DIE doesn't have decl_line. Maybe broken DWARF?\n");
+ return DWARF_CB_OK;
+ }
+ if (pp->file && fname && strtailcmp(pp->file, fname))
return DWARF_CB_OK;
pr_debug("Matched function: %s [%lx]\n", dwarf_diename(sp_die),
(unsigned long)dwarf_dieoffset(sp_die));
- pf->fname = dwarf_decl_file(sp_die);
+ pf->fname = fname;
if (pp->line) { /* Function relative line */
dwarf_decl_line(sp_die, &pf->lno);
pf->lno += pp->line;
@@ -1134,6 +1140,7 @@ struct pubname_callback_param {
static int pubname_search_cb(Dwarf *dbg, Dwarf_Global *gl, void *data)
{
struct pubname_callback_param *param = data;
+ const char *fname;
if (dwarf_offdie(dbg, gl->die_offset, param->sp_die)) {
if (dwarf_tag(param->sp_die) != DW_TAG_subprogram)
@@ -1143,9 +1150,11 @@ static int pubname_search_cb(Dwarf *dbg, Dwarf_Global *gl, void *data)
if (!dwarf_offdie(dbg, gl->cu_offset, param->cu_die))
return DWARF_CB_OK;
- if (param->file &&
- strtailcmp(param->file, dwarf_decl_file(param->sp_die)))
- return DWARF_CB_OK;
+ if (param->file) {
+ fname = die_get_decl_file(param->sp_die);
+ if (!fname || strtailcmp(param->file, fname))
+ return DWARF_CB_OK;
+ }
param->found = 1;
return DWARF_CB_ABORT;
@@ -1741,7 +1750,7 @@ int debuginfo__find_probe_point(struct debuginfo *dbg, u64 addr,
goto post;
}
- fname = dwarf_decl_file(&spdie);
+ fname = die_get_decl_file(&spdie);
if (addr == baseaddr) {
/* Function entry - Relative line number is 0 */
lineno = baseline;
@@ -1778,8 +1787,8 @@ int debuginfo__find_probe_point(struct debuginfo *dbg, u64 addr,
}
}
/* Verify the lineno and baseline are in a same file */
- tmp = dwarf_decl_file(&spdie);
- if (!tmp || strcmp(tmp, fname) != 0)
+ tmp = die_get_decl_file(&spdie);
+ if (!tmp || (fname && strcmp(tmp, fname) != 0))
lineno = 0;
}
@@ -1889,13 +1898,17 @@ static int line_range_search_cb(Dwarf_Die *sp_die, void *data)
struct dwarf_callback_param *param = data;
struct line_finder *lf = param->data;
struct line_range *lr = lf->lr;
+ const char *fname;
/* Check declared file */
- if (lr->file && strtailcmp(lr->file, dwarf_decl_file(sp_die)))
- return DWARF_CB_OK;
+ if (lr->file) {
+ fname = die_get_decl_file(sp_die);
+ if (!fname || strtailcmp(lr->file, fname))
+ return DWARF_CB_OK;
+ }
if (die_match_name(sp_die, lr->function) && die_is_func_def(sp_die)) {
- lf->fname = dwarf_decl_file(sp_die);
+ lf->fname = die_get_decl_file(sp_die);
dwarf_decl_line(sp_die, &lr->offset);
pr_debug("fname: %s, lineno:%d\n", lf->fname, lr->offset);
lf->lno_s = lr->offset + lr->start;
diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c
index 5be5fa2391de..b5941c74a0d6 100644
--- a/tools/perf/util/python.c
+++ b/tools/perf/util/python.c
@@ -718,17 +718,17 @@ static Py_ssize_t pyrf_thread_map__length(PyObject *obj)
{
struct pyrf_thread_map *pthreads = (void *)obj;
- return pthreads->threads->nr;
+ return perf_thread_map__nr(pthreads->threads);
}
static PyObject *pyrf_thread_map__item(PyObject *obj, Py_ssize_t i)
{
struct pyrf_thread_map *pthreads = (void *)obj;
- if (i >= pthreads->threads->nr)
+ if (i >= perf_thread_map__nr(pthreads->threads))
return NULL;
- return Py_BuildValue("i", pthreads->threads->map[i]);
+ return Py_BuildValue("i", perf_thread_map__pid(pthreads->threads, i));
}
static PySequenceMethods pyrf_thread_map__sequence_methods = {
diff --git a/tools/perf/util/s390-cpumsf.c b/tools/perf/util/s390-cpumsf.c
index f3fdad28a852..6fe478b0b61b 100644
--- a/tools/perf/util/s390-cpumsf.c
+++ b/tools/perf/util/s390-cpumsf.c
@@ -163,6 +163,7 @@
#include "s390-cpumsf-kernel.h"
#include "s390-cpumcf-kernel.h"
#include "config.h"
+#include "util/sample.h"
struct s390_cpumsf {
struct auxtrace auxtrace;
diff --git a/tools/perf/util/s390-sample-raw.c b/tools/perf/util/s390-sample-raw.c
index 9a631d97471c..c10b891dbad6 100644
--- a/tools/perf/util/s390-sample-raw.c
+++ b/tools/perf/util/s390-sample-raw.c
@@ -28,6 +28,7 @@
#include "sample-raw.h"
#include "s390-cpumcf-kernel.h"
#include "pmu-events/pmu-events.h"
+#include "util/sample.h"
static size_t ctrset_size(struct cf_ctrset_entry *set)
{
diff --git a/tools/perf/util/sample.h b/tools/perf/util/sample.h
new file mode 100644
index 000000000000..60ec79d4eea4
--- /dev/null
+++ b/tools/perf/util/sample.h
@@ -0,0 +1,117 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_SAMPLE_H
+#define __PERF_SAMPLE_H
+
+#include <linux/perf_event.h>
+#include <linux/types.h>
+
+/* number of register is bound by the number of bits in regs_dump::mask (64) */
+#define PERF_SAMPLE_REGS_CACHE_SIZE (8 * sizeof(u64))
+
+struct regs_dump {
+ u64 abi;
+ u64 mask;
+ u64 *regs;
+
+ /* Cached values/mask filled by first register access. */
+ u64 cache_regs[PERF_SAMPLE_REGS_CACHE_SIZE];
+ u64 cache_mask;
+};
+
+struct stack_dump {
+ u16 offset;
+ u64 size;
+ char *data;
+};
+
+struct sample_read_value {
+ u64 value;
+ u64 id; /* only if PERF_FORMAT_ID */
+ u64 lost; /* only if PERF_FORMAT_LOST */
+};
+
+struct sample_read {
+ u64 time_enabled;
+ u64 time_running;
+ union {
+ struct {
+ u64 nr;
+ struct sample_read_value *values;
+ } group;
+ struct sample_read_value one;
+ };
+};
+
+static inline size_t sample_read_value_size(u64 read_format)
+{
+ /* PERF_FORMAT_ID is forced for PERF_SAMPLE_READ */
+ if (read_format & PERF_FORMAT_LOST)
+ return sizeof(struct sample_read_value);
+ else
+ return offsetof(struct sample_read_value, lost);
+}
+
+static inline struct sample_read_value *next_sample_read_value(struct sample_read_value *v, u64 read_format)
+{
+ return (void *)v + sample_read_value_size(read_format);
+}
+
+#define sample_read_group__for_each(v, nr, rf) \
+ for (int __i = 0; __i < (int)nr; v = next_sample_read_value(v, rf), __i++)
+
+#define MAX_INSN 16
+
+struct aux_sample {
+ u64 size;
+ void *data;
+};
+
+struct perf_sample {
+ u64 ip;
+ u32 pid, tid;
+ u64 time;
+ u64 addr;
+ u64 id;
+ u64 stream_id;
+ u64 period;
+ u64 weight;
+ u64 transaction;
+ u64 insn_cnt;
+ u64 cyc_cnt;
+ u32 cpu;
+ u32 raw_size;
+ u64 data_src;
+ u64 phys_addr;
+ u64 data_page_size;
+ u64 code_page_size;
+ u64 cgroup;
+ u32 flags;
+ u32 machine_pid;
+ u32 vcpu;
+ u16 insn_len;
+ u8 cpumode;
+ u16 misc;
+ u16 ins_lat;
+ u16 p_stage_cyc;
+ bool no_hw_idx; /* No hw_idx collected in branch_stack */
+ char insn[MAX_INSN];
+ void *raw_data;
+ struct ip_callchain *callchain;
+ struct branch_stack *branch_stack;
+ struct regs_dump user_regs;
+ struct regs_dump intr_regs;
+ struct stack_dump user_stack;
+ struct sample_read read;
+ struct aux_sample aux_sample;
+};
+
+/*
+ * raw_data is always 4 bytes from an 8-byte boundary, so subtract 4 to get
+ * 8-byte alignment.
+ */
+static inline void *perf_sample__synth_ptr(struct perf_sample *sample)
+{
+ return sample->raw_data - 4;
+}
+
+#endif /* __PERF_SAMPLE_H */
diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c
index 1f2040f36d4e..1cf65db8f861 100644
--- a/tools/perf/util/scripting-engines/trace-event-python.c
+++ b/tools/perf/util/scripting-engines/trace-event-python.c
@@ -52,6 +52,7 @@
#include "print_binary.h"
#include "stat.h"
#include "mem-events.h"
+#include "util/perf_regs.h"
#if PY_MAJOR_VERSION < 3
#define _PyUnicode_FromString(arg) \
@@ -1653,13 +1654,7 @@ static void python_process_stat(struct perf_stat_config *config,
struct perf_cpu_map *cpus = counter->core.cpus;
int cpu, thread;
- if (config->aggr_mode == AGGR_GLOBAL) {
- process_stat(counter, (struct perf_cpu){ .cpu = -1 }, -1, tstamp,
- &counter->counts->aggr);
- return;
- }
-
- for (thread = 0; thread < threads->nr; thread++) {
+ for (thread = 0; thread < perf_thread_map__nr(threads); thread++) {
for (cpu = 0; cpu < perf_cpu_map__nr(cpus); cpu++) {
process_stat(counter, perf_cpu_map__cpu(cpus, cpu),
perf_thread_map__pid(threads, thread), tstamp,
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 1a4f10de29ff..0e1a3d6bacb9 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
#include <errno.h>
+#include <signal.h>
#include <inttypes.h>
#include <linux/err.h>
#include <linux/kernel.h>
@@ -2022,7 +2023,7 @@ static int perf_session__flush_thread_stacks(struct perf_session *session)
NULL);
}
-volatile int session_done;
+volatile sig_atomic_t session_done;
static int __perf_session__process_decomp_events(struct perf_session *session);
diff --git a/tools/perf/util/setup.py b/tools/perf/util/setup.py
index 5b1e6468d5e8..43e7ca40b2ec 100644
--- a/tools/perf/util/setup.py
+++ b/tools/perf/util/setup.py
@@ -77,7 +77,8 @@ perf = Extension('perf',
include_dirs = ['util/include'],
libraries = extra_libraries,
extra_compile_args = cflags,
- extra_objects = [libtraceevent, libapikfs, libperf],
+ extra_objects = [ x for x in [libtraceevent, libapikfs, libperf]
+ if x is not None],
)
setup(name='perf',
diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index ba66bb7fc1ca..847acdb5dc40 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -25,41 +25,124 @@
#define CNTR_NOT_SUPPORTED "<not supported>"
#define CNTR_NOT_COUNTED "<not counted>"
-static void print_running(struct perf_stat_config *config,
- u64 run, u64 ena)
+#define METRIC_LEN 38
+#define EVNAME_LEN 32
+#define COUNTS_LEN 18
+#define INTERVAL_LEN 16
+#define CGROUP_LEN 16
+#define COMM_LEN 16
+#define PID_LEN 7
+#define CPUS_LEN 4
+
+static int aggr_header_lens[] = {
+ [AGGR_CORE] = 18,
+ [AGGR_DIE] = 12,
+ [AGGR_SOCKET] = 6,
+ [AGGR_NODE] = 6,
+ [AGGR_NONE] = 6,
+ [AGGR_THREAD] = 16,
+ [AGGR_GLOBAL] = 0,
+};
+
+static const char *aggr_header_csv[] = {
+ [AGGR_CORE] = "core,cpus,",
+ [AGGR_DIE] = "die,cpus,",
+ [AGGR_SOCKET] = "socket,cpus,",
+ [AGGR_NONE] = "cpu,",
+ [AGGR_THREAD] = "comm-pid,",
+ [AGGR_NODE] = "node,",
+ [AGGR_GLOBAL] = ""
+};
+
+static const char *aggr_header_std[] = {
+ [AGGR_CORE] = "core",
+ [AGGR_DIE] = "die",
+ [AGGR_SOCKET] = "socket",
+ [AGGR_NONE] = "cpu",
+ [AGGR_THREAD] = "comm-pid",
+ [AGGR_NODE] = "node",
+ [AGGR_GLOBAL] = ""
+};
+
+static void print_running_std(struct perf_stat_config *config, u64 run, u64 ena)
{
+ if (run != ena)
+ fprintf(config->output, " (%.2f%%)", 100.0 * run / ena);
+}
+static void print_running_csv(struct perf_stat_config *config, u64 run, u64 ena)
+{
double enabled_percent = 100;
if (run != ena)
enabled_percent = 100 * run / ena;
- if (config->json_output)
- fprintf(config->output,
- "\"event-runtime\" : %" PRIu64 ", \"pcnt-running\" : %.2f, ",
- run, enabled_percent);
- else if (config->csv_output)
- fprintf(config->output,
- "%s%" PRIu64 "%s%.2f", config->csv_sep,
- run, config->csv_sep, enabled_percent);
- else if (run != ena)
- fprintf(config->output, " (%.2f%%)", 100.0 * run / ena);
+ fprintf(config->output, "%s%" PRIu64 "%s%.2f",
+ config->csv_sep, run, config->csv_sep, enabled_percent);
+}
+
+static void print_running_json(struct perf_stat_config *config, u64 run, u64 ena)
+{
+ double enabled_percent = 100;
+
+ if (run != ena)
+ enabled_percent = 100 * run / ena;
+ fprintf(config->output, "\"event-runtime\" : %" PRIu64 ", \"pcnt-running\" : %.2f, ",
+ run, enabled_percent);
+}
+
+static void print_running(struct perf_stat_config *config,
+ u64 run, u64 ena, bool before_metric)
+{
+ if (config->json_output) {
+ if (before_metric)
+ print_running_json(config, run, ena);
+ } else if (config->csv_output) {
+ if (before_metric)
+ print_running_csv(config, run, ena);
+ } else {
+ if (!before_metric)
+ print_running_std(config, run, ena);
+ }
+}
+
+static void print_noise_pct_std(struct perf_stat_config *config,
+ double pct)
+{
+ if (pct)
+ fprintf(config->output, " ( +-%6.2f%% )", pct);
+}
+
+static void print_noise_pct_csv(struct perf_stat_config *config,
+ double pct)
+{
+ fprintf(config->output, "%s%.2f%%", config->csv_sep, pct);
+}
+
+static void print_noise_pct_json(struct perf_stat_config *config,
+ double pct)
+{
+ fprintf(config->output, "\"variance\" : %.2f, ", pct);
}
static void print_noise_pct(struct perf_stat_config *config,
- double total, double avg)
+ double total, double avg, bool before_metric)
{
double pct = rel_stddev_stats(total, avg);
- if (config->json_output)
- fprintf(config->output, "\"variance\" : %.2f, ", pct);
- else if (config->csv_output)
- fprintf(config->output, "%s%.2f%%", config->csv_sep, pct);
- else if (pct)
- fprintf(config->output, " ( +-%6.2f%% )", pct);
+ if (config->json_output) {
+ if (before_metric)
+ print_noise_pct_json(config, pct);
+ } else if (config->csv_output) {
+ if (before_metric)
+ print_noise_pct_csv(config, pct);
+ } else {
+ if (!before_metric)
+ print_noise_pct_std(config, pct);
+ }
}
static void print_noise(struct perf_stat_config *config,
- struct evsel *evsel, double avg)
+ struct evsel *evsel, double avg, bool before_metric)
{
struct perf_stat_evsel *ps;
@@ -67,139 +150,166 @@ static void print_noise(struct perf_stat_config *config,
return;
ps = evsel->stats;
- print_noise_pct(config, stddev_stats(&ps->res_stats), avg);
+ print_noise_pct(config, stddev_stats(&ps->res_stats), avg, before_metric);
+}
+
+static void print_cgroup_std(struct perf_stat_config *config, const char *cgrp_name)
+{
+ fprintf(config->output, " %-*s", CGROUP_LEN, cgrp_name);
+}
+
+static void print_cgroup_csv(struct perf_stat_config *config, const char *cgrp_name)
+{
+ fprintf(config->output, "%s%s", config->csv_sep, cgrp_name);
+}
+
+static void print_cgroup_json(struct perf_stat_config *config, const char *cgrp_name)
+{
+ fprintf(config->output, "\"cgroup\" : \"%s\", ", cgrp_name);
}
-static void print_cgroup(struct perf_stat_config *config, struct evsel *evsel)
+static void print_cgroup(struct perf_stat_config *config, struct cgroup *cgrp)
{
- if (nr_cgroups) {
- const char *cgrp_name = evsel->cgrp ? evsel->cgrp->name : "";
+ if (nr_cgroups || config->cgroup_list) {
+ const char *cgrp_name = cgrp ? cgrp->name : "";
if (config->json_output)
- fprintf(config->output, "\"cgroup\" : \"%s\", ", cgrp_name);
+ print_cgroup_json(config, cgrp_name);
+ else if (config->csv_output)
+ print_cgroup_csv(config, cgrp_name);
else
- fprintf(config->output, "%s%s", config->csv_sep, cgrp_name);
+ print_cgroup_std(config, cgrp_name);
}
}
-
-static void aggr_printout(struct perf_stat_config *config,
- struct evsel *evsel, struct aggr_cpu_id id, int nr)
+static void print_aggr_id_std(struct perf_stat_config *config,
+ struct evsel *evsel, struct aggr_cpu_id id, int nr)
{
+ FILE *output = config->output;
+ int idx = config->aggr_mode;
+ char buf[128];
+
+ switch (config->aggr_mode) {
+ case AGGR_CORE:
+ snprintf(buf, sizeof(buf), "S%d-D%d-C%d", id.socket, id.die, id.core);
+ break;
+ case AGGR_DIE:
+ snprintf(buf, sizeof(buf), "S%d-D%d", id.socket, id.die);
+ break;
+ case AGGR_SOCKET:
+ snprintf(buf, sizeof(buf), "S%d", id.socket);
+ break;
+ case AGGR_NODE:
+ snprintf(buf, sizeof(buf), "N%d", id.node);
+ break;
+ case AGGR_NONE:
+ if (evsel->percore && !config->percore_show_thread) {
+ snprintf(buf, sizeof(buf), "S%d-D%d-C%d ",
+ id.socket, id.die, id.core);
+ fprintf(output, "%-*s ",
+ aggr_header_lens[AGGR_CORE], buf);
+ } else if (id.cpu.cpu > -1) {
+ fprintf(output, "CPU%-*d ",
+ aggr_header_lens[AGGR_NONE] - 3, id.cpu.cpu);
+ }
+ return;
+ case AGGR_THREAD:
+ fprintf(output, "%*s-%-*d ",
+ COMM_LEN, perf_thread_map__comm(evsel->core.threads, id.thread_idx),
+ PID_LEN, perf_thread_map__pid(evsel->core.threads, id.thread_idx));
+ return;
+ case AGGR_GLOBAL:
+ case AGGR_UNSET:
+ case AGGR_MAX:
+ default:
+ return;
+ }
+ fprintf(output, "%-*s %*d ", aggr_header_lens[idx], buf, 4, nr);
+}
- if (config->json_output && !config->interval)
- fprintf(config->output, "{");
+static void print_aggr_id_csv(struct perf_stat_config *config,
+ struct evsel *evsel, struct aggr_cpu_id id, int nr)
+{
+ FILE *output = config->output;
+ const char *sep = config->csv_sep;
switch (config->aggr_mode) {
case AGGR_CORE:
- if (config->json_output) {
- fprintf(config->output,
- "\"core\" : \"S%d-D%d-C%d\", \"aggregate-number\" : %d, ",
- id.socket,
- id.die,
- id.core,
- nr);
- } else {
- fprintf(config->output, "S%d-D%d-C%*d%s%*d%s",
- id.socket,
- id.die,
- config->csv_output ? 0 : -8,
- id.core,
- config->csv_sep,
- config->csv_output ? 0 : 4,
- nr,
- config->csv_sep);
- }
+ fprintf(output, "S%d-D%d-C%d%s%d%s",
+ id.socket, id.die, id.core, sep, nr, sep);
break;
case AGGR_DIE:
- if (config->json_output) {
- fprintf(config->output,
- "\"die\" : \"S%d-D%d\", \"aggregate-number\" : %d, ",
- id.socket,
- id.die,
- nr);
- } else {
- fprintf(config->output, "S%d-D%*d%s%*d%s",
- id.socket,
- config->csv_output ? 0 : -8,
- id.die,
- config->csv_sep,
- config->csv_output ? 0 : 4,
- nr,
- config->csv_sep);
- }
+ fprintf(output, "S%d-D%d%s%d%s",
+ id.socket, id.die, sep, nr, sep);
break;
case AGGR_SOCKET:
- if (config->json_output) {
- fprintf(config->output,
- "\"socket\" : \"S%d\", \"aggregate-number\" : %d, ",
- id.socket,
- nr);
- } else {
- fprintf(config->output, "S%*d%s%*d%s",
- config->csv_output ? 0 : -5,
- id.socket,
- config->csv_sep,
- config->csv_output ? 0 : 4,
- nr,
- config->csv_sep);
- }
+ fprintf(output, "S%d%s%d%s",
+ id.socket, sep, nr, sep);
break;
case AGGR_NODE:
- if (config->json_output) {
- fprintf(config->output, "\"node\" : \"N%d\", \"aggregate-number\" : %d, ",
- id.node,
- nr);
- } else {
- fprintf(config->output, "N%*d%s%*d%s",
- config->csv_output ? 0 : -5,
- id.node,
- config->csv_sep,
- config->csv_output ? 0 : 4,
- nr,
- config->csv_sep);
- }
+ fprintf(output, "N%d%s%d%s",
+ id.node, sep, nr, sep);
break;
case AGGR_NONE:
- if (config->json_output) {
- if (evsel->percore && !config->percore_show_thread) {
- fprintf(config->output, "\"core\" : \"S%d-D%d-C%d\"",
- id.socket,
- id.die,
- id.core);
- } else if (id.cpu.cpu > -1) {
- fprintf(config->output, "\"cpu\" : \"%d\", ",
- id.cpu.cpu);
- }
- } else {
- if (evsel->percore && !config->percore_show_thread) {
- fprintf(config->output, "S%d-D%d-C%*d%s",
- id.socket,
- id.die,
- config->csv_output ? 0 : -3,
- id.core, config->csv_sep);
- } else if (id.cpu.cpu > -1) {
- fprintf(config->output, "CPU%*d%s",
- config->csv_output ? 0 : -7,
- id.cpu.cpu, config->csv_sep);
- }
+ if (evsel->percore && !config->percore_show_thread) {
+ fprintf(output, "S%d-D%d-C%d%s",
+ id.socket, id.die, id.core, sep);
+ } else if (id.cpu.cpu > -1) {
+ fprintf(output, "CPU%d%s",
+ id.cpu.cpu, sep);
}
break;
case AGGR_THREAD:
- if (config->json_output) {
- fprintf(config->output, "\"thread\" : \"%s-%d\", ",
- perf_thread_map__comm(evsel->core.threads, id.thread_idx),
- perf_thread_map__pid(evsel->core.threads, id.thread_idx));
- } else {
- fprintf(config->output, "%*s-%*d%s",
- config->csv_output ? 0 : 16,
- perf_thread_map__comm(evsel->core.threads, id.thread_idx),
- config->csv_output ? 0 : -8,
- perf_thread_map__pid(evsel->core.threads, id.thread_idx),
- config->csv_sep);
+ fprintf(output, "%s-%d%s",
+ perf_thread_map__comm(evsel->core.threads, id.thread_idx),
+ perf_thread_map__pid(evsel->core.threads, id.thread_idx),
+ sep);
+ break;
+ case AGGR_GLOBAL:
+ case AGGR_UNSET:
+ case AGGR_MAX:
+ default:
+ break;
+ }
+}
+
+static void print_aggr_id_json(struct perf_stat_config *config,
+ struct evsel *evsel, struct aggr_cpu_id id, int nr)
+{
+ FILE *output = config->output;
+
+ switch (config->aggr_mode) {
+ case AGGR_CORE:
+ fprintf(output, "\"core\" : \"S%d-D%d-C%d\", \"cpu-count\" : %d, ",
+ id.socket, id.die, id.core, nr);
+ break;
+ case AGGR_DIE:
+ fprintf(output, "\"die\" : \"S%d-D%d\", \"cpu-count\" : %d, ",
+ id.socket, id.die, nr);
+ break;
+ case AGGR_SOCKET:
+ fprintf(output, "\"socket\" : \"S%d\", \"cpu-count\" : %d, ",
+ id.socket, nr);
+ break;
+ case AGGR_NODE:
+ fprintf(output, "\"node\" : \"N%d\", \"cpu-count\" : %d, ",
+ id.node, nr);
+ break;
+ case AGGR_NONE:
+ if (evsel->percore && !config->percore_show_thread) {
+ fprintf(output, "\"core\" : \"S%d-D%d-C%d\"",
+ id.socket, id.die, id.core);
+ } else if (id.cpu.cpu > -1) {
+ fprintf(output, "\"cpu\" : \"%d\", ",
+ id.cpu.cpu);
}
break;
+ case AGGR_THREAD:
+ fprintf(output, "\"thread\" : \"%s-%d\", ",
+ perf_thread_map__comm(evsel->core.threads, id.thread_idx),
+ perf_thread_map__pid(evsel->core.threads, id.thread_idx));
+ break;
case AGGR_GLOBAL:
case AGGR_UNSET:
case AGGR_MAX:
@@ -208,18 +318,29 @@ static void aggr_printout(struct perf_stat_config *config,
}
}
+static void aggr_printout(struct perf_stat_config *config,
+ struct evsel *evsel, struct aggr_cpu_id id, int nr)
+{
+ if (config->json_output)
+ print_aggr_id_json(config, evsel, id, nr);
+ else if (config->csv_output)
+ print_aggr_id_csv(config, evsel, id, nr);
+ else
+ print_aggr_id_std(config, evsel, id, nr);
+}
+
struct outstate {
FILE *fh;
bool newline;
+ bool first;
const char *prefix;
int nfields;
int nr;
struct aggr_cpu_id id;
struct evsel *evsel;
+ struct cgroup *cgrp;
};
-#define METRIC_LEN 35
-
static void new_line_std(struct perf_stat_config *config __maybe_unused,
void *ctx)
{
@@ -368,6 +489,7 @@ static void print_metric_only(struct perf_stat_config *config,
color_snprintf(str, sizeof(str), color ?: "", fmt, val);
fprintf(out, "%*s ", mlen, str);
+ os->first = false;
}
static void print_metric_only_csv(struct perf_stat_config *config __maybe_unused,
@@ -389,6 +511,7 @@ static void print_metric_only_csv(struct perf_stat_config *config __maybe_unused
ends++;
*ends = 0;
fprintf(out, "%s%s", vals, config->csv_sep);
+ os->first = false;
}
static void print_metric_only_json(struct perf_stat_config *config __maybe_unused,
@@ -409,7 +532,10 @@ static void print_metric_only_json(struct perf_stat_config *config __maybe_unuse
while (isdigit(*ends) || *ends == '.')
ends++;
*ends = 0;
- fprintf(out, "{\"metric-value\" : \"%s\"}", vals);
+ if (!unit[0] || !vals[0])
+ return;
+ fprintf(out, "%s\"%s\" : \"%s\"", os->first ? "" : ", ", unit, vals);
+ os->first = false;
}
static void new_line_metric(struct perf_stat_config *config __maybe_unused,
@@ -430,84 +556,100 @@ static void print_metric_header(struct perf_stat_config *config,
os->evsel->priv != os->evsel->evlist->selected->priv)
return;
- if (!valid_only_metric(unit) && !config->json_output)
+ if (os->evsel->cgrp != os->cgrp)
+ return;
+
+ if (!valid_only_metric(unit))
return;
unit = fixunit(tbuf, os->evsel, unit);
if (config->json_output)
- fprintf(os->fh, "\"unit\" : \"%s\"", unit);
+ return;
else if (config->csv_output)
fprintf(os->fh, "%s%s", unit, config->csv_sep);
else
fprintf(os->fh, "%*s ", config->metric_only_len, unit);
}
-static int first_shadow_map_idx(struct perf_stat_config *config,
- struct evsel *evsel, const struct aggr_cpu_id *id)
+static void print_counter_value_std(struct perf_stat_config *config,
+ struct evsel *evsel, double avg, bool ok)
{
- struct perf_cpu_map *cpus = evsel__cpus(evsel);
- struct perf_cpu cpu;
- int idx;
-
- if (config->aggr_mode == AGGR_NONE)
- return perf_cpu_map__idx(cpus, id->cpu);
+ FILE *output = config->output;
+ double sc = evsel->scale;
+ const char *fmt;
+ const char *bad_count = evsel->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED;
- if (config->aggr_mode == AGGR_THREAD)
- return id->thread_idx;
+ if (config->big_num)
+ fmt = floor(sc) != sc ? "%'*.2f " : "%'*.0f ";
+ else
+ fmt = floor(sc) != sc ? "%*.2f " : "%*.0f ";
- if (!config->aggr_get_id)
- return 0;
+ if (ok)
+ fprintf(output, fmt, COUNTS_LEN, avg);
+ else
+ fprintf(output, "%*s ", COUNTS_LEN, bad_count);
- perf_cpu_map__for_each_cpu(cpu, idx, cpus) {
- struct aggr_cpu_id cpu_id = config->aggr_get_id(config, cpu);
+ if (evsel->unit)
+ fprintf(output, "%-*s ", config->unit_width, evsel->unit);
- if (aggr_cpu_id__equal(&cpu_id, id))
- return idx;
- }
- return 0;
+ fprintf(output, "%-*s", EVNAME_LEN, evsel__name(evsel));
}
-static void abs_printout(struct perf_stat_config *config,
- struct aggr_cpu_id id, int nr, struct evsel *evsel, double avg)
+static void print_counter_value_csv(struct perf_stat_config *config,
+ struct evsel *evsel, double avg, bool ok)
{
FILE *output = config->output;
double sc = evsel->scale;
- const char *fmt;
+ const char *sep = config->csv_sep;
+ const char *fmt = floor(sc) != sc ? "%.2f%s" : "%.0f%s";
+ const char *bad_count = evsel->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED;
- if (config->csv_output) {
- fmt = floor(sc) != sc ? "%.2f%s" : "%.0f%s";
- } else {
- if (config->big_num)
- fmt = floor(sc) != sc ? "%'18.2f%s" : "%'18.0f%s";
- else
- fmt = floor(sc) != sc ? "%18.2f%s" : "%18.0f%s";
- }
+ if (ok)
+ fprintf(output, fmt, avg, sep);
+ else
+ fprintf(output, "%s%s", bad_count, sep);
- aggr_printout(config, evsel, id, nr);
+ if (evsel->unit)
+ fprintf(output, "%s%s", evsel->unit, sep);
- if (config->json_output)
+ fprintf(output, "%s", evsel__name(evsel));
+}
+
+static void print_counter_value_json(struct perf_stat_config *config,
+ struct evsel *evsel, double avg, bool ok)
+{
+ FILE *output = config->output;
+ const char *bad_count = evsel->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED;
+
+ if (ok)
fprintf(output, "\"counter-value\" : \"%f\", ", avg);
else
- fprintf(output, fmt, avg, config->csv_sep);
+ fprintf(output, "\"counter-value\" : \"%s\", ", bad_count);
- if (config->json_output) {
- if (evsel->unit) {
- fprintf(output, "\"unit\" : \"%s\", ",
- evsel->unit);
- }
- } else {
- if (evsel->unit)
- fprintf(output, "%-*s%s",
- config->csv_output ? 0 : config->unit_width,
- evsel->unit, config->csv_sep);
- }
+ if (evsel->unit)
+ fprintf(output, "\"unit\" : \"%s\", ", evsel->unit);
+ fprintf(output, "\"event\" : \"%s\", ", evsel__name(evsel));
+}
+
+static void print_counter_value(struct perf_stat_config *config,
+ struct evsel *evsel, double avg, bool ok)
+{
if (config->json_output)
- fprintf(output, "\"event\" : \"%s\", ", evsel__name(evsel));
+ print_counter_value_json(config, evsel, avg, ok);
+ else if (config->csv_output)
+ print_counter_value_csv(config, evsel, avg, ok);
else
- fprintf(output, "%-*s", config->csv_output ? 0 : 32, evsel__name(evsel));
+ print_counter_value_std(config, evsel, avg, ok);
+}
- print_cgroup(config, evsel);
+static void abs_printout(struct perf_stat_config *config,
+ struct aggr_cpu_id id, int nr,
+ struct evsel *evsel, double avg, bool ok)
+{
+ aggr_printout(config, evsel, id, nr);
+ print_counter_value(config, evsel, avg, ok);
+ print_cgroup(config, evsel->cgrp);
}
static bool is_mixed_hw_group(struct evsel *counter)
@@ -534,21 +676,14 @@ static bool is_mixed_hw_group(struct evsel *counter)
return false;
}
-static void printout(struct perf_stat_config *config, struct aggr_cpu_id id, int nr,
- struct evsel *counter, double uval,
- char *prefix, u64 run, u64 ena, double noise,
- struct runtime_stat *st)
+static void printout(struct perf_stat_config *config, struct outstate *os,
+ double uval, u64 run, u64 ena, double noise, int map_idx)
{
struct perf_stat_output_ctx out;
- struct outstate os = {
- .fh = config->output,
- .prefix = prefix ? prefix : "",
- .id = id,
- .nr = nr,
- .evsel = counter,
- };
print_metric_t pm;
new_line_t nl;
+ bool ok = true;
+ struct evsel *counter = os->evsel;
if (config->csv_output) {
static const int aggr_fields[AGGR_MAX] = {
@@ -564,7 +699,7 @@ static void printout(struct perf_stat_config *config, struct aggr_cpu_id id, int
pm = config->metric_only ? print_metric_only_csv : print_metric_csv;
nl = config->metric_only ? new_line_metric : new_line_csv;
- os.nfields = 3 + aggr_fields[config->aggr_mode] + (counter->cgrp ? 1 : 0);
+ os->nfields = 3 + aggr_fields[config->aggr_mode] + (counter->cgrp ? 1 : 0);
} else if (config->json_output) {
pm = config->metric_only ? print_metric_only_json : print_metric_json;
nl = config->metric_only ? new_line_metric : new_line_json;
@@ -573,27 +708,13 @@ static void printout(struct perf_stat_config *config, struct aggr_cpu_id id, int
nl = config->metric_only ? new_line_metric : new_line_std;
}
- if (!config->no_csv_summary && config->csv_output &&
- config->summary && !config->interval) {
- fprintf(config->output, "%16s%s", "summary", config->csv_sep);
- }
-
if (run == 0 || ena == 0 || counter->counts->scaled == -1) {
if (config->metric_only) {
- pm(config, &os, NULL, "", "", 0);
+ pm(config, os, NULL, "", "", 0);
return;
}
- aggr_printout(config, counter, id, nr);
- if (config->json_output) {
- fprintf(config->output, "\"counter-value\" : \"%s\", ",
- counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED);
- } else {
- fprintf(config->output, "%*s%s",
- config->csv_output ? 0 : 18,
- counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED,
- config->csv_sep);
- }
+ ok = false;
if (counter->supported) {
if (!evlist__has_hybrid(counter->evlist)) {
@@ -602,86 +723,30 @@ static void printout(struct perf_stat_config *config, struct aggr_cpu_id id, int
config->print_mixed_hw_group_error = 1;
}
}
-
- if (config->json_output) {
- fprintf(config->output, "\"unit\" : \"%s\", ", counter->unit);
- } else {
- fprintf(config->output, "%-*s%s",
- config->csv_output ? 0 : config->unit_width,
- counter->unit, config->csv_sep);
- }
-
- if (config->json_output) {
- fprintf(config->output, "\"event\" : \"%s\", ",
- evsel__name(counter));
- } else {
- fprintf(config->output, "%*s",
- config->csv_output ? 0 : -25, evsel__name(counter));
- }
-
- print_cgroup(config, counter);
-
- if (!config->csv_output && !config->json_output)
- pm(config, &os, NULL, NULL, "", 0);
- print_noise(config, counter, noise);
- print_running(config, run, ena);
- if (config->csv_output)
- pm(config, &os, NULL, NULL, "", 0);
- else if (config->json_output)
- pm(config, &os, NULL, NULL, "", 0);
- return;
}
- if (!config->metric_only)
- abs_printout(config, id, nr, counter, uval);
-
out.print_metric = pm;
out.new_line = nl;
- out.ctx = &os;
+ out.ctx = os;
out.force_header = false;
- if (config->csv_output && !config->metric_only) {
- print_noise(config, counter, noise);
- print_running(config, run, ena);
- } else if (config->json_output && !config->metric_only) {
- print_noise(config, counter, noise);
- print_running(config, run, ena);
- }
+ if (!config->metric_only) {
+ abs_printout(config, os->id, os->nr, counter, uval, ok);
- perf_stat__print_shadow_stats(config, counter, uval,
- first_shadow_map_idx(config, counter, &id),
- &out, &config->metric_events, st);
- if (!config->csv_output && !config->metric_only && !config->json_output) {
- print_noise(config, counter, noise);
- print_running(config, run, ena);
+ print_noise(config, counter, noise, /*before_metric=*/true);
+ print_running(config, run, ena, /*before_metric=*/true);
}
-}
-static void aggr_update_shadow(struct perf_stat_config *config,
- struct evlist *evlist)
-{
- int idx, s;
- struct perf_cpu cpu;
- struct aggr_cpu_id s2, id;
- u64 val;
- struct evsel *counter;
- struct perf_cpu_map *cpus;
+ if (ok) {
+ perf_stat__print_shadow_stats(config, counter, uval, map_idx,
+ &out, &config->metric_events, &rt_stat);
+ } else {
+ pm(config, &os, /*color=*/NULL, /*format=*/NULL, /*unit=*/"", /*val=*/0);
+ }
- for (s = 0; s < config->aggr_map->nr; s++) {
- id = config->aggr_map->map[s];
- evlist__for_each_entry(evlist, counter) {
- cpus = evsel__cpus(counter);
- val = 0;
- perf_cpu_map__for_each_cpu(cpu, idx, cpus) {
- s2 = config->aggr_get_id(config, cpu);
- if (!aggr_cpu_id__equal(&s2, &id))
- continue;
- val += perf_counts(counter->counts, idx, 0)->val;
- }
- perf_stat__update_shadow_stats(counter, val,
- first_shadow_map_idx(config, counter, &id),
- &rt_stat);
- }
+ if (!config->metric_only) {
+ print_noise(config, counter, noise, /*before_metric=*/false);
+ print_running(config, run, ena, /*before_metric=*/false);
}
}
@@ -704,7 +769,7 @@ static void uniquify_event_name(struct evsel *counter)
counter->name = new_name;
}
} else {
- if (perf_pmu__has_hybrid()) {
+ if (evsel__is_hybrid(counter)) {
ret = asprintf(&new_name, "%s/%s/",
counter->pmu_name, counter->name);
} else {
@@ -721,366 +786,181 @@ static void uniquify_event_name(struct evsel *counter)
counter->uniquified_name = true;
}
-static void collect_all_aliases(struct perf_stat_config *config, struct evsel *counter,
- void (*cb)(struct perf_stat_config *config, struct evsel *counter, void *data,
- bool first),
- void *data)
+static bool hybrid_uniquify(struct evsel *evsel, struct perf_stat_config *config)
{
- struct evlist *evlist = counter->evlist;
- struct evsel *alias;
-
- alias = list_prepare_entry(counter, &(evlist->core.entries), core.node);
- list_for_each_entry_continue (alias, &evlist->core.entries, core.node) {
- /* Merge events with the same name, etc. but on different PMUs. */
- if (!strcmp(evsel__name(alias), evsel__name(counter)) &&
- alias->scale == counter->scale &&
- alias->cgrp == counter->cgrp &&
- !strcmp(alias->unit, counter->unit) &&
- evsel__is_clock(alias) == evsel__is_clock(counter) &&
- strcmp(alias->pmu_name, counter->pmu_name)) {
- alias->merged_stat = true;
- cb(config, alias, data, false);
- }
- }
+ return evsel__is_hybrid(evsel) && !config->hybrid_merge;
}
-static bool is_uncore(struct evsel *evsel)
+static void uniquify_counter(struct perf_stat_config *config, struct evsel *counter)
{
- struct perf_pmu *pmu = evsel__find_pmu(evsel);
-
- return pmu && pmu->is_uncore;
+ if (config->no_merge || hybrid_uniquify(counter, config))
+ uniquify_event_name(counter);
}
-static bool hybrid_uniquify(struct evsel *evsel)
+static void print_counter_aggrdata(struct perf_stat_config *config,
+ struct evsel *counter, int s,
+ struct outstate *os)
{
- return perf_pmu__has_hybrid() && !is_uncore(evsel);
-}
+ FILE *output = config->output;
+ u64 ena, run, val;
+ double uval;
+ struct perf_stat_evsel *ps = counter->stats;
+ struct perf_stat_aggr *aggr = &ps->aggr[s];
+ struct aggr_cpu_id id = config->aggr_map->map[s];
+ double avg = aggr->counts.val;
+ bool metric_only = config->metric_only;
-static bool hybrid_merge(struct evsel *counter, struct perf_stat_config *config,
- bool check)
-{
- if (hybrid_uniquify(counter)) {
- if (check)
- return config && config->hybrid_merge;
- else
- return config && !config->hybrid_merge;
+ os->id = id;
+ os->nr = aggr->nr;
+ os->evsel = counter;
+
+ if (counter->supported && aggr->nr == 0)
+ return;
+
+ uniquify_counter(config, counter);
+
+ val = aggr->counts.val;
+ ena = aggr->counts.ena;
+ run = aggr->counts.run;
+
+ if (!metric_only) {
+ if (config->json_output)
+ fputc('{', output);
+ if (os->prefix)
+ fprintf(output, "%s", os->prefix);
+ else if (config->summary && config->csv_output &&
+ !config->no_csv_summary && !config->interval)
+ fprintf(output, "%s%s", "summary", config->csv_sep);
}
- return false;
-}
+ uval = val * counter->scale;
-static bool collect_data(struct perf_stat_config *config, struct evsel *counter,
- void (*cb)(struct perf_stat_config *config, struct evsel *counter, void *data,
- bool first),
- void *data)
-{
- if (counter->merged_stat)
- return false;
- cb(config, counter, data, true);
- if (config->no_merge || hybrid_merge(counter, config, false))
- uniquify_event_name(counter);
- else if (counter->auto_merge_stats || hybrid_merge(counter, config, true))
- collect_all_aliases(config, counter, cb, data);
- return true;
+ printout(config, os, uval, run, ena, avg, s);
+
+ if (!metric_only)
+ fputc('\n', output);
}
-struct aggr_data {
- u64 ena, run, val;
+static void print_metric_begin(struct perf_stat_config *config,
+ struct evlist *evlist,
+ struct outstate *os, int aggr_idx)
+{
+ struct perf_stat_aggr *aggr;
struct aggr_cpu_id id;
- int nr;
- int cpu_map_idx;
-};
+ struct evsel *evsel;
-static void aggr_cb(struct perf_stat_config *config,
- struct evsel *counter, void *data, bool first)
-{
- struct aggr_data *ad = data;
- int idx;
- struct perf_cpu cpu;
- struct perf_cpu_map *cpus;
- struct aggr_cpu_id s2;
+ os->first = true;
+ if (!config->metric_only)
+ return;
- cpus = evsel__cpus(counter);
- perf_cpu_map__for_each_cpu(cpu, idx, cpus) {
- struct perf_counts_values *counts;
+ if (config->json_output)
+ fputc('{', config->output);
+ if (os->prefix)
+ fprintf(config->output, "%s", os->prefix);
- s2 = config->aggr_get_id(config, cpu);
- if (!aggr_cpu_id__equal(&s2, &ad->id))
- continue;
- if (first)
- ad->nr++;
- counts = perf_counts(counter->counts, idx, 0);
- /*
- * When any result is bad, make them all to give
- * consistent output in interval mode.
- */
- if (counts->ena == 0 || counts->run == 0 ||
- counter->counts->scaled == -1) {
- ad->ena = 0;
- ad->run = 0;
- break;
- }
- ad->val += counts->val;
- ad->ena += counts->ena;
- ad->run += counts->run;
- }
+ evsel = evlist__first(evlist);
+ id = config->aggr_map->map[aggr_idx];
+ aggr = &evsel->stats->aggr[aggr_idx];
+ aggr_printout(config, evsel, id, aggr->nr);
+
+ print_cgroup(config, os->cgrp ? : evsel->cgrp);
}
-static void print_counter_aggrdata(struct perf_stat_config *config,
- struct evsel *counter, int s,
- char *prefix, bool metric_only,
- bool *first, struct perf_cpu cpu)
+static void print_metric_end(struct perf_stat_config *config, struct outstate *os)
{
- struct aggr_data ad;
FILE *output = config->output;
- u64 ena, run, val;
- int nr;
- struct aggr_cpu_id id;
- double uval;
- ad.id = id = config->aggr_map->map[s];
- ad.val = ad.ena = ad.run = 0;
- ad.nr = 0;
- if (!collect_data(config, counter, aggr_cb, &ad))
- return;
-
- if (perf_pmu__has_hybrid() && ad.ena == 0)
+ if (!config->metric_only)
return;
- nr = ad.nr;
- ena = ad.ena;
- run = ad.run;
- val = ad.val;
- if (*first && metric_only) {
- *first = false;
- aggr_printout(config, counter, id, nr);
+ if (config->json_output) {
+ if (os->first)
+ fputs("\"metric-value\" : \"none\"", output);
+ fputc('}', output);
}
- if (prefix && !metric_only)
- fprintf(output, "%s", prefix);
-
- uval = val * counter->scale;
- if (cpu.cpu != -1)
- id = aggr_cpu_id__cpu(cpu, /*data=*/NULL);
-
- printout(config, id, nr, counter, uval,
- prefix, run, ena, 1.0, &rt_stat);
- if (!metric_only)
- fputc('\n', output);
+ fputc('\n', output);
}
static void print_aggr(struct perf_stat_config *config,
struct evlist *evlist,
- char *prefix)
+ struct outstate *os)
{
- bool metric_only = config->metric_only;
- FILE *output = config->output;
struct evsel *counter;
int s;
- bool first;
if (!config->aggr_map || !config->aggr_get_id)
return;
- aggr_update_shadow(config, evlist);
-
/*
* With metric_only everything is on a single line.
* Without each counter has its own line.
*/
for (s = 0; s < config->aggr_map->nr; s++) {
- if (prefix && metric_only)
- fprintf(output, "%s", prefix);
+ print_metric_begin(config, evlist, os, s);
- first = true;
evlist__for_each_entry(evlist, counter) {
- print_counter_aggrdata(config, counter, s,
- prefix, metric_only,
- &first, (struct perf_cpu){ .cpu = -1 });
- }
- if (metric_only)
- fputc('\n', output);
- }
-}
-
-static int cmp_val(const void *a, const void *b)
-{
- return ((struct perf_aggr_thread_value *)b)->val -
- ((struct perf_aggr_thread_value *)a)->val;
-}
-
-static struct perf_aggr_thread_value *sort_aggr_thread(
- struct evsel *counter,
- int *ret,
- struct target *_target)
-{
- int nthreads = perf_thread_map__nr(counter->core.threads);
- int i = 0;
- double uval;
- struct perf_aggr_thread_value *buf;
-
- buf = calloc(nthreads, sizeof(struct perf_aggr_thread_value));
- if (!buf)
- return NULL;
-
- for (int thread = 0; thread < nthreads; thread++) {
- int idx;
- u64 ena = 0, run = 0, val = 0;
-
- perf_cpu_map__for_each_idx(idx, evsel__cpus(counter)) {
- struct perf_counts_values *counts =
- perf_counts(counter->counts, idx, thread);
+ if (counter->merged_stat)
+ continue;
- val += counts->val;
- ena += counts->ena;
- run += counts->run;
+ print_counter_aggrdata(config, counter, s, os);
}
-
- uval = val * counter->scale;
-
- /*
- * Skip value 0 when enabling --per-thread globally,
- * otherwise too many 0 output.
- */
- if (uval == 0.0 && target__has_per_thread(_target))
- continue;
-
- buf[i].counter = counter;
- buf[i].id = aggr_cpu_id__empty();
- buf[i].id.thread_idx = thread;
- buf[i].uval = uval;
- buf[i].val = val;
- buf[i].run = run;
- buf[i].ena = ena;
- i++;
+ print_metric_end(config, os);
}
-
- qsort(buf, i, sizeof(struct perf_aggr_thread_value), cmp_val);
-
- if (ret)
- *ret = i;
-
- return buf;
}
-static void print_aggr_thread(struct perf_stat_config *config,
- struct target *_target,
- struct evsel *counter, char *prefix)
+static void print_aggr_cgroup(struct perf_stat_config *config,
+ struct evlist *evlist,
+ struct outstate *os)
{
- FILE *output = config->output;
- int thread, sorted_threads;
- struct aggr_cpu_id id;
- struct perf_aggr_thread_value *buf;
+ struct evsel *counter, *evsel;
+ int s;
- buf = sort_aggr_thread(counter, &sorted_threads, _target);
- if (!buf) {
- perror("cannot sort aggr thread");
+ if (!config->aggr_map || !config->aggr_get_id)
return;
- }
- for (thread = 0; thread < sorted_threads; thread++) {
- if (prefix)
- fprintf(output, "%s", prefix);
-
- id = buf[thread].id;
- printout(config, id, 0, buf[thread].counter, buf[thread].uval,
- prefix, buf[thread].run, buf[thread].ena, 1.0,
- &rt_stat);
- fputc('\n', output);
- }
-
- free(buf);
-}
-
-struct caggr_data {
- double avg, avg_enabled, avg_running;
-};
-
-static void counter_aggr_cb(struct perf_stat_config *config __maybe_unused,
- struct evsel *counter, void *data,
- bool first __maybe_unused)
-{
- struct caggr_data *cd = data;
- struct perf_counts_values *aggr = &counter->counts->aggr;
-
- cd->avg += aggr->val;
- cd->avg_enabled += aggr->ena;
- cd->avg_running += aggr->run;
-}
-
-/*
- * Print out the results of a single counter:
- * aggregated counts in system-wide mode
- */
-static void print_counter_aggr(struct perf_stat_config *config,
- struct evsel *counter, char *prefix)
-{
- bool metric_only = config->metric_only;
- FILE *output = config->output;
- double uval;
- struct caggr_data cd = { .avg = 0.0 };
+ evlist__for_each_entry(evlist, evsel) {
+ if (os->cgrp == evsel->cgrp)
+ continue;
- if (!collect_data(config, counter, counter_aggr_cb, &cd))
- return;
+ os->cgrp = evsel->cgrp;
- if (prefix && !metric_only)
- fprintf(output, "%s", prefix);
+ for (s = 0; s < config->aggr_map->nr; s++) {
+ print_metric_begin(config, evlist, os, s);
- uval = cd.avg * counter->scale;
- printout(config, aggr_cpu_id__empty(), 0, counter, uval, prefix, cd.avg_running,
- cd.avg_enabled, cd.avg, &rt_stat);
- if (!metric_only)
- fprintf(output, "\n");
-}
+ evlist__for_each_entry(evlist, counter) {
+ if (counter->merged_stat)
+ continue;
-static void counter_cb(struct perf_stat_config *config __maybe_unused,
- struct evsel *counter, void *data,
- bool first __maybe_unused)
-{
- struct aggr_data *ad = data;
+ if (counter->cgrp != os->cgrp)
+ continue;
- ad->val += perf_counts(counter->counts, ad->cpu_map_idx, 0)->val;
- ad->ena += perf_counts(counter->counts, ad->cpu_map_idx, 0)->ena;
- ad->run += perf_counts(counter->counts, ad->cpu_map_idx, 0)->run;
+ print_counter_aggrdata(config, counter, s, os);
+ }
+ print_metric_end(config, os);
+ }
+ }
}
-/*
- * Print out the results of a single counter:
- * does not use aggregated count in system-wide
- */
static void print_counter(struct perf_stat_config *config,
- struct evsel *counter, char *prefix)
+ struct evsel *counter, struct outstate *os)
{
- FILE *output = config->output;
- u64 ena, run, val;
- double uval;
- int idx;
- struct perf_cpu cpu;
- struct aggr_cpu_id id;
-
- perf_cpu_map__for_each_cpu(cpu, idx, evsel__cpus(counter)) {
- struct aggr_data ad = { .cpu_map_idx = idx };
-
- if (!collect_data(config, counter, counter_cb, &ad))
- return;
- val = ad.val;
- ena = ad.ena;
- run = ad.run;
+ int s;
- if (prefix)
- fprintf(output, "%s", prefix);
+ /* AGGR_THREAD doesn't have config->aggr_get_id */
+ if (!config->aggr_map)
+ return;
- uval = val * counter->scale;
- id = aggr_cpu_id__cpu(cpu, /*data=*/NULL);
- printout(config, id, 0, counter, uval, prefix,
- run, ena, 1.0, &rt_stat);
+ if (counter->merged_stat)
+ return;
- fputc('\n', output);
+ for (s = 0; s < config->aggr_map->nr; s++) {
+ print_counter_aggrdata(config, counter, s, os);
}
}
static void print_no_aggr_metric(struct perf_stat_config *config,
struct evlist *evlist,
- char *prefix)
+ struct outstate *os)
{
int all_idx;
struct perf_cpu cpu;
@@ -1092,214 +972,241 @@ static void print_no_aggr_metric(struct perf_stat_config *config,
evlist__for_each_entry(evlist, counter) {
u64 ena, run, val;
double uval;
- struct aggr_cpu_id id;
+ struct perf_stat_evsel *ps = counter->stats;
int counter_idx = perf_cpu_map__idx(evsel__cpus(counter), cpu);
if (counter_idx < 0)
continue;
- id = aggr_cpu_id__cpu(cpu, /*data=*/NULL);
+ os->evsel = counter;
+ os->id = aggr_cpu_id__cpu(cpu, /*data=*/NULL);
if (first) {
- if (prefix)
- fputs(prefix, config->output);
- aggr_printout(config, counter, id, 0);
+ print_metric_begin(config, evlist, os, counter_idx);
first = false;
}
- val = perf_counts(counter->counts, counter_idx, 0)->val;
- ena = perf_counts(counter->counts, counter_idx, 0)->ena;
- run = perf_counts(counter->counts, counter_idx, 0)->run;
+ val = ps->aggr[counter_idx].counts.val;
+ ena = ps->aggr[counter_idx].counts.ena;
+ run = ps->aggr[counter_idx].counts.run;
uval = val * counter->scale;
- printout(config, id, 0, counter, uval, prefix,
- run, ena, 1.0, &rt_stat);
+ printout(config, os, uval, run, ena, 1.0, counter_idx);
}
if (!first)
- fputc('\n', config->output);
+ print_metric_end(config, os);
}
}
-static int aggr_header_lens[] = {
- [AGGR_CORE] = 24,
- [AGGR_DIE] = 18,
- [AGGR_SOCKET] = 12,
- [AGGR_NONE] = 6,
- [AGGR_THREAD] = 24,
- [AGGR_NODE] = 6,
- [AGGR_GLOBAL] = 0,
-};
+static void print_metric_headers_std(struct perf_stat_config *config,
+ bool no_indent)
+{
+ fputc(' ', config->output);
-static const char *aggr_header_csv[] = {
- [AGGR_CORE] = "core,cpus,",
- [AGGR_DIE] = "die,cpus",
- [AGGR_SOCKET] = "socket,cpus",
- [AGGR_NONE] = "cpu,",
- [AGGR_THREAD] = "comm-pid,",
- [AGGR_NODE] = "node,",
- [AGGR_GLOBAL] = ""
-};
+ if (!no_indent) {
+ int len = aggr_header_lens[config->aggr_mode];
+
+ if (nr_cgroups || config->cgroup_list)
+ len += CGROUP_LEN + 1;
+
+ fprintf(config->output, "%*s", len, "");
+ }
+}
+
+static void print_metric_headers_csv(struct perf_stat_config *config,
+ bool no_indent __maybe_unused)
+{
+ if (config->interval)
+ fputs("time,", config->output);
+ if (!config->iostat_run)
+ fputs(aggr_header_csv[config->aggr_mode], config->output);
+}
+
+static void print_metric_headers_json(struct perf_stat_config *config __maybe_unused,
+ bool no_indent __maybe_unused)
+{
+}
static void print_metric_headers(struct perf_stat_config *config,
- struct evlist *evlist,
- const char *prefix, bool no_indent)
+ struct evlist *evlist, bool no_indent)
{
- struct perf_stat_output_ctx out;
struct evsel *counter;
struct outstate os = {
.fh = config->output
};
- bool first = true;
-
- if (config->json_output && !config->interval)
- fprintf(config->output, "{");
+ struct perf_stat_output_ctx out = {
+ .ctx = &os,
+ .print_metric = print_metric_header,
+ .new_line = new_line_metric,
+ .force_header = true,
+ };
- if (prefix && !config->json_output)
- fprintf(config->output, "%s", prefix);
+ if (config->json_output)
+ print_metric_headers_json(config, no_indent);
+ else if (config->csv_output)
+ print_metric_headers_csv(config, no_indent);
+ else
+ print_metric_headers_std(config, no_indent);
- if (!config->csv_output && !no_indent)
- fprintf(config->output, "%*s",
- aggr_header_lens[config->aggr_mode], "");
- if (config->csv_output) {
- if (config->interval)
- fputs("time,", config->output);
- if (!config->iostat_run)
- fputs(aggr_header_csv[config->aggr_mode], config->output);
- }
if (config->iostat_run)
iostat_print_header_prefix(config);
+ if (config->cgroup_list)
+ os.cgrp = evlist__first(evlist)->cgrp;
+
/* Print metrics headers only */
evlist__for_each_entry(evlist, counter) {
os.evsel = counter;
- out.ctx = &os;
- out.print_metric = print_metric_header;
- if (!first && config->json_output)
- fprintf(config->output, ", ");
- first = false;
- out.new_line = new_line_metric;
- out.force_header = true;
+
perf_stat__print_shadow_stats(config, counter, 0,
0,
&out,
&config->metric_events,
&rt_stat);
}
+
+ if (!config->json_output)
+ fputc('\n', config->output);
+}
+
+static void prepare_interval(struct perf_stat_config *config,
+ char *prefix, size_t len, struct timespec *ts)
+{
+ if (config->iostat_run)
+ return;
+
if (config->json_output)
- fprintf(config->output, "}");
- fputc('\n', config->output);
+ scnprintf(prefix, len, "\"interval\" : %lu.%09lu, ",
+ (unsigned long) ts->tv_sec, ts->tv_nsec);
+ else if (config->csv_output)
+ scnprintf(prefix, len, "%lu.%09lu%s",
+ (unsigned long) ts->tv_sec, ts->tv_nsec, config->csv_sep);
+ else
+ scnprintf(prefix, len, "%6lu.%09lu ",
+ (unsigned long) ts->tv_sec, ts->tv_nsec);
}
-static void print_interval(struct perf_stat_config *config,
- struct evlist *evlist,
- char *prefix, struct timespec *ts)
+static void print_header_interval_std(struct perf_stat_config *config,
+ struct target *_target __maybe_unused,
+ struct evlist *evlist,
+ int argc __maybe_unused,
+ const char **argv __maybe_unused)
{
- bool metric_only = config->metric_only;
- unsigned int unit_width = config->unit_width;
FILE *output = config->output;
- static int num_print_interval;
- if (config->interval_clear)
- puts(CONSOLE_CLEAR);
-
- if (!config->iostat_run && !config->json_output)
- sprintf(prefix, "%6lu.%09lu%s", (unsigned long) ts->tv_sec,
- ts->tv_nsec, config->csv_sep);
- if (!config->iostat_run && config->json_output && !config->metric_only)
- sprintf(prefix, "{\"interval\" : %lu.%09lu, ", (unsigned long)
- ts->tv_sec, ts->tv_nsec);
- if (!config->iostat_run && config->json_output && config->metric_only)
- sprintf(prefix, "{\"interval\" : %lu.%09lu}", (unsigned long)
- ts->tv_sec, ts->tv_nsec);
-
- if ((num_print_interval == 0 && !config->csv_output && !config->json_output)
- || config->interval_clear) {
- switch (config->aggr_mode) {
- case AGGR_NODE:
- fprintf(output, "# time node cpus");
- if (!metric_only)
- fprintf(output, " counts %*s events\n", unit_width, "unit");
- break;
- case AGGR_SOCKET:
- fprintf(output, "# time socket cpus");
- if (!metric_only)
- fprintf(output, " counts %*s events\n", unit_width, "unit");
- break;
- case AGGR_DIE:
- fprintf(output, "# time die cpus");
- if (!metric_only)
- fprintf(output, " counts %*s events\n", unit_width, "unit");
- break;
- case AGGR_CORE:
- fprintf(output, "# time core cpus");
- if (!metric_only)
- fprintf(output, " counts %*s events\n", unit_width, "unit");
- break;
- case AGGR_NONE:
- fprintf(output, "# time CPU ");
- if (!metric_only)
- fprintf(output, " counts %*s events\n", unit_width, "unit");
- break;
- case AGGR_THREAD:
- fprintf(output, "# time comm-pid");
- if (!metric_only)
- fprintf(output, " counts %*s events\n", unit_width, "unit");
- break;
- case AGGR_GLOBAL:
- default:
- if (!config->iostat_run) {
- fprintf(output, "# time");
- if (!metric_only)
- fprintf(output, " counts %*s events\n", unit_width, "unit");
- }
- case AGGR_UNSET:
- case AGGR_MAX:
- break;
- }
+ switch (config->aggr_mode) {
+ case AGGR_NODE:
+ case AGGR_SOCKET:
+ case AGGR_DIE:
+ case AGGR_CORE:
+ fprintf(output, "#%*s %-*s cpus",
+ INTERVAL_LEN - 1, "time",
+ aggr_header_lens[config->aggr_mode],
+ aggr_header_std[config->aggr_mode]);
+ break;
+ case AGGR_NONE:
+ fprintf(output, "#%*s %-*s",
+ INTERVAL_LEN - 1, "time",
+ aggr_header_lens[config->aggr_mode],
+ aggr_header_std[config->aggr_mode]);
+ break;
+ case AGGR_THREAD:
+ fprintf(output, "#%*s %*s-%-*s",
+ INTERVAL_LEN - 1, "time",
+ COMM_LEN, "comm", PID_LEN, "pid");
+ break;
+ case AGGR_GLOBAL:
+ default:
+ if (!config->iostat_run)
+ fprintf(output, "#%*s",
+ INTERVAL_LEN - 1, "time");
+ case AGGR_UNSET:
+ case AGGR_MAX:
+ break;
}
- if ((num_print_interval == 0 || config->interval_clear)
- && metric_only && !config->json_output)
- print_metric_headers(config, evlist, " ", true);
- if ((num_print_interval == 0 || config->interval_clear)
- && metric_only && config->json_output) {
- fprintf(output, "{");
- print_metric_headers(config, evlist, " ", true);
- }
- if (++num_print_interval == 25)
- num_print_interval = 0;
+ if (config->metric_only)
+ print_metric_headers(config, evlist, true);
+ else
+ fprintf(output, " %*s %*s events\n",
+ COUNTS_LEN, "counts", config->unit_width, "unit");
+}
+
+static void print_header_std(struct perf_stat_config *config,
+ struct target *_target, struct evlist *evlist,
+ int argc, const char **argv)
+{
+ FILE *output = config->output;
+ int i;
+
+ fprintf(output, "\n");
+ fprintf(output, " Performance counter stats for ");
+ if (_target->bpf_str)
+ fprintf(output, "\'BPF program(s) %s", _target->bpf_str);
+ else if (_target->system_wide)
+ fprintf(output, "\'system wide");
+ else if (_target->cpu_list)
+ fprintf(output, "\'CPU(s) %s", _target->cpu_list);
+ else if (!target__has_task(_target)) {
+ fprintf(output, "\'%s", argv ? argv[0] : "pipe");
+ for (i = 1; argv && (i < argc); i++)
+ fprintf(output, " %s", argv[i]);
+ } else if (_target->pid)
+ fprintf(output, "process id \'%s", _target->pid);
+ else
+ fprintf(output, "thread id \'%s", _target->tid);
+
+ fprintf(output, "\'");
+ if (config->run_count > 1)
+ fprintf(output, " (%d runs)", config->run_count);
+ fprintf(output, ":\n\n");
+
+ if (config->metric_only)
+ print_metric_headers(config, evlist, false);
+}
+
+static void print_header_csv(struct perf_stat_config *config,
+ struct target *_target __maybe_unused,
+ struct evlist *evlist,
+ int argc __maybe_unused,
+ const char **argv __maybe_unused)
+{
+ if (config->metric_only)
+ print_metric_headers(config, evlist, true);
+}
+static void print_header_json(struct perf_stat_config *config,
+ struct target *_target __maybe_unused,
+ struct evlist *evlist,
+ int argc __maybe_unused,
+ const char **argv __maybe_unused)
+{
+ if (config->metric_only)
+ print_metric_headers(config, evlist, true);
}
static void print_header(struct perf_stat_config *config,
struct target *_target,
+ struct evlist *evlist,
int argc, const char **argv)
{
- FILE *output = config->output;
- int i;
+ static int num_print_iv;
fflush(stdout);
- if (!config->csv_output && !config->json_output) {
- fprintf(output, "\n");
- fprintf(output, " Performance counter stats for ");
- if (_target->bpf_str)
- fprintf(output, "\'BPF program(s) %s", _target->bpf_str);
- else if (_target->system_wide)
- fprintf(output, "\'system wide");
- else if (_target->cpu_list)
- fprintf(output, "\'CPU(s) %s", _target->cpu_list);
- else if (!target__has_task(_target)) {
- fprintf(output, "\'%s", argv ? argv[0] : "pipe");
- for (i = 1; argv && (i < argc); i++)
- fprintf(output, " %s", argv[i]);
- } else if (_target->pid)
- fprintf(output, "process id \'%s", _target->pid);
- else
- fprintf(output, "thread id \'%s", _target->tid);
+ if (config->interval_clear)
+ puts(CONSOLE_CLEAR);
- fprintf(output, "\'");
- if (config->run_count > 1)
- fprintf(output, " (%d runs)", config->run_count);
- fprintf(output, ":\n\n");
+ if (num_print_iv == 0 || config->interval_clear) {
+ if (config->json_output)
+ print_header_json(config, _target, evlist, argc, argv);
+ else if (config->csv_output)
+ print_header_csv(config, _target, evlist, argc, argv);
+ else if (config->interval)
+ print_header_interval_std(config, _target, evlist, argc, argv);
+ else
+ print_header_std(config, _target, evlist, argc, argv);
}
+
+ if (num_print_iv++ == 25)
+ num_print_iv = 0;
}
static int get_precision(double num)
@@ -1348,6 +1255,9 @@ static void print_footer(struct perf_stat_config *config)
double avg = avg_stats(config->walltime_nsecs_stats) / NSEC_PER_SEC;
FILE *output = config->output;
+ if (config->interval || config->csv_output || config->json_output)
+ return;
+
if (!config->null_run)
fprintf(output, "\n");
@@ -1376,7 +1286,7 @@ static void print_footer(struct perf_stat_config *config)
fprintf(output, " %17.*f +- %.*f seconds time elapsed",
precision, avg, precision, sd);
- print_noise_pct(config, sd, avg);
+ print_noise_pct(config, sd, avg, /*before_metric=*/false);
}
fprintf(output, "\n\n");
@@ -1393,121 +1303,127 @@ static void print_footer(struct perf_stat_config *config)
"the same PMU. Try reorganizing the group.\n");
}
-static void print_percore_thread(struct perf_stat_config *config,
- struct evsel *counter, char *prefix)
-{
- int s;
- struct aggr_cpu_id s2, id;
- struct perf_cpu_map *cpus;
- bool first = true;
- int idx;
- struct perf_cpu cpu;
-
- cpus = evsel__cpus(counter);
- perf_cpu_map__for_each_cpu(cpu, idx, cpus) {
- s2 = config->aggr_get_id(config, cpu);
- for (s = 0; s < config->aggr_map->nr; s++) {
- id = config->aggr_map->map[s];
- if (aggr_cpu_id__equal(&s2, &id))
- break;
- }
-
- print_counter_aggrdata(config, counter, s,
- prefix, false,
- &first, cpu);
- }
-}
-
static void print_percore(struct perf_stat_config *config,
- struct evsel *counter, char *prefix)
+ struct evsel *counter, struct outstate *os)
{
bool metric_only = config->metric_only;
FILE *output = config->output;
- int s;
- bool first = true;
+ struct cpu_aggr_map *core_map;
+ int s, c, i;
if (!config->aggr_map || !config->aggr_get_id)
return;
if (config->percore_show_thread)
- return print_percore_thread(config, counter, prefix);
+ return print_counter(config, counter, os);
- for (s = 0; s < config->aggr_map->nr; s++) {
- if (prefix && metric_only)
- fprintf(output, "%s", prefix);
+ core_map = cpu_aggr_map__empty_new(config->aggr_map->nr);
+ if (core_map == NULL) {
+ fprintf(output, "Cannot allocate per-core aggr map for display\n");
+ return;
+ }
+
+ for (s = 0, c = 0; s < config->aggr_map->nr; s++) {
+ struct perf_cpu curr_cpu = config->aggr_map->map[s].cpu;
+ struct aggr_cpu_id core_id = aggr_cpu_id__core(curr_cpu, NULL);
+ bool found = false;
- print_counter_aggrdata(config, counter, s,
- prefix, metric_only,
- &first, (struct perf_cpu){ .cpu = -1 });
+ for (i = 0; i < c; i++) {
+ if (aggr_cpu_id__equal(&core_map->map[i], &core_id)) {
+ found = true;
+ break;
+ }
+ }
+ if (found)
+ continue;
+
+ print_counter_aggrdata(config, counter, s, os);
+
+ core_map->map[c++] = core_id;
}
+ free(core_map);
if (metric_only)
fputc('\n', output);
}
+static void print_cgroup_counter(struct perf_stat_config *config, struct evlist *evlist,
+ struct outstate *os)
+{
+ struct evsel *counter;
+
+ evlist__for_each_entry(evlist, counter) {
+ if (os->cgrp != counter->cgrp) {
+ if (os->cgrp != NULL)
+ print_metric_end(config, os);
+
+ os->cgrp = counter->cgrp;
+ print_metric_begin(config, evlist, os, /*aggr_idx=*/0);
+ }
+
+ print_counter(config, counter, os);
+ }
+ if (os->cgrp)
+ print_metric_end(config, os);
+}
+
void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *config,
- struct target *_target, struct timespec *ts, int argc, const char **argv)
+ struct target *_target, struct timespec *ts,
+ int argc, const char **argv)
{
bool metric_only = config->metric_only;
int interval = config->interval;
struct evsel *counter;
- char buf[64], *prefix = NULL;
+ char buf[64];
+ struct outstate os = {
+ .fh = config->output,
+ .first = true,
+ };
if (config->iostat_run)
evlist->selected = evlist__first(evlist);
- if (interval)
- print_interval(config, evlist, prefix = buf, ts);
- else
- print_header(config, _target, argc, argv);
-
- if (metric_only) {
- static int num_print_iv;
-
- if (num_print_iv == 0 && !interval)
- print_metric_headers(config, evlist, prefix, false);
- if (num_print_iv++ == 25)
- num_print_iv = 0;
- if (config->aggr_mode == AGGR_GLOBAL && prefix && !config->iostat_run)
- fprintf(config->output, "%s", prefix);
-
- if (config->json_output && !config->metric_only)
- fprintf(config->output, "}");
+ if (interval) {
+ os.prefix = buf;
+ prepare_interval(config, buf, sizeof(buf), ts);
}
+ print_header(config, _target, evlist, argc, argv);
+
switch (config->aggr_mode) {
case AGGR_CORE:
case AGGR_DIE:
case AGGR_SOCKET:
case AGGR_NODE:
- print_aggr(config, evlist, prefix);
+ if (config->cgroup_list)
+ print_aggr_cgroup(config, evlist, &os);
+ else
+ print_aggr(config, evlist, &os);
break;
case AGGR_THREAD:
- evlist__for_each_entry(evlist, counter) {
- print_aggr_thread(config, _target, counter, prefix);
- }
- break;
case AGGR_GLOBAL:
- if (config->iostat_run)
- iostat_print_counters(evlist, config, ts, prefix = buf,
- print_counter_aggr);
- else {
+ if (config->iostat_run) {
+ iostat_print_counters(evlist, config, ts, buf,
+ (iostat_print_counter_t)print_counter, &os);
+ } else if (config->cgroup_list) {
+ print_cgroup_counter(config, evlist, &os);
+ } else {
+ print_metric_begin(config, evlist, &os, /*aggr_idx=*/0);
evlist__for_each_entry(evlist, counter) {
- print_counter_aggr(config, counter, prefix);
+ print_counter(config, counter, &os);
}
- if (metric_only)
- fputc('\n', config->output);
+ print_metric_end(config, &os);
}
break;
case AGGR_NONE:
if (metric_only)
- print_no_aggr_metric(config, evlist, prefix);
+ print_no_aggr_metric(config, evlist, &os);
else {
evlist__for_each_entry(evlist, counter) {
if (counter->percore)
- print_percore(config, counter, prefix);
+ print_percore(config, counter, &os);
else
- print_counter(config, counter, prefix);
+ print_counter(config, counter, &os);
}
}
break;
@@ -1517,8 +1433,7 @@ void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *conf
break;
}
- if (!interval && !config->csv_output && !config->json_output)
- print_footer(config);
+ print_footer(config);
fflush(config->output);
}
diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c
index 07b29fe272c7..9bde9224a97c 100644
--- a/tools/perf/util/stat-shadow.c
+++ b/tools/perf/util/stat-shadow.c
@@ -14,6 +14,7 @@
#include "units.h"
#include <linux/zalloc.h>
#include "iostat.h"
+#include "util/hashmap.h"
/*
* AGGR_GLOBAL: Use CPU 0
diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
index 8ec8bb4a9912..673f017a211f 100644
--- a/tools/perf/util/stat.c
+++ b/tools/perf/util/stat.c
@@ -14,11 +14,7 @@
#include "evlist.h"
#include "evsel.h"
#include "thread_map.h"
-#ifdef HAVE_LIBBPF_SUPPORT
-#include <bpf/hashmap.h>
-#else
#include "util/hashmap.h"
-#endif
#include <linux/zalloc.h>
void update_stats(struct stats *stats, u64 val)
@@ -130,18 +126,65 @@ static void perf_stat_evsel_id_init(struct evsel *evsel)
}
}
+static void evsel__reset_aggr_stats(struct evsel *evsel)
+{
+ struct perf_stat_evsel *ps = evsel->stats;
+ struct perf_stat_aggr *aggr = ps->aggr;
+
+ if (aggr)
+ memset(aggr, 0, sizeof(*aggr) * ps->nr_aggr);
+}
+
static void evsel__reset_stat_priv(struct evsel *evsel)
{
struct perf_stat_evsel *ps = evsel->stats;
init_stats(&ps->res_stats);
+ evsel__reset_aggr_stats(evsel);
+}
+
+static int evsel__alloc_aggr_stats(struct evsel *evsel, int nr_aggr)
+{
+ struct perf_stat_evsel *ps = evsel->stats;
+
+ if (ps == NULL)
+ return 0;
+
+ ps->nr_aggr = nr_aggr;
+ ps->aggr = calloc(nr_aggr, sizeof(*ps->aggr));
+ if (ps->aggr == NULL)
+ return -ENOMEM;
+
+ return 0;
+}
+
+int evlist__alloc_aggr_stats(struct evlist *evlist, int nr_aggr)
+{
+ struct evsel *evsel;
+
+ evlist__for_each_entry(evlist, evsel) {
+ if (evsel__alloc_aggr_stats(evsel, nr_aggr) < 0)
+ return -1;
+ }
+ return 0;
}
-static int evsel__alloc_stat_priv(struct evsel *evsel)
+static int evsel__alloc_stat_priv(struct evsel *evsel, int nr_aggr)
{
- evsel->stats = zalloc(sizeof(struct perf_stat_evsel));
- if (evsel->stats == NULL)
+ struct perf_stat_evsel *ps;
+
+ ps = zalloc(sizeof(*ps));
+ if (ps == NULL)
return -ENOMEM;
+
+ evsel->stats = ps;
+
+ if (nr_aggr && evsel__alloc_aggr_stats(evsel, nr_aggr) < 0) {
+ evsel->stats = NULL;
+ free(ps);
+ return -ENOMEM;
+ }
+
perf_stat_evsel_id_init(evsel);
evsel__reset_stat_priv(evsel);
return 0;
@@ -151,8 +194,10 @@ static void evsel__free_stat_priv(struct evsel *evsel)
{
struct perf_stat_evsel *ps = evsel->stats;
- if (ps)
+ if (ps) {
+ zfree(&ps->aggr);
zfree(&ps->group_data);
+ }
zfree(&evsel->stats);
}
@@ -181,9 +226,9 @@ static void evsel__reset_prev_raw_counts(struct evsel *evsel)
perf_counts__reset(evsel->prev_raw_counts);
}
-static int evsel__alloc_stats(struct evsel *evsel, bool alloc_raw)
+static int evsel__alloc_stats(struct evsel *evsel, int nr_aggr, bool alloc_raw)
{
- if (evsel__alloc_stat_priv(evsel) < 0 ||
+ if (evsel__alloc_stat_priv(evsel, nr_aggr) < 0 ||
evsel__alloc_counts(evsel) < 0 ||
(alloc_raw && evsel__alloc_prev_raw_counts(evsel) < 0))
return -ENOMEM;
@@ -191,12 +236,17 @@ static int evsel__alloc_stats(struct evsel *evsel, bool alloc_raw)
return 0;
}
-int evlist__alloc_stats(struct evlist *evlist, bool alloc_raw)
+int evlist__alloc_stats(struct perf_stat_config *config,
+ struct evlist *evlist, bool alloc_raw)
{
struct evsel *evsel;
+ int nr_aggr = 0;
+
+ if (config && config->aggr_map)
+ nr_aggr = config->aggr_map->nr;
evlist__for_each_entry(evlist, evsel) {
- if (evsel__alloc_stats(evsel, alloc_raw))
+ if (evsel__alloc_stats(evsel, nr_aggr, alloc_raw))
goto out_free;
}
@@ -228,6 +278,14 @@ void evlist__reset_stats(struct evlist *evlist)
}
}
+void evlist__reset_aggr_stats(struct evlist *evlist)
+{
+ struct evsel *evsel;
+
+ evlist__for_each_entry(evlist, evsel)
+ evsel__reset_aggr_stats(evsel);
+}
+
void evlist__reset_prev_raw_counts(struct evlist *evlist)
{
struct evsel *evsel;
@@ -246,8 +304,6 @@ static void evsel__copy_prev_raw_counts(struct evsel *evsel)
*perf_counts(evsel->prev_raw_counts, idx, thread);
}
}
-
- evsel->counts->aggr = evsel->prev_raw_counts->aggr;
}
void evlist__copy_prev_raw_counts(struct evlist *evlist)
@@ -258,26 +314,6 @@ void evlist__copy_prev_raw_counts(struct evlist *evlist)
evsel__copy_prev_raw_counts(evsel);
}
-void evlist__save_aggr_prev_raw_counts(struct evlist *evlist)
-{
- struct evsel *evsel;
-
- /*
- * To collect the overall statistics for interval mode,
- * we copy the counts from evsel->prev_raw_counts to
- * evsel->counts. The perf_stat_process_counter creates
- * aggr values from per cpu values, but the per cpu values
- * are 0 for AGGR_GLOBAL. So we use a trick that saves the
- * previous aggr value to the first member of perf_counts,
- * then aggr calculation in process_counter_values can work
- * correctly.
- */
- evlist__for_each_entry(evlist, evsel) {
- *perf_counts(evsel->prev_raw_counts, 0, 0) =
- evsel->prev_raw_counts->aggr;
- }
-}
-
static size_t pkg_id_hash(const void *__key, void *ctx __maybe_unused)
{
uint64_t *key = (uint64_t *) __key;
@@ -356,12 +392,31 @@ static int check_per_pkg(struct evsel *counter, struct perf_counts_values *vals,
return ret;
}
+static bool evsel__count_has_error(struct evsel *evsel,
+ struct perf_counts_values *count,
+ struct perf_stat_config *config)
+{
+ /* the evsel was failed already */
+ if (evsel->err || evsel->counts->scaled == -1)
+ return true;
+
+ /* this is meaningful for CPU aggregation modes only */
+ if (config->aggr_mode == AGGR_GLOBAL)
+ return false;
+
+ /* it's considered ok when it actually ran */
+ if (count->ena != 0 && count->run != 0)
+ return false;
+
+ return true;
+}
+
static int
process_counter_values(struct perf_stat_config *config, struct evsel *evsel,
int cpu_map_idx, int thread,
struct perf_counts_values *count)
{
- struct perf_counts_values *aggr = &evsel->counts->aggr;
+ struct perf_stat_evsel *ps = evsel->stats;
static struct perf_counts_values zero;
bool skip = false;
@@ -373,34 +428,60 @@ process_counter_values(struct perf_stat_config *config, struct evsel *evsel,
if (skip)
count = &zero;
- switch (config->aggr_mode) {
- case AGGR_THREAD:
- case AGGR_CORE:
- case AGGR_DIE:
- case AGGR_SOCKET:
- case AGGR_NODE:
- case AGGR_NONE:
- if (!evsel->snapshot)
- evsel__compute_deltas(evsel, cpu_map_idx, thread, count);
- perf_counts_values__scale(count, config->scale, NULL);
- if ((config->aggr_mode == AGGR_NONE) && (!evsel->percore)) {
- perf_stat__update_shadow_stats(evsel, count->val,
- cpu_map_idx, &rt_stat);
- }
+ if (!evsel->snapshot)
+ evsel__compute_deltas(evsel, cpu_map_idx, thread, count);
+ perf_counts_values__scale(count, config->scale, NULL);
+
+ if (config->aggr_mode == AGGR_THREAD) {
+ struct perf_counts_values *aggr_counts = &ps->aggr[thread].counts;
+
+ /*
+ * Skip value 0 when enabling --per-thread globally,
+ * otherwise too many 0 output.
+ */
+ if (count->val == 0 && config->system_wide)
+ return 0;
+
+ ps->aggr[thread].nr++;
+
+ aggr_counts->val += count->val;
+ aggr_counts->ena += count->ena;
+ aggr_counts->run += count->run;
+ return 0;
+ }
- if (config->aggr_mode == AGGR_THREAD) {
- perf_stat__update_shadow_stats(evsel, count->val,
- thread, &rt_stat);
+ if (ps->aggr) {
+ struct perf_cpu cpu = perf_cpu_map__cpu(evsel->core.cpus, cpu_map_idx);
+ struct aggr_cpu_id aggr_id = config->aggr_get_id(config, cpu);
+ struct perf_stat_aggr *ps_aggr;
+ int i;
+
+ for (i = 0; i < ps->nr_aggr; i++) {
+ if (!aggr_cpu_id__equal(&aggr_id, &config->aggr_map->map[i]))
+ continue;
+
+ ps_aggr = &ps->aggr[i];
+ ps_aggr->nr++;
+
+ /*
+ * When any result is bad, make them all to give consistent output
+ * in interval mode. But per-task counters can have 0 enabled time
+ * when some tasks are idle.
+ */
+ if (evsel__count_has_error(evsel, count, config) && !ps_aggr->failed) {
+ ps_aggr->counts.val = 0;
+ ps_aggr->counts.ena = 0;
+ ps_aggr->counts.run = 0;
+ ps_aggr->failed = true;
+ }
+
+ if (!ps_aggr->failed) {
+ ps_aggr->counts.val += count->val;
+ ps_aggr->counts.ena += count->ena;
+ ps_aggr->counts.run += count->run;
+ }
+ break;
}
- break;
- case AGGR_GLOBAL:
- aggr->val += count->val;
- aggr->ena += count->ena;
- aggr->run += count->run;
- case AGGR_UNSET:
- case AGGR_MAX:
- default:
- break;
}
return 0;
@@ -427,13 +508,10 @@ static int process_counter_maps(struct perf_stat_config *config,
int perf_stat_process_counter(struct perf_stat_config *config,
struct evsel *counter)
{
- struct perf_counts_values *aggr = &counter->counts->aggr;
struct perf_stat_evsel *ps = counter->stats;
- u64 *count = counter->counts->aggr.values;
+ u64 *count;
int ret;
- aggr->val = aggr->ena = aggr->run = 0;
-
if (counter->per_pkg)
evsel__zero_per_pkg(counter);
@@ -444,10 +522,11 @@ int perf_stat_process_counter(struct perf_stat_config *config,
if (config->aggr_mode != AGGR_GLOBAL)
return 0;
- if (!counter->snapshot)
- evsel__compute_deltas(counter, -1, -1, aggr);
- perf_counts_values__scale(aggr, config->scale, &counter->counts->scaled);
-
+ /*
+ * GLOBAL aggregation mode only has a single aggr counts,
+ * so we can use ps->aggr[0] as the actual output.
+ */
+ count = ps->aggr[0].counts.values;
update_stats(&ps->res_stats, *count);
if (verbose > 0) {
@@ -455,13 +534,194 @@ int perf_stat_process_counter(struct perf_stat_config *config,
evsel__name(counter), count[0], count[1], count[2]);
}
- /*
- * Save the full runtime - to allow normalization during printout:
- */
- perf_stat__update_shadow_stats(counter, *count, 0, &rt_stat);
+ return 0;
+}
+
+static int evsel__merge_aggr_counters(struct evsel *evsel, struct evsel *alias)
+{
+ struct perf_stat_evsel *ps_a = evsel->stats;
+ struct perf_stat_evsel *ps_b = alias->stats;
+ int i;
+
+ if (ps_a->aggr == NULL && ps_b->aggr == NULL)
+ return 0;
+
+ if (ps_a->nr_aggr != ps_b->nr_aggr) {
+ pr_err("Unmatched aggregation mode between aliases\n");
+ return -1;
+ }
+
+ for (i = 0; i < ps_a->nr_aggr; i++) {
+ struct perf_counts_values *aggr_counts_a = &ps_a->aggr[i].counts;
+ struct perf_counts_values *aggr_counts_b = &ps_b->aggr[i].counts;
+
+ /* NB: don't increase aggr.nr for aliases */
+
+ aggr_counts_a->val += aggr_counts_b->val;
+ aggr_counts_a->ena += aggr_counts_b->ena;
+ aggr_counts_a->run += aggr_counts_b->run;
+ }
return 0;
}
+/* events should have the same name, scale, unit, cgroup but on different PMUs */
+static bool evsel__is_alias(struct evsel *evsel_a, struct evsel *evsel_b)
+{
+ if (strcmp(evsel__name(evsel_a), evsel__name(evsel_b)))
+ return false;
+
+ if (evsel_a->scale != evsel_b->scale)
+ return false;
+
+ if (evsel_a->cgrp != evsel_b->cgrp)
+ return false;
+
+ if (strcmp(evsel_a->unit, evsel_b->unit))
+ return false;
+
+ if (evsel__is_clock(evsel_a) != evsel__is_clock(evsel_b))
+ return false;
+
+ return !!strcmp(evsel_a->pmu_name, evsel_b->pmu_name);
+}
+
+static void evsel__merge_aliases(struct evsel *evsel)
+{
+ struct evlist *evlist = evsel->evlist;
+ struct evsel *alias;
+
+ alias = list_prepare_entry(evsel, &(evlist->core.entries), core.node);
+ list_for_each_entry_continue(alias, &evlist->core.entries, core.node) {
+ /* Merge the same events on different PMUs. */
+ if (evsel__is_alias(evsel, alias)) {
+ evsel__merge_aggr_counters(evsel, alias);
+ alias->merged_stat = true;
+ }
+ }
+}
+
+static bool evsel__should_merge_hybrid(const struct evsel *evsel,
+ const struct perf_stat_config *config)
+{
+ return config->hybrid_merge && evsel__is_hybrid(evsel);
+}
+
+static void evsel__merge_stats(struct evsel *evsel, struct perf_stat_config *config)
+{
+ /* this evsel is already merged */
+ if (evsel->merged_stat)
+ return;
+
+ if (evsel->auto_merge_stats || evsel__should_merge_hybrid(evsel, config))
+ evsel__merge_aliases(evsel);
+}
+
+/* merge the same uncore and hybrid events if requested */
+void perf_stat_merge_counters(struct perf_stat_config *config, struct evlist *evlist)
+{
+ struct evsel *evsel;
+
+ if (config->no_merge)
+ return;
+
+ evlist__for_each_entry(evlist, evsel)
+ evsel__merge_stats(evsel, config);
+}
+
+static void evsel__update_percore_stats(struct evsel *evsel, struct aggr_cpu_id *core_id)
+{
+ struct perf_stat_evsel *ps = evsel->stats;
+ struct perf_counts_values counts = { 0, };
+ struct aggr_cpu_id id;
+ struct perf_cpu cpu;
+ int idx;
+
+ /* collect per-core counts */
+ perf_cpu_map__for_each_cpu(cpu, idx, evsel->core.cpus) {
+ struct perf_stat_aggr *aggr = &ps->aggr[idx];
+
+ id = aggr_cpu_id__core(cpu, NULL);
+ if (!aggr_cpu_id__equal(core_id, &id))
+ continue;
+
+ counts.val += aggr->counts.val;
+ counts.ena += aggr->counts.ena;
+ counts.run += aggr->counts.run;
+ }
+
+ /* update aggregated per-core counts for each CPU */
+ perf_cpu_map__for_each_cpu(cpu, idx, evsel->core.cpus) {
+ struct perf_stat_aggr *aggr = &ps->aggr[idx];
+
+ id = aggr_cpu_id__core(cpu, NULL);
+ if (!aggr_cpu_id__equal(core_id, &id))
+ continue;
+
+ aggr->counts.val = counts.val;
+ aggr->counts.ena = counts.ena;
+ aggr->counts.run = counts.run;
+
+ aggr->used = true;
+ }
+}
+
+/* we have an aggr_map for cpu, but want to aggregate the counters per-core */
+static void evsel__process_percore(struct evsel *evsel)
+{
+ struct perf_stat_evsel *ps = evsel->stats;
+ struct aggr_cpu_id core_id;
+ struct perf_cpu cpu;
+ int idx;
+
+ if (!evsel->percore)
+ return;
+
+ perf_cpu_map__for_each_cpu(cpu, idx, evsel->core.cpus) {
+ struct perf_stat_aggr *aggr = &ps->aggr[idx];
+
+ if (aggr->used)
+ continue;
+
+ core_id = aggr_cpu_id__core(cpu, NULL);
+ evsel__update_percore_stats(evsel, &core_id);
+ }
+}
+
+/* process cpu stats on per-core events */
+void perf_stat_process_percore(struct perf_stat_config *config, struct evlist *evlist)
+{
+ struct evsel *evsel;
+
+ if (config->aggr_mode != AGGR_NONE)
+ return;
+
+ evlist__for_each_entry(evlist, evsel)
+ evsel__process_percore(evsel);
+}
+
+static void evsel__update_shadow_stats(struct evsel *evsel)
+{
+ struct perf_stat_evsel *ps = evsel->stats;
+ int i;
+
+ if (ps->aggr == NULL)
+ return;
+
+ for (i = 0; i < ps->nr_aggr; i++) {
+ struct perf_counts_values *aggr_counts = &ps->aggr[i].counts;
+
+ perf_stat__update_shadow_stats(evsel, aggr_counts->val, i, &rt_stat);
+ }
+}
+
+void perf_stat_process_shadow_stats(struct perf_stat_config *config __maybe_unused,
+ struct evlist *evlist)
+{
+ struct evsel *evsel;
+
+ evlist__for_each_entry(evlist, evsel)
+ evsel__update_shadow_stats(evsel);
+}
int perf_event__process_stat_event(struct perf_session *session,
union perf_event *event)
diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
index b0899c6e002f..499c3bf81333 100644
--- a/tools/perf/util/stat.h
+++ b/tools/perf/util/stat.h
@@ -8,6 +8,7 @@
#include <sys/resource.h>
#include "cpumap.h"
#include "rblist.h"
+#include "counts.h"
struct perf_cpu_map;
struct perf_stat_config;
@@ -42,9 +43,29 @@ enum perf_stat_evsel_id {
PERF_STAT_EVSEL_ID__MAX,
};
+/* hold aggregated event info */
+struct perf_stat_aggr {
+ /* aggregated values */
+ struct perf_counts_values counts;
+ /* number of entries (CPUs) aggregated */
+ int nr;
+ /* whether any entry has failed to read/process event */
+ bool failed;
+ /* to mark this data is processed already */
+ bool used;
+};
+
+/* per-evsel event stats */
struct perf_stat_evsel {
+ /* used for repeated runs */
struct stats res_stats;
+ /* evsel id for quick check */
enum perf_stat_evsel_id id;
+ /* number of allocated 'aggr' */
+ int nr_aggr;
+ /* aggregated event values */
+ struct perf_stat_aggr *aggr;
+ /* used for group read */
u64 *group_data;
};
@@ -139,7 +160,6 @@ struct perf_stat_config {
bool metric_no_group;
bool metric_no_merge;
bool stop_read_counter;
- bool quiet;
bool iostat_run;
char *user_requested_cpu_list;
bool system_wide;
@@ -203,15 +223,6 @@ static inline void update_rusage_stats(struct rusage_stats *ru_stats, struct rus
struct evsel;
struct evlist;
-struct perf_aggr_thread_value {
- struct evsel *counter;
- struct aggr_cpu_id id;
- double uval;
- u64 val;
- u64 run;
- u64 ena;
-};
-
bool __perf_stat_evsel__is(struct evsel *evsel, enum perf_stat_evsel_id id);
#define perf_stat_evsel__is(evsel, id) \
@@ -248,15 +259,23 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
struct runtime_stat *st);
void perf_stat__collect_metric_expr(struct evlist *);
-int evlist__alloc_stats(struct evlist *evlist, bool alloc_raw);
+int evlist__alloc_stats(struct perf_stat_config *config,
+ struct evlist *evlist, bool alloc_raw);
void evlist__free_stats(struct evlist *evlist);
void evlist__reset_stats(struct evlist *evlist);
void evlist__reset_prev_raw_counts(struct evlist *evlist);
void evlist__copy_prev_raw_counts(struct evlist *evlist);
void evlist__save_aggr_prev_raw_counts(struct evlist *evlist);
+int evlist__alloc_aggr_stats(struct evlist *evlist, int nr_aggr);
+void evlist__reset_aggr_stats(struct evlist *evlist);
+
int perf_stat_process_counter(struct perf_stat_config *config,
struct evsel *counter);
+void perf_stat_merge_counters(struct perf_stat_config *config, struct evlist *evlist);
+void perf_stat_process_percore(struct perf_stat_config *config, struct evlist *evlist);
+void perf_stat_process_shadow_stats(struct perf_stat_config *config, struct evlist *evlist);
+
struct perf_tool;
union perf_event;
struct perf_session;
diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
index 647b7dff8ef3..80345695b136 100644
--- a/tools/perf/util/symbol-elf.c
+++ b/tools/perf/util/symbol-elf.c
@@ -1303,7 +1303,7 @@ dso__load_sym_internal(struct dso *dso, struct map *map, struct symsrc *syms_ss,
(!used_opd && syms_ss->adjust_symbols)) {
GElf_Phdr phdr;
- if (elf_read_program_header(syms_ss->elf,
+ if (elf_read_program_header(runtime_ss->elf,
(u64)sym.st_value, &phdr)) {
pr_debug4("%s: failed to find program header for "
"symbol: %s st_value: %#" PRIx64 "\n",
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index 0b893dcc8ea6..e297de14184c 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -132,6 +132,8 @@ struct addr_location {
s32 socket;
};
+void addr_location__put(struct addr_location *al);
+
int dso__load(struct dso *dso, struct map *map);
int dso__load_vmlinux(struct dso *dso, struct map *map,
const char *vmlinux, bool vmlinux_allocated);
diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c
index cccd293b5312..0645795ff080 100644
--- a/tools/perf/util/synthetic-events.c
+++ b/tools/perf/util/synthetic-events.c
@@ -2218,8 +2218,9 @@ int perf_event__synthesize_build_id(struct perf_tool *tool, struct dso *pos, u16
len = pos->long_name_len + 1;
len = PERF_ALIGN(len, NAME_ALIGN);
memcpy(&ev.build_id.build_id, pos->bid.data, sizeof(pos->bid.data));
+ ev.build_id.size = pos->bid.size;
ev.build_id.header.type = PERF_RECORD_HEADER_BUILD_ID;
- ev.build_id.header.misc = misc;
+ ev.build_id.header.misc = misc | PERF_RECORD_MISC_BUILD_ID_SIZE;
ev.build_id.pid = machine->pid;
ev.build_id.header.size = sizeof(ev.build_id) + len;
memcpy(&ev.build_id.filename, pos->long_name, pos->long_name_len);
diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h
index 241f300d7d6e..395c626699a9 100644
--- a/tools/perf/util/thread.h
+++ b/tools/perf/util/thread.h
@@ -158,4 +158,7 @@ static inline bool thread__is_filtered(struct thread *thread)
void thread__free_stitch_list(struct thread *thread);
+void thread__resolve(struct thread *thread, struct addr_location *al,
+ struct perf_sample *sample);
+
#endif /* __PERF_THREAD_H */
diff --git a/tools/perf/util/thread_map.c b/tools/perf/util/thread_map.c
index c9bfe4696943..e848579e61a8 100644
--- a/tools/perf/util/thread_map.c
+++ b/tools/perf/util/thread_map.c
@@ -18,6 +18,7 @@
#include "thread_map.h"
#include "debug.h"
#include "event.h"
+#include <internal/threadmap.h>
/* Skip "." and ".." directories */
static int filter(const struct dirent *dir)
diff --git a/tools/perf/util/thread_map.h b/tools/perf/util/thread_map.h
index 3bb860a32b8e..00ec05fc1656 100644
--- a/tools/perf/util/thread_map.h
+++ b/tools/perf/util/thread_map.h
@@ -4,8 +4,6 @@
#include <sys/types.h>
#include <stdio.h>
-#include <linux/refcount.h>
-#include <internal/threadmap.h>
#include <perf/threadmap.h>
struct perf_record_thread_map;
diff --git a/tools/perf/util/trace-event-scripting.c b/tools/perf/util/trace-event-scripting.c
index 7172ca05265f..636a010d929b 100644
--- a/tools/perf/util/trace-event-scripting.c
+++ b/tools/perf/util/trace-event-scripting.c
@@ -12,9 +12,9 @@
#include "debug.h"
#include "trace-event.h"
-#include "event.h"
#include "evsel.h"
#include <linux/zalloc.h>
+#include "util/sample.h"
struct scripting_context *scripting_context;