diff options
139 files changed, 3332 insertions, 421 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index fb2a3633b719..ab2edbbae480 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -14017,8 +14017,10 @@ R: Mark Rutland <mark.rutland@arm.com> R: Alexander Shishkin <alexander.shishkin@linux.intel.com> R: Jiri Olsa <jolsa@redhat.com> R: Namhyung Kim <namhyung@kernel.org> +L: linux-perf-users@vger.kernel.org L: linux-kernel@vger.kernel.org S: Supported +W: https://perf.wiki.kernel.org/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git perf/core F: arch/*/events/* F: arch/*/events/*/* diff --git a/tools/include/linux/types.h b/tools/include/linux/types.h index e9c5a215837d..6e14a533ab4e 100644 --- a/tools/include/linux/types.h +++ b/tools/include/linux/types.h @@ -61,6 +61,9 @@ typedef __u32 __bitwise __be32; typedef __u64 __bitwise __le64; typedef __u64 __bitwise __be64; +typedef __u16 __bitwise __sum16; +typedef __u32 __bitwise __wsum; + typedef struct { int counter; } atomic_t; diff --git a/tools/perf/Documentation/perf-buildid-cache.txt b/tools/perf/Documentation/perf-buildid-cache.txt index bb167e32a1d7..cd8ce6e8ec12 100644 --- a/tools/perf/Documentation/perf-buildid-cache.txt +++ b/tools/perf/Documentation/perf-buildid-cache.txt @@ -57,7 +57,7 @@ OPTIONS -u:: --update=:: Update specified file of the cache. Note that this doesn't remove - older entires since those may be still needed for annotating old + older entries since those may be still needed for annotating old (or remote) perf.data. Only if there is already a cache which has exactly same build-id, that is replaced by new one. It can be used to update kallsyms and kernel dso to vmlinux in order to support diff --git a/tools/perf/Documentation/perf-config.txt b/tools/perf/Documentation/perf-config.txt index 153bde14bbe0..154a1ced72b2 100644 --- a/tools/perf/Documentation/perf-config.txt +++ b/tools/perf/Documentation/perf-config.txt @@ -393,6 +393,12 @@ annotate.*:: This option works with tui, stdio2 browsers. + annotate.demangle:: + Demangle symbol names to human readable form. Default is 'true'. + + annotate.demangle_kernel:: + Demangle kernel symbol names to human readable form. Default is 'true'. + hist.*:: hist.percentage:: This option control the way to calculate overhead of filtered entries - diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index f546b5e9db05..f51f0000676e 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -112,6 +112,8 @@ OPTIONS - ins_lat: Instruction latency in core cycles. This is the global instruction latency - local_ins_lat: Local instruction latency version + - p_stage_cyc: On powerpc, this presents the number of cycles spent in a + pipeline stage. And currently supported only on powerpc. By default, comm, dso and symbol keys are used. (i.e. --sort comm,dso,symbol) @@ -224,6 +226,9 @@ OPTIONS --dump-raw-trace:: Dump raw trace in ASCII. +--disable-order:: + Disable raw trace ordering. + -g:: --call-graph=<print_type,threshold[,print_limit],order,sort_key[,branch],value>:: Display call chains using type, min percent threshold, print limit, @@ -472,7 +477,7 @@ OPTIONS but probably we'll make the default not to show the switch-on/off events on the --group mode and if there is only one event besides the off/on ones, go straight to the histogram browser, just like 'perf report' with no events - explicitely specified does. + explicitly specified does. --itrace:: Options for decoding instruction tracing data. The options are: diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt index 08a1714494f8..6ec5960b08c3 100644 --- a/tools/perf/Documentation/perf-stat.txt +++ b/tools/perf/Documentation/perf-stat.txt @@ -93,6 +93,17 @@ report:: 1.102235068 seconds time elapsed +--bpf-counters:: + Use BPF programs to aggregate readings from perf_events. This + allows multiple perf-stat sessions that are counting the same metric (cycles, + instructions, etc.) to share hardware counters. + +--bpf-attr-map:: + With option "--bpf-counters", different perf-stat sessions share + information about shared BPF programs and maps via a pinned hashmap. + Use "--bpf-attr-map" to specify the path of this pinned hashmap. + The default path is /sys/fs/bpf/perf_attr_map. + ifdef::HAVE_LIBPFM[] --pfm-events events:: Select a PMU event using libpfm4 syntax (see http://perfmon2.sf.net) @@ -142,7 +153,10 @@ Do not aggregate counts across all monitored CPUs. -n:: --null:: - null run - don't start any counters +null run - Don't start any counters. + +This can be useful to measure just elapsed wall-clock time - or to assess the +raw overhead of perf stat itself, without running any counters. -v:: --verbose:: @@ -468,6 +482,15 @@ convenient for post processing. --summary:: Print summary for interval mode (-I). +--no-csv-summary:: +Don't print 'summary' at the first column for CVS summary output. +This option must be used with -x and --summary. + +This option can be enabled in perf config by setting the variable +'stat.no-csv-summary'. + +$ perf config stat.no-csv-summary=true + EXAMPLES -------- diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt index ee2024691d46..bba5ffb05463 100644 --- a/tools/perf/Documentation/perf-top.txt +++ b/tools/perf/Documentation/perf-top.txt @@ -317,7 +317,7 @@ Default is to monitor all CPUS. but probably we'll make the default not to show the switch-on/off events on the --group mode and if there is only one event besides the off/on ones, go straight to the histogram browser, just like 'perf top' with no events - explicitely specified does. + explicitly specified does. --stitch-lbr:: Show callgraph with stitched LBRs, which may have more complete diff --git a/tools/perf/Documentation/perf.txt b/tools/perf/Documentation/perf.txt index c130a3c46a90..9c330cdfa973 100644 --- a/tools/perf/Documentation/perf.txt +++ b/tools/perf/Documentation/perf.txt @@ -76,3 +76,15 @@ SEE ALSO linkperf:perf-stat[1], linkperf:perf-top[1], linkperf:perf-record[1], linkperf:perf-report[1], linkperf:perf-list[1] + +linkperf:perf-annotate[1],linkperf:perf-archive[1], +linkperf:perf-bench[1], linkperf:perf-buildid-cache[1], +linkperf:perf-buildid-list[1], linkperf:perf-c2c[1], +linkperf:perf-config[1], linkperf:perf-data[1], linkperf:perf-diff[1], +linkperf:perf-evlist[1], linkperf:perf-ftrace[1], +linkperf:perf-help[1], linkperf:perf-inject[1], +linkperf:perf-intel-pt[1], linkperf:perf-kallsyms[1], +linkperf:perf-kmem[1], linkperf:perf-kvm[1], linkperf:perf-lock[1], +linkperf:perf-mem[1], linkperf:perf-probe[1], linkperf:perf-sched[1], +linkperf:perf-script[1], linkperf:perf-test[1], +linkperf:perf-trace[1], linkperf:perf-version[1] diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index d8e59d31399a..3514fe956ed1 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -32,7 +32,7 @@ ifneq ($(NO_SYSCALL_TABLE),1) NO_SYSCALL_TABLE := 0 endif else - ifeq ($(SRCARCH),$(filter $(SRCARCH),powerpc arm64 s390)) + ifeq ($(SRCARCH),$(filter $(SRCARCH),powerpc arm64 s390 mips)) NO_SYSCALL_TABLE := 0 endif endif @@ -87,6 +87,13 @@ ifeq ($(ARCH),s390) CFLAGS += -fPIC -I$(OUTPUT)arch/s390/include/generated endif +ifeq ($(ARCH),mips) + NO_PERF_REGS := 0 + CFLAGS += -I$(OUTPUT)arch/mips/include/generated + CFLAGS += -I../../arch/mips/include/uapi -I../../arch/mips/include/generated/uapi + LIBUNWIND_LIBS = -lunwind -lunwind-mips +endif + ifeq ($(NO_PERF_REGS),0) $(call detected,CONFIG_PERF_REGS) endif diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index f6e609673de2..090fb9d62665 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -1007,6 +1007,7 @@ python-clean: SKEL_OUT := $(abspath $(OUTPUT)util/bpf_skel) SKEL_TMP_OUT := $(abspath $(SKEL_OUT)/.tmp) SKELETONS := $(SKEL_OUT)/bpf_prog_profiler.skel.h +SKELETONS += $(SKEL_OUT)/bperf_leader.skel.h $(SKEL_OUT)/bperf_follower.skel.h ifdef BUILD_BPF_SKEL BPFTOOL := $(SKEL_TMP_OUT)/bootstrap/bpftool @@ -1021,7 +1022,7 @@ $(BPFTOOL): | $(SKEL_TMP_OUT) OUTPUT=$(SKEL_TMP_OUT)/ bootstrap $(SKEL_TMP_OUT)/%.bpf.o: util/bpf_skel/%.bpf.c $(LIBBPF) | $(SKEL_TMP_OUT) - $(QUIET_CLANG)$(CLANG) -g -O2 -target bpf $(BPF_INCLUDE) \ + $(QUIET_CLANG)$(CLANG) -g -O2 -target bpf -Wall -Werror $(BPF_INCLUDE) \ -c $(filter util/bpf_skel/%.bpf.c,$^) -o $@ && $(LLVM_STRIP) -g $@ $(SKEL_OUT)/%.skel.h: $(SKEL_TMP_OUT)/%.bpf.o | $(BPFTOOL) diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c index c25c878fd06c..d942f118d32c 100644 --- a/tools/perf/arch/arm/util/cs-etm.c +++ b/tools/perf/arch/arm/util/cs-etm.c @@ -67,6 +67,7 @@ static int cs_etm_set_context_id(struct auxtrace_record *itr, char path[PATH_MAX]; int err = -EINVAL; u32 val; + u64 contextid; ptr = container_of(itr, struct cs_etm_recording, itr); cs_etm_pmu = ptr->cs_etm_pmu; @@ -86,25 +87,59 @@ static int cs_etm_set_context_id(struct auxtrace_record *itr, goto out; } + /* User has configured for PID tracing, respects it. */ + contextid = evsel->core.attr.config & + (BIT(ETM_OPT_CTXTID) | BIT(ETM_OPT_CTXTID2)); + /* - * TRCIDR2.CIDSIZE, bit [9-5], indicates whether contextID tracing - * is supported: - * 0b00000 Context ID tracing is not supported. - * 0b00100 Maximum of 32-bit Context ID size. - * All other values are reserved. + * If user doesn't configure the contextid format, parse PMU format and + * enable PID tracing according to the "contextid" format bits: + * + * If bit ETM_OPT_CTXTID is set, trace CONTEXTIDR_EL1; + * If bit ETM_OPT_CTXTID2 is set, trace CONTEXTIDR_EL2. */ - val = BMVAL(val, 5, 9); - if (!val || val != 0x4) { - err = -EINVAL; - goto out; + if (!contextid) + contextid = perf_pmu__format_bits(&cs_etm_pmu->format, + "contextid"); + + if (contextid & BIT(ETM_OPT_CTXTID)) { + /* + * TRCIDR2.CIDSIZE, bit [9-5], indicates whether contextID + * tracing is supported: + * 0b00000 Context ID tracing is not supported. + * 0b00100 Maximum of 32-bit Context ID size. + * All other values are reserved. + */ + val = BMVAL(val, 5, 9); + if (!val || val != 0x4) { + pr_err("%s: CONTEXTIDR_EL1 isn't supported\n", + CORESIGHT_ETM_PMU_NAME); + err = -EINVAL; + goto out; + } + } + + if (contextid & BIT(ETM_OPT_CTXTID2)) { + /* + * TRCIDR2.VMIDOPT[30:29] != 0 and + * TRCIDR2.VMIDSIZE[14:10] == 0b00100 (32bit virtual contextid) + * We can't support CONTEXTIDR in VMID if the size of the + * virtual context id is < 32bit. + * Any value of VMIDSIZE >= 4 (i.e, > 32bit) is fine for us. + */ + if (!BMVAL(val, 29, 30) || BMVAL(val, 10, 14) < 4) { + pr_err("%s: CONTEXTIDR_EL2 isn't supported\n", + CORESIGHT_ETM_PMU_NAME); + err = -EINVAL; + goto out; + } } /* All good, let the kernel know */ - evsel->core.attr.config |= (1 << ETM_OPT_CTXTID); + evsel->core.attr.config |= contextid; err = 0; out: - return err; } @@ -173,17 +208,17 @@ static int cs_etm_set_option(struct auxtrace_record *itr, !cpu_map__has(online_cpus, i)) continue; - if (option & ETM_SET_OPT_CTXTID) { + if (option & BIT(ETM_OPT_CTXTID)) { err = cs_etm_set_context_id(itr, evsel, i); if (err) goto out; } - if (option & ETM_SET_OPT_TS) { + if (option & BIT(ETM_OPT_TS)) { err = cs_etm_set_timestamp(itr, evsel, i); if (err) goto out; } - if (option & ~(ETM_SET_OPT_MASK)) + if (option & ~(BIT(ETM_OPT_CTXTID) | BIT(ETM_OPT_TS))) /* Nothing else is currently supported */ goto out; } @@ -343,7 +378,7 @@ static int cs_etm_recording_options(struct auxtrace_record *itr, opts->auxtrace_mmap_pages = roundup_pow_of_two(sz); } - /* Snapshost size can't be bigger than the auxtrace area */ + /* Snapshot size can't be bigger than the auxtrace area */ if (opts->auxtrace_snapshot_size > opts->auxtrace_mmap_pages * (size_t)page_size) { pr_err("Snapshot size %zu must not be greater than AUX area tracing mmap size %zu\n", @@ -410,7 +445,7 @@ static int cs_etm_recording_options(struct auxtrace_record *itr, evsel__set_sample_bit(cs_etm_evsel, CPU); err = cs_etm_set_option(itr, cs_etm_evsel, - ETM_SET_OPT_CTXTID | ETM_SET_OPT_TS); + BIT(ETM_OPT_CTXTID) | BIT(ETM_OPT_TS)); if (err) goto out; } @@ -489,7 +524,9 @@ static u64 cs_etmv4_get_config(struct auxtrace_record *itr) config |= BIT(ETM4_CFG_BIT_TS); if (config_opts & BIT(ETM_OPT_RETSTK)) config |= BIT(ETM4_CFG_BIT_RETSTK); - + if (config_opts & BIT(ETM_OPT_CTXTID2)) + config |= BIT(ETM4_CFG_BIT_VMID) | + BIT(ETM4_CFG_BIT_VMID_OPT); return config; } @@ -576,7 +613,7 @@ static void cs_etm_get_metadata(int cpu, u32 *offset, struct auxtrace_record *itr, struct perf_record_auxtrace_info *info) { - u32 increment; + u32 increment, nr_trc_params; u64 magic; struct cs_etm_recording *ptr = container_of(itr, struct cs_etm_recording, itr); @@ -611,6 +648,7 @@ static void cs_etm_get_metadata(int cpu, u32 *offset, /* How much space was used */ increment = CS_ETMV4_PRIV_MAX; + nr_trc_params = CS_ETMV4_PRIV_MAX - CS_ETMV4_TRCCONFIGR; } else { magic = __perf_cs_etmv3_magic; /* Get configuration register */ @@ -628,11 +666,13 @@ static void cs_etm_get_metadata(int cpu, u32 *offset, /* How much space was used */ increment = CS_ETM_PRIV_MAX; + nr_trc_params = CS_ETM_PRIV_MAX - CS_ETM_ETMCR; } /* Build generic header portion */ info->priv[*offset + CS_ETM_MAGIC] = magic; info->priv[*offset + CS_ETM_CPU] = cpu; + info->priv[*offset + CS_ETM_NR_TRC_PARAMS] = nr_trc_params; /* Where the next CPU entry should start from */ *offset += increment; } @@ -678,7 +718,7 @@ static int cs_etm_info_fill(struct auxtrace_record *itr, /* First fill out the session header */ info->type = PERF_AUXTRACE_CS_ETM; - info->priv[CS_HEADER_VERSION_0] = 0; + info->priv[CS_HEADER_VERSION] = CS_HEADER_CURRENT_VERSION; info->priv[CS_PMU_TYPE_CPUS] = type << 32; info->priv[CS_PMU_TYPE_CPUS] |= nr_cpu; info->priv[CS_ETM_SNAPSHOT] = ptr->snapshot_mode; diff --git a/tools/perf/arch/arm64/util/machine.c b/tools/perf/arch/arm64/util/machine.c index 40c5e0b5bda8..7e7714290a87 100644 --- a/tools/perf/arch/arm64/util/machine.c +++ b/tools/perf/arch/arm64/util/machine.c @@ -6,11 +6,11 @@ #include "debug.h" #include "symbol.h" -/* On arm64, kernel text segment start at high memory address, +/* On arm64, kernel text segment starts at high memory address, * for example 0xffff 0000 8xxx xxxx. Modules start at a low memory - * address, like 0xffff 0000 00ax xxxx. When only samll amount of + * address, like 0xffff 0000 00ax xxxx. When only small amount of * memory is used by modules, gap between end of module's text segment - * and start of kernel text segment may be reach 2G. + * and start of kernel text segment may reach 2G. * Therefore do not fill this gap and do not assign it to the kernel dso map. */ diff --git a/tools/perf/arch/arm64/util/perf_regs.c b/tools/perf/arch/arm64/util/perf_regs.c index 2518cde18b34..476b037eea1c 100644 --- a/tools/perf/arch/arm64/util/perf_regs.c +++ b/tools/perf/arch/arm64/util/perf_regs.c @@ -108,7 +108,7 @@ int arch_sdt_arg_parse_op(char *old_op, char **new_op) /* [sp], [sp, NUM] or [sp,NUM] */ new_len = 7; /* + ( % s p ) NULL */ - /* If the arugment is [sp], need to fill offset '0' */ + /* If the argument is [sp], need to fill offset '0' */ if (rm[2].rm_so == -1) new_len += 1; else diff --git a/tools/perf/arch/mips/Makefile b/tools/perf/arch/mips/Makefile new file mode 100644 index 000000000000..8bc09072e3d6 --- /dev/null +++ b/tools/perf/arch/mips/Makefile @@ -0,0 +1,22 @@ +# SPDX-License-Identifier: GPL-2.0 +ifndef NO_DWARF +PERF_HAVE_DWARF_REGS := 1 +endif + +# Syscall table generation for perf +out := $(OUTPUT)arch/mips/include/generated/asm +header := $(out)/syscalls_n64.c +sysprf := $(srctree)/tools/perf/arch/mips/entry/syscalls +sysdef := $(sysprf)/syscall_n64.tbl +systbl := $(sysprf)/mksyscalltbl + +# Create output directory if not already present +_dummy := $(shell [ -d '$(out)' ] || mkdir -p '$(out)') + +$(header): $(sysdef) $(systbl) + $(Q)$(SHELL) '$(systbl)' $(sysdef) > $@ + +clean:: + $(call QUIET_CLEAN, mips) $(RM) $(header) + +archheaders: $(header) diff --git a/tools/perf/arch/mips/entry/syscalls/mksyscalltbl b/tools/perf/arch/mips/entry/syscalls/mksyscalltbl new file mode 100644 index 000000000000..fb1f49451af6 --- /dev/null +++ b/tools/perf/arch/mips/entry/syscalls/mksyscalltbl @@ -0,0 +1,32 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# +# Generate system call table for perf. Derived from +# s390 script. +# +# Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com> +# Changed by: Tiezhu Yang <yangtiezhu@loongson.cn> + +SYSCALL_TBL=$1 + +if ! test -r $SYSCALL_TBL; then + echo "Could not read input file" >&2 + exit 1 +fi + +create_table() +{ + local max_nr nr abi sc discard + + echo 'static const char *syscalltbl_mips_n64[] = {' + while read nr abi sc discard; do + printf '\t[%d] = "%s",\n' $nr $sc + max_nr=$nr + done + echo '};' + echo "#define SYSCALLTBL_MIPS_N64_MAX_ID $max_nr" +} + +grep -E "^[[:digit:]]+[[:space:]]+(n64)" $SYSCALL_TBL \ + |sort -k1 -n \ + |create_table diff --git a/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl b/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl new file mode 100644 index 000000000000..91649690b52f --- /dev/null +++ b/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl @@ -0,0 +1,358 @@ +# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +# +# system call numbers and entry vectors for mips +# +# The format is: +# <number> <abi> <name> <entry point> +# +# The <abi> is always "n64" for this file. +# +0 n64 read sys_read +1 n64 write sys_write +2 n64 open sys_open +3 n64 close sys_close +4 n64 stat sys_newstat +5 n64 fstat sys_newfstat +6 n64 lstat sys_newlstat +7 n64 poll sys_poll +8 n64 lseek sys_lseek +9 n64 mmap sys_mips_mmap +10 n64 mprotect sys_mprotect +11 n64 munmap sys_munmap +12 n64 brk sys_brk +13 n64 rt_sigaction sys_rt_sigaction +14 n64 rt_sigprocmask sys_rt_sigprocmask +15 n64 ioctl sys_ioctl +16 n64 pread64 sys_pread64 +17 n64 pwrite64 sys_pwrite64 +18 n64 readv sys_readv +19 n64 writev sys_writev +20 n64 access sys_access +21 n64 pipe sysm_pipe +22 n64 _newselect sys_select +23 n64 sched_yield sys_sched_yield +24 n64 mremap sys_mremap +25 n64 msync sys_msync +26 n64 mincore sys_mincore +27 n64 madvise sys_madvise +28 n64 shmget sys_shmget +29 n64 shmat sys_shmat +30 n64 shmctl sys_old_shmctl +31 n64 dup sys_dup +32 n64 dup2 sys_dup2 +33 n64 pause sys_pause +34 n64 nanosleep sys_nanosleep +35 n64 getitimer sys_getitimer +36 n64 setitimer sys_setitimer +37 n64 alarm sys_alarm +38 n64 getpid sys_getpid +39 n64 sendfile sys_sendfile64 +40 n64 socket sys_socket +41 n64 connect sys_connect +42 n64 accept sys_accept +43 n64 sendto sys_sendto +44 n64 recvfrom sys_recvfrom +45 n64 sendmsg sys_sendmsg +46 n64 recvmsg sys_recvmsg +47 n64 shutdown sys_shutdown +48 n64 bind sys_bind +49 n64 listen sys_listen +50 n64 getsockname sys_getsockname +51 n64 getpeername sys_getpeername +52 n64 socketpair sys_socketpair +53 n64 setsockopt sys_setsockopt +54 n64 getsockopt sys_getsockopt +55 n64 clone __sys_clone +56 n64 fork __sys_fork +57 n64 execve sys_execve +58 n64 exit sys_exit +59 n64 wait4 sys_wait4 +60 n64 kill sys_kill +61 n64 uname sys_newuname +62 n64 semget sys_semget +63 n64 semop sys_semop +64 n64 semctl sys_old_semctl +65 n64 shmdt sys_shmdt +66 n64 msgget sys_msgget +67 n64 msgsnd sys_msgsnd +68 n64 msgrcv sys_msgrcv +69 n64 msgctl sys_old_msgctl +70 n64 fcntl sys_fcntl +71 n64 flock sys_flock +72 n64 fsync sys_fsync +73 n64 fdatasync sys_fdatasync +74 n64 truncate sys_truncate +75 n64 ftruncate sys_ftruncate +76 n64 getdents sys_getdents +77 n64 getcwd sys_getcwd +78 n64 chdir sys_chdir +79 n64 fchdir sys_fchdir +80 n64 rename sys_rename +81 n64 mkdir sys_mkdir +82 n64 rmdir sys_rmdir +83 n64 creat sys_creat +84 n64 link sys_link +85 n64 unlink sys_unlink +86 n64 symlink sys_symlink +87 n64 readlink sys_readlink +88 n64 chmod sys_chmod +89 n64 fchmod sys_fchmod +90 n64 chown sys_chown +91 n64 fchown sys_fchown +92 n64 lchown sys_lchown +93 n64 umask sys_umask +94 n64 gettimeofday sys_gettimeofday +95 n64 getrlimit sys_getrlimit +96 n64 getrusage sys_getrusage +97 n64 sysinfo sys_sysinfo +98 n64 times sys_times +99 n64 ptrace sys_ptrace +100 n64 getuid sys_getuid +101 n64 syslog sys_syslog +102 n64 getgid sys_getgid +103 n64 setuid sys_setuid +104 n64 setgid sys_setgid +105 n64 geteuid sys_geteuid +106 n64 getegid sys_getegid +107 n64 setpgid sys_setpgid +108 n64 getppid sys_getppid +109 n64 getpgrp sys_getpgrp +110 n64 setsid sys_setsid +111 n64 setreuid sys_setreuid +112 n64 setregid sys_setregid +113 n64 getgroups sys_getgroups +114 n64 setgroups sys_setgroups +115 n64 setresuid sys_setresuid +116 n64 getresuid sys_getresuid +117 n64 setresgid sys_setresgid +118 n64 getresgid sys_getresgid +119 n64 getpgid sys_getpgid +120 n64 setfsuid sys_setfsuid +121 n64 setfsgid sys_setfsgid +122 n64 getsid sys_getsid +123 n64 capget sys_capget +124 n64 capset sys_capset +125 n64 rt_sigpending sys_rt_sigpending +126 n64 rt_sigtimedwait sys_rt_sigtimedwait +127 n64 rt_sigqueueinfo sys_rt_sigqueueinfo +128 n64 rt_sigsuspend sys_rt_sigsuspend +129 n64 sigaltstack sys_sigaltstack +130 n64 utime sys_utime +131 n64 mknod sys_mknod +132 n64 personality sys_personality +133 n64 ustat sys_ustat +134 n64 statfs sys_statfs +135 n64 fstatfs sys_fstatfs +136 n64 sysfs sys_sysfs +137 n64 getpriority sys_getpriority +138 n64 setpriority sys_setpriority +139 n64 sched_setparam sys_sched_setparam +140 n64 sched_getparam sys_sched_getparam +141 n64 sched_setscheduler sys_sched_setscheduler +142 n64 sched_getscheduler sys_sched_getscheduler +143 n64 sched_get_priority_max sys_sched_get_priority_max +144 n64 sched_get_priority_min sys_sched_get_priority_min +145 n64 sched_rr_get_interval sys_sched_rr_get_interval +146 n64 mlock sys_mlock +147 n64 munlock sys_munlock +148 n64 mlockall sys_mlockall +149 n64 munlockall sys_munlockall +150 n64 vhangup sys_vhangup +151 n64 pivot_root sys_pivot_root +152 n64 _sysctl sys_ni_syscall +153 n64 prctl sys_prctl +154 n64 adjtimex sys_adjtimex +155 n64 setrlimit sys_setrlimit +156 n64 chroot sys_chroot +157 n64 sync sys_sync +158 n64 acct sys_acct +159 n64 settimeofday sys_settimeofday +160 n64 mount sys_mount +161 n64 umount2 sys_umount +162 n64 swapon sys_swapon +163 n64 swapoff sys_swapoff +164 n64 reboot sys_reboot +165 n64 sethostname sys_sethostname +166 n64 setdomainname sys_setdomainname +167 n64 create_module sys_ni_syscall +168 n64 init_module sys_init_module +169 n64 delete_module sys_delete_module +170 n64 get_kernel_syms sys_ni_syscall +171 n64 query_module sys_ni_syscall +172 n64 quotactl sys_quotactl +173 n64 nfsservctl sys_ni_syscall +174 n64 getpmsg sys_ni_syscall +175 n64 putpmsg sys_ni_syscall +176 n64 afs_syscall sys_ni_syscall +# 177 reserved for security +177 n64 reserved177 sys_ni_syscall +178 n64 gettid sys_gettid +179 n64 readahead sys_readahead +180 n64 setxattr sys_setxattr +181 n64 lsetxattr sys_lsetxattr +182 n64 fsetxattr sys_fsetxattr +183 n64 getxattr sys_getxattr +184 n64 lgetxattr sys_lgetxattr +185 n64 fgetxattr sys_fgetxattr +186 n64 listxattr sys_listxattr +187 n64 llistxattr sys_llistxattr +188 n64 flistxattr sys_flistxattr +189 n64 removexattr sys_removexattr +190 n64 lremovexattr sys_lremovexattr +191 n64 fremovexattr sys_fremovexattr +192 n64 tkill sys_tkill +193 n64 reserved193 sys_ni_syscall +194 n64 futex sys_futex +195 n64 sched_setaffinity sys_sched_setaffinity +196 n64 sched_getaffinity sys_sched_getaffinity +197 n64 cacheflush sys_cacheflush +198 n64 cachectl sys_cachectl +199 n64 sysmips __sys_sysmips +200 n64 io_setup sys_io_setup +201 n64 io_destroy sys_io_destroy +202 n64 io_getevents sys_io_getevents +203 n64 io_submit sys_io_submit +204 n64 io_cancel sys_io_cancel +205 n64 exit_group sys_exit_group +206 n64 lookup_dcookie sys_lookup_dcookie +207 n64 epoll_create sys_epoll_create +208 n64 epoll_ctl sys_epoll_ctl +209 n64 epoll_wait sys_epoll_wait +210 n64 remap_file_pages sys_remap_file_pages +211 n64 rt_sigreturn sys_rt_sigreturn +212 n64 set_tid_address sys_set_tid_address +213 n64 restart_syscall sys_restart_syscall +214 n64 semtimedop sys_semtimedop +215 n64 fadvise64 sys_fadvise64_64 +216 n64 timer_create sys_timer_create +217 n64 timer_settime sys_timer_settime +218 n64 timer_gettime sys_timer_gettime +219 n64 timer_getoverrun sys_timer_getoverrun +220 n64 timer_delete sys_timer_delete +221 n64 clock_settime sys_clock_settime +222 n64 clock_gettime sys_clock_gettime +223 n64 clock_getres sys_clock_getres +224 n64 clock_nanosleep sys_clock_nanosleep +225 n64 tgkill sys_tgkill +226 n64 utimes sys_utimes +227 n64 mbind sys_mbind +228 n64 get_mempolicy sys_get_mempolicy +229 n64 set_mempolicy sys_set_mempolicy +230 n64 mq_open sys_mq_open +231 n64 mq_unlink sys_mq_unlink +232 n64 mq_timedsend sys_mq_timedsend +233 n64 mq_timedreceive sys_mq_timedreceive +234 n64 mq_notify sys_mq_notify +235 n64 mq_getsetattr sys_mq_getsetattr +236 n64 vserver sys_ni_syscall +237 n64 waitid sys_waitid +# 238 was sys_setaltroot +239 n64 add_key sys_add_key +240 n64 request_key sys_request_key +241 n64 keyctl sys_keyctl +242 n64 set_thread_area sys_set_thread_area +243 n64 inotify_init sys_inotify_init +244 n64 inotify_add_watch sys_inotify_add_watch +245 n64 inotify_rm_watch sys_inotify_rm_watch +246 n64 migrate_pages sys_migrate_pages +247 n64 openat sys_openat +248 n64 mkdirat sys_mkdirat +249 n64 mknodat sys_mknodat +250 n64 fchownat sys_fchownat +251 n64 futimesat sys_futimesat +252 n64 newfstatat sys_newfstatat +253 n64 unlinkat sys_unlinkat +254 n64 renameat sys_renameat +255 n64 linkat sys_linkat +256 n64 symlinkat sys_symlinkat +257 n64 readlinkat sys_readlinkat +258 n64 fchmodat sys_fchmodat +259 n64 faccessat sys_faccessat +260 n64 pselect6 sys_pselect6 +261 n64 ppoll sys_ppoll +262 n64 unshare sys_unshare +263 n64 splice sys_splice +264 n64 sync_file_range sys_sync_file_range +265 n64 tee sys_tee +266 n64 vmsplice sys_vmsplice +267 n64 move_pages sys_move_pages +268 n64 set_robust_list sys_set_robust_list +269 n64 get_robust_list sys_get_robust_list +270 n64 kexec_load sys_kexec_load +271 n64 getcpu sys_getcpu +272 n64 epoll_pwait sys_epoll_pwait +273 n64 ioprio_set sys_ioprio_set +274 n64 ioprio_get sys_ioprio_get +275 n64 utimensat sys_utimensat +276 n64 signalfd sys_signalfd +277 n64 timerfd sys_ni_syscall +278 n64 eventfd sys_eventfd +279 n64 fallocate sys_fallocate +280 n64 timerfd_create sys_timerfd_create +281 n64 timerfd_gettime sys_timerfd_gettime +282 n64 timerfd_settime sys_timerfd_settime +283 n64 signalfd4 sys_signalfd4 +284 n64 eventfd2 sys_eventfd2 +285 n64 epoll_create1 sys_epoll_create1 +286 n64 dup3 sys_dup3 +287 n64 pipe2 sys_pipe2 +288 n64 inotify_init1 sys_inotify_init1 +289 n64 preadv sys_preadv +290 n64 pwritev sys_pwritev +291 n64 rt_tgsigqueueinfo sys_rt_tgsigqueueinfo +292 n64 perf_event_open sys_perf_event_open +293 n64 accept4 sys_accept4 +294 n64 recvmmsg sys_recvmmsg +295 n64 fanotify_init sys_fanotify_init +296 n64 fanotify_mark sys_fanotify_mark +297 n64 prlimit64 sys_prlimit64 +298 n64 name_to_handle_at sys_name_to_handle_at +299 n64 open_by_handle_at sys_open_by_handle_at +300 n64 clock_adjtime sys_clock_adjtime +301 n64 syncfs sys_syncfs +302 n64 sendmmsg sys_sendmmsg +303 n64 setns sys_setns +304 n64 process_vm_readv sys_process_vm_readv +305 n64 process_vm_writev sys_process_vm_writev +306 n64 kcmp sys_kcmp +307 n64 finit_module sys_finit_module +308 n64 getdents64 sys_getdents64 +309 n64 sched_setattr sys_sched_setattr +310 n64 sched_getattr sys_sched_getattr +311 n64 renameat2 sys_renameat2 +312 n64 seccomp sys_seccomp +313 n64 getrandom sys_getrandom +314 n64 memfd_create sys_memfd_create +315 n64 bpf sys_bpf +316 n64 execveat sys_execveat +317 n64 userfaultfd sys_userfaultfd +318 n64 membarrier sys_membarrier +319 n64 mlock2 sys_mlock2 +320 n64 copy_file_range sys_copy_file_range +321 n64 preadv2 sys_preadv2 +322 n64 pwritev2 sys_pwritev2 +323 n64 pkey_mprotect sys_pkey_mprotect +324 n64 pkey_alloc sys_pkey_alloc +325 n64 pkey_free sys_pkey_free +326 n64 statx sys_statx +327 n64 rseq sys_rseq +328 n64 io_pgetevents sys_io_pgetevents +# 329 through 423 are reserved to sync up with other architectures +424 n64 pidfd_send_signal sys_pidfd_send_signal +425 n64 io_uring_setup sys_io_uring_setup +426 n64 io_uring_enter sys_io_uring_enter +427 n64 io_uring_register sys_io_uring_register +428 n64 open_tree sys_open_tree +429 n64 move_mount sys_move_mount +430 n64 fsopen sys_fsopen +431 n64 fsconfig sys_fsconfig +432 n64 fsmount sys_fsmount +433 n64 fspick sys_fspick +434 n64 pidfd_open sys_pidfd_open +435 n64 clone3 __sys_clone3 +436 n64 close_range sys_close_range +437 n64 openat2 sys_openat2 +438 n64 pidfd_getfd sys_pidfd_getfd +439 n64 faccessat2 sys_faccessat2 +440 n64 process_madvise sys_process_madvise +441 n64 epoll_pwait2 sys_epoll_pwait2 diff --git a/tools/perf/arch/mips/include/dwarf-regs-table.h b/tools/perf/arch/mips/include/dwarf-regs-table.h new file mode 100644 index 000000000000..5badbcd3c5ec --- /dev/null +++ b/tools/perf/arch/mips/include/dwarf-regs-table.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * dwarf-regs-table.h : Mapping of DWARF debug register numbers into + * register names. + * + * Copyright (C) 2013 Cavium, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#ifdef DEFINE_DWARF_REGSTR_TABLE +#undef REG_DWARFNUM_NAME +#define REG_DWARFNUM_NAME(reg, idx) [idx] = "$" #reg +static const char * const mips_regstr_tbl[] = { + "$0", "$1", "$2", "$3", "$4", "$5", "$6", "$7", "$8", "$9", + "$10", "$11", "$12", "$13", "$14", "$15", "$16", "$17", "$18", "$19", + "$20", "$21", "$22", "$23", "$24", "$25", "$26", "$27", "$28", "%29", + "$30", "$31", + REG_DWARFNUM_NAME(hi, 64), + REG_DWARFNUM_NAME(lo, 65), +}; +#endif diff --git a/tools/perf/arch/mips/include/perf_regs.h b/tools/perf/arch/mips/include/perf_regs.h new file mode 100644 index 000000000000..ee73b36a14d1 --- /dev/null +++ b/tools/perf/arch/mips/include/perf_regs.h @@ -0,0 +1,84 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef ARCH_PERF_REGS_H +#define ARCH_PERF_REGS_H + +#include <stdlib.h> +#include <linux/types.h> +#include <asm/perf_regs.h> + +#define PERF_REGS_MAX PERF_REG_MIPS_MAX +#define PERF_REG_IP PERF_REG_MIPS_PC +#define PERF_REG_SP PERF_REG_MIPS_R29 + +#define PERF_REGS_MASK ((1ULL << PERF_REG_MIPS_MAX) - 1) + +static inline const char *__perf_reg_name(int id) +{ + switch (id) { + case PERF_REG_MIPS_PC: + return "PC"; + case PERF_REG_MIPS_R1: + return "$1"; + case PERF_REG_MIPS_R2: + return "$2"; + case PERF_REG_MIPS_R3: + return "$3"; + case PERF_REG_MIPS_R4: + return "$4"; + case PERF_REG_MIPS_R5: + return "$5"; + case PERF_REG_MIPS_R6: + return "$6"; + case PERF_REG_MIPS_R7: + return "$7"; + case PERF_REG_MIPS_R8: + return "$8"; + case PERF_REG_MIPS_R9: + return "$9"; + case PERF_REG_MIPS_R10: + return "$10"; + case PERF_REG_MIPS_R11: + return "$11"; + case PERF_REG_MIPS_R12: + return "$12"; + case PERF_REG_MIPS_R13: + return "$13"; + case PERF_REG_MIPS_R14: + return "$14"; + case PERF_REG_MIPS_R15: + return "$15"; + case PERF_REG_MIPS_R16: + return "$16"; + case PERF_REG_MIPS_R17: + return "$17"; + case PERF_REG_MIPS_R18: + return "$18"; + case PERF_REG_MIPS_R19: + return "$19"; + case PERF_REG_MIPS_R20: + return "$20"; + case PERF_REG_MIPS_R21: + return "$21"; + case PERF_REG_MIPS_R22: + return "$22"; + case PERF_REG_MIPS_R23: + return "$23"; + case PERF_REG_MIPS_R24: + return "$24"; + case PERF_REG_MIPS_R25: + return "$25"; + case PERF_REG_MIPS_R28: + return "$28"; + case PERF_REG_MIPS_R29: + return "$29"; + case PERF_REG_MIPS_R30: + return "$30"; + case PERF_REG_MIPS_R31: + return "$31"; + default: + break; + } + return NULL; +} + +#endif /* ARCH_PERF_REGS_H */ diff --git a/tools/perf/arch/mips/util/Build b/tools/perf/arch/mips/util/Build new file mode 100644 index 000000000000..51c8900a9a10 --- /dev/null +++ b/tools/perf/arch/mips/util/Build @@ -0,0 +1,3 @@ +perf-y += perf_regs.o +perf-$(CONFIG_DWARF) += dwarf-regs.o +perf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o diff --git a/tools/perf/arch/mips/util/dwarf-regs.c b/tools/perf/arch/mips/util/dwarf-regs.c new file mode 100644 index 000000000000..25c13a91c2a7 --- /dev/null +++ b/tools/perf/arch/mips/util/dwarf-regs.c @@ -0,0 +1,38 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * dwarf-regs.c : Mapping of DWARF debug register numbers into register names. + * + * Copyright (C) 2013 Cavium, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include <stdio.h> +#include <dwarf-regs.h> + +static const char *mips_gpr_names[32] = { + "$0", "$1", "$2", "$3", "$4", "$5", "$6", "$7", "$8", "$9", + "$10", "$11", "$12", "$13", "$14", "$15", "$16", "$17", "$18", "$19", + "$20", "$21", "$22", "$23", "$24", "$25", "$26", "$27", "$28", "$29", + "$30", "$31" +}; + +const char *get_arch_regstr(unsigned int n) +{ + if (n < 32) + return mips_gpr_names[n]; + if (n == 64) + return "hi"; + if (n == 65) + return "lo"; + return NULL; +} diff --git a/tools/perf/arch/mips/util/perf_regs.c b/tools/perf/arch/mips/util/perf_regs.c new file mode 100644 index 000000000000..2864e2e3776d --- /dev/null +++ b/tools/perf/arch/mips/util/perf_regs.c @@ -0,0 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "../../util/perf_regs.h" + +const struct sample_reg sample_reg_masks[] = { + SMPL_REG_END +}; diff --git a/tools/perf/arch/mips/util/unwind-libunwind.c b/tools/perf/arch/mips/util/unwind-libunwind.c new file mode 100644 index 000000000000..0d8c99c29da6 --- /dev/null +++ b/tools/perf/arch/mips/util/unwind-libunwind.c @@ -0,0 +1,22 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <errno.h> +#include <libunwind.h> +#include "perf_regs.h" +#include "../../util/unwind.h" +#include "util/debug.h" + +int libunwind__arch_reg_id(int regnum) +{ + switch (regnum) { + case UNW_MIPS_R1 ... UNW_MIPS_R25: + return regnum - UNW_MIPS_R1 + PERF_REG_MIPS_R1; + case UNW_MIPS_R28 ... UNW_MIPS_R31: + return regnum - UNW_MIPS_R28 + PERF_REG_MIPS_R28; + case UNW_MIPS_PC: + return PERF_REG_MIPS_PC; + default: + pr_err("unwind: invalid reg id %d\n", regnum); + return -EINVAL; + } +} diff --git a/tools/perf/arch/powerpc/util/Build b/tools/perf/arch/powerpc/util/Build index b7945e5a543b..8a79c4126e5b 100644 --- a/tools/perf/arch/powerpc/util/Build +++ b/tools/perf/arch/powerpc/util/Build @@ -4,6 +4,8 @@ perf-y += kvm-stat.o perf-y += perf_regs.o perf-y += mem-events.o perf-y += sym-handling.o +perf-y += evsel.o +perf-y += event.o perf-$(CONFIG_DWARF) += dwarf-regs.o perf-$(CONFIG_DWARF) += skip-callchain-idx.o diff --git a/tools/perf/arch/powerpc/util/event.c b/tools/perf/arch/powerpc/util/event.c new file mode 100644 index 000000000000..3bf441257466 --- /dev/null +++ b/tools/perf/arch/powerpc/util/event.c @@ -0,0 +1,53 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/types.h> +#include <linux/string.h> +#include <linux/zalloc.h> + +#include "../../../util/event.h" +#include "../../../util/synthetic-events.h" +#include "../../../util/machine.h" +#include "../../../util/tool.h" +#include "../../../util/map.h" +#include "../../../util/debug.h" + +void arch_perf_parse_sample_weight(struct perf_sample *data, + const __u64 *array, u64 type) +{ + union perf_sample_weight weight; + + weight.full = *array; + if (type & PERF_SAMPLE_WEIGHT) + data->weight = weight.full; + else { + data->weight = weight.var1_dw; + data->ins_lat = weight.var2_w; + data->p_stage_cyc = weight.var3_w; + } +} + +void arch_perf_synthesize_sample_weight(const struct perf_sample *data, + __u64 *array, u64 type) +{ + *array = data->weight; + + if (type & PERF_SAMPLE_WEIGHT_STRUCT) { + *array &= 0xffffffff; + *array |= ((u64)data->ins_lat << 32); + } +} + +const char *arch_perf_header_entry(const char *se_header) +{ + if (!strcmp(se_header, "Local INSTR Latency")) + return "Finish Cyc"; + else if (!strcmp(se_header, "Pipeline Stage Cycle")) + return "Dispatch Cyc"; + return se_header; +} + +int arch_support_sort_key(const char *sort_key) +{ + if (!strcmp(sort_key, "p_stage_cyc")) + return 1; + return 0; +} diff --git a/tools/perf/arch/powerpc/util/evsel.c b/tools/perf/arch/powerpc/util/evsel.c new file mode 100644 index 000000000000..2f733cdc8dbb --- /dev/null +++ b/tools/perf/arch/powerpc/util/evsel.c @@ -0,0 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <stdio.h> +#include "util/evsel.h" + +void arch_evsel__set_sample_weight(struct evsel *evsel) +{ + evsel__set_sample_bit(evsel, WEIGHT_STRUCT); +} diff --git a/tools/perf/arch/powerpc/util/kvm-stat.c b/tools/perf/arch/powerpc/util/kvm-stat.c index eed9e5a42935..16510686c138 100644 --- a/tools/perf/arch/powerpc/util/kvm-stat.c +++ b/tools/perf/arch/powerpc/util/kvm-stat.c @@ -176,7 +176,7 @@ int cpu_isa_init(struct perf_kvm_stat *kvm, const char *cpuid __maybe_unused) } /* - * Incase of powerpc architecture, pmu registers are programmable + * In case of powerpc architecture, pmu registers are programmable * by guest kernel. So monitoring guest via host may not provide * valid samples with default 'cycles' event. It is better to use * 'trace_imc/trace_cycles' event for guest profiling, since it diff --git a/tools/perf/arch/powerpc/util/utils_header.h b/tools/perf/arch/powerpc/util/utils_header.h index 5788eb1f1fe3..2baeb1c1ae85 100644 --- a/tools/perf/arch/powerpc/util/utils_header.h +++ b/tools/perf/arch/powerpc/util/utils_header.h @@ -10,6 +10,6 @@ #define SPRN_PVR 0x11F /* Processor Version Register */ #define PVR_VER(pvr) (((pvr) >> 16) & 0xFFFF) /* Version field */ -#define PVR_REV(pvr) (((pvr) >> 0) & 0xFFFF) /* Revison field */ +#define PVR_REV(pvr) (((pvr) >> 0) & 0xFFFF) /* Revision field */ #endif /* __PERF_UTIL_HEADER_H */ diff --git a/tools/perf/arch/x86/tests/bp-modify.c b/tools/perf/arch/x86/tests/bp-modify.c index adcacf1b6609..dffcf9b52153 100644 --- a/tools/perf/arch/x86/tests/bp-modify.c +++ b/tools/perf/arch/x86/tests/bp-modify.c @@ -73,7 +73,7 @@ static int bp_modify1(void) /* * The parent does following steps: * - creates a new breakpoint (id 0) for bp_2 function - * - changes that breakponit to bp_1 function + * - changes that breakpoint to bp_1 function * - waits for the breakpoint to hit and checks * it has proper rip of bp_1 function * - detaches the child diff --git a/tools/perf/arch/x86/util/perf_regs.c b/tools/perf/arch/x86/util/perf_regs.c index fca81b39b09f..207c56805c55 100644 --- a/tools/perf/arch/x86/util/perf_regs.c +++ b/tools/perf/arch/x86/util/perf_regs.c @@ -165,7 +165,7 @@ static int sdt_init_op_regex(void) /* * Max x86 register name length is 5(ex: %r15d). So, 6th char * should always contain NULL. This helps to find register name - * length using strlen, insted of maintaing one more variable. + * length using strlen, instead of maintaining one more variable. */ #define SDT_REG_NAME_SIZE 6 @@ -207,7 +207,7 @@ int arch_sdt_arg_parse_op(char *old_op, char **new_op) * and displacement 0 (Both sign and displacement 0 are * optional so it may be empty). Use one more character * to hold last NULL so that strlen can be used to find - * prefix length, instead of maintaing one more variable. + * prefix length, instead of maintaining one more variable. */ char prefix[3] = {0}; diff --git a/tools/perf/bench/epoll-wait.c b/tools/perf/bench/epoll-wait.c index 0a0ff1247c83..79d13dbc0a47 100644 --- a/tools/perf/bench/epoll-wait.c +++ b/tools/perf/bench/epoll-wait.c @@ -17,7 +17,7 @@ * While the second model, enabled via --multiq option, uses multiple * queueing (which refers to one epoll instance per worker). For example, * short lived tcp connections in a high throughput httpd server will - * ditribute the accept()'ing connections across CPUs. In this case each + * distribute the accept()'ing connections across CPUs. In this case each * worker does a limited amount of processing. * * [queue A] ---> [worker] @@ -198,7 +198,7 @@ static void *workerfn(void *arg) do { /* - * Block undefinitely waiting for the IN event. + * Block indefinitely waiting for the IN event. * In order to stress the epoll_wait(2) syscall, * call it event per event, instead of a larger * batch (max)limit. diff --git a/tools/perf/bench/inject-buildid.c b/tools/perf/bench/inject-buildid.c index 280227e3ffd7..55d373b75791 100644 --- a/tools/perf/bench/inject-buildid.c +++ b/tools/perf/bench/inject-buildid.c @@ -372,7 +372,7 @@ static int inject_build_id(struct bench_data *data, u64 *max_rss) len += synthesize_flush(data); } - /* tihs makes the child to finish */ + /* this makes the child to finish */ close(data->input_pipe[1]); wait4(data->pid, &status, 0, &rusage); diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c index 20b87e29c96f..f2640179ada9 100644 --- a/tools/perf/bench/numa.c +++ b/tools/perf/bench/numa.c @@ -42,7 +42,7 @@ #endif /* - * Regular printout to the terminal, supressed if -q is specified: + * Regular printout to the terminal, suppressed if -q is specified: */ #define tprintf(x...) do { if (g && g->p.show_details >= 0) printf(x); } while (0) diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index a23ba6bb99b6..0f3a196e5d6e 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -239,7 +239,7 @@ static int evsel__add_sample(struct evsel *evsel, struct perf_sample *sample, } /* - * XXX filtered samples can still have branch entires pointing into our + * XXX filtered samples can still have branch entries pointing into our * symbol and are missed. */ process_branch_stack(sample->branch_stack, al, sample); diff --git a/tools/perf/builtin-daemon.c b/tools/perf/builtin-daemon.c index 7c4a9d424a64..61929f63a047 100644 --- a/tools/perf/builtin-daemon.c +++ b/tools/perf/builtin-daemon.c @@ -6,7 +6,6 @@ #include <linux/zalloc.h> #include <linux/string.h> #include <linux/limits.h> -#include <linux/string.h> #include <string.h> #include <sys/file.h> #include <signal.h> @@ -24,8 +23,6 @@ #include <sys/signalfd.h> #include <sys/wait.h> #include <poll.h> -#include <sys/stat.h> -#include <time.h> #include "builtin.h" #include "perf.h" #include "debug.h" diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c index 878e04b1fab7..f52b3a799e76 100644 --- a/tools/perf/builtin-diff.c +++ b/tools/perf/builtin-diff.c @@ -1796,7 +1796,7 @@ static int ui_init(void) data__for_each_file(i, d) { /* - * Baseline or compute realted columns: + * Baseline or compute related columns: * * PERF_HPP_DIFF__BASELINE * PERF_HPP_DIFF__DELTA diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c index a2f1e53f37a7..01326e370009 100644 --- a/tools/perf/builtin-lock.c +++ b/tools/perf/builtin-lock.c @@ -49,7 +49,7 @@ struct lock_stat { /* * FIXME: evsel__intval() returns u64, - * so address of lockdep_map should be dealed as 64bit. + * so address of lockdep_map should be treated as 64bit. * Is there more better solution? */ void *addr; /* address of lockdep_map, used as ID */ diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 2a845d6cac09..0d65c98794a8 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -84,6 +84,7 @@ struct report { bool nonany_branch_mode; bool group_set; bool stitch_lbr; + bool disable_order; int max_stack; struct perf_read_values show_threads_values; struct annotation_options annotation_opts; @@ -1296,6 +1297,8 @@ int cmd_report(int argc, const char **argv) OPTS_EVSWITCH(&report.evswitch), OPT_BOOLEAN(0, "total-cycles", &report.total_cycles_mode, "Sort all blocks by 'Sampled Cycles%'"), + OPT_BOOLEAN(0, "disable-order", &report.disable_order, + "Disable raw trace ordering"), OPT_END() }; struct perf_data data = { @@ -1329,7 +1332,7 @@ int cmd_report(int argc, const char **argv) if (report.mmaps_mode) report.tasks_mode = true; - if (dump_trace) + if (dump_trace && report.disable_order) report.tool.ordered_events = false; if (quiet) diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index 69c769b04a61..954ce2f594e9 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -1712,7 +1712,7 @@ static int perf_sched__process_fork_event(struct perf_tool *tool, { struct perf_sched *sched = container_of(tool, struct perf_sched, tool); - /* run the fork event through the perf machineruy */ + /* run the fork event through the perf machinery */ perf_event__process_fork(tool, event, sample, machine); /* and then run additional processing needed for this command */ diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 5915f19cee55..1280cbfad4db 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -314,8 +314,7 @@ static inline struct evsel_script *evsel_script(struct evsel *evsel) return (struct evsel_script *)evsel->priv; } -static struct evsel_script *perf_evsel_script__new(struct evsel *evsel, - struct perf_data *data) +static struct evsel_script *evsel_script__new(struct evsel *evsel, struct perf_data *data) { struct evsel_script *es = zalloc(sizeof(*es)); @@ -335,7 +334,7 @@ out_free: return NULL; } -static void perf_evsel_script__delete(struct evsel_script *es) +static void evsel_script__delete(struct evsel_script *es) { zfree(&es->filename); fclose(es->fp); @@ -343,7 +342,7 @@ static void perf_evsel_script__delete(struct evsel_script *es) free(es); } -static int perf_evsel_script__fprintf(struct evsel_script *es, FILE *fp) +static int evsel_script__fprintf(struct evsel_script *es, FILE *fp) { struct stat st; @@ -2219,8 +2218,7 @@ static int process_attr(struct perf_tool *tool, union perf_event *event, if (!evsel->priv) { if (scr->per_event_dump) { - evsel->priv = perf_evsel_script__new(evsel, - scr->session->data); + evsel->priv = evsel_script__new(evsel, scr->session->data); } else { es = zalloc(sizeof(*es)); if (!es) @@ -2475,7 +2473,7 @@ static void perf_script__fclose_per_event_dump(struct perf_script *script) evlist__for_each_entry(evlist, evsel) { if (!evsel->priv) break; - perf_evsel_script__delete(evsel->priv); + evsel_script__delete(evsel->priv); evsel->priv = NULL; } } @@ -2488,14 +2486,14 @@ static int perf_script__fopen_per_event_dump(struct perf_script *script) /* * Already setup? I.e. we may be called twice in cases like * Intel PT, one for the intel_pt// and dummy events, then - * for the evsels syntheized from the auxtrace info. + * for the evsels synthesized from the auxtrace info. * * Ses perf_script__process_auxtrace_info. */ if (evsel->priv != NULL) continue; - evsel->priv = perf_evsel_script__new(evsel, script->session->data); + evsel->priv = evsel_script__new(evsel, script->session->data); if (evsel->priv == NULL) goto out_err_fclose; } @@ -2530,8 +2528,8 @@ static void perf_script__exit_per_event_dump_stats(struct perf_script *script) evlist__for_each_entry(script->session->evlist, evsel) { struct evsel_script *es = evsel->priv; - perf_evsel_script__fprintf(es, stdout); - perf_evsel_script__delete(es); + evsel_script__fprintf(es, stdout); + evsel_script__delete(es); evsel->priv = NULL; } } @@ -3085,7 +3083,7 @@ static int list_available_scripts(const struct option *opt __maybe_unused, * * Fixme: All existing "xxx-record" are all in good formats "-e event ", * which is covered well now. And new parsing code should be added to - * cover the future complexing formats like event groups etc. + * cover the future complex formats like event groups etc. */ static int check_ev_match(char *dir_name, char *scriptname, struct perf_session *session) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 2e2e4a8345ea..2a2c15cac80a 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -792,6 +792,12 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx) } evlist__for_each_cpu (evsel_list, i, cpu) { + /* + * bperf calls evsel__open_per_cpu() in bperf__load(), so + * no need to call it again here. + */ + if (target.use_bpf) + break; affinity__set(&affinity, cpu); evlist__for_each_entry(evsel_list, counter) { @@ -925,15 +931,15 @@ try_again_reset: /* * Enable counters and exec the command: */ - t0 = rdclock(); - clock_gettime(CLOCK_MONOTONIC, &ref_time); - if (forks) { evlist__start_workload(evsel_list); err = enable_counters(); if (err) return -1; + t0 = rdclock(); + clock_gettime(CLOCK_MONOTONIC, &ref_time); + if (interval || timeout || evlist__ctlfd_initialized(evsel_list)) status = dispatch_events(forks, timeout, interval, ×); if (child_pid != -1) { @@ -954,6 +960,10 @@ try_again_reset: err = enable_counters(); if (err) return -1; + + t0 = rdclock(); + clock_gettime(CLOCK_MONOTONIC, &ref_time); + status = dispatch_events(forks, timeout, interval, ×); } @@ -1083,6 +1093,11 @@ void perf_stat__set_big_num(int set) stat_config.big_num = (set != 0); } +void perf_stat__set_no_csv_summary(int set) +{ + stat_config.no_csv_summary = (set != 0); +} + static int stat__set_big_num(const struct option *opt __maybe_unused, const char *s __maybe_unused, int unset) { @@ -1146,6 +1161,10 @@ static struct option stat_options[] = { #ifdef HAVE_BPF_SKEL OPT_STRING('b', "bpf-prog", &target.bpf_str, "bpf-prog-id", "stat events on existing bpf program id"), + OPT_BOOLEAN(0, "bpf-counters", &target.use_bpf, + "use bpf program to count events"), + OPT_STRING(0, "bpf-attr-map", &target.attr_map, "attr-map-path", + "path to perf_event_attr map"), #endif OPT_BOOLEAN('a', "all-cpus", &target.system_wide, "system-wide collection from all CPUs"), @@ -1235,6 +1254,8 @@ static struct option stat_options[] = { "threads of same physical core"), OPT_BOOLEAN(0, "summary", &stat_config.summary, "print summary for interval mode"), + OPT_BOOLEAN(0, "no-csv-summary", &stat_config.no_csv_summary, + "don't print 'summary' for CSV summary output"), OPT_BOOLEAN(0, "quiet", &stat_config.quiet, "don't print output (useful with record)"), #ifdef HAVE_LIBPFM @@ -1705,7 +1726,7 @@ static int add_default_attributes(void) bzero(&errinfo, sizeof(errinfo)); if (transaction_run) { /* Handle -T as -M transaction. Once platform specific metrics - * support has been added to the json files, all archictures + * support has been added to the json files, all architectures * will use this approach. To determine transaction support * on an architecture test for such a metric name. */ @@ -2459,7 +2480,7 @@ int cmd_stat(int argc, const char **argv) /* * We synthesize the kernel mmap record just so that older tools * don't emit warnings about not being able to resolve symbols - * due to /proc/sys/kernel/kptr_restrict settings and instear provide + * due to /proc/sys/kernel/kptr_restrict settings and instead provide * a saner message about no samples being in the perf.data file. * * This also serves to suppress a warning about f_header.data.size == 0 diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 3673c04d16b6..173ace43f845 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -1607,7 +1607,7 @@ int cmd_top(int argc, const char **argv) if (status) { /* * Some arches do not provide a get_cpuid(), so just use pr_debug, otherwise - * warn the user explicitely. + * warn the user explicitly. */ eprintf(status == ENOSYS ? 1 : 0, verbose, "Couldn't read the cpuid for this machine: %s\n", diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh index dded93a2bc89..39eada935d4e 100755 --- a/tools/perf/check-headers.sh +++ b/tools/perf/check-headers.sh @@ -146,6 +146,7 @@ check arch/x86/lib/insn.c '-I "^#include [\"<]\(../include/\)*asm/in check_2 tools/perf/arch/x86/entry/syscalls/syscall_64.tbl arch/x86/entry/syscalls/syscall_64.tbl check_2 tools/perf/arch/powerpc/entry/syscalls/syscall.tbl arch/powerpc/kernel/syscalls/syscall.tbl check_2 tools/perf/arch/s390/entry/syscalls/syscall.tbl arch/s390/kernel/syscalls/syscall.tbl +check_2 tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl arch/mips/kernel/syscalls/syscall_n64.tbl for i in $BEAUTY_FILES; do beauty_check $i -B diff --git a/tools/perf/examples/bpf/augmented_raw_syscalls.c b/tools/perf/examples/bpf/augmented_raw_syscalls.c index b80437971d80..a262dcd020f4 100644 --- a/tools/perf/examples/bpf/augmented_raw_syscalls.c +++ b/tools/perf/examples/bpf/augmented_raw_syscalls.c @@ -262,7 +262,7 @@ int sys_enter(struct syscall_enter_args *args) /* * Jump to syscall specific augmenter, even if the default one, * "!raw_syscalls:unaugmented" that will just return 1 to return the - * unagmented tracepoint payload. + * unaugmented tracepoint payload. */ bpf_tail_call(args, &syscalls_sys_enter, augmented_args->args.syscall_nr); @@ -282,7 +282,7 @@ int sys_exit(struct syscall_exit_args *args) /* * Jump to syscall specific return augmenter, even if the default one, * "!raw_syscalls:unaugmented" that will just return 1 to return the - * unagmented tracepoint payload. + * unaugmented tracepoint payload. */ bpf_tail_call(args, &syscalls_sys_exit, exit_args.syscall_nr); /* diff --git a/tools/perf/jvmti/jvmti_agent.c b/tools/perf/jvmti/jvmti_agent.c index 88108598d6e9..526dcaf9f079 100644 --- a/tools/perf/jvmti/jvmti_agent.c +++ b/tools/perf/jvmti/jvmti_agent.c @@ -390,7 +390,7 @@ jvmti_write_code(void *agent, char const *sym, rec.p.total_size += size; /* - * If JVM is multi-threaded, nultiple concurrent calls to agent + * If JVM is multi-threaded, multiple concurrent calls to agent * may be possible, so protect file writes */ flockfile(fp); @@ -457,7 +457,7 @@ jvmti_write_debug_info(void *agent, uint64_t code, rec.p.total_size = size; /* - * If JVM is multi-threaded, nultiple concurrent calls to agent + * If JVM is multi-threaded, multiple concurrent calls to agent * may be possible, so protect file writes */ flockfile(fp); diff --git a/tools/perf/pmu-events/arch/arm64/armv8-common-and-microarch.json b/tools/perf/pmu-events/arch/arm64/armv8-common-and-microarch.json index 75376c7cc072..913fb200ea52 100644 --- a/tools/perf/pmu-events/arch/arm64/armv8-common-and-microarch.json +++ b/tools/perf/pmu-events/arch/arm64/armv8-common-and-microarch.json @@ -210,12 +210,24 @@ "BriefDescription": "Attributable Level 2 data TLB refill" }, { + "PublicDescription": "Attributable Level 2 instruction TLB refill.", + "EventCode": "0x2E", + "EventName": "L2I_TLB_REFILL", + "BriefDescription": "Attributable Level 2 instruction TLB refill." + }, + { "PublicDescription": "Attributable Level 2 data or unified TLB access", "EventCode": "0x2F", "EventName": "L2D_TLB", "BriefDescription": "Attributable Level 2 data or unified TLB access" }, { + "PublicDescription": "Attributable Level 2 instruction TLB access.", + "EventCode": "0x30", + "EventName": "L2I_TLB", + "BriefDescription": "Attributable Level 2 instruction TLB access." + }, + { "PublicDescription": "Access to another socket in a multi-socket system", "EventCode": "0x31", "EventName": "REMOTE_ACCESS", @@ -244,5 +256,221 @@ "EventCode": "0x37", "EventName": "LL_CACHE_MISS_RD", "BriefDescription": "Last level cache miss, read" + }, + { + "PublicDescription": "SIMD Instruction architecturally executed.", + "EventCode": "0x8000", + "EventName": "SIMD_INST_RETIRED", + "BriefDescription": "SIMD Instruction architecturally executed." + }, + { + "PublicDescription": "Instruction architecturally executed, SVE.", + "EventCode": "0x8002", + "EventName": "SVE_INST_RETIRED", + "BriefDescription": "Instruction architecturally executed, SVE." + }, + { + "PublicDescription": "Microarchitectural operation, Operations speculatively executed.", + "EventCode": "0x8008", + "EventName": "UOP_SPEC", + "BriefDescription": "Microarchitectural operation, Operations speculatively executed." + }, + { + "PublicDescription": "SVE Math accelerator Operations speculatively executed.", + "EventCode": "0x800E", + "EventName": "SVE_MATH_SPEC", + "BriefDescription": "SVE Math accelerator Operations speculatively executed." + }, + { + "PublicDescription": "Floating-point Operations speculatively executed.", + "EventCode": "0x8010", + "EventName": "FP_SPEC", + "BriefDescription": "Floating-point Operations speculatively executed." + }, + { + "PublicDescription": "Floating-point FMA Operations speculatively executed.", + "EventCode": "0x8028", + "EventName": "FP_FMA_SPEC", + "BriefDescription": "Floating-point FMA Operations speculatively executed." + }, + { + "PublicDescription": "Floating-point reciprocal estimate Operations speculatively executed.", + "EventCode": "0x8034", + "EventName": "FP_RECPE_SPEC", + "BriefDescription": "Floating-point reciprocal estimate Operations speculatively executed." + }, + { + "PublicDescription": "floating-point convert Operations speculatively executed.", + "EventCode": "0x8038", + "EventName": "FP_CVT_SPEC", + "BriefDescription": "floating-point convert Operations speculatively executed." + }, + { + "PublicDescription": "Advanced SIMD and SVE integer Operations speculatively executed.", + "EventCode": "0x8043", + "EventName": "ASE_SVE_INT_SPEC", + "BriefDescription": "Advanced SIMD and SVE integer Operations speculatively executed." + }, + { + "PublicDescription": "SVE predicated Operations speculatively executed.", + "EventCode": "0x8074", + "EventName": "SVE_PRED_SPEC", + "BriefDescription": "SVE predicated Operations speculatively executed." + }, + { + "PublicDescription": "SVE MOVPRFX Operations speculatively executed.", + "EventCode": "0x807C", + "EventName": "SVE_MOVPRFX_SPEC", + "BriefDescription": "SVE MOVPRFX Operations speculatively executed." + }, + { + "PublicDescription": "SVE MOVPRFX unfused Operations speculatively executed.", + "EventCode": "0x807F", + "EventName": "SVE_MOVPRFX_U_SPEC", + "BriefDescription": "SVE MOVPRFX unfused Operations speculatively executed." + }, + { + "PublicDescription": "Advanced SIMD and SVE load Operations speculatively executed.", + "EventCode": "0x8085", + "EventName": "ASE_SVE_LD_SPEC", + "BriefDescription": "Advanced SIMD and SVE load Operations speculatively executed." + }, + { + "PublicDescription": "Advanced SIMD and SVE store Operations speculatively executed.", + "EventCode": "0x8086", + "EventName": "ASE_SVE_ST_SPEC", + "BriefDescription": "Advanced SIMD and SVE store Operations speculatively executed." + }, + { + "PublicDescription": "Prefetch Operations speculatively executed.", + "EventCode": "0x8087", + "EventName": "PRF_SPEC", + "BriefDescription": "Prefetch Operations speculatively executed." + }, + { + "PublicDescription": "General-purpose register load Operations speculatively executed.", + "EventCode": "0x8089", + "EventName": "BASE_LD_REG_SPEC", + "BriefDescription": "General-purpose register load Operations speculatively executed." + }, + { + "PublicDescription": "General-purpose register store Operations speculatively executed.", + "EventCode": "0x808A", + "EventName": "BASE_ST_REG_SPEC", + "BriefDescription": "General-purpose register store Operations speculatively executed." + }, + { + "PublicDescription": "SVE unpredicated load register Operations speculatively executed.", + "EventCode": "0x8091", + "EventName": "SVE_LDR_REG_SPEC", + "BriefDescription": "SVE unpredicated load register Operations speculatively executed." + }, + { + "PublicDescription": "SVE unpredicated store register Operations speculatively executed.", + "EventCode": "0x8092", + "EventName": "SVE_STR_REG_SPEC", + "BriefDescription": "SVE unpredicated store register Operations speculatively executed." + }, + { + "PublicDescription": "SVE load predicate register Operations speculatively executed.", + "EventCode": "0x8095", + "EventName": "SVE_LDR_PREG_SPEC", + "BriefDescription": "SVE load predicate register Operations speculatively executed." + }, + { + "PublicDescription": "SVE store predicate register Operations speculatively executed.", + "EventCode": "0x8096", + "EventName": "SVE_STR_PREG_SPEC", + "BriefDescription": "SVE store predicate register Operations speculatively executed." + }, + { + "PublicDescription": "SVE contiguous prefetch element Operations speculatively executed.", + "EventCode": "0x809F", + "EventName": "SVE_PRF_CONTIG_SPEC", + "BriefDescription": "SVE contiguous prefetch element Operations speculatively executed." + }, + { + "PublicDescription": "Advanced SIMD and SVE contiguous load multiple vector Operations speculatively executed.", + "EventCode": "0x80A5", + "EventName": "ASE_SVE_LD_MULTI_SPEC", + "BriefDescription": "Advanced SIMD and SVE contiguous load multiple vector Operations speculatively executed." + }, + { + "PublicDescription": "Advanced SIMD and SVE contiguous store multiple vector Operations speculatively executed.", + "EventCode": "0x80A6", + "EventName": "ASE_SVE_ST_MULTI_SPEC", + "BriefDescription": "Advanced SIMD and SVE contiguous store multiple vector Operations speculatively executed." + }, + { + "PublicDescription": "SVE gather-load Operations speculatively executed.", + "EventCode": "0x80AD", + "EventName": "SVE_LD_GATHER_SPEC", + "BriefDescription": "SVE gather-load Operations speculatively executed." + }, + { + "PublicDescription": "SVE scatter-store Operations speculatively executed.", + "EventCode": "0x80AE", + "EventName": "SVE_ST_SCATTER_SPEC", + "BriefDescription": "SVE scatter-store Operations speculatively executed." + }, + { + "PublicDescription": "SVE gather-prefetch Operations speculatively executed.", + "EventCode": "0x80AF", + "EventName": "SVE_PRF_GATHER_SPEC", + "BriefDescription": "SVE gather-prefetch Operations speculatively executed." + }, + { + "PublicDescription": "SVE First-fault load Operations speculatively executed.", + "EventCode": "0x80BC", + "EventName": "SVE_LDFF_SPEC", + "BriefDescription": "SVE First-fault load Operations speculatively executed." + }, + { + "PublicDescription": "Scalable floating-point element Operations speculatively executed.", + "EventCode": "0x80C0", + "EventName": "FP_SCALE_OPS_SPEC", + "BriefDescription": "Scalable floating-point element Operations speculatively executed." + }, + { + "PublicDescription": "Non-scalable floating-point element Operations speculatively executed.", + "EventCode": "0x80C1", + "EventName": "FP_FIXED_OPS_SPEC", + "BriefDescription": "Non-scalable floating-point element Operations speculatively executed." + }, + { + "PublicDescription": "Scalable half-precision floating-point element Operations speculatively executed.", + "EventCode": "0x80C2", + "EventName": "FP_HP_SCALE_OPS_SPEC", + "BriefDescription": "Scalable half-precision floating-point element Operations speculatively executed." + }, + { + "PublicDescription": "Non-scalable half-precision floating-point element Operations speculatively executed.", + "EventCode": "0x80C3", + "EventName": "FP_HP_FIXED_OPS_SPEC", + "BriefDescription": "Non-scalable half-precision floating-point element Operations speculatively executed." + }, + { + "PublicDescription": "Scalable single-precision floating-point element Operations speculatively executed.", + "EventCode": "0x80C4", + "EventName": "FP_SP_SCALE_OPS_SPEC", + "BriefDescription": "Scalable single-precision floating-point element Operations speculatively executed." + }, + { + "PublicDescription": "Non-scalable single-precision floating-point element Operations speculatively executed.", + "EventCode": "0x80C5", + "EventName": "FP_SP_FIXED_OPS_SPEC", + "BriefDescription": "Non-scalable single-precision floating-point element Operations speculatively executed." + }, + { + "PublicDescription": "Scalable double-precision floating-point element Operations speculatively executed.", + "EventCode": "0x80C6", + "EventName": "FP_DP_SCALE_OPS_SPEC", + "BriefDescription": "Scalable double-precision floating-point element Operations speculatively executed." + }, + { + "PublicDescription": "Non-scalable double-precision floating-point element Operations speculatively executed.", + "EventCode": "0x80C7", + "EventName": "FP_DP_FIXED_OPS_SPEC", + "BriefDescription": "Non-scalable double-precision floating-point element Operations speculatively executed." } ] diff --git a/tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/branch.json b/tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/branch.json new file mode 100644 index 000000000000..b011af11bf94 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/branch.json @@ -0,0 +1,8 @@ +[ + { + "ArchStdEvent": "BR_MIS_PRED" + }, + { + "ArchStdEvent": "BR_PRED" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/bus.json b/tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/bus.json new file mode 100644 index 000000000000..084e88d7df73 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/bus.json @@ -0,0 +1,62 @@ +[ + { + "PublicDescription": "This event counts read transactions from tofu controller to measured CMG.", + "EventCode": "0x314", + "EventName": "BUS_READ_TOTAL_TOFU", + "BriefDescription": "This event counts read transactions from tofu controller to measured CMG." + }, + { + "PublicDescription": "This event counts read transactions from PCI controller to measured CMG.", + "EventCode": "0x315", + "EventName": "BUS_READ_TOTAL_PCI", + "BriefDescription": "This event counts read transactions from PCI controller to measured CMG." + }, + { + "PublicDescription": "This event counts read transactions from measured CMG local memory to measured CMG.", + "EventCode": "0x316", + "EventName": "BUS_READ_TOTAL_MEM", + "BriefDescription": "This event counts read transactions from measured CMG local memory to measured CMG." + }, + { + "PublicDescription": "This event counts write transactions from measured CMG to CMG0, if measured CMG is not CMG0.", + "EventCode": "0x318", + "EventName": "BUS_WRITE_TOTAL_CMG0", + "BriefDescription": "This event counts write transactions from measured CMG to CMG0, if measured CMG is not CMG0." + }, + { + "PublicDescription": "This event counts write transactions from measured CMG to CMG1, if measured CMG is not CMG1.", + "EventCode": "0x319", + "EventName": "BUS_WRITE_TOTAL_CMG1", + "BriefDescription": "This event counts write transactions from measured CMG to CMG1, if measured CMG is not CMG1." + }, + { + "PublicDescription": "This event counts write transactions from measured CMG to CMG2, if measured CMG is not CMG2.", + "EventCode": "0x31A", + "EventName": "BUS_WRITE_TOTAL_CMG2", + "BriefDescription": "This event counts write transactions from measured CMG to CMG2, if measured CMG is not CMG2." + }, + { + "PublicDescription": "This event counts write transactions from measured CMG to CMG3, if measured CMG is not CMG3.", + "EventCode": "0x31B", + "EventName": "BUS_WRITE_TOTAL_CMG3", + "BriefDescription": "This event counts write transactions from measured CMG to CMG3, if measured CMG is not CMG3." + }, + { + "PublicDescription": "This event counts write transactions from measured CMG to tofu controller.", + "EventCode": "0x31C", + "EventName": "BUS_WRITE_TOTAL_TOFU", + "BriefDescription": "This event counts write transactions from measured CMG to tofu controller." + }, + { + "PublicDescription": "This event counts write transactions from measured CMG to PCI controller.", + "EventCode": "0x31D", + "EventName": "BUS_WRITE_TOTAL_PCI", + "BriefDescription": "This event counts write transactions from measured CMG to PCI controller." + }, + { + "PublicDescription": "This event counts write transactions from measured CMG to measured CMG local memory.", + "EventCode": "0x31E", + "EventName": "BUS_WRITE_TOTAL_MEM", + "BriefDescription": "This event counts write transactions from measured CMG to measured CMG local memory." + } +] diff --git a/tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/cache.json b/tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/cache.json new file mode 100644 index 000000000000..2e341a951a10 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/cache.json @@ -0,0 +1,128 @@ +[ + { + "ArchStdEvent": "L1I_CACHE_REFILL" + }, + { + "ArchStdEvent": "L1I_TLB_REFILL" + }, + { + "ArchStdEvent": "L1D_CACHE_REFILL" + }, + { + "ArchStdEvent": "L1D_CACHE" + }, + { + "ArchStdEvent": "L1D_TLB_REFILL" + }, + { + "ArchStdEvent": "L1I_CACHE" + }, + { + "ArchStdEvent": "L1D_CACHE_WB" + }, + { + "ArchStdEvent": "L2D_CACHE" + }, + { + "ArchStdEvent": "L2D_CACHE_REFILL" + }, + { + "ArchStdEvent": "L2D_CACHE_WB" + }, + { + "ArchStdEvent": "L2D_TLB_REFILL" + }, + { + "ArchStdEvent": "L2I_TLB_REFILL" + }, + { + "ArchStdEvent": "L2D_TLB" + }, + { + "ArchStdEvent": "L2I_TLB" + }, + { + "PublicDescription": "This event counts L1D_CACHE_REFILL caused by software or hardware prefetch.", + "EventCode": "0x49", + "EventName": "L1D_CACHE_REFILL_PRF", + "BriefDescription": "This event counts L1D_CACHE_REFILL caused by software or hardware prefetch." + }, + { + "PublicDescription": "This event counts L2D_CACHE_REFILL caused by software or hardware prefetch.", + "EventCode": "0x59", + "EventName": "L2D_CACHE_REFILL_PRF", + "BriefDescription": "This event counts L2D_CACHE_REFILL caused by software or hardware prefetch." + }, + { + "PublicDescription": "This event counts L1D_CACHE_REFILL caused by demand access.", + "EventCode": "0x200", + "EventName": "L1D_CACHE_REFILL_DM", + "BriefDescription": "This event counts L1D_CACHE_REFILL caused by demand access." + }, + { + "PublicDescription": "This event counts L1D_CACHE_REFILL caused by hardware prefetch.", + "EventCode": "0x202", + "EventName": "L1D_CACHE_REFILL_HWPRF", + "BriefDescription": "This event counts L1D_CACHE_REFILL caused by hardware prefetch." + }, + { + "PublicDescription": "This event counts outstanding L1D cache miss requests per cycle.", + "EventCode": "0x208", + "EventName": "L1_MISS_WAIT", + "BriefDescription": "This event counts outstanding L1D cache miss requests per cycle." + }, + { + "PublicDescription": "This event counts outstanding L1I cache miss requests per cycle.", + "EventCode": "0x209", + "EventName": "L1I_MISS_WAIT", + "BriefDescription": "This event counts outstanding L1I cache miss requests per cycle." + }, + { + "PublicDescription": "This event counts L2D_CACHE_REFILL caused by demand access.", + "EventCode": "0x300", + "EventName": "L2D_CACHE_REFILL_DM", + "BriefDescription": "This event counts L2D_CACHE_REFILL caused by demand access." + }, + { + "PublicDescription": "This event counts L2D_CACHE_REFILL caused by hardware prefetch.", + "EventCode": "0x302", + "EventName": "L2D_CACHE_REFILL_HWPRF", + "BriefDescription": "This event counts L2D_CACHE_REFILL caused by hardware prefetch." + }, + { + "PublicDescription": "This event counts outstanding L2 cache miss requests per cycle.", + "EventCode": "0x308", + "EventName": "L2_MISS_WAIT", + "BriefDescription": "This event counts outstanding L2 cache miss requests per cycle." + }, + { + "PublicDescription": "This event counts the number of times of L2 cache miss.", + "EventCode": "0x309", + "EventName": "L2_MISS_COUNT", + "BriefDescription": "This event counts the number of times of L2 cache miss." + }, + { + "PublicDescription": "This event counts operations where demand access hits an L2 cache refill buffer allocated by software or hardware prefetch.", + "EventCode": "0x325", + "EventName": "L2D_SWAP_DM", + "BriefDescription": "This event counts operations where demand access hits an L2 cache refill buffer allocated by software or hardware prefetch." + }, + { + "PublicDescription": "This event counts operations where software or hardware prefetch hits an L2 cache refill buffer allocated by demand access.", + "EventCode": "0x326", + "EventName": "L2D_CACHE_MIBMCH_PRF", + "BriefDescription": "This event counts operations where software or hardware prefetch hits an L2 cache refill buffer allocated by demand access." + }, + { + "PublicDescription": "This event counts operations where demand access hits an L2 cache refill buffer allocated by software or hardware prefetch.", + "EventCode": "0x396", + "EventName": "L2D_CACHE_SWAP_LOCAL", + "BriefDescription": "This event counts operations where demand access hits an L2 cache refill buffer allocated by software or hardware prefetch." + }, + { + "PublicDescription": "This event counts energy consumption per cycle of L2 cache.", + "EventCode": "0x3E0", + "EventName": "EA_L2", + "BriefDescription": "This event counts energy consumption per cycle of L2 cache." + } +] diff --git a/tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/cycle.json b/tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/cycle.json new file mode 100644 index 000000000000..b16484628290 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/cycle.json @@ -0,0 +1,5 @@ +[ + { + "ArchStdEvent": "CPU_CYCLES" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/exception.json b/tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/exception.json new file mode 100644 index 000000000000..348749c154c0 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/exception.json @@ -0,0 +1,29 @@ +[ + { + "ArchStdEvent": "EXC_TAKEN" + }, + { + "ArchStdEvent": "EXC_UNDEF" + }, + { + "ArchStdEvent": "EXC_SVC" + }, + { + "ArchStdEvent": "EXC_PABORT" + }, + { + "ArchStdEvent": "EXC_DABORT" + }, + { + "ArchStdEvent": "EXC_IRQ" + }, + { + "ArchStdEvent": "EXC_FIQ" + }, + { + "ArchStdEvent": "EXC_SMC" + }, + { + "ArchStdEvent": "EXC_HVC" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/instruction.json b/tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/instruction.json new file mode 100644 index 000000000000..6d258b1080cf --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/instruction.json @@ -0,0 +1,131 @@ +[ + { + "ArchStdEvent": "SW_INCR" + }, + { + "ArchStdEvent": "INST_RETIRED" + }, + { + "ArchStdEvent": "EXC_RETURN" + }, + { + "ArchStdEvent": "CID_WRITE_RETIRED" + }, + { + "ArchStdEvent": "INST_SPEC" + }, + { + "ArchStdEvent": "LDREX_SPEC" + }, + { + "ArchStdEvent": "STREX_SPEC" + }, + { + "ArchStdEvent": "LD_SPEC" + }, + { + "ArchStdEvent": "ST_SPEC" + }, + { + "ArchStdEvent": "LDST_SPEC" + }, + { + "ArchStdEvent": "DP_SPEC" + }, + { + "ArchStdEvent": "ASE_SPEC" + }, + { + "ArchStdEvent": "VFP_SPEC" + }, + { + "ArchStdEvent": "PC_WRITE_SPEC" + }, + { + "ArchStdEvent": "CRYPTO_SPEC" + }, + { + "ArchStdEvent": "BR_IMMED_SPEC" + }, + { + "ArchStdEvent": "BR_RETURN_SPEC" + }, + { + "ArchStdEvent": "BR_INDIRECT_SPEC" + }, + { + "ArchStdEvent": "ISB_SPEC" + }, + { + "ArchStdEvent": "DSB_SPEC" + }, + { + "ArchStdEvent": "DMB_SPEC" + }, + { + "PublicDescription": "This event counts architecturally executed zero blocking operations due to the 'DC ZVA' instruction.", + "EventCode": "0x9F", + "EventName": "DCZVA_SPEC", + "BriefDescription": "This event counts architecturally executed zero blocking operations due to the 'DC ZVA' instruction." + }, + { + "PublicDescription": "This event counts architecturally executed floating-point move operations.", + "EventCode": "0x105", + "EventName": "FP_MV_SPEC", + "BriefDescription": "This event counts architecturally executed floating-point move operations." + }, + { + "PublicDescription": "This event counts architecturally executed operations that using predicate register.", + "EventCode": "0x108", + "EventName": "PRD_SPEC", + "BriefDescription": "This event counts architecturally executed operations that using predicate register." + }, + { + "PublicDescription": "This event counts architecturally executed inter-element manipulation operations.", + "EventCode": "0x109", + "EventName": "IEL_SPEC", + "BriefDescription": "This event counts architecturally executed inter-element manipulation operations." + }, + { + "PublicDescription": "This event counts architecturally executed inter-register manipulation operations.", + "EventCode": "0x10A", + "EventName": "IREG_SPEC", + "BriefDescription": "This event counts architecturally executed inter-register manipulation operations." + }, + { + "PublicDescription": "This event counts architecturally executed NOSIMD load operations that using SIMD&FP registers.", + "EventCode": "0x112", + "EventName": "FP_LD_SPEC", + "BriefDescription": "This event counts architecturally executed NOSIMD load operations that using SIMD&FP registers." + }, + { + "PublicDescription": "This event counts architecturally executed NOSIMD store operations that using SIMD&FP registers.", + "EventCode": "0x113", + "EventName": "FP_ST_SPEC", + "BriefDescription": "This event counts architecturally executed NOSIMD store operations that using SIMD&FP registers." + }, + { + "PublicDescription": "This event counts architecturally executed SIMD broadcast floating-point load operations.", + "EventCode": "0x11A", + "EventName": "BC_LD_SPEC", + "BriefDescription": "This event counts architecturally executed SIMD broadcast floating-point load operations." + }, + { + "PublicDescription": "This event counts architecturally executed instructions, excluding the MOVPRFX instruction.", + "EventCode": "0x121", + "EventName": "EFFECTIVE_INST_SPEC", + "BriefDescription": "This event counts architecturally executed instructions, excluding the MOVPRFX instruction." + }, + { + "PublicDescription": "This event counts architecturally executed operations that uses 'pre-index' as its addressing mode.", + "EventCode": "0x123", + "EventName": "PRE_INDEX_SPEC", + "BriefDescription": "This event counts architecturally executed operations that uses 'pre-index' as its addressing mode." + }, + { + "PublicDescription": "This event counts architecturally executed operations that uses 'post-index' as its addressing mode.", + "EventCode": "0x124", + "EventName": "POST_INDEX_SPEC", + "BriefDescription": "This event counts architecturally executed operations that uses 'post-index' as its addressing mode." + } +] diff --git a/tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/memory.json b/tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/memory.json new file mode 100644 index 000000000000..c1f6479e92b4 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/memory.json @@ -0,0 +1,8 @@ +[ + { + "PublicDescription": "This event counts energy consumption per cycle of CMG local memory.", + "EventCode": "0x3E8", + "EventName": "EA_MEMORY", + "BriefDescription": "This event counts energy consumption per cycle of CMG local memory." + } +] diff --git a/tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/other.json b/tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/other.json new file mode 100644 index 000000000000..10c823ac26cc --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/other.json @@ -0,0 +1,188 @@ +[ + { + "PublicDescription": "This event counts the occurrence count of the micro-operation split.", + "EventCode": "0x139", + "EventName": "UOP_SPLIT", + "BriefDescription": "This event counts the occurrence count of the micro-operation split." + }, + { + "PublicDescription": "This event counts every cycle that no operation was committed because the oldest and uncommitted load/store/prefetch operation waits for memory access.", + "EventCode": "0x180", + "EventName": "LD_COMP_WAIT_L2_MISS", + "BriefDescription": "This event counts every cycle that no operation was committed because the oldest and uncommitted load/store/prefetch operation waits for memory access." + }, + { + "PublicDescription": "This event counts every cycle that no instruction was committed because the oldest and uncommitted integer load operation waits for memory access.", + "EventCode": "0x181", + "EventName": "LD_COMP_WAIT_L2_MISS_EX", + "BriefDescription": "This event counts every cycle that no instruction was committed because the oldest and uncommitted integer load operation waits for memory access." + }, + { + "PublicDescription": "This event counts every cycle that no instruction was committed because the oldest and uncommitted load/store/prefetch operation waits for L2 cache access.", + "EventCode": "0x182", + "EventName": "LD_COMP_WAIT_L1_MISS", + "BriefDescription": "This event counts every cycle that no instruction was committed because the oldest and uncommitted load/store/prefetch operation waits for L2 cache access." + }, + { + "PublicDescription": "This event counts every cycle that no instruction was committed because the oldest and uncommitted integer load operation waits for L2 cache access.", + "EventCode": "0x183", + "EventName": "LD_COMP_WAIT_L1_MISS_EX", + "BriefDescription": "This event counts every cycle that no instruction was committed because the oldest and uncommitted integer load operation waits for L2 cache access." + }, + { + "PublicDescription": "This event counts every cycle that no instruction was committed because the oldest and uncommitted load/store/prefetch operation waits for L1D cache, L2 cache and memory access.", + "EventCode": "0x184", + "EventName": "LD_COMP_WAIT", + "BriefDescription": "This event counts every cycle that no instruction was committed because the oldest and uncommitted load/store/prefetch operation waits for L1D cache, L2 cache and memory access." + }, + { + "PublicDescription": "This event counts every cycle that no instruction was committed because the oldest and uncommitted integer load operation waits for L1D cache, L2 cache and memory access.", + "EventCode": "0x185", + "EventName": "LD_COMP_WAIT_EX", + "BriefDescription": "This event counts every cycle that no instruction was committed because the oldest and uncommitted integer load operation waits for L1D cache, L2 cache and memory access." + }, + { + "PublicDescription": "This event counts every cycle that no instruction was committed due to the lack of an available prefetch port.", + "EventCode": "0x186", + "EventName": "LD_COMP_WAIT_PFP_BUSY", + "BriefDescription": "This event counts every cycle that no instruction was committed due to the lack of an available prefetch port." + }, + { + "PublicDescription": "This event counts the LD_COMP_WAIT_PFP_BUSY caused by an integer load operation.", + "EventCode": "0x187", + "EventName": "LD_COMP_WAIT_PFP_BUSY_EX", + "BriefDescription": "This event counts the LD_COMP_WAIT_PFP_BUSY caused by an integer load operation." + }, + { + "PublicDescription": "This event counts the LD_COMP_WAIT_PFP_BUSY caused by a software prefetch instruction.", + "EventCode": "0x188", + "EventName": "LD_COMP_WAIT_PFP_BUSY_SWPF", + "BriefDescription": "This event counts the LD_COMP_WAIT_PFP_BUSY caused by a software prefetch instruction." + }, + { + "PublicDescription": "This event counts every cycle that no instruction was committed and the oldest and uncommitted instruction is an integer or floating-point/SIMD instruction.", + "EventCode": "0x189", + "EventName": "EU_COMP_WAIT", + "BriefDescription": "This event counts every cycle that no instruction was committed and the oldest and uncommitted instruction is an integer or floating-point/SIMD instruction." + }, + { + "PublicDescription": "This event counts every cycle that no instruction was committed and the oldest and uncommitted instruction is a floating-point/SIMD instruction.", + "EventCode": "0x18A", + "EventName": "FL_COMP_WAIT", + "BriefDescription": "This event counts every cycle that no instruction was committed and the oldest and uncommitted instruction is a floating-point/SIMD instruction." + }, + { + "PublicDescription": "This event counts every cycle that no instruction was committed and the oldest and uncommitted instruction is a branch instruction.", + "EventCode": "0x18B", + "EventName": "BR_COMP_WAIT", + "BriefDescription": "This event counts every cycle that no instruction was committed and the oldest and uncommitted instruction is a branch instruction." + }, + { + "PublicDescription": "This event counts every cycle that no instruction was committed because the CSE is empty.", + "EventCode": "0x18C", + "EventName": "ROB_EMPTY", + "BriefDescription": "This event counts every cycle that no instruction was committed because the CSE is empty." + }, + { + "PublicDescription": "This event counts every cycle that no instruction was committed because the CSE is empty and the store port (SP) is full.", + "EventCode": "0x18D", + "EventName": "ROB_EMPTY_STQ_BUSY", + "BriefDescription": "This event counts every cycle that no instruction was committed because the CSE is empty and the store port (SP) is full." + }, + { + "PublicDescription": "This event counts every cycle that the instruction unit is halted by the WFE/WFI instruction.", + "EventCode": "0x18E", + "EventName": "WFE_WFI_CYCLE", + "BriefDescription": "This event counts every cycle that the instruction unit is halted by the WFE/WFI instruction." + }, + { + "PublicDescription": "This event counts every cycle that no instruction was committed, but counts at the time when commits MOVPRFX only.", + "EventCode": "0x190", + "EventName": "_0INST_COMMIT", + "BriefDescription": "This event counts every cycle that no instruction was committed, but counts at the time when commits MOVPRFX only." + }, + { + "PublicDescription": "This event counts every cycle that one instruction is committed.", + "EventCode": "0x191", + "EventName": "_1INST_COMMIT", + "BriefDescription": "This event counts every cycle that one instruction is committed." + }, + { + "PublicDescription": "This event counts every cycle that two instructions are committed.", + "EventCode": "0x192", + "EventName": "_2INST_COMMIT", + "BriefDescription": "This event counts every cycle that two instructions are committed." + }, + { + "PublicDescription": "This event counts every cycle that three instructions are committed.", + "EventCode": "0x193", + "EventName": "_3INST_COMMIT", + "BriefDescription": "This event counts every cycle that three instructions are committed." + }, + { + "PublicDescription": "This event counts every cycle that four instructions are committed.", + "EventCode": "0x194", + "EventName": "_4INST_COMMIT", + "BriefDescription": "This event counts every cycle that four instructions are committed." + }, + { + "PublicDescription": "This event counts every cycle that only any micro-operations are committed.", + "EventCode": "0x198", + "EventName": "UOP_ONLY_COMMIT", + "BriefDescription": "This event counts every cycle that only any micro-operations are committed." + }, + { + "PublicDescription": "This event counts every cycle that only the MOVPRFX instruction is committed.", + "EventCode": "0x199", + "EventName": "SINGLE_MOVPRFX_COMMIT", + "BriefDescription": "This event counts every cycle that only the MOVPRFX instruction is committed." + }, + { + "PublicDescription": "This event counts energy consumption per cycle of core.", + "EventCode": "0x1E0", + "EventName": "EA_CORE", + "BriefDescription": "This event counts energy consumption per cycle of core." + }, + { + "PublicDescription": "This event counts streaming prefetch requests to L1D cache generated by hardware prefetcher.", + "EventCode": "0x230", + "EventName": "L1HWPF_STREAM_PF", + "BriefDescription": "This event counts streaming prefetch requests to L1D cache generated by hardware prefetcher." + }, + { + "PublicDescription": "This event counts allocation type prefetch injection requests to L1D cache generated by hardware prefetcher.", + "EventCode": "0x231", + "EventName": "L1HWPF_INJ_ALLOC_PF", + "BriefDescription": "This event counts allocation type prefetch injection requests to L1D cache generated by hardware prefetcher." + }, + { + "PublicDescription": "This event counts non-allocation type prefetch injection requests to L1D cache generated by hardware prefetcher.", + "EventCode": "0x232", + "EventName": "L1HWPF_INJ_NOALLOC_PF", + "BriefDescription": "This event counts non-allocation type prefetch injection requests to L1D cache generated by hardware prefetcher." + }, + { + "PublicDescription": "This event counts streaming prefetch requests to L2 cache generated by hardware prefecher.", + "EventCode": "0x233", + "EventName": "L2HWPF_STREAM_PF", + "BriefDescription": "This event counts streaming prefetch requests to L2 cache generated by hardware prefecher." + }, + { + "PublicDescription": "This event counts allocation type prefetch injection requests to L2 cache generated by hardware prefetcher.", + "EventCode": "0x234", + "EventName": "L2HWPF_INJ_ALLOC_PF", + "BriefDescription": "This event counts allocation type prefetch injection requests to L2 cache generated by hardware prefetcher." + }, + { + "PublicDescription": "This event counts non-allocation type prefetch injection requests to L2 cache generated by hardware prefetcher.", + "EventCode": "0x235", + "EventName": "L2HWPF_INJ_NOALLOC_PF", + "BriefDescription": "This event counts non-allocation type prefetch injection requests to L2 cache generated by hardware prefetcher." + }, + { + "PublicDescription": "This event counts prefetch requests to L2 cache generated by the other causes.", + "EventCode": "0x236", + "EventName": "L2HWPF_OTHER", + "BriefDescription": "This event counts prefetch requests to L2 cache generated by the other causes." + } +] diff --git a/tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/pipeline.json b/tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/pipeline.json new file mode 100644 index 000000000000..dd7c97a9972b --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/pipeline.json @@ -0,0 +1,194 @@ +[ + { + "ArchStdEvent": "STALL_FRONTEND" + }, + { + "ArchStdEvent": "STALL_BACKEND" + }, + { + "PublicDescription": "This event counts valid cycles of EAGA pipeline.", + "EventCode": "0x1A0", + "EventName": "EAGA_VAL", + "BriefDescription": "This event counts valid cycles of EAGA pipeline." + }, + { + "PublicDescription": "This event counts valid cycles of EAGB pipeline.", + "EventCode": "0x1A1", + "EventName": "EAGB_VAL", + "BriefDescription": "This event counts valid cycles of EAGB pipeline." + }, + { + "PublicDescription": "This event counts valid cycles of EXA pipeline.", + "EventCode": "0x1A2", + "EventName": "EXA_VAL", + "BriefDescription": "This event counts valid cycles of EXA pipeline." + }, + { + "PublicDescription": "This event counts valid cycles of EXB pipeline.", + "EventCode": "0x1A3", + "EventName": "EXB_VAL", + "BriefDescription": "This event counts valid cycles of EXB pipeline." + }, + { + "PublicDescription": "This event counts valid cycles of FLA pipeline.", + "EventCode": "0x1A4", + "EventName": "FLA_VAL", + "BriefDescription": "This event counts valid cycles of FLA pipeline." + }, + { + "PublicDescription": "This event counts valid cycles of FLB pipeline.", + "EventCode": "0x1A5", + "EventName": "FLB_VAL", + "BriefDescription": "This event counts valid cycles of FLB pipeline." + }, + { + "PublicDescription": "This event counts valid cycles of PRX pipeline.", + "EventCode": "0x1A6", + "EventName": "PRX_VAL", + "BriefDescription": "This event counts valid cycles of PRX pipeline." + }, + { + "PublicDescription": "This event counts the number of 1's in the predicate bits of request in FLA pipeline, where it is corrected so that it becomes 16 when all bits are 1.", + "EventCode": "0x1B4", + "EventName": "FLA_VAL_PRD_CNT", + "BriefDescription": "This event counts the number of 1's in the predicate bits of request in FLA pipeline, where it is corrected so that it becomes 16 when all bits are 1." + }, + { + "PublicDescription": "This event counts the number of 1's in the predicate bits of request in FLB pipeline, where it is corrected so that it becomes 16 when all bits are 1.", + "EventCode": "0x1B5", + "EventName": "FLB_VAL_PRD_CNT", + "BriefDescription": "This event counts the number of 1's in the predicate bits of request in FLB pipeline, where it is corrected so that it becomes 16 when all bits are 1." + }, + { + "PublicDescription": "This event counts valid cycles of L1D cache pipeline#0.", + "EventCode": "0x240", + "EventName": "L1_PIPE0_VAL", + "BriefDescription": "This event counts valid cycles of L1D cache pipeline#0." + }, + { + "PublicDescription": "This event counts valid cycles of L1D cache pipeline#1.", + "EventCode": "0x241", + "EventName": "L1_PIPE1_VAL", + "BriefDescription": "This event counts valid cycles of L1D cache pipeline#1." + }, + { + "PublicDescription": "This event counts requests in L1D cache pipeline#0 that its sce bit of tagged address is 1.", + "EventCode": "0x250", + "EventName": "L1_PIPE0_VAL_IU_TAG_ADRS_SCE", + "BriefDescription": "This event counts requests in L1D cache pipeline#0 that its sce bit of tagged address is 1." + }, + { + "PublicDescription": "This event counts requests in L1D cache pipeline#0 that its pfe bit of tagged address is 1.", + "EventCode": "0x251", + "EventName": "L1_PIPE0_VAL_IU_TAG_ADRS_PFE", + "BriefDescription": "This event counts requests in L1D cache pipeline#0 that its pfe bit of tagged address is 1." + }, + { + "PublicDescription": "This event counts requests in L1D cache pipeline#1 that its sce bit of tagged address is 1.", + "EventCode": "0x252", + "EventName": "L1_PIPE1_VAL_IU_TAG_ADRS_SCE", + "BriefDescription": "This event counts requests in L1D cache pipeline#1 that its sce bit of tagged address is 1." + }, + { + "PublicDescription": "This event counts requests in L1D cache pipeline#1 that its pfe bit of tagged address is 1.", + "EventCode": "0x253", + "EventName": "L1_PIPE1_VAL_IU_TAG_ADRS_PFE", + "BriefDescription": "This event counts requests in L1D cache pipeline#1 that its pfe bit of tagged address is 1." + }, + { + "PublicDescription": "This event counts completed requests in L1D cache pipeline#0.", + "EventCode": "0x260", + "EventName": "L1_PIPE0_COMP", + "BriefDescription": "This event counts completed requests in L1D cache pipeline#0." + }, + { + "PublicDescription": "This event counts completed requests in L1D cache pipeline#1.", + "EventCode": "0x261", + "EventName": "L1_PIPE1_COMP", + "BriefDescription": "This event counts completed requests in L1D cache pipeline#1." + }, + { + "PublicDescription": "This event counts completed requests in L1I cache pipeline.", + "EventCode": "0x268", + "EventName": "L1I_PIPE_COMP", + "BriefDescription": "This event counts completed requests in L1I cache pipeline." + }, + { + "PublicDescription": "This event counts valid cycles of L1I cache pipeline.", + "EventCode": "0x269", + "EventName": "L1I_PIPE_VAL", + "BriefDescription": "This event counts valid cycles of L1I cache pipeline." + }, + { + "PublicDescription": "This event counts aborted requests in L1D pipelines that due to store-load interlock.", + "EventCode": "0x274", + "EventName": "L1_PIPE_ABORT_STLD_INTLK", + "BriefDescription": "This event counts aborted requests in L1D pipelines that due to store-load interlock." + }, + { + "PublicDescription": "This event counts requests in L1D cache pipeline#0 that its sector cache ID is not 0.", + "EventCode": "0x2A0", + "EventName": "L1_PIPE0_VAL_IU_NOT_SEC0", + "BriefDescription": "This event counts requests in L1D cache pipeline#0 that its sector cache ID is not 0." + }, + { + "PublicDescription": "This event counts requests in L1D cache pipeline#1 that its sector cache ID is not 0.", + "EventCode": "0x2A1", + "EventName": "L1_PIPE1_VAL_IU_NOT_SEC0", + "BriefDescription": "This event counts requests in L1D cache pipeline#1 that its sector cache ID is not 0." + }, + { + "PublicDescription": "This event counts the number of times where 2 elements of the gather instructions became 2 flows because 2 elements could not be combined.", + "EventCode": "0x2B0", + "EventName": "L1_PIPE_COMP_GATHER_2FLOW", + "BriefDescription": "This event counts the number of times where 2 elements of the gather instructions became 2 flows because 2 elements could not be combined." + }, + { + "PublicDescription": "This event counts the number of times where 2 elements of the gather instructions became 1 flow because 2 elements could be combined.", + "EventCode": "0x2B1", + "EventName": "L1_PIPE_COMP_GATHER_1FLOW", + "BriefDescription": "This event counts the number of times where 2 elements of the gather instructions became 1 flow because 2 elements could be combined." + }, + { + "PublicDescription": "This event counts the number of times where 2 elements of the gather instructions became 0 flow because both predicate values are 0.", + "EventCode": "0x2B2", + "EventName": "L1_PIPE_COMP_GATHER_0FLOW", + "BriefDescription": "This event counts the number of times where 2 elements of the gather instructions became 0 flow because both predicate values are 0." + }, + { + "PublicDescription": "This event counts the number of flows of the scatter instructions.", + "EventCode": "0x2B3", + "EventName": "L1_PIPE_COMP_SCATTER_1FLOW", + "BriefDescription": "This event counts the number of flows of the scatter instructions." + }, + { + "PublicDescription": "This event counts the number of 1's in the predicate bits of request in L1D cache pipeline#0, where it is corrected so that it becomes 16 when all bits are 1.", + "EventCode": "0x2B8", + "EventName": "L1_PIPE0_COMP_PRD_CNT", + "BriefDescription": "This event counts the number of 1's in the predicate bits of request in L1D cache pipeline#0, where it is corrected so that it becomes 16 when all bits are 1." + }, + { + "PublicDescription": "This event counts the number of 1's in the predicate bits of request in L1D cache pipeline#1, where it is corrected so that it becomes 16 when all bits are 1.", + "EventCode": "0x2B9", + "EventName": "L1_PIPE1_COMP_PRD_CNT", + "BriefDescription": "This event counts the number of 1's in the predicate bits of request in L1D cache pipeline#1, where it is corrected so that it becomes 16 when all bits are 1." + }, + { + "PublicDescription": "This event counts valid cycles of L2 cache pipeline.", + "EventCode": "0x330", + "EventName": "L2_PIPE_VAL", + "BriefDescription": "This event counts valid cycles of L2 cache pipeline." + }, + { + "PublicDescription": "This event counts completed requests in L2 cache pipeline.", + "EventCode": "0x350", + "EventName": "L2_PIPE_COMP_ALL", + "BriefDescription": "This event counts completed requests in L2 cache pipeline." + }, + { + "PublicDescription": "This event counts operations where software or hardware prefetch hits an L2 cache refill buffer allocated by demand access.", + "EventCode": "0x370", + "EventName": "L2_PIPE_COMP_PF_L2MIB_MCH", + "BriefDescription": "This event counts operations where software or hardware prefetch hits an L2 cache refill buffer allocated by demand access." + } +] diff --git a/tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/sve.json b/tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/sve.json new file mode 100644 index 000000000000..dc1b95e42c32 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/sve.json @@ -0,0 +1,110 @@ +[ + { + "ArchStdEvent": "SIMD_INST_RETIRED" + }, + { + "ArchStdEvent": "SVE_INST_RETIRED" + }, + { + "ArchStdEvent": "UOP_SPEC" + }, + { + "ArchStdEvent": "SVE_MATH_SPEC" + }, + { + "ArchStdEvent": "FP_SPEC" + }, + { + "ArchStdEvent": "FP_FMA_SPEC" + }, + { + "ArchStdEvent": "FP_RECPE_SPEC" + }, + { + "ArchStdEvent": "FP_CVT_SPEC" + }, + { + "ArchStdEvent": "ASE_SVE_INT_SPEC" + }, + { + "ArchStdEvent": "SVE_PRED_SPEC" + }, + { + "ArchStdEvent": "SVE_MOVPRFX_SPEC" + }, + { + "ArchStdEvent": "SVE_MOVPRFX_U_SPEC" + }, + { + "ArchStdEvent": "ASE_SVE_LD_SPEC" + }, + { + "ArchStdEvent": "ASE_SVE_ST_SPEC" + }, + { + "ArchStdEvent": "PRF_SPEC" + }, + { + "ArchStdEvent": "BASE_LD_REG_SPEC" + }, + { + "ArchStdEvent": "BASE_ST_REG_SPEC" + }, + { + "ArchStdEvent": "SVE_LDR_REG_SPEC" + }, + { + "ArchStdEvent": "SVE_STR_REG_SPEC" + }, + { + "ArchStdEvent": "SVE_LDR_PREG_SPEC" + }, + { + "ArchStdEvent": "SVE_STR_PREG_SPEC" + }, + { + "ArchStdEvent": "SVE_PRF_CONTIG_SPEC" + }, + { + "ArchStdEvent": "ASE_SVE_LD_MULTI_SPEC" + }, + { + "ArchStdEvent": "ASE_SVE_ST_MULTI_SPEC" + }, + { + "ArchStdEvent": "SVE_LD_GATHER_SPEC" + }, + { + "ArchStdEvent": "SVE_ST_SCATTER_SPEC" + }, + { + "ArchStdEvent": "SVE_PRF_GATHER_SPEC" + }, + { + "ArchStdEvent": "SVE_LDFF_SPEC" + }, + { + "ArchStdEvent": "FP_SCALE_OPS_SPEC" + }, + { + "ArchStdEvent": "FP_FIXED_OPS_SPEC" + }, + { + "ArchStdEvent": "FP_HP_SCALE_OPS_SPEC" + }, + { + "ArchStdEvent": "FP_HP_FIXED_OPS_SPEC" + }, + { + "ArchStdEvent": "FP_SP_SCALE_OPS_SPEC" + }, + { + "ArchStdEvent": "FP_SP_FIXED_OPS_SPEC" + }, + { + "ArchStdEvent": "FP_DP_SCALE_OPS_SPEC" + }, + { + "ArchStdEvent": "FP_DP_FIXED_OPS_SPEC" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/mapfile.csv b/tools/perf/pmu-events/arch/arm64/mapfile.csv index 0d609149b82a..c43591d831b8 100644 --- a/tools/perf/pmu-events/arch/arm64/mapfile.csv +++ b/tools/perf/pmu-events/arch/arm64/mapfile.csv @@ -20,5 +20,6 @@ 0x00000000410fd0c0,v1,arm/cortex-a76-n1,core 0x00000000420f5160,v1,cavium/thunderx2,core 0x00000000430f0af0,v1,cavium/thunderx2,core +0x00000000460f0010,v1,fujitsu/a64fx,core 0x00000000480fd010,v1,hisilicon/hip08,core 0x00000000500f0000,v1,ampere/emag,core diff --git a/tools/perf/pmu-events/arch/powerpc/power8/metrics.json b/tools/perf/pmu-events/arch/powerpc/power8/metrics.json index fc4aa6c2ddc9..4e25525b7da6 100644 --- a/tools/perf/pmu-events/arch/powerpc/power8/metrics.json +++ b/tools/perf/pmu-events/arch/powerpc/power8/metrics.json @@ -885,37 +885,37 @@ "MetricName": "flush_rate_percent" }, { - "BriefDescription": "GCT slot utilization (11 to 14) as a % of cycles this thread had atleast 1 slot valid", + "BriefDescription": "GCT slot utilization (11 to 14) as a % of cycles this thread had at least 1 slot valid", "MetricExpr": "PM_GCT_UTIL_11_14_ENTRIES / ( PM_RUN_CYC - PM_GCT_NOSLOT_CYC) * 100", "MetricGroup": "general", "MetricName": "gct_util_11to14_slots_percent" }, { - "BriefDescription": "GCT slot utilization (15 to 17) as a % of cycles this thread had atleast 1 slot valid", + "BriefDescription": "GCT slot utilization (15 to 17) as a % of cycles this thread had at least 1 slot valid", "MetricExpr": "PM_GCT_UTIL_15_17_ENTRIES / ( PM_RUN_CYC - PM_GCT_NOSLOT_CYC) * 100", "MetricGroup": "general", "MetricName": "gct_util_15to17_slots_percent" }, { - "BriefDescription": "GCT slot utilization 18+ as a % of cycles this thread had atleast 1 slot valid", + "BriefDescription": "GCT slot utilization 18+ as a % of cycles this thread had at least 1 slot valid", "MetricExpr": "PM_GCT_UTIL_18_ENTRIES / ( PM_RUN_CYC - PM_GCT_NOSLOT_CYC) * 100", "MetricGroup": "general", "MetricName": "gct_util_18plus_slots_percent" }, { - "BriefDescription": "GCT slot utilization (1 to 2) as a % of cycles this thread had atleast 1 slot valid", + "BriefDescription": "GCT slot utilization (1 to 2) as a % of cycles this thread had at least 1 slot valid", "MetricExpr": "PM_GCT_UTIL_1_2_ENTRIES / ( PM_RUN_CYC - PM_GCT_NOSLOT_CYC) * 100", "MetricGroup": "general", "MetricName": "gct_util_1to2_slots_percent" }, { - "BriefDescription": "GCT slot utilization (3 to 6) as a % of cycles this thread had atleast 1 slot valid", + "BriefDescription": "GCT slot utilization (3 to 6) as a % of cycles this thread had at least 1 slot valid", "MetricExpr": "PM_GCT_UTIL_3_6_ENTRIES / ( PM_RUN_CYC - PM_GCT_NOSLOT_CYC) * 100", "MetricGroup": "general", "MetricName": "gct_util_3to6_slots_percent" }, { - "BriefDescription": "GCT slot utilization (7 to 10) as a % of cycles this thread had atleast 1 slot valid", + "BriefDescription": "GCT slot utilization (7 to 10) as a % of cycles this thread had at least 1 slot valid", "MetricExpr": "PM_GCT_UTIL_7_10_ENTRIES / ( PM_RUN_CYC - PM_GCT_NOSLOT_CYC) * 100", "MetricGroup": "general", "MetricName": "gct_util_7to10_slots_percent" diff --git a/tools/perf/pmu-events/arch/powerpc/power9/metrics.json b/tools/perf/pmu-events/arch/powerpc/power9/metrics.json index f8784c608479..db86ba36224d 100644 --- a/tools/perf/pmu-events/arch/powerpc/power9/metrics.json +++ b/tools/perf/pmu-events/arch/powerpc/power9/metrics.json @@ -1210,156 +1210,24 @@ "MetricName": "inst_from_rmem_percent" }, { - "BriefDescription": "%L2 Modified CO Cache read Utilization (4 pclks per disp attempt)", - "MetricExpr": "((PM_L2_CASTOUT_MOD/2)*4)/ PM_RUN_CYC * 100", - "MetricGroup": "l2_stats", - "MetricName": "l2_co_m_rd_util" - }, - { - "BriefDescription": "L2 dcache invalidates per run inst (per core)", - "MetricExpr": "(PM_L2_DC_INV / 2) / PM_RUN_INST_CMPL * 100", - "MetricGroup": "l2_stats", - "MetricName": "l2_dc_inv_rate_percent" - }, - { "BriefDescription": "Demand load misses as a % of L2 LD dispatches (per thread)", "MetricExpr": "PM_L1_DCACHE_RELOAD_VALID / (PM_L2_LD / 2) * 100", "MetricGroup": "l2_stats", "MetricName": "l2_dem_ld_disp_percent" }, { - "BriefDescription": "L2 Icache invalidates per run inst (per core)", - "MetricExpr": "(PM_L2_IC_INV / 2) / PM_RUN_INST_CMPL * 100", - "MetricGroup": "l2_stats", - "MetricName": "l2_ic_inv_rate_percent" - }, - { - "BriefDescription": "L2 Inst misses as a % of total L2 Inst dispatches (per thread)", - "MetricExpr": "PM_L2_INST_MISS / PM_L2_INST * 100", - "MetricGroup": "l2_stats", - "MetricName": "l2_inst_miss_ratio_percent" - }, - { - "BriefDescription": "Average number of cycles between L2 Load hits", - "MetricExpr": "(PM_L2_LD_HIT / PM_RUN_CYC) / 2", - "MetricGroup": "l2_stats", - "MetricName": "l2_ld_hit_frequency" - }, - { - "BriefDescription": "Average number of cycles between L2 Load misses", - "MetricExpr": "(PM_L2_LD_MISS / PM_RUN_CYC) / 2", - "MetricGroup": "l2_stats", - "MetricName": "l2_ld_miss_frequency" - }, - { - "BriefDescription": "L2 Load misses as a % of total L2 Load dispatches (per thread)", - "MetricExpr": "PM_L2_LD_MISS / PM_L2_LD * 100", - "MetricGroup": "l2_stats", - "MetricName": "l2_ld_miss_ratio_percent" - }, - { - "BriefDescription": "% L2 load disp attempts Cache read Utilization (4 pclks per disp attempt)", - "MetricExpr": "((PM_L2_RCLD_DISP/2)*4)/ PM_RUN_CYC * 100", - "MetricGroup": "l2_stats", - "MetricName": "l2_ld_rd_util" - }, - { - "BriefDescription": "L2 load misses that require a cache write (4 pclks per disp attempt) % of pclks", - "MetricExpr": "((( PM_L2_LD_DISP - PM_L2_LD_HIT)/2)*4)/ PM_RUN_CYC * 100", - "MetricGroup": "l2_stats", - "MetricName": "l2_ldmiss_wr_util" - }, - { - "BriefDescription": "L2 local pump prediction success", - "MetricExpr": "PM_L2_LOC_GUESS_CORRECT / (PM_L2_LOC_GUESS_CORRECT + PM_L2_LOC_GUESS_WRONG) * 100", - "MetricGroup": "l2_stats", - "MetricName": "l2_local_pred_correct_percent" - }, - { - "BriefDescription": "L2 COs that were in M,Me,Mu state as a % of all L2 COs", - "MetricExpr": "PM_L2_CASTOUT_MOD / (PM_L2_CASTOUT_MOD + PM_L2_CASTOUT_SHR) * 100", - "MetricGroup": "l2_stats", - "MetricName": "l2_mod_co_percent" - }, - { - "BriefDescription": "% of L2 Load RC dispatch atampts that failed because of address collisions and cclass conflicts", - "MetricExpr": "(PM_L2_RCLD_DISP_FAIL_ADDR )/ PM_L2_RCLD_DISP * 100", - "MetricGroup": "l2_stats", - "MetricName": "l2_rc_ld_disp_addr_fail_percent" - }, - { - "BriefDescription": "% of L2 Load RC dispatch attempts that failed", - "MetricExpr": "(PM_L2_RCLD_DISP_FAIL_ADDR + PM_L2_RCLD_DISP_FAIL_OTHER)/ PM_L2_RCLD_DISP * 100", - "MetricGroup": "l2_stats", - "MetricName": "l2_rc_ld_disp_fail_percent" - }, - { - "BriefDescription": "% of L2 Store RC dispatch atampts that failed because of address collisions and cclass conflicts", - "MetricExpr": "PM_L2_RCST_DISP_FAIL_ADDR / PM_L2_RCST_DISP * 100", - "MetricGroup": "l2_stats", - "MetricName": "l2_rc_st_disp_addr_fail_percent" - }, - { - "BriefDescription": "% of L2 Store RC dispatch attempts that failed", - "MetricExpr": "(PM_L2_RCST_DISP_FAIL_ADDR + PM_L2_RCST_DISP_FAIL_OTHER)/ PM_L2_RCST_DISP * 100", - "MetricGroup": "l2_stats", - "MetricName": "l2_rc_st_disp_fail_percent" - }, - { - "BriefDescription": "L2 Cache Read Utilization (per core)", - "MetricExpr": "(((PM_L2_RCLD_DISP/2)*4)/ PM_RUN_CYC * 100) + (((PM_L2_RCST_DISP/2)*4)/PM_RUN_CYC * 100) + (((PM_L2_CASTOUT_MOD/2)*4)/PM_RUN_CYC * 100)", - "MetricGroup": "l2_stats", - "MetricName": "l2_rd_util_percent" - }, - { - "BriefDescription": "L2 COs that were in T,Te,Si,S state as a % of all L2 COs", - "MetricExpr": "PM_L2_CASTOUT_SHR / (PM_L2_CASTOUT_MOD + PM_L2_CASTOUT_SHR) * 100", - "MetricGroup": "l2_stats", - "MetricName": "l2_shr_co_percent" - }, - { "BriefDescription": "L2 Store misses as a % of total L2 Store dispatches (per thread)", "MetricExpr": "PM_L2_ST_MISS / PM_L2_ST * 100", "MetricGroup": "l2_stats", "MetricName": "l2_st_miss_ratio_percent" }, { - "BriefDescription": "% L2 store disp attempts Cache read Utilization (4 pclks per disp attempt)", - "MetricExpr": "((PM_L2_RCST_DISP/2)*4) / PM_RUN_CYC * 100", - "MetricGroup": "l2_stats", - "MetricName": "l2_st_rd_util" - }, - { "BriefDescription": "L2 stores that require a cache write (4 pclks per disp attempt) % of pclks", "MetricExpr": "((PM_L2_ST_DISP/2)*4) / PM_RUN_CYC * 100", "MetricGroup": "l2_stats", "MetricName": "l2_st_wr_util" }, { - "BriefDescription": "L2 Cache Write Utilization (per core)", - "MetricExpr": "((((PM_L2_LD_DISP - PM_L2_LD_HIT)/2)*4) / PM_RUN_CYC * 100) + (((PM_L2_ST_DISP/2)*4) / PM_RUN_CYC * 100)", - "MetricGroup": "l2_stats", - "MetricName": "l2_wr_util_percent" - }, - { - "BriefDescription": "Average number of cycles between L3 Load hits", - "MetricExpr": "(PM_L3_LD_HIT / PM_RUN_CYC) / 2", - "MetricGroup": "l3_stats", - "MetricName": "l3_ld_hit_frequency" - }, - { - "BriefDescription": "Average number of cycles between L3 Load misses", - "MetricExpr": "(PM_L3_LD_MISS / PM_RUN_CYC) / 2", - "MetricGroup": "l3_stats", - "MetricName": "l3_ld_miss_frequency" - }, - { - "BriefDescription": "Average number of Write-in machines used. 1 of 8 WI machines is sampled every L3 cycle", - "MetricExpr": "(PM_L3_WI_USAGE / PM_RUN_CYC) * 8", - "MetricGroup": "l3_stats", - "MetricName": "l3_wi_usage" - }, - { "BriefDescription": "Average icache miss latency", "MetricExpr": "PM_IC_DEMAND_CYC / PM_IC_DEMAND_REQ", "MetricGroup": "latency", @@ -1823,7 +1691,7 @@ "MetricName": "custom_secs" }, { - "BriefDescription": "Percentage Cycles atleast one instruction dispatched", + "BriefDescription": "Percentage Cycles at least one instruction dispatched", "MetricExpr": "PM_1PLUS_PPC_DISP / PM_CYC * 100", "MetricName": "cycles_atleast_one_inst_dispatched_percent" }, diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c index e1f3f5c8c550..33aa3c885eaf 100644 --- a/tools/perf/pmu-events/jevents.c +++ b/tools/perf/pmu-events/jevents.c @@ -1149,7 +1149,7 @@ static int process_one_file(const char *fpath, const struct stat *sb, * and directory tree could result in build failure due to table * names not being found. * - * Atleast for now, be strict with processing JSON file names. + * At least for now, be strict with processing JSON file names. * i.e. if JSON file name cannot be mapped to C-style table name, * fail. */ diff --git a/tools/perf/scripts/python/netdev-times.py b/tools/perf/scripts/python/netdev-times.py index ea0c8b90a783..a0cfc7fe5908 100644 --- a/tools/perf/scripts/python/netdev-times.py +++ b/tools/perf/scripts/python/netdev-times.py @@ -356,7 +356,7 @@ def handle_irq_softirq_exit(event_info): return rec_data = {'sirq_ent_t':sirq_ent_t, 'sirq_ext_t':time, 'irq_list':irq_list, 'event_list':event_list} - # merge information realted to a NET_RX softirq + # merge information related to a NET_RX softirq receive_hunk_list.append(rec_data) def handle_napi_poll(event_info): diff --git a/tools/perf/tests/bp_signal.c b/tools/perf/tests/bp_signal.c index cc9fbcedb364..ef37353636d8 100644 --- a/tools/perf/tests/bp_signal.c +++ b/tools/perf/tests/bp_signal.c @@ -225,11 +225,11 @@ int test__bp_signal(struct test *test __maybe_unused, int subtest __maybe_unused * * The test case check following error conditions: * - we get stuck in signal handler because of debug - * exception being triggered receursively due to + * exception being triggered recursively due to * the wrong RF EFLAG management * * - we never trigger the sig_handler breakpoint due - * to the rong RF EFLAG management + * to the wrong RF EFLAG management * */ @@ -242,7 +242,7 @@ int test__bp_signal(struct test *test __maybe_unused, int subtest __maybe_unused ioctl(fd3, PERF_EVENT_IOC_ENABLE, 0); /* - * Kick off the test by trigering 'fd1' + * Kick off the test by triggering 'fd1' * breakpoint. */ test_function(); diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c index 2fdc7b2f996e..9866cddebf23 100644 --- a/tools/perf/tests/code-reading.c +++ b/tools/perf/tests/code-reading.c @@ -658,7 +658,7 @@ static int do_test_code_reading(bool try_kcore) /* * Both cpus and threads are now owned by evlist * and will be freed by following perf_evlist__set_maps - * call. Getting refference to keep them alive. + * call. Getting reference to keep them alive. */ perf_cpu_map__get(cpus); perf_thread_map__get(threads); diff --git a/tools/perf/tests/demangle-ocaml-test.c b/tools/perf/tests/demangle-ocaml-test.c index a273ed5163d7..2fac7d762c0d 100644 --- a/tools/perf/tests/demangle-ocaml-test.c +++ b/tools/perf/tests/demangle-ocaml-test.c @@ -26,7 +26,7 @@ int test__demangle_ocaml(struct test *test __maybe_unused, int subtest __maybe_u "Stdlib.bytes.++" }, }; - for (i = 0; i < sizeof(test_cases) / sizeof(test_cases[0]); i++) { + for (i = 0; i < ARRAY_SIZE(test_cases); i++) { buf = ocaml_demangle_sym(test_cases[i].mangled); if ((buf == NULL && test_cases[i].demangled != NULL) || (buf != NULL && test_cases[i].demangled == NULL) diff --git a/tools/perf/tests/hists_cumulate.c b/tools/perf/tests/hists_cumulate.c index 3f2e1a581247..890cb1f5bf53 100644 --- a/tools/perf/tests/hists_cumulate.c +++ b/tools/perf/tests/hists_cumulate.c @@ -47,7 +47,7 @@ static struct sample fake_samples[] = { }; /* - * Will be casted to struct ip_callchain which has all 64 bit entries + * Will be cast to struct ip_callchain which has all 64 bit entries * of nr and ips[]. */ static u64 fake_callchains[][10] = { @@ -297,7 +297,7 @@ out: return err; } -/* callcain + NO children */ +/* callchain + NO children */ static int test2(struct evsel *evsel, struct machine *machine) { int err; diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c index a7f6661e6112..026c54743311 100644 --- a/tools/perf/tests/parse-events.c +++ b/tools/perf/tests/parse-events.c @@ -20,7 +20,7 @@ #if defined(__s390x__) /* Return true if kvm module is available and loaded. Test this - * and retun success when trace point kvm_s390_create_vm + * and return success when trace point kvm_s390_create_vm * exists. Otherwise this test always fails. */ static bool kvm_s390_create_vm_valid(void) diff --git a/tools/perf/tests/parse-metric.c b/tools/perf/tests/parse-metric.c index 6dc1db1626ad..4968c4106254 100644 --- a/tools/perf/tests/parse-metric.c +++ b/tools/perf/tests/parse-metric.c @@ -98,7 +98,7 @@ static u64 find_value(const char *name, struct value *values) if (!strcmp(name, v->event)) return v->val; v++; - }; + } return 0; } @@ -186,7 +186,7 @@ static int __compute_metric(const char *name, struct value *vals, *ratio2 = compute_single(&metric_events, evlist, &st, name2); out: - /* ... clenup. */ + /* ... cleanup. */ metricgroup__rblist_exit(&metric_events); runtime_stat__exit(&st); evlist__free_stats(evlist); diff --git a/tools/perf/tests/shell/buildid.sh b/tools/perf/tests/shell/buildid.sh index 416af614bbe0..f05670d1e39e 100755 --- a/tools/perf/tests/shell/buildid.sh +++ b/tools/perf/tests/shell/buildid.sh @@ -14,18 +14,56 @@ if ! [ -x "$(command -v cc)" ]; then exit 2 fi +# check what we need to test windows binaries +add_pe=1 +run_pe=1 +if ! perf version --build-options | grep -q 'libbfd: .* on '; then + echo "WARNING: perf not built with libbfd. PE binaries will not be tested." + add_pe=0 + run_pe=0 +fi +if ! which wine > /dev/null; then + echo "WARNING: wine not found. PE binaries will not be run." + run_pe=0 +fi + +# set up wine +if [ ${run_pe} -eq 1 ]; then + wineprefix=$(mktemp -d /tmp/perf.wineprefix.XXX) + export WINEPREFIX=${wineprefix} + # clear display variables to prevent wine from popping up dialogs + unset DISPLAY + unset WAYLAND_DISPLAY +fi + ex_md5=$(mktemp /tmp/perf.ex.MD5.XXX) ex_sha1=$(mktemp /tmp/perf.ex.SHA1.XXX) +ex_pe=$(dirname $0)/../pe-file.exe echo 'int main(void) { return 0; }' | cc -Wl,--build-id=sha1 -o ${ex_sha1} -x c - echo 'int main(void) { return 0; }' | cc -Wl,--build-id=md5 -o ${ex_md5} -x c - -echo "test binaries: ${ex_sha1} ${ex_md5}" +echo "test binaries: ${ex_sha1} ${ex_md5} ${ex_pe}" check() { - id=`readelf -n ${1} 2>/dev/null | grep 'Build ID' | awk '{print $3}'` - + case $1 in + *.exe) + # We don't have a tool that can pull a nicely formatted build-id out of + # a PE file, but we can extract the whole section with objcopy and + # format it ourselves. The .buildid section is a Debug Directory + # containing a CodeView entry: + # https://docs.microsoft.com/en-us/windows/win32/debug/pe-format#debug-directory-image-only + # https://github.com/dotnet/runtime/blob/da94c022576a5c3bbc0e896f006565905eb137f9/docs/design/specs/PE-COFF.md + # The build-id starts at byte 33 and must be rearranged into a GUID. + id=`objcopy -O binary --only-section=.buildid $1 /dev/stdout | \ + cut -c 33-48 | hexdump -ve '/1 "%02x"' | \ + sed 's@^\(..\)\(..\)\(..\)\(..\)\(..\)\(..\)\(..\)\(..\)\(.*\)0a$@\4\3\2\1\6\5\8\7\9@'` + ;; + *) + id=`readelf -n ${1} 2>/dev/null | grep 'Build ID' | awk '{print $3}'` + ;; + esac echo "build id: ${id}" link=${build_id_dir}/.build-id/${id:0:2}/${id:2} @@ -50,7 +88,7 @@ check() exit 1 fi - ${perf} buildid-cache -l | grep $id + ${perf} buildid-cache -l | grep ${id} if [ $? -ne 0 ]; then echo "failed: ${id} is not reported by \"perf buildid-cache -l\"" exit 1 @@ -79,16 +117,20 @@ test_record() { data=$(mktemp /tmp/perf.data.XXX) build_id_dir=$(mktemp -d /tmp/perf.debug.XXX) + log=$(mktemp /tmp/perf.log.XXX) perf="perf --buildid-dir ${build_id_dir}" - ${perf} record --buildid-all -o ${data} ${1} + echo "running: perf record $@" + ${perf} record --buildid-all -o ${data} $@ &> ${log} if [ $? -ne 0 ]; then - echo "failed: record ${1}" + echo "failed: record $@" + echo "see log: ${log}" exit 1 fi - check ${1} + check ${@: -1} + rm -f ${log} rm -rf ${build_id_dir} rm -rf ${data} } @@ -96,12 +138,21 @@ test_record() # add binaries manual via perf buildid-cache -a test_add ${ex_sha1} test_add ${ex_md5} +if [ ${add_pe} -eq 1 ]; then + test_add ${ex_pe} +fi # add binaries via perf record post processing test_record ${ex_sha1} test_record ${ex_md5} +if [ ${run_pe} -eq 1 ]; then + test_record wine ${ex_pe} +fi # cleanup rm ${ex_sha1} ${ex_md5} +if [ ${run_pe} -eq 1 ]; then + rm -r ${wineprefix} +fi exit ${err} diff --git a/tools/perf/tests/shell/daemon.sh b/tools/perf/tests/shell/daemon.sh index 58984380b211..45fc24af5b07 100755 --- a/tools/perf/tests/shell/daemon.sh +++ b/tools/perf/tests/shell/daemon.sh @@ -98,6 +98,23 @@ check_line_other() fi } +daemon_exit() +{ + local config=$1 + + local line=`perf daemon --config ${config} -x: | head -1` + local pid=`echo "${line}" | awk 'BEGIN { FS = ":" } ; { print $1 }'` + + # Reset trap handler. + trap - SIGINT SIGTERM + + # stop daemon + perf daemon stop --config ${config} + + # ... and wait for the pid to go away + tail --pid=${pid} -f /dev/null +} + daemon_start() { local config=$1 @@ -105,29 +122,24 @@ daemon_start() perf daemon start --config ${config} + # Clean up daemon if interrupted. + trap "echo 'FAILED: Signal caught'; daemon_exit ${config}; exit 1" SIGINT SIGTERM + # wait for the session to ping local state="FAIL" + local retries=0 while [ "${state}" != "OK" ]; do state=`perf daemon ping --config ${config} --session ${session} | awk '{ print $1 }'` sleep 0.05 + retries=$((${retries} +1)) + if [ ${retries} -ge 600 ]; then + echo "FAILED: Timeout waiting for daemon to ping" + daemon_exit ${config} + exit 1 + fi done } -daemon_exit() -{ - local base=$1 - local config=$2 - - local line=`perf daemon --config ${config} -x: | head -1` - local pid=`echo "${line}" | awk 'BEGIN { FS = ":" } ; { print $1 }'` - - # stop daemon - perf daemon stop --config ${config} - - # ... and wait for the pid to go away - tail --pid=${pid} -f /dev/null -} - test_list() { echo "test daemon list" @@ -171,7 +183,7 @@ EOF ${base}/session-time/ack "0" # stop daemon - daemon_exit ${base} ${config} + daemon_exit ${config} rm -rf ${base} rm -f ${config} @@ -288,7 +300,7 @@ EOF done # stop daemon - daemon_exit ${base} ${config} + daemon_exit ${config} rm -rf ${base} rm -f ${config} @@ -333,7 +345,7 @@ EOF fi # stop daemon - daemon_exit ${base} ${config} + daemon_exit ${config} # check that sessions are gone if [ -d "/proc/${pid_size}" ]; then @@ -374,7 +386,7 @@ EOF perf daemon signal --config ${config} # stop daemon - daemon_exit ${base} ${config} + daemon_exit ${config} # count is 2 perf.data for signals and 1 for perf record finished count=`ls ${base}/session-test/ | grep perf.data | wc -l` @@ -420,7 +432,7 @@ EOF fi # stop daemon - daemon_exit ${base} ${config} + daemon_exit ${config} rm -rf ${base} rm -f ${config} @@ -457,7 +469,7 @@ EOF fi # stop daemon - daemon_exit ${base} ${config} + daemon_exit ${config} rm -rf ${base} rm -f ${config} diff --git a/tools/perf/tests/shell/stat+csv_summary.sh b/tools/perf/tests/shell/stat+csv_summary.sh new file mode 100755 index 000000000000..5571ff75eb42 --- /dev/null +++ b/tools/perf/tests/shell/stat+csv_summary.sh @@ -0,0 +1,31 @@ +#!/bin/sh +# perf stat csv summary test +# SPDX-License-Identifier: GPL-2.0 + +set -e + +# +# 1.001364330 9224197 cycles 8012885033 100.00 +# summary 9224197 cycles 8012885033 100.00 +# +perf stat -e cycles -x' ' -I1000 --interval-count 1 --summary 2>&1 | \ +grep -e summary | \ +while read summary num event run pct +do + if [ $summary != "summary" ]; then + exit 1 + fi +done + +# +# 1.001360298 9148534 cycles 8012853854 100.00 +#9148534 cycles 8012853854 100.00 +# +perf stat -e cycles -x' ' -I1000 --interval-count 1 --summary --no-csv-summary 2>&1 | \ +grep -e summary | \ +while read num event run pct +do + exit 1 +done + +exit 0 diff --git a/tools/perf/tests/shell/stat_bpf_counters.sh b/tools/perf/tests/shell/stat_bpf_counters.sh new file mode 100755 index 000000000000..22eb31e48ca7 --- /dev/null +++ b/tools/perf/tests/shell/stat_bpf_counters.sh @@ -0,0 +1,31 @@ +#!/bin/sh +# perf stat --bpf-counters test +# SPDX-License-Identifier: GPL-2.0 + +set -e + +# check whether $2 is within +/- 10% of $1 +compare_number() +{ + first_num=$1 + second_num=$2 + + # upper bound is first_num * 110% + upper=$(( $first_num + $first_num / 10 )) + # lower bound is first_num * 90% + lower=$(( $first_num - $first_num / 10 )) + + if [ $second_num -gt $upper ] || [ $second_num -lt $lower ]; then + echo "The difference between $first_num and $second_num are greater than 10%." + exit 1 + fi +} + +# skip if --bpf-counters is not supported +perf stat --bpf-counters true > /dev/null 2>&1 || exit 2 + +base_cycles=$(perf stat --no-big-num -e cycles -- perf bench sched messaging -g 1 -l 100 -t 2>&1 | awk '/cycles/ {print $1}') +bpf_cycles=$(perf stat --no-big-num --bpf-counters -e cycles -- perf bench sched messaging -g 1 -l 100 -t 2>&1 | awk '/cycles/ {print $1}') + +compare_number $base_cycles $bpf_cycles +exit 0 diff --git a/tools/perf/tests/topology.c b/tools/perf/tests/topology.c index 74748ed75b2c..050489807a47 100644 --- a/tools/perf/tests/topology.c +++ b/tools/perf/tests/topology.c @@ -80,7 +80,7 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map) * CPU 1 is on core_id 1 and physical_package_id 3 * * Core_id and physical_package_id are platform and architecture - * dependend and might have higher numbers than the CPU id. + * dependent and might have higher numbers than the CPU id. * This actually depends on the configuration. * * In this case process_cpu_topology() prints error message: diff --git a/tools/perf/trace/beauty/include/linux/socket.h b/tools/perf/trace/beauty/include/linux/socket.h index 385894b4a8bb..b8fc5c53ba6f 100644 --- a/tools/perf/trace/beauty/include/linux/socket.h +++ b/tools/perf/trace/beauty/include/linux/socket.h @@ -85,7 +85,7 @@ struct mmsghdr { /* * POSIX 1003.1g - ancillary data object information - * Ancillary data consits of a sequence of pairs of + * Ancillary data consists of a sequence of pairs of * (cmsghdr, cmsg_data[]) */ diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index 35b82caf8090..ad0a70f0edaf 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -381,6 +381,25 @@ static bool annotate_browser__toggle_source(struct annotate_browser *browser) return true; } +#define SYM_TITLE_MAX_SIZE (PATH_MAX + 64) + +static void annotate_browser__show_full_location(struct ui_browser *browser) +{ + struct annotate_browser *ab = container_of(browser, struct annotate_browser, b); + struct disasm_line *cursor = disasm_line(ab->selection); + struct annotation_line *al = &cursor->al; + + if (al->offset != -1) + ui_helpline__puts("Only available for source code lines."); + else if (al->fileloc == NULL) + ui_helpline__puts("No source file location."); + else { + char help_line[SYM_TITLE_MAX_SIZE]; + sprintf (help_line, "Source file location: %s", al->fileloc); + ui_helpline__puts(help_line); + } +} + static void ui_browser__init_asm_mode(struct ui_browser *browser) { struct annotation *notes = browser__annotation(browser); @@ -388,8 +407,6 @@ static void ui_browser__init_asm_mode(struct ui_browser *browser) browser->nr_entries = notes->nr_asm_entries; } -#define SYM_TITLE_MAX_SIZE (PATH_MAX + 64) - static int sym_title(struct symbol *sym, struct map *map, char *title, size_t sz, int percent_type) { @@ -398,7 +415,7 @@ static int sym_title(struct symbol *sym, struct map *map, char *title, } /* - * This can be called from external jumps, i.e. jumps from one functon + * This can be called from external jumps, i.e. jumps from one function * to another, like from the kernel's entry_SYSCALL_64 function to the * swapgs_restore_regs_and_return_to_usermode() function. * @@ -747,6 +764,7 @@ static int annotate_browser__run(struct annotate_browser *browser, "c Show min/max cycle\n" "/ Search string\n" "k Toggle line numbers\n" + "l Show full source file location\n" "P Print to [symbol_name].annotation file.\n" "r Run available scripts\n" "p Toggle percent type [local/global]\n" @@ -760,6 +778,9 @@ static int annotate_browser__run(struct annotate_browser *browser, case 'k': notes->options->show_linenr = !notes->options->show_linenr; continue; + case 'l': + annotate_browser__show_full_location (&browser->b); + continue; case 'H': nd = browser->curr_hot; break; diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index 3b9818ee9546..bcfd0a45953b 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -117,7 +117,7 @@ static void hist_browser__update_rows(struct hist_browser *hb) browser->rows -= browser->extra_title_lines; /* * Verify if we were at the last line and that line isn't - * visibe because we now show the header line(s). + * visible because we now show the header line(s). */ index_row = browser->index - browser->top_idx; if (index_row >= browser->rows) diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index e60841b86d27..18eee25b4976 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -1161,6 +1161,7 @@ struct annotate_args { s64 offset; char *line; int line_nr; + char *fileloc; }; static void annotation_line__init(struct annotation_line *al, @@ -1170,6 +1171,7 @@ static void annotation_line__init(struct annotation_line *al, al->offset = args->offset; al->line = strdup(args->line); al->line_nr = args->line_nr; + al->fileloc = args->fileloc; al->data_nr = nr; } @@ -1482,7 +1484,7 @@ annotation_line__print(struct annotation_line *al, struct symbol *sym, u64 start */ static int symbol__parse_objdump_line(struct symbol *sym, struct annotate_args *args, - char *parsed_line, int *line_nr) + char *parsed_line, int *line_nr, char **fileloc) { struct map *map = args->ms.map; struct annotation *notes = symbol__annotation(sym); @@ -1494,6 +1496,7 @@ static int symbol__parse_objdump_line(struct symbol *sym, /* /filename:linenr ? Save line number and ignore. */ if (regexec(&file_lineno, parsed_line, 2, match, 0) == 0) { *line_nr = atoi(parsed_line + match[1].rm_so); + *fileloc = strdup(parsed_line); return 0; } @@ -1513,6 +1516,7 @@ static int symbol__parse_objdump_line(struct symbol *sym, args->offset = offset; args->line = parsed_line; args->line_nr = *line_nr; + args->fileloc = *fileloc; args->ms.sym = sym; dl = disasm_line__new(args); @@ -1807,6 +1811,7 @@ static int symbol__disassemble_bpf(struct symbol *sym, args->offset = -1; args->line = strdup(srcline); args->line_nr = 0; + args->fileloc = NULL; args->ms.sym = sym; dl = disasm_line__new(args); if (dl) { @@ -1818,6 +1823,7 @@ static int symbol__disassemble_bpf(struct symbol *sym, args->offset = pc; args->line = buf + prev_buf_size; args->line_nr = 0; + args->fileloc = NULL; args->ms.sym = sym; dl = disasm_line__new(args); if (dl) @@ -1852,6 +1858,7 @@ symbol__disassemble_bpf_image(struct symbol *sym, args->offset = -1; args->line = strdup("to be implemented"); args->line_nr = 0; + args->fileloc = NULL; dl = disasm_line__new(args); if (dl) annotation_line__add(&dl->al, ¬es->src->source); @@ -1933,6 +1940,7 @@ static int symbol__disassemble(struct symbol *sym, struct annotate_args *args) bool delete_extract = false; bool decomp = false; int lineno = 0; + char *fileloc = NULL; int nline; char *line; size_t line_len; @@ -2060,7 +2068,7 @@ static int symbol__disassemble(struct symbol *sym, struct annotate_args *args) * See disasm_line__new() and struct disasm_line::line_nr. */ if (symbol__parse_objdump_line(sym, args, expanded_line, - &lineno) < 0) + &lineno, &fileloc) < 0) break; nline++; } @@ -3144,6 +3152,10 @@ static int annotation__config(const char *var, const char *value, void *data) opt->use_offset = perf_config_bool("use_offset", value); } else if (!strcmp(var, "annotate.disassembler_style")) { opt->disassembler_style = value; + } else if (!strcmp(var, "annotate.demangle")) { + symbol_conf.demangle = perf_config_bool("demangle", value); + } else if (!strcmp(var, "annotate.demangle_kernel")) { + symbol_conf.demangle_kernel = perf_config_bool("demangle_kernel", value); } else { pr_debug("%s variable unknown, ignoring...", var); } diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index 096cdaf21b01..3757416bcf46 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -84,6 +84,7 @@ struct annotation_options { print_lines, full_path, show_linenr, + show_fileloc, show_nr_jumps, show_minmax_cycle, show_asm_raw, @@ -136,6 +137,7 @@ struct annotation_line { s64 offset; char *line; int line_nr; + char *fileloc; int jump_sources; float ipc; u64 cycles; diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c index 9087f1bffd3d..fbb3c4057c30 100644 --- a/tools/perf/util/bpf-loader.c +++ b/tools/perf/util/bpf-loader.c @@ -671,7 +671,7 @@ int bpf__probe(struct bpf_object *obj) * After probing, let's consider prologue, which * adds program fetcher to BPF programs. * - * hook_load_preprocessorr() hooks pre-processor + * hook_load_preprocessor() hooks pre-processor * to bpf_program, let it generate prologue * dynamically during loading. */ diff --git a/tools/perf/util/bpf_counter.c b/tools/perf/util/bpf_counter.c index 04f89120b323..81d1df3c4ec0 100644 --- a/tools/perf/util/bpf_counter.c +++ b/tools/perf/util/bpf_counter.c @@ -5,6 +5,7 @@ #include <assert.h> #include <limits.h> #include <unistd.h> +#include <sys/file.h> #include <sys/time.h> #include <sys/resource.h> #include <linux/err.h> @@ -12,14 +13,45 @@ #include <bpf/bpf.h> #include <bpf/btf.h> #include <bpf/libbpf.h> +#include <api/fs/fs.h> #include "bpf_counter.h" #include "counts.h" #include "debug.h" #include "evsel.h" +#include "evlist.h" #include "target.h" +#include "cpumap.h" +#include "thread_map.h" #include "bpf_skel/bpf_prog_profiler.skel.h" +#include "bpf_skel/bperf_u.h" +#include "bpf_skel/bperf_leader.skel.h" +#include "bpf_skel/bperf_follower.skel.h" + +/* + * bperf uses a hashmap, the attr_map, to track all the leader programs. + * The hashmap is pinned in bpffs. flock() on this file is used to ensure + * no concurrent access to the attr_map. The key of attr_map is struct + * perf_event_attr, and the value is struct perf_event_attr_map_entry. + * + * struct perf_event_attr_map_entry contains two __u32 IDs, bpf_link of the + * leader prog, and the diff_map. Each perf-stat session holds a reference + * to the bpf_link to make sure the leader prog is attached to sched_switch + * tracepoint. + * + * Since the hashmap only contains IDs of the bpf_link and diff_map, it + * does not hold any references to the leader program. Once all perf-stat + * sessions of these events exit, the leader prog, its maps, and the + * perf_events will be freed. + */ +struct perf_event_attr_map_entry { + __u32 link_id; + __u32 diff_map_id; +}; + +#define DEFAULT_ATTR_MAP_PATH "fs/bpf/perf_attr_map" +#define ATTR_MAP_SIZE 16 static inline void *u64_to_ptr(__u64 ptr) { @@ -274,17 +306,494 @@ struct bpf_counter_ops bpf_program_profiler_ops = { .install_pe = bpf_program_profiler__install_pe, }; +static __u32 bpf_link_get_id(int fd) +{ + struct bpf_link_info link_info = {0}; + __u32 link_info_len = sizeof(link_info); + + bpf_obj_get_info_by_fd(fd, &link_info, &link_info_len); + return link_info.id; +} + +static __u32 bpf_link_get_prog_id(int fd) +{ + struct bpf_link_info link_info = {0}; + __u32 link_info_len = sizeof(link_info); + + bpf_obj_get_info_by_fd(fd, &link_info, &link_info_len); + return link_info.prog_id; +} + +static __u32 bpf_map_get_id(int fd) +{ + struct bpf_map_info map_info = {0}; + __u32 map_info_len = sizeof(map_info); + + bpf_obj_get_info_by_fd(fd, &map_info, &map_info_len); + return map_info.id; +} + +static int bperf_lock_attr_map(struct target *target) +{ + char path[PATH_MAX]; + int map_fd, err; + + if (target->attr_map) { + scnprintf(path, PATH_MAX, "%s", target->attr_map); + } else { + scnprintf(path, PATH_MAX, "%s/%s", sysfs__mountpoint(), + DEFAULT_ATTR_MAP_PATH); + } + + if (access(path, F_OK)) { + map_fd = bpf_create_map(BPF_MAP_TYPE_HASH, + sizeof(struct perf_event_attr), + sizeof(struct perf_event_attr_map_entry), + ATTR_MAP_SIZE, 0); + if (map_fd < 0) + return -1; + + err = bpf_obj_pin(map_fd, path); + if (err) { + /* someone pinned the map in parallel? */ + close(map_fd); + map_fd = bpf_obj_get(path); + if (map_fd < 0) + return -1; + } + } else { + map_fd = bpf_obj_get(path); + if (map_fd < 0) + return -1; + } + + err = flock(map_fd, LOCK_EX); + if (err) { + close(map_fd); + return -1; + } + return map_fd; +} + +/* trigger the leader program on a cpu */ +static int bperf_trigger_reading(int prog_fd, int cpu) +{ + DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts, + .ctx_in = NULL, + .ctx_size_in = 0, + .flags = BPF_F_TEST_RUN_ON_CPU, + .cpu = cpu, + .retval = 0, + ); + + return bpf_prog_test_run_opts(prog_fd, &opts); +} + +static int bperf_check_target(struct evsel *evsel, + struct target *target, + enum bperf_filter_type *filter_type, + __u32 *filter_entry_cnt) +{ + if (evsel->leader->core.nr_members > 1) { + pr_err("bpf managed perf events do not yet support groups.\n"); + return -1; + } + + /* determine filter type based on target */ + if (target->system_wide) { + *filter_type = BPERF_FILTER_GLOBAL; + *filter_entry_cnt = 1; + } else if (target->cpu_list) { + *filter_type = BPERF_FILTER_CPU; + *filter_entry_cnt = perf_cpu_map__nr(evsel__cpus(evsel)); + } else if (target->tid) { + *filter_type = BPERF_FILTER_PID; + *filter_entry_cnt = perf_thread_map__nr(evsel->core.threads); + } else if (target->pid || evsel->evlist->workload.pid != -1) { + *filter_type = BPERF_FILTER_TGID; + *filter_entry_cnt = perf_thread_map__nr(evsel->core.threads); + } else { + pr_err("bpf managed perf events do not yet support these targets.\n"); + return -1; + } + + return 0; +} + +static struct perf_cpu_map *all_cpu_map; + +static int bperf_reload_leader_program(struct evsel *evsel, int attr_map_fd, + struct perf_event_attr_map_entry *entry) +{ + struct bperf_leader_bpf *skel = bperf_leader_bpf__open(); + int link_fd, diff_map_fd, err; + struct bpf_link *link = NULL; + + if (!skel) { + pr_err("Failed to open leader skeleton\n"); + return -1; + } + + bpf_map__resize(skel->maps.events, libbpf_num_possible_cpus()); + err = bperf_leader_bpf__load(skel); + if (err) { + pr_err("Failed to load leader skeleton\n"); + goto out; + } + + err = -1; + link = bpf_program__attach(skel->progs.on_switch); + if (!link) { + pr_err("Failed to attach leader program\n"); + goto out; + } + + link_fd = bpf_link__fd(link); + diff_map_fd = bpf_map__fd(skel->maps.diff_readings); + entry->link_id = bpf_link_get_id(link_fd); + entry->diff_map_id = bpf_map_get_id(diff_map_fd); + err = bpf_map_update_elem(attr_map_fd, &evsel->core.attr, entry, BPF_ANY); + assert(err == 0); + + evsel->bperf_leader_link_fd = bpf_link_get_fd_by_id(entry->link_id); + assert(evsel->bperf_leader_link_fd >= 0); + + /* + * save leader_skel for install_pe, which is called within + * following evsel__open_per_cpu call + */ + evsel->leader_skel = skel; + evsel__open_per_cpu(evsel, all_cpu_map, -1); + +out: + bperf_leader_bpf__destroy(skel); + bpf_link__destroy(link); + return err; +} + +static int bperf__load(struct evsel *evsel, struct target *target) +{ + struct perf_event_attr_map_entry entry = {0xffffffff, 0xffffffff}; + int attr_map_fd, diff_map_fd = -1, err; + enum bperf_filter_type filter_type; + __u32 filter_entry_cnt, i; + + if (bperf_check_target(evsel, target, &filter_type, &filter_entry_cnt)) + return -1; + + if (!all_cpu_map) { + all_cpu_map = perf_cpu_map__new(NULL); + if (!all_cpu_map) + return -1; + } + + evsel->bperf_leader_prog_fd = -1; + evsel->bperf_leader_link_fd = -1; + + /* + * Step 1: hold a fd on the leader program and the bpf_link, if + * the program is not already gone, reload the program. + * Use flock() to ensure exclusive access to the perf_event_attr + * map. + */ + attr_map_fd = bperf_lock_attr_map(target); + if (attr_map_fd < 0) { + pr_err("Failed to lock perf_event_attr map\n"); + return -1; + } + + err = bpf_map_lookup_elem(attr_map_fd, &evsel->core.attr, &entry); + if (err) { + err = bpf_map_update_elem(attr_map_fd, &evsel->core.attr, &entry, BPF_ANY); + if (err) + goto out; + } + + evsel->bperf_leader_link_fd = bpf_link_get_fd_by_id(entry.link_id); + if (evsel->bperf_leader_link_fd < 0 && + bperf_reload_leader_program(evsel, attr_map_fd, &entry)) + goto out; + + /* + * The bpf_link holds reference to the leader program, and the + * leader program holds reference to the maps. Therefore, if + * link_id is valid, diff_map_id should also be valid. + */ + evsel->bperf_leader_prog_fd = bpf_prog_get_fd_by_id( + bpf_link_get_prog_id(evsel->bperf_leader_link_fd)); + assert(evsel->bperf_leader_prog_fd >= 0); + + diff_map_fd = bpf_map_get_fd_by_id(entry.diff_map_id); + assert(diff_map_fd >= 0); + + /* + * bperf uses BPF_PROG_TEST_RUN to get accurate reading. Check + * whether the kernel support it + */ + err = bperf_trigger_reading(evsel->bperf_leader_prog_fd, 0); + if (err) { + pr_err("The kernel does not support test_run for raw_tp BPF programs.\n" + "Therefore, --use-bpf might show inaccurate readings\n"); + goto out; + } + + /* Step 2: load the follower skeleton */ + evsel->follower_skel = bperf_follower_bpf__open(); + if (!evsel->follower_skel) { + pr_err("Failed to open follower skeleton\n"); + goto out; + } + + /* attach fexit program to the leader program */ + bpf_program__set_attach_target(evsel->follower_skel->progs.fexit_XXX, + evsel->bperf_leader_prog_fd, "on_switch"); + + /* connect to leader diff_reading map */ + bpf_map__reuse_fd(evsel->follower_skel->maps.diff_readings, diff_map_fd); + + /* set up reading map */ + bpf_map__set_max_entries(evsel->follower_skel->maps.accum_readings, + filter_entry_cnt); + /* set up follower filter based on target */ + bpf_map__set_max_entries(evsel->follower_skel->maps.filter, + filter_entry_cnt); + err = bperf_follower_bpf__load(evsel->follower_skel); + if (err) { + pr_err("Failed to load follower skeleton\n"); + bperf_follower_bpf__destroy(evsel->follower_skel); + evsel->follower_skel = NULL; + goto out; + } + + for (i = 0; i < filter_entry_cnt; i++) { + int filter_map_fd; + __u32 key; + + if (filter_type == BPERF_FILTER_PID || + filter_type == BPERF_FILTER_TGID) + key = evsel->core.threads->map[i].pid; + else if (filter_type == BPERF_FILTER_CPU) + key = evsel->core.cpus->map[i]; + else + break; + + filter_map_fd = bpf_map__fd(evsel->follower_skel->maps.filter); + bpf_map_update_elem(filter_map_fd, &key, &i, BPF_ANY); + } + + evsel->follower_skel->bss->type = filter_type; + + err = bperf_follower_bpf__attach(evsel->follower_skel); + +out: + if (err && evsel->bperf_leader_link_fd >= 0) + close(evsel->bperf_leader_link_fd); + if (err && evsel->bperf_leader_prog_fd >= 0) + close(evsel->bperf_leader_prog_fd); + if (diff_map_fd >= 0) + close(diff_map_fd); + + flock(attr_map_fd, LOCK_UN); + close(attr_map_fd); + + return err; +} + +static int bperf__install_pe(struct evsel *evsel, int cpu, int fd) +{ + struct bperf_leader_bpf *skel = evsel->leader_skel; + + return bpf_map_update_elem(bpf_map__fd(skel->maps.events), + &cpu, &fd, BPF_ANY); +} + +/* + * trigger the leader prog on each cpu, so the accum_reading map could get + * the latest readings. + */ +static int bperf_sync_counters(struct evsel *evsel) +{ + int num_cpu, i, cpu; + + num_cpu = all_cpu_map->nr; + for (i = 0; i < num_cpu; i++) { + cpu = all_cpu_map->map[i]; + bperf_trigger_reading(evsel->bperf_leader_prog_fd, cpu); + } + return 0; +} + +static int bperf__enable(struct evsel *evsel) +{ + evsel->follower_skel->bss->enabled = 1; + return 0; +} + +static int bperf__read(struct evsel *evsel) +{ + struct bperf_follower_bpf *skel = evsel->follower_skel; + __u32 num_cpu_bpf = cpu__max_cpu(); + struct bpf_perf_event_value values[num_cpu_bpf]; + int reading_map_fd, err = 0; + __u32 i, j, num_cpu; + + bperf_sync_counters(evsel); + reading_map_fd = bpf_map__fd(skel->maps.accum_readings); + + for (i = 0; i < bpf_map__max_entries(skel->maps.accum_readings); i++) { + __u32 cpu; + + err = bpf_map_lookup_elem(reading_map_fd, &i, values); + if (err) + goto out; + switch (evsel->follower_skel->bss->type) { + case BPERF_FILTER_GLOBAL: + assert(i == 0); + + num_cpu = all_cpu_map->nr; + for (j = 0; j < num_cpu; j++) { + cpu = all_cpu_map->map[j]; + perf_counts(evsel->counts, cpu, 0)->val = values[cpu].counter; + perf_counts(evsel->counts, cpu, 0)->ena = values[cpu].enabled; + perf_counts(evsel->counts, cpu, 0)->run = values[cpu].running; + } + break; + case BPERF_FILTER_CPU: + cpu = evsel->core.cpus->map[i]; + perf_counts(evsel->counts, i, 0)->val = values[cpu].counter; + perf_counts(evsel->counts, i, 0)->ena = values[cpu].enabled; + perf_counts(evsel->counts, i, 0)->run = values[cpu].running; + break; + case BPERF_FILTER_PID: + case BPERF_FILTER_TGID: + perf_counts(evsel->counts, 0, i)->val = 0; + perf_counts(evsel->counts, 0, i)->ena = 0; + perf_counts(evsel->counts, 0, i)->run = 0; + + for (cpu = 0; cpu < num_cpu_bpf; cpu++) { + perf_counts(evsel->counts, 0, i)->val += values[cpu].counter; + perf_counts(evsel->counts, 0, i)->ena += values[cpu].enabled; + perf_counts(evsel->counts, 0, i)->run += values[cpu].running; + } + break; + default: + break; + } + } +out: + return err; +} + +static int bperf__destroy(struct evsel *evsel) +{ + bperf_follower_bpf__destroy(evsel->follower_skel); + close(evsel->bperf_leader_prog_fd); + close(evsel->bperf_leader_link_fd); + return 0; +} + +/* + * bperf: share hardware PMCs with BPF + * + * perf uses performance monitoring counters (PMC) to monitor system + * performance. The PMCs are limited hardware resources. For example, + * Intel CPUs have 3x fixed PMCs and 4x programmable PMCs per cpu. + * + * Modern data center systems use these PMCs in many different ways: + * system level monitoring, (maybe nested) container level monitoring, per + * process monitoring, profiling (in sample mode), etc. In some cases, + * there are more active perf_events than available hardware PMCs. To allow + * all perf_events to have a chance to run, it is necessary to do expensive + * time multiplexing of events. + * + * On the other hand, many monitoring tools count the common metrics + * (cycles, instructions). It is a waste to have multiple tools create + * multiple perf_events of "cycles" and occupy multiple PMCs. + * + * bperf tries to reduce such wastes by allowing multiple perf_events of + * "cycles" or "instructions" (at different scopes) to share PMUs. Instead + * of having each perf-stat session to read its own perf_events, bperf uses + * BPF programs to read the perf_events and aggregate readings to BPF maps. + * Then, the perf-stat session(s) reads the values from these BPF maps. + * + * || + * shared progs and maps <- || -> per session progs and maps + * || + * --------------- || + * | perf_events | || + * --------------- fexit || ----------------- + * | --------||----> | follower prog | + * --------------- / || --- ----------------- + * cs -> | leader prog |/ ||/ | | + * --> --------------- /|| -------------- ------------------ + * / | | / || | filter map | | accum_readings | + * / ------------ ------------ || -------------- ------------------ + * | | prev map | | diff map | || | + * | ------------ ------------ || | + * \ || | + * = \ ==================================================== | ============ + * \ / user space + * \ / + * \ / + * BPF_PROG_TEST_RUN BPF_MAP_LOOKUP_ELEM + * \ / + * \ / + * \------ perf-stat ----------------------/ + * + * The figure above shows the architecture of bperf. Note that the figure + * is divided into 3 regions: shared progs and maps (top left), per session + * progs and maps (top right), and user space (bottom). + * + * The leader prog is triggered on each context switch (cs). The leader + * prog reads perf_events and stores the difference (current_reading - + * previous_reading) to the diff map. For the same metric, e.g. "cycles", + * multiple perf-stat sessions share the same leader prog. + * + * Each perf-stat session creates a follower prog as fexit program to the + * leader prog. It is possible to attach up to BPF_MAX_TRAMP_PROGS (38) + * follower progs to the same leader prog. The follower prog checks current + * task and processor ID to decide whether to add the value from the diff + * map to its accumulated reading map (accum_readings). + * + * Finally, perf-stat user space reads the value from accum_reading map. + * + * Besides context switch, it is also necessary to trigger the leader prog + * before perf-stat reads the value. Otherwise, the accum_reading map may + * not have the latest reading from the perf_events. This is achieved by + * triggering the event via sys_bpf(BPF_PROG_TEST_RUN) to each CPU. + * + * Comment before the definition of struct perf_event_attr_map_entry + * describes how different sessions of perf-stat share information about + * the leader prog. + */ + +struct bpf_counter_ops bperf_ops = { + .load = bperf__load, + .enable = bperf__enable, + .read = bperf__read, + .install_pe = bperf__install_pe, + .destroy = bperf__destroy, +}; + +static inline bool bpf_counter_skip(struct evsel *evsel) +{ + return list_empty(&evsel->bpf_counter_list) && + evsel->follower_skel == NULL; +} + int bpf_counter__install_pe(struct evsel *evsel, int cpu, int fd) { - if (list_empty(&evsel->bpf_counter_list)) + if (bpf_counter_skip(evsel)) return 0; return evsel->bpf_counter_ops->install_pe(evsel, cpu, fd); } int bpf_counter__load(struct evsel *evsel, struct target *target) { - if (target__has_bpf(target)) + if (target->bpf_str) evsel->bpf_counter_ops = &bpf_program_profiler_ops; + else if (target->use_bpf) + evsel->bpf_counter_ops = &bperf_ops; if (evsel->bpf_counter_ops) return evsel->bpf_counter_ops->load(evsel, target); @@ -293,21 +802,21 @@ int bpf_counter__load(struct evsel *evsel, struct target *target) int bpf_counter__enable(struct evsel *evsel) { - if (list_empty(&evsel->bpf_counter_list)) + if (bpf_counter_skip(evsel)) return 0; return evsel->bpf_counter_ops->enable(evsel); } int bpf_counter__read(struct evsel *evsel) { - if (list_empty(&evsel->bpf_counter_list)) + if (bpf_counter_skip(evsel)) return -EAGAIN; return evsel->bpf_counter_ops->read(evsel); } void bpf_counter__destroy(struct evsel *evsel) { - if (list_empty(&evsel->bpf_counter_list)) + if (bpf_counter_skip(evsel)) return; evsel->bpf_counter_ops->destroy(evsel); evsel->bpf_counter_ops = NULL; diff --git a/tools/perf/util/bpf_counter.h b/tools/perf/util/bpf_counter.h index 2eca210e5dc1..cb9c532e0a07 100644 --- a/tools/perf/util/bpf_counter.h +++ b/tools/perf/util/bpf_counter.h @@ -38,7 +38,7 @@ int bpf_counter__install_pe(struct evsel *evsel, int cpu, int fd); #else /* HAVE_BPF_SKEL */ -#include<linux/err.h> +#include <linux/err.h> static inline int bpf_counter__load(struct evsel *evsel __maybe_unused, struct target *target __maybe_unused) diff --git a/tools/perf/util/bpf_skel/bperf.h b/tools/perf/util/bpf_skel/bperf.h new file mode 100644 index 000000000000..186a5551ddb9 --- /dev/null +++ b/tools/perf/util/bpf_skel/bperf.h @@ -0,0 +1,14 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +// Copyright (c) 2021 Facebook + +#ifndef __BPERF_STAT_H +#define __BPERF_STAT_H + +typedef struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(struct bpf_perf_event_value)); + __uint(max_entries, 1); +} reading_map; + +#endif /* __BPERF_STAT_H */ diff --git a/tools/perf/util/bpf_skel/bperf_follower.bpf.c b/tools/perf/util/bpf_skel/bperf_follower.bpf.c new file mode 100644 index 000000000000..b8fa3cb2da23 --- /dev/null +++ b/tools/perf/util/bpf_skel/bperf_follower.bpf.c @@ -0,0 +1,69 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +// Copyright (c) 2021 Facebook +#include <linux/bpf.h> +#include <linux/perf_event.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include "bperf.h" +#include "bperf_u.h" + +reading_map diff_readings SEC(".maps"); +reading_map accum_readings SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(__u32)); +} filter SEC(".maps"); + +enum bperf_filter_type type = 0; +int enabled = 0; + +SEC("fexit/XXX") +int BPF_PROG(fexit_XXX) +{ + struct bpf_perf_event_value *diff_val, *accum_val; + __u32 filter_key, zero = 0; + __u32 *accum_key; + + if (!enabled) + return 0; + + switch (type) { + case BPERF_FILTER_GLOBAL: + accum_key = &zero; + goto do_add; + case BPERF_FILTER_CPU: + filter_key = bpf_get_smp_processor_id(); + break; + case BPERF_FILTER_PID: + filter_key = bpf_get_current_pid_tgid() & 0xffffffff; + break; + case BPERF_FILTER_TGID: + filter_key = bpf_get_current_pid_tgid() >> 32; + break; + default: + return 0; + } + + accum_key = bpf_map_lookup_elem(&filter, &filter_key); + if (!accum_key) + return 0; + +do_add: + diff_val = bpf_map_lookup_elem(&diff_readings, &zero); + if (!diff_val) + return 0; + + accum_val = bpf_map_lookup_elem(&accum_readings, accum_key); + if (!accum_val) + return 0; + + accum_val->counter += diff_val->counter; + accum_val->enabled += diff_val->enabled; + accum_val->running += diff_val->running; + + return 0; +} + +char LICENSE[] SEC("license") = "Dual BSD/GPL"; diff --git a/tools/perf/util/bpf_skel/bperf_leader.bpf.c b/tools/perf/util/bpf_skel/bperf_leader.bpf.c new file mode 100644 index 000000000000..4f70d1459e86 --- /dev/null +++ b/tools/perf/util/bpf_skel/bperf_leader.bpf.c @@ -0,0 +1,46 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +// Copyright (c) 2021 Facebook +#include <linux/bpf.h> +#include <linux/perf_event.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include "bperf.h" + +struct { + __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(int)); + __uint(map_flags, BPF_F_PRESERVE_ELEMS); +} events SEC(".maps"); + +reading_map prev_readings SEC(".maps"); +reading_map diff_readings SEC(".maps"); + +SEC("raw_tp/sched_switch") +int BPF_PROG(on_switch) +{ + struct bpf_perf_event_value val, *prev_val, *diff_val; + __u32 key = bpf_get_smp_processor_id(); + __u32 zero = 0; + long err; + + prev_val = bpf_map_lookup_elem(&prev_readings, &zero); + if (!prev_val) + return 0; + + diff_val = bpf_map_lookup_elem(&diff_readings, &zero); + if (!diff_val) + return 0; + + err = bpf_perf_event_read_value(&events, key, &val, sizeof(val)); + if (err) + return 0; + + diff_val->counter = val.counter - prev_val->counter; + diff_val->enabled = val.enabled - prev_val->enabled; + diff_val->running = val.running - prev_val->running; + *prev_val = val; + return 0; +} + +char LICENSE[] SEC("license") = "Dual BSD/GPL"; diff --git a/tools/perf/util/bpf_skel/bperf_u.h b/tools/perf/util/bpf_skel/bperf_u.h new file mode 100644 index 000000000000..1ce0c2c905c1 --- /dev/null +++ b/tools/perf/util/bpf_skel/bperf_u.h @@ -0,0 +1,14 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +// Copyright (c) 2021 Facebook + +#ifndef __BPERF_STAT_U_H +#define __BPERF_STAT_U_H + +enum bperf_filter_type { + BPERF_FILTER_GLOBAL = 1, + BPERF_FILTER_CPU, + BPERF_FILTER_PID, + BPERF_FILTER_TGID, +}; + +#endif /* __BPERF_STAT_U_H */ diff --git a/tools/perf/util/bpf_skel/bpf_prog_profiler.bpf.c b/tools/perf/util/bpf_skel/bpf_prog_profiler.bpf.c index c7cec92d0236..ab12b4c4ece2 100644 --- a/tools/perf/util/bpf_skel/bpf_prog_profiler.bpf.c +++ b/tools/perf/util/bpf_skel/bpf_prog_profiler.bpf.c @@ -52,7 +52,7 @@ int BPF_PROG(fentry_XXX) static inline void fexit_update_maps(struct bpf_perf_event_value *after) { - struct bpf_perf_event_value *before, diff, *accum; + struct bpf_perf_event_value *before, diff; __u32 zero = 0; before = bpf_map_lookup_elem(&fentry_readings, &zero); @@ -78,7 +78,6 @@ int BPF_PROG(fexit_XXX) { struct bpf_perf_event_value reading; __u32 cpu = bpf_get_smp_processor_id(); - __u32 one = 1, zero = 0; int err; /* read all events before updating the maps, to reduce error */ diff --git a/tools/perf/util/call-path.h b/tools/perf/util/call-path.h index 6b3229106f16..5875cfc8106e 100644 --- a/tools/perf/util/call-path.h +++ b/tools/perf/util/call-path.h @@ -23,7 +23,7 @@ * @children: tree of call paths of functions called * * In combination with the call_return structure, the call_path structure - * defines a context-sensitve call-graph. + * defines a context-sensitive call-graph. */ struct call_path { struct call_path *parent; diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index 1b60985690bb..8e2777133bd9 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -877,7 +877,7 @@ append_chain_children(struct callchain_node *root, if (!node) return -1; - /* lookup in childrens */ + /* lookup in children */ while (*p) { enum match_result ret; diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c index 6984c77068a3..6bcb5ef221f8 100644 --- a/tools/perf/util/config.c +++ b/tools/perf/util/config.c @@ -457,6 +457,9 @@ static int perf_stat_config(const char *var, const char *value) if (!strcmp(var, "stat.big-num")) perf_stat__set_big_num(perf_config_bool(var, value)); + if (!strcmp(var, "stat.no-csv-summary")) + perf_stat__set_no_csv_summary(perf_config_bool(var, value)); + /* Add other config variables here. */ return 0; } @@ -699,7 +702,7 @@ static int collect_config(const char *var, const char *value, /* perf_config_set can contain both user and system config items. * So we should know where each value is from. * The classification would be needed when a particular config file - * is overwrited by setting feature i.e. set_config(). + * is overwritten by setting feature i.e. set_config(). */ if (strcmp(config_file_name, perf_etc_perfconfig()) == 0) { section->from_system_config = true; diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c index 3f4bc4050477..059bcec3f651 100644 --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c @@ -6,6 +6,7 @@ * Author: Mathieu Poirier <mathieu.poirier@linaro.org> */ +#include <linux/coresight-pmu.h> #include <linux/err.h> #include <linux/list.h> #include <linux/zalloc.h> @@ -316,7 +317,7 @@ cs_etm_decoder__do_hard_timestamp(struct cs_etm_queue *etmq, * This is the first timestamp we've seen since the beginning of traces * or a discontinuity. Since timestamps packets are generated *after* * range packets have been generated, we need to estimate the time at - * which instructions started by substracting the number of instructions + * which instructions started by subtracting the number of instructions * executed to the timestamp. */ packet_queue->timestamp = elem->timestamp - packet_queue->instr_count; @@ -491,13 +492,42 @@ cs_etm_decoder__set_tid(struct cs_etm_queue *etmq, const ocsd_generic_trace_elem *elem, const uint8_t trace_chan_id) { - pid_t tid; + pid_t tid = -1; + static u64 pid_fmt; + int ret; - /* Ignore PE_CONTEXT packets that don't have a valid contextID */ - if (!elem->context.ctxt_id_valid) + /* + * As all the ETMs run at the same exception level, the system should + * have the same PID format crossing CPUs. So cache the PID format + * and reuse it for sequential decoding. + */ + if (!pid_fmt) { + ret = cs_etm__get_pid_fmt(trace_chan_id, &pid_fmt); + if (ret) + return OCSD_RESP_FATAL_SYS_ERR; + } + + /* + * Process the PE_CONTEXT packets if we have a valid contextID or VMID. + * If the kernel is running at EL2, the PID is traced in CONTEXTIDR_EL2 + * as VMID, Bit ETM_OPT_CTXTID2 is set in this case. + */ + switch (pid_fmt) { + case BIT(ETM_OPT_CTXTID): + if (elem->context.ctxt_id_valid) + tid = elem->context.context_id; + break; + case BIT(ETM_OPT_CTXTID2): + if (elem->context.vmid_valid) + tid = elem->context.vmid; + break; + default: + break; + } + + if (tid == -1) return OCSD_RESP_CONT; - tid = elem->context.context_id; if (cs_etm__etmq_set_tid(etmq, tid, trace_chan_id)) return OCSD_RESP_FATAL_SYS_ERR; diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index a2a369e2fbb6..7e63e7dedc33 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -7,6 +7,7 @@ */ #include <linux/bitops.h> +#include <linux/coresight-pmu.h> #include <linux/err.h> #include <linux/kernel.h> #include <linux/log2.h> @@ -156,11 +157,52 @@ int cs_etm__get_cpu(u8 trace_chan_id, int *cpu) return 0; } +/* + * The returned PID format is presented by two bits: + * + * Bit ETM_OPT_CTXTID: CONTEXTIDR or CONTEXTIDR_EL1 is traced; + * Bit ETM_OPT_CTXTID2: CONTEXTIDR_EL2 is traced. + * + * It's possible that the two bits ETM_OPT_CTXTID and ETM_OPT_CTXTID2 + * are enabled at the same time when the session runs on an EL2 kernel. + * This means the CONTEXTIDR_EL1 and CONTEXTIDR_EL2 both will be + * recorded in the trace data, the tool will selectively use + * CONTEXTIDR_EL2 as PID. + */ +int cs_etm__get_pid_fmt(u8 trace_chan_id, u64 *pid_fmt) +{ + struct int_node *inode; + u64 *metadata, val; + + inode = intlist__find(traceid_list, trace_chan_id); + if (!inode) + return -EINVAL; + + metadata = inode->priv; + + if (metadata[CS_ETM_MAGIC] == __perf_cs_etmv3_magic) { + val = metadata[CS_ETM_ETMCR]; + /* CONTEXTIDR is traced */ + if (val & BIT(ETM_OPT_CTXTID)) + *pid_fmt = BIT(ETM_OPT_CTXTID); + } else { + val = metadata[CS_ETMV4_TRCCONFIGR]; + /* CONTEXTIDR_EL2 is traced */ + if (val & (BIT(ETM4_CFG_BIT_VMID) | BIT(ETM4_CFG_BIT_VMID_OPT))) + *pid_fmt = BIT(ETM_OPT_CTXTID2); + /* CONTEXTIDR_EL1 is traced */ + else if (val & BIT(ETM4_CFG_BIT_CTXTID)) + *pid_fmt = BIT(ETM_OPT_CTXTID); + } + + return 0; +} + void cs_etm__etmq_set_traceid_queue_timestamp(struct cs_etm_queue *etmq, u8 trace_chan_id) { /* - * Wnen a timestamp packet is encountered the backend code + * When a timestamp packet is encountered the backend code * is stopped so that the front end has time to process packets * that were accumulated in the traceID queue. Since there can * be more than one channel per cs_etm_queue, we need to specify @@ -1655,7 +1697,7 @@ static bool cs_etm__is_svc_instr(struct cs_etm_queue *etmq, u8 trace_chan_id, * | 1 1 0 1 1 1 1 1 | imm8 | * +-----------------+--------+ * - * According to the specifiction, it only defines SVC for T32 + * According to the specification, it only defines SVC for T32 * with 16 bits instruction and has no definition for 32bits; * so below only read 2 bytes as instruction size for T32. */ @@ -1887,7 +1929,7 @@ static int cs_etm__set_sample_flags(struct cs_etm_queue *etmq, /* * If the previous packet is an exception return packet - * and the return address just follows SVC instuction, + * and the return address just follows SVC instruction, * it needs to calibrate the previous packet sample flags * as PERF_IP_FLAG_SYSCALLRET. */ @@ -1961,7 +2003,7 @@ static int cs_etm__set_sample_flags(struct cs_etm_queue *etmq, * contain exception type related info so we cannot decide * the exception type purely based on exception return packet. * If we record the exception number from exception packet and - * reuse it for excpetion return packet, this is not reliable + * reuse it for exception return packet, this is not reliable * due the trace can be discontinuity or the interrupt can * be nested, thus the recorded exception number cannot be * used for exception return packet for these two cases. @@ -2435,7 +2477,7 @@ static bool cs_etm__is_timeless_decoding(struct cs_etm_auxtrace *etm) } static const char * const cs_etm_global_header_fmts[] = { - [CS_HEADER_VERSION_0] = " Header version %llx\n", + [CS_HEADER_VERSION] = " Header version %llx\n", [CS_PMU_TYPE_CPUS] = " PMU type/num cpus %llx\n", [CS_ETM_SNAPSHOT] = " Snapshot %llx\n", }; @@ -2443,6 +2485,7 @@ static const char * const cs_etm_global_header_fmts[] = { static const char * const cs_etm_priv_fmts[] = { [CS_ETM_MAGIC] = " Magic number %llx\n", [CS_ETM_CPU] = " CPU %lld\n", + [CS_ETM_NR_TRC_PARAMS] = " NR_TRC_PARAMS %llx\n", [CS_ETM_ETMCR] = " ETMCR %llx\n", [CS_ETM_ETMTRACEIDR] = " ETMTRACEIDR %llx\n", [CS_ETM_ETMCCER] = " ETMCCER %llx\n", @@ -2452,6 +2495,7 @@ static const char * const cs_etm_priv_fmts[] = { static const char * const cs_etmv4_priv_fmts[] = { [CS_ETM_MAGIC] = " Magic number %llx\n", [CS_ETM_CPU] = " CPU %lld\n", + [CS_ETM_NR_TRC_PARAMS] = " NR_TRC_PARAMS %llx\n", [CS_ETMV4_TRCCONFIGR] = " TRCCONFIGR %llx\n", [CS_ETMV4_TRCTRACEIDR] = " TRCTRACEIDR %llx\n", [CS_ETMV4_TRCIDR0] = " TRCIDR0 %llx\n", @@ -2461,26 +2505,167 @@ static const char * const cs_etmv4_priv_fmts[] = { [CS_ETMV4_TRCAUTHSTATUS] = " TRCAUTHSTATUS %llx\n", }; -static void cs_etm__print_auxtrace_info(__u64 *val, int num) +static const char * const param_unk_fmt = + " Unknown parameter [%d] %llx\n"; +static const char * const magic_unk_fmt = + " Magic number Unknown %llx\n"; + +static int cs_etm__print_cpu_metadata_v0(__u64 *val, int *offset) { - int i, j, cpu = 0; + int i = *offset, j, nr_params = 0, fmt_offset; + __u64 magic; - for (i = 0; i < CS_HEADER_VERSION_0_MAX; i++) - fprintf(stdout, cs_etm_global_header_fmts[i], val[i]); + /* check magic value */ + magic = val[i + CS_ETM_MAGIC]; + if ((magic != __perf_cs_etmv3_magic) && + (magic != __perf_cs_etmv4_magic)) { + /* failure - note bad magic value */ + fprintf(stdout, magic_unk_fmt, magic); + return -EINVAL; + } + + /* print common header block */ + fprintf(stdout, cs_etm_priv_fmts[CS_ETM_MAGIC], val[i++]); + fprintf(stdout, cs_etm_priv_fmts[CS_ETM_CPU], val[i++]); + + if (magic == __perf_cs_etmv3_magic) { + nr_params = CS_ETM_NR_TRC_PARAMS_V0; + fmt_offset = CS_ETM_ETMCR; + /* after common block, offset format index past NR_PARAMS */ + for (j = fmt_offset; j < nr_params + fmt_offset; j++, i++) + fprintf(stdout, cs_etm_priv_fmts[j], val[i]); + } else if (magic == __perf_cs_etmv4_magic) { + nr_params = CS_ETMV4_NR_TRC_PARAMS_V0; + fmt_offset = CS_ETMV4_TRCCONFIGR; + /* after common block, offset format index past NR_PARAMS */ + for (j = fmt_offset; j < nr_params + fmt_offset; j++, i++) + fprintf(stdout, cs_etmv4_priv_fmts[j], val[i]); + } + *offset = i; + return 0; +} + +static int cs_etm__print_cpu_metadata_v1(__u64 *val, int *offset) +{ + int i = *offset, j, total_params = 0; + __u64 magic; + + magic = val[i + CS_ETM_MAGIC]; + /* total params to print is NR_PARAMS + common block size for v1 */ + total_params = val[i + CS_ETM_NR_TRC_PARAMS] + CS_ETM_COMMON_BLK_MAX_V1; - for (i = CS_HEADER_VERSION_0_MAX; cpu < num; cpu++) { - if (val[i] == __perf_cs_etmv3_magic) - for (j = 0; j < CS_ETM_PRIV_MAX; j++, i++) + if (magic == __perf_cs_etmv3_magic) { + for (j = 0; j < total_params; j++, i++) { + /* if newer record - could be excess params */ + if (j >= CS_ETM_PRIV_MAX) + fprintf(stdout, param_unk_fmt, j, val[i]); + else fprintf(stdout, cs_etm_priv_fmts[j], val[i]); - else if (val[i] == __perf_cs_etmv4_magic) - for (j = 0; j < CS_ETMV4_PRIV_MAX; j++, i++) + } + } else if (magic == __perf_cs_etmv4_magic) { + for (j = 0; j < total_params; j++, i++) { + /* if newer record - could be excess params */ + if (j >= CS_ETMV4_PRIV_MAX) + fprintf(stdout, param_unk_fmt, j, val[i]); + else fprintf(stdout, cs_etmv4_priv_fmts[j], val[i]); - else - /* failure.. return */ + } + } else { + /* failure - note bad magic value and error out */ + fprintf(stdout, magic_unk_fmt, magic); + return -EINVAL; + } + *offset = i; + return 0; +} + +static void cs_etm__print_auxtrace_info(__u64 *val, int num) +{ + int i, cpu = 0, version, err; + + /* bail out early on bad header version */ + version = val[0]; + if (version > CS_HEADER_CURRENT_VERSION) { + /* failure.. return */ + fprintf(stdout, " Unknown Header Version = %x, ", version); + fprintf(stdout, "Version supported <= %x\n", CS_HEADER_CURRENT_VERSION); + return; + } + + for (i = 0; i < CS_HEADER_VERSION_MAX; i++) + fprintf(stdout, cs_etm_global_header_fmts[i], val[i]); + + for (i = CS_HEADER_VERSION_MAX; cpu < num; cpu++) { + if (version == 0) + err = cs_etm__print_cpu_metadata_v0(val, &i); + else if (version == 1) + err = cs_etm__print_cpu_metadata_v1(val, &i); + if (err) return; } } +/* + * Read a single cpu parameter block from the auxtrace_info priv block. + * + * For version 1 there is a per cpu nr_params entry. If we are handling + * version 1 file, then there may be less, the same, or more params + * indicated by this value than the compile time number we understand. + * + * For a version 0 info block, there are a fixed number, and we need to + * fill out the nr_param value in the metadata we create. + */ +static u64 *cs_etm__create_meta_blk(u64 *buff_in, int *buff_in_offset, + int out_blk_size, int nr_params_v0) +{ + u64 *metadata = NULL; + int hdr_version; + int nr_in_params, nr_out_params, nr_cmn_params; + int i, k; + + metadata = zalloc(sizeof(*metadata) * out_blk_size); + if (!metadata) + return NULL; + + /* read block current index & version */ + i = *buff_in_offset; + hdr_version = buff_in[CS_HEADER_VERSION]; + + if (!hdr_version) { + /* read version 0 info block into a version 1 metadata block */ + nr_in_params = nr_params_v0; + metadata[CS_ETM_MAGIC] = buff_in[i + CS_ETM_MAGIC]; + metadata[CS_ETM_CPU] = buff_in[i + CS_ETM_CPU]; + metadata[CS_ETM_NR_TRC_PARAMS] = nr_in_params; + /* remaining block params at offset +1 from source */ + for (k = CS_ETM_COMMON_BLK_MAX_V1 - 1; k < nr_in_params; k++) + metadata[k + 1] = buff_in[i + k]; + /* version 0 has 2 common params */ + nr_cmn_params = 2; + } else { + /* read version 1 info block - input and output nr_params may differ */ + /* version 1 has 3 common params */ + nr_cmn_params = 3; + nr_in_params = buff_in[i + CS_ETM_NR_TRC_PARAMS]; + + /* if input has more params than output - skip excess */ + nr_out_params = nr_in_params + nr_cmn_params; + if (nr_out_params > out_blk_size) + nr_out_params = out_blk_size; + + for (k = CS_ETM_MAGIC; k < nr_out_params; k++) + metadata[k] = buff_in[i + k]; + + /* record the actual nr params we copied */ + metadata[CS_ETM_NR_TRC_PARAMS] = nr_out_params - nr_cmn_params; + } + + /* adjust in offset by number of in params used */ + i += nr_in_params + nr_cmn_params; + *buff_in_offset = i; + return metadata; +} + int cs_etm__process_auxtrace_info(union perf_event *event, struct perf_session *session) { @@ -2492,11 +2677,12 @@ int cs_etm__process_auxtrace_info(union perf_event *event, int info_header_size; int total_size = auxtrace_info->header.size; int priv_size = 0; - int num_cpu; - int err = 0, idx = -1; - int i, j, k; + int num_cpu, trcidr_idx; + int err = 0; + int i, j; u64 *ptr, *hdr = NULL; u64 **metadata = NULL; + u64 hdr_version; /* * sizeof(auxtrace_info_event::type) + @@ -2512,16 +2698,21 @@ int cs_etm__process_auxtrace_info(union perf_event *event, /* First the global part */ ptr = (u64 *) auxtrace_info->priv; - /* Look for version '0' of the header */ - if (ptr[0] != 0) + /* Look for version of the header */ + hdr_version = ptr[0]; + if (hdr_version > CS_HEADER_CURRENT_VERSION) { + /* print routine will print an error on bad version */ + if (dump_trace) + cs_etm__print_auxtrace_info(auxtrace_info->priv, 0); return -EINVAL; + } - hdr = zalloc(sizeof(*hdr) * CS_HEADER_VERSION_0_MAX); + hdr = zalloc(sizeof(*hdr) * CS_HEADER_VERSION_MAX); if (!hdr) return -ENOMEM; /* Extract header information - see cs-etm.h for format */ - for (i = 0; i < CS_HEADER_VERSION_0_MAX; i++) + for (i = 0; i < CS_HEADER_VERSION_MAX; i++) hdr[i] = ptr[i]; num_cpu = hdr[CS_PMU_TYPE_CPUS] & 0xffffffff; pmu_type = (unsigned int) ((hdr[CS_PMU_TYPE_CPUS] >> 32) & @@ -2552,35 +2743,31 @@ int cs_etm__process_auxtrace_info(union perf_event *event, */ for (j = 0; j < num_cpu; j++) { if (ptr[i] == __perf_cs_etmv3_magic) { - metadata[j] = zalloc(sizeof(*metadata[j]) * - CS_ETM_PRIV_MAX); - if (!metadata[j]) { - err = -ENOMEM; - goto err_free_metadata; - } - for (k = 0; k < CS_ETM_PRIV_MAX; k++) - metadata[j][k] = ptr[i + k]; + metadata[j] = + cs_etm__create_meta_blk(ptr, &i, + CS_ETM_PRIV_MAX, + CS_ETM_NR_TRC_PARAMS_V0); /* The traceID is our handle */ - idx = metadata[j][CS_ETM_ETMTRACEIDR]; - i += CS_ETM_PRIV_MAX; + trcidr_idx = CS_ETM_ETMTRACEIDR; + } else if (ptr[i] == __perf_cs_etmv4_magic) { - metadata[j] = zalloc(sizeof(*metadata[j]) * - CS_ETMV4_PRIV_MAX); - if (!metadata[j]) { - err = -ENOMEM; - goto err_free_metadata; - } - for (k = 0; k < CS_ETMV4_PRIV_MAX; k++) - metadata[j][k] = ptr[i + k]; + metadata[j] = + cs_etm__create_meta_blk(ptr, &i, + CS_ETMV4_PRIV_MAX, + CS_ETMV4_NR_TRC_PARAMS_V0); /* The traceID is our handle */ - idx = metadata[j][CS_ETMV4_TRCTRACEIDR]; - i += CS_ETMV4_PRIV_MAX; + trcidr_idx = CS_ETMV4_TRCTRACEIDR; + } + + if (!metadata[j]) { + err = -ENOMEM; + goto err_free_metadata; } /* Get an RB node for this CPU */ - inode = intlist__findnew(traceid_list, idx); + inode = intlist__findnew(traceid_list, metadata[j][trcidr_idx]); /* Something went wrong, no need to continue */ if (!inode) { @@ -2601,7 +2788,7 @@ int cs_etm__process_auxtrace_info(union perf_event *event, } /* - * Each of CS_HEADER_VERSION_0_MAX, CS_ETM_PRIV_MAX and + * Each of CS_HEADER_VERSION_MAX, CS_ETM_PRIV_MAX and * CS_ETMV4_PRIV_MAX mark how many double words are in the * global metadata, and each cpu's metadata respectively. * The following tests if the correct number of double words was @@ -2703,6 +2890,12 @@ err_free_traceid_list: intlist__delete(traceid_list); err_free_hdr: zfree(&hdr); - + /* + * At this point, as a minimum we have valid header. Dump the rest of + * the info section - the print routines will error out on structural + * issues. + */ + if (dump_trace) + cs_etm__print_auxtrace_info(auxtrace_info->priv, num_cpu); return err; } diff --git a/tools/perf/util/cs-etm.h b/tools/perf/util/cs-etm.h index 4ad925d6d799..36428918411e 100644 --- a/tools/perf/util/cs-etm.h +++ b/tools/perf/util/cs-etm.h @@ -12,28 +12,43 @@ struct perf_session; -/* Versionning header in case things need tro change in the future. That way +/* + * Versioning header in case things need to change in the future. That way * decoding of old snapshot is still possible. */ enum { /* Starting with 0x0 */ - CS_HEADER_VERSION_0, + CS_HEADER_VERSION, /* PMU->type (32 bit), total # of CPUs (32 bit) */ CS_PMU_TYPE_CPUS, CS_ETM_SNAPSHOT, - CS_HEADER_VERSION_0_MAX, + CS_HEADER_VERSION_MAX, }; +/* + * Update the version for new format. + * + * New version 1 format adds a param count to the per cpu metadata. + * This allows easy adding of new metadata parameters. + * Requires that new params always added after current ones. + * Also allows client reader to handle file versions that are different by + * checking the number of params in the file vs the number expected. + */ +#define CS_HEADER_CURRENT_VERSION 1 + /* Beginning of header common to both ETMv3 and V4 */ enum { CS_ETM_MAGIC, CS_ETM_CPU, + /* Number of trace config params in following ETM specific block */ + CS_ETM_NR_TRC_PARAMS, + CS_ETM_COMMON_BLK_MAX_V1, }; /* ETMv3/PTM metadata */ enum { /* Dynamic, configurable parameters */ - CS_ETM_ETMCR = CS_ETM_CPU + 1, + CS_ETM_ETMCR = CS_ETM_COMMON_BLK_MAX_V1, CS_ETM_ETMTRACEIDR, /* RO, taken from sysFS */ CS_ETM_ETMCCER, @@ -41,10 +56,13 @@ enum { CS_ETM_PRIV_MAX, }; +/* define fixed version 0 length - allow new format reader to read old files. */ +#define CS_ETM_NR_TRC_PARAMS_V0 (CS_ETM_ETMIDR - CS_ETM_ETMCR + 1) + /* ETMv4 metadata */ enum { /* Dynamic, configurable parameters */ - CS_ETMV4_TRCCONFIGR = CS_ETM_CPU + 1, + CS_ETMV4_TRCCONFIGR = CS_ETM_COMMON_BLK_MAX_V1, CS_ETMV4_TRCTRACEIDR, /* RO, taken from sysFS */ CS_ETMV4_TRCIDR0, @@ -55,9 +73,12 @@ enum { CS_ETMV4_PRIV_MAX, }; +/* define fixed version 0 length - allow new format reader to read old files. */ +#define CS_ETMV4_NR_TRC_PARAMS_V0 (CS_ETMV4_TRCAUTHSTATUS - CS_ETMV4_TRCCONFIGR + 1) + /* * ETMv3 exception encoding number: - * See Embedded Trace Macrocell spcification (ARM IHI 0014Q) + * See Embedded Trace Macrocell specification (ARM IHI 0014Q) * table 7-12 Encoding of Exception[3:0] for non-ARMv7-M processors. */ enum { @@ -162,7 +183,7 @@ struct cs_etm_packet_queue { #define BMVAL(val, lsb, msb) ((val & GENMASK(msb, lsb)) >> lsb) -#define CS_ETM_HEADER_SIZE (CS_HEADER_VERSION_0_MAX * sizeof(u64)) +#define CS_ETM_HEADER_SIZE (CS_HEADER_VERSION_MAX * sizeof(u64)) #define __perf_cs_etmv3_magic 0x3030303030303030ULL #define __perf_cs_etmv4_magic 0x4040404040404040ULL @@ -173,6 +194,7 @@ struct cs_etm_packet_queue { int cs_etm__process_auxtrace_info(union perf_event *event, struct perf_session *session); int cs_etm__get_cpu(u8 trace_chan_id, int *cpu); +int cs_etm__get_pid_fmt(u8 trace_chan_id, u64 *pid_fmt); int cs_etm__etmq_set_tid(struct cs_etm_queue *etmq, pid_t tid, u8 trace_chan_id); bool cs_etm__etmq_is_timeless(struct cs_etm_queue *etmq); diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c index 8b67bd97d122..a9c375e63e73 100644 --- a/tools/perf/util/data-convert-bt.c +++ b/tools/perf/util/data-convert-bt.c @@ -949,7 +949,7 @@ static char *change_name(char *name, char *orig_name, int dup) /* * Add '_' prefix to potential keywork. According to * Mathieu Desnoyers (https://lore.kernel.org/lkml/1074266107.40857.1422045946295.JavaMail.zimbra@efficios.com), - * futher CTF spec updating may require us to use '$'. + * further CTF spec updating may require us to use '$'. */ if (dup < 0) len = strlen(name) + sizeof("_"); diff --git a/tools/perf/util/demangle-java.c b/tools/perf/util/demangle-java.c index 39c05200ed65..ddf33d58bcd3 100644 --- a/tools/perf/util/demangle-java.c +++ b/tools/perf/util/demangle-java.c @@ -147,7 +147,7 @@ error: * Demangle Java function signature (openJDK, not GCJ) * input: * str: string to parse. String is not modified - * flags: comobination of JAVA_DEMANGLE_* flags to modify demangling + * flags: combination of JAVA_DEMANGLE_* flags to modify demangling * return: * if input can be demangled, then a newly allocated string is returned. * if input cannot be demangled, then NULL is returned @@ -164,7 +164,7 @@ java_demangle_sym(const char *str, int flags) if (!str) return NULL; - /* find start of retunr type */ + /* find start of return type */ p = strrchr(str, ')'); if (!p) return NULL; diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h index cd2fe64a3c5d..52e7101c5609 100644 --- a/tools/perf/util/dso.h +++ b/tools/perf/util/dso.h @@ -216,7 +216,7 @@ struct dso { /* dso__for_each_symbol - iterate over the symbols of given type * - * @dso: the 'struct dso *' in which symbols itereated + * @dso: the 'struct dso *' in which symbols are iterated * @pos: the 'struct symbol *' to use as a loop cursor * @n: the 'struct rb_node *' to use as a temporary storage */ diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c index 7b2d471a6419..b2f4920e19a6 100644 --- a/tools/perf/util/dwarf-aux.c +++ b/tools/perf/util/dwarf-aux.c @@ -91,7 +91,7 @@ static Dwarf_Line *cu_getsrc_die(Dwarf_Die *cu_die, Dwarf_Addr addr) return NULL; } while (laddr == addr); l++; - /* Going foward to find the statement line */ + /* Going forward to find the statement line */ do { line = dwarf_onesrcline(lines, l++); if (!line || dwarf_lineaddr(line, &laddr) != 0 || @@ -177,7 +177,7 @@ int cu_walk_functions_at(Dwarf_Die *cu_die, Dwarf_Addr addr, * die_get_linkage_name - Get the linkage name of the object * @dw_die: A DIE of the object * - * Get the linkage name attiribute of given @dw_die. + * Get the linkage name attribute of given @dw_die. * For C++ binary, the linkage name will be the mangled symbol. */ const char *die_get_linkage_name(Dwarf_Die *dw_die) @@ -739,7 +739,7 @@ static int __die_walk_instances_cb(Dwarf_Die *inst, void *data) * @data: user data * * Walk on the instances of give @in_die. @in_die must be an inlined function - * declartion. This returns the return value of @callback if it returns + * declaration. This returns the return value of @callback if it returns * non-zero value, or -ENOENT if there is no instance. */ int die_walk_instances(Dwarf_Die *or_die, int (*callback)(Dwarf_Die *, void *), diff --git a/tools/perf/util/dwarf-aux.h b/tools/perf/util/dwarf-aux.h index 506006e0cf66..cb99646843a9 100644 --- a/tools/perf/util/dwarf-aux.h +++ b/tools/perf/util/dwarf-aux.h @@ -22,7 +22,7 @@ const char *cu_get_comp_dir(Dwarf_Die *cu_die); int cu_find_lineinfo(Dwarf_Die *cudie, unsigned long addr, const char **fname, int *lineno); -/* Walk on funcitons at given address */ +/* Walk on functions at given address */ int cu_walk_functions_at(Dwarf_Die *cu_die, Dwarf_Addr addr, int (*callback)(Dwarf_Die *, void *), void *data); diff --git a/tools/perf/util/dwarf-regs.c b/tools/perf/util/dwarf-regs.c index 1b49ecee5aff..3fa4486742cd 100644 --- a/tools/perf/util/dwarf-regs.c +++ b/tools/perf/util/dwarf-regs.c @@ -24,6 +24,7 @@ #include "../arch/s390/include/dwarf-regs-table.h" #include "../arch/sparc/include/dwarf-regs-table.h" #include "../arch/xtensa/include/dwarf-regs-table.h" +#include "../arch/mips/include/dwarf-regs-table.h" #define __get_dwarf_regstr(tbl, n) (((n) < ARRAY_SIZE(tbl)) ? (tbl)[(n)] : NULL) @@ -53,6 +54,8 @@ const char *get_dwarf_regstr(unsigned int n, unsigned int machine) return __get_dwarf_regstr(sparc_regstr_tbl, n); case EM_XTENSA: return __get_dwarf_regstr(xtensa_regstr_tbl, n); + case EM_MIPS: + return __get_dwarf_regstr(mips_regstr_tbl, n); default: pr_err("ELF MACHINE %x is not supported.\n", machine); } diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index f603edbbbc6f..8a62fb39e365 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -147,6 +147,7 @@ struct perf_sample { u8 cpumode; u16 misc; u16 ins_lat; + u16 p_stage_cyc; bool no_hw_idx; /* No hw_idx collected in branch_stack */ char insn[MAX_INSN]; void *raw_data; @@ -427,5 +428,7 @@ char *get_page_size_name(u64 size, char *str); void arch_perf_parse_sample_weight(struct perf_sample *data, const __u64 *array, u64 type); void arch_perf_synthesize_sample_weight(const struct perf_sample *data, __u64 *array, u64 type); +const char *arch_perf_header_entry(const char *se_header); +int arch_support_sort_key(const char *sort_key); #endif /* __PERF_RECORD_H */ diff --git a/tools/perf/util/events_stats.h b/tools/perf/util/events_stats.h index 859cb34fcff2..631a4af2ed86 100644 --- a/tools/perf/util/events_stats.h +++ b/tools/perf/util/events_stats.h @@ -21,7 +21,7 @@ * all struct perf_record_lost_samples.lost fields reported. * * The total_period is needed because by default auto-freq is used, so - * multipling nr_events[PERF_EVENT_SAMPLE] by a frequency isn't possible to get + * multiplying nr_events[PERF_EVENT_SAMPLE] by a frequency isn't possible to get * the total number of low level events, it is necessary to to sum all struct * perf_record_sample.period and stash the result in total_period. */ diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 882cd1f721d9..f1c79ecf8107 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -36,6 +36,7 @@ #include <fcntl.h> #include <sys/ioctl.h> #include <sys/mman.h> +#include <sys/prctl.h> #include <linux/bitops.h> #include <linux/hash.h> @@ -1209,7 +1210,7 @@ bool evlist__valid_read_format(struct evlist *evlist) } } - /* PERF_SAMPLE_READ imples PERF_FORMAT_ID. */ + /* PERF_SAMPLE_READ implies PERF_FORMAT_ID. */ if ((sample_type & PERF_SAMPLE_READ) && !(read_format & PERF_FORMAT_ID)) { return false; @@ -1406,6 +1407,13 @@ int evlist__prepare_workload(struct evlist *evlist, struct target *target, const fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC); /* + * Change the name of this process not to confuse --exclude-perf users + * that sees 'perf' in the window up to the execvp() and thinks that + * perf samples are not being excluded. + */ + prctl(PR_SET_NAME, "perf-exec"); + + /* * Tell the parent we're ready to go */ close(child_ready_pipe[1]); diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 7ecbc8e2fbfa..2d2614eeaa20 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -621,7 +621,7 @@ const char *evsel__hw_cache_result[PERF_COUNT_HW_CACHE_RESULT_MAX][EVSEL__MAX_AL #define COP(x) (1 << x) /* - * cache operartion stat + * cache operation stat * L1I : Read and prefetch only * ITLB and BPU : Read-only */ @@ -2275,7 +2275,7 @@ int evsel__parse_sample(struct evsel *evsel, union perf_event *event, /* * Undo swap of u64, then swap on individual u32s, * get the size of the raw area and undo all of the - * swap. The pevent interface handles endianity by + * swap. The pevent interface handles endianness by * itself. */ if (swapped) { diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 6026487353dd..dd4f56f9cfdf 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -20,6 +20,8 @@ union perf_event; struct bpf_counter_ops; struct target; struct hashmap; +struct bperf_leader_bpf; +struct bperf_follower_bpf; typedef int (evsel__sb_cb_t)(union perf_event *event, void *data); @@ -130,8 +132,24 @@ struct evsel { * See also evsel__has_callchain(). */ __u64 synth_sample_type; - struct list_head bpf_counter_list; + + /* + * bpf_counter_ops serves two use cases: + * 1. perf-stat -b counting events used byBPF programs + * 2. perf-stat --use-bpf use BPF programs to aggregate counts + */ struct bpf_counter_ops *bpf_counter_ops; + + /* for perf-stat -b */ + struct list_head bpf_counter_list; + + /* for perf-stat --use-bpf */ + int bperf_leader_prog_fd; + int bperf_leader_link_fd; + union { + struct bperf_leader_bpf *leader_skel; + struct bperf_follower_bpf *follower_skel; + }; }; struct perf_missing_features { diff --git a/tools/perf/util/expr.h b/tools/perf/util/expr.h index dcf8d19b83c8..85df3e4771e4 100644 --- a/tools/perf/util/expr.h +++ b/tools/perf/util/expr.h @@ -3,7 +3,7 @@ #define PARSE_CTX_H 1 // There are fixes that need to land upstream before we can use libbpf's headers, -// for now use our copy uncoditionally, since the data structures at this point +// for now use our copy unconditionally, since the data structures at this point // are exactly the same, no problem. //#ifdef HAVE_LIBBPF_SUPPORT //#include <bpf/hashmap.h> diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 20effdff76ce..aa1e42518d37 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -127,7 +127,7 @@ static int __do_write_buf(struct feat_fd *ff, const void *buf, size_t size) return 0; } -/* Return: 0 if succeded, -ERR if failed. */ +/* Return: 0 if succeeded, -ERR if failed. */ int do_write(struct feat_fd *ff, const void *buf, size_t size) { if (!ff->buf) @@ -135,7 +135,7 @@ int do_write(struct feat_fd *ff, const void *buf, size_t size) return __do_write_buf(ff, buf, size); } -/* Return: 0 if succeded, -ERR if failed. */ +/* Return: 0 if succeeded, -ERR if failed. */ static int do_write_bitmap(struct feat_fd *ff, unsigned long *set, u64 size) { u64 *p = (u64 *) set; @@ -154,7 +154,7 @@ static int do_write_bitmap(struct feat_fd *ff, unsigned long *set, u64 size) return 0; } -/* Return: 0 if succeded, -ERR if failed. */ +/* Return: 0 if succeeded, -ERR if failed. */ int write_padded(struct feat_fd *ff, const void *bf, size_t count, size_t count_aligned) { @@ -170,7 +170,7 @@ int write_padded(struct feat_fd *ff, const void *bf, #define string_size(str) \ (PERF_ALIGN((strlen(str) + 1), NAME_ALIGN) + sizeof(u32)) -/* Return: 0 if succeded, -ERR if failed. */ +/* Return: 0 if succeeded, -ERR if failed. */ static int do_write_string(struct feat_fd *ff, const char *str) { u32 len, olen; @@ -266,7 +266,7 @@ static char *do_read_string(struct feat_fd *ff) return NULL; } -/* Return: 0 if succeded, -ERR if failed. */ +/* Return: 0 if succeeded, -ERR if failed. */ static int do_read_bitmap(struct feat_fd *ff, unsigned long **pset, u64 *psize) { unsigned long *set; @@ -2874,7 +2874,7 @@ static int process_bpf_prog_info(struct feat_fd *ff, void *data __maybe_unused) int err = -1; if (ff->ph->needs_swap) { - pr_warning("interpreting bpf_prog_info from systems with endianity is not yet supported\n"); + pr_warning("interpreting bpf_prog_info from systems with endianness is not yet supported\n"); return 0; } @@ -2942,7 +2942,7 @@ static int process_bpf_btf(struct feat_fd *ff, void *data __maybe_unused) int err = -1; if (ff->ph->needs_swap) { - pr_warning("interpreting btf from systems with endianity is not yet supported\n"); + pr_warning("interpreting btf from systems with endianness is not yet supported\n"); return 0; } @@ -3481,11 +3481,11 @@ static const size_t attr_pipe_abi_sizes[] = { }; /* - * In the legacy pipe format, there is an implicit assumption that endiannesss + * In the legacy pipe format, there is an implicit assumption that endianness * between host recording the samples, and host parsing the samples is the * same. This is not always the case given that the pipe output may always be * redirected into a file and analyzed on a different machine with possibly a - * different endianness and perf_event ABI revsions in the perf tool itself. + * different endianness and perf_event ABI revisions in the perf tool itself. */ static int try_all_pipe_abis(uint64_t hdr_sz, struct perf_header *ph) { diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index c82f5fc26af8..9299ee535518 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -211,6 +211,7 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h) hists__new_col_len(hists, HISTC_MEM_BLOCKED, 10); hists__new_col_len(hists, HISTC_LOCAL_INS_LAT, 13); hists__new_col_len(hists, HISTC_GLOBAL_INS_LAT, 13); + hists__new_col_len(hists, HISTC_P_STAGE_CYC, 13); if (symbol_conf.nanosecs) hists__new_col_len(hists, HISTC_TIME, 16); else @@ -289,13 +290,14 @@ static long hist_time(unsigned long htime) } static void he_stat__add_period(struct he_stat *he_stat, u64 period, - u64 weight, u64 ins_lat) + u64 weight, u64 ins_lat, u64 p_stage_cyc) { he_stat->period += period; he_stat->weight += weight; he_stat->nr_events += 1; he_stat->ins_lat += ins_lat; + he_stat->p_stage_cyc += p_stage_cyc; } static void he_stat__add_stat(struct he_stat *dest, struct he_stat *src) @@ -308,6 +310,7 @@ static void he_stat__add_stat(struct he_stat *dest, struct he_stat *src) dest->nr_events += src->nr_events; dest->weight += src->weight; dest->ins_lat += src->ins_lat; + dest->p_stage_cyc += src->p_stage_cyc; } static void he_stat__decay(struct he_stat *he_stat) @@ -597,6 +600,7 @@ static struct hist_entry *hists__findnew_entry(struct hists *hists, u64 period = entry->stat.period; u64 weight = entry->stat.weight; u64 ins_lat = entry->stat.ins_lat; + u64 p_stage_cyc = entry->stat.p_stage_cyc; bool leftmost = true; p = &hists->entries_in->rb_root.rb_node; @@ -615,11 +619,11 @@ static struct hist_entry *hists__findnew_entry(struct hists *hists, if (!cmp) { if (sample_self) { - he_stat__add_period(&he->stat, period, weight, ins_lat); + he_stat__add_period(&he->stat, period, weight, ins_lat, p_stage_cyc); hist_entry__add_callchain_period(he, period); } if (symbol_conf.cumulate_callchain) - he_stat__add_period(he->stat_acc, period, weight, ins_lat); + he_stat__add_period(he->stat_acc, period, weight, ins_lat, p_stage_cyc); /* * This mem info was allocated from sample__resolve_mem @@ -731,6 +735,7 @@ __hists__add_entry(struct hists *hists, .period = sample->period, .weight = sample->weight, .ins_lat = sample->ins_lat, + .p_stage_cyc = sample->p_stage_cyc, }, .parent = sym_parent, .filtered = symbol__parent_filter(sym_parent) | al->filtered, diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 3c537232294b..e2faa745c8d6 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -75,6 +75,7 @@ enum hist_column { HISTC_MEM_BLOCKED, HISTC_LOCAL_INS_LAT, HISTC_GLOBAL_INS_LAT, + HISTC_P_STAGE_CYC, HISTC_NR_COLS, /* Last entry */ }; diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index f6e28ac231b7..8658d42ce57a 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -3569,7 +3569,7 @@ int intel_pt_process_auxtrace_info(union perf_event *event, /* * Since this thread will not be kept in any rbtree not in a * list, initialize its list node so that at thread__put() the - * current thread lifetime assuption is kept and we don't segfault + * current thread lifetime assumption is kept and we don't segfault * at list_del_init(). */ INIT_LIST_HEAD(&pt->unknown_thread->node); diff --git a/tools/perf/util/levenshtein.c b/tools/perf/util/levenshtein.c index a217ecf0359d..6a6712635aa4 100644 --- a/tools/perf/util/levenshtein.c +++ b/tools/perf/util/levenshtein.c @@ -30,7 +30,7 @@ * * It does so by calculating the costs of the path ending in characters * i (in string1) and j (in string2), respectively, given that the last - * operation is a substition, a swap, a deletion, or an insertion. + * operation is a substitution, a swap, a deletion, or an insertion. * * This implementation allows the costs to be weighted: * diff --git a/tools/perf/util/libunwind/arm64.c b/tools/perf/util/libunwind/arm64.c index 6b4e5a0892f8..c397be0c2e32 100644 --- a/tools/perf/util/libunwind/arm64.c +++ b/tools/perf/util/libunwind/arm64.c @@ -4,7 +4,7 @@ * generic one. * * The function 'LIBUNWIND__ARCH_REG_ID' name is set according to arch - * name and the defination of this function is included directly from + * name and the definition of this function is included directly from * 'arch/arm64/util/unwind-libunwind.c', to make sure that this function * is defined no matter what arch the host is. * diff --git a/tools/perf/util/libunwind/x86_32.c b/tools/perf/util/libunwind/x86_32.c index 21c216c40a3b..b2b92d030aef 100644 --- a/tools/perf/util/libunwind/x86_32.c +++ b/tools/perf/util/libunwind/x86_32.c @@ -4,7 +4,7 @@ * generic one. * * The function 'LIBUNWIND__ARCH_REG_ID' name is set according to arch - * name and the defination of this function is included directly from + * name and the definition of this function is included directly from * 'arch/x86/util/unwind-libunwind.c', to make sure that this function * is defined no matter what arch the host is. * diff --git a/tools/perf/util/llvm-utils.c b/tools/perf/util/llvm-utils.c index dbdffb6673fe..3ceaf7ef3301 100644 --- a/tools/perf/util/llvm-utils.c +++ b/tools/perf/util/llvm-utils.c @@ -471,7 +471,7 @@ int llvm__compile_bpf(const char *path, void **p_obj_buf, /* * This is an optional work. Even it fail we can continue our - * work. Needn't to check error return. + * work. Needn't check error return. */ llvm__get_kbuild_opts(&kbuild_dir, &kbuild_include_opts); diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index b5c2d8be4144..3ff4936a15a4 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -905,7 +905,7 @@ static struct map *machine__addnew_module_map(struct machine *machine, u64 start maps__insert(&machine->kmaps, map); - /* Put the map here because maps__insert alread got it */ + /* Put the map here because maps__insert already got it */ map__put(map); out: /* put the dso here, corresponding to machine__findnew_module_dso */ @@ -1952,7 +1952,7 @@ int machine__process_fork_event(struct machine *machine, union perf_event *event * maps because that is what the kernel just did. * * But when synthesizing, this should not be done. If we do, we end up - * with overlapping maps as we process the sythesized MMAP2 events that + * with overlapping maps as we process the synthesized MMAP2 events that * get delivered shortly thereafter. * * Use the FORK event misc flags in an internal way to signal this @@ -2038,8 +2038,8 @@ int machine__process_event(struct machine *machine, union perf_event *event, static bool symbol__match_regex(struct symbol *sym, regex_t *regex) { if (!regexec(regex, sym->name, 0, NULL, 0)) - return 1; - return 0; + return true; + return false; } static void ip__resolve_ams(struct thread *thread, @@ -2518,7 +2518,7 @@ static bool has_stitched_lbr(struct thread *thread, /* * Check if there are identical LBRs between two samples. - * Identicall LBRs must have same from, to and flags values. Also, + * Identical LBRs must have same from, to and flags values. Also, * they have to be saved in the same LBR registers (same physical * index). * @@ -2588,7 +2588,7 @@ err: } /* - * Recolve LBR callstack chain sample + * Resolve LBR callstack chain sample * Return: * 1 on success get LBR callchain information * 0 no available LBR callchain information, should try fp diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h index 9f32825c98d8..d32f5b28c1fb 100644 --- a/tools/perf/util/map.h +++ b/tools/perf/util/map.h @@ -75,7 +75,7 @@ struct thread; /* map__for_each_symbol - iterate over the symbols in the given map * - * @map: the 'struct map *' in which symbols itereated + * @map: the 'struct map *' in which symbols are iterated * @pos: the 'struct symbol *' to use as a loop cursor * @n: the 'struct rb_node *' to use as a temporary storage * Note: caller must ensure map->dso is not NULL (map is loaded). @@ -86,7 +86,7 @@ struct thread; /* map__for_each_symbol_with_name - iterate over the symbols in the given map * that have the given name * - * @map: the 'struct map *' in which symbols itereated + * @map: the 'struct map *' in which symbols are iterated * @sym_name: the symbol name * @pos: the 'struct symbol *' to use as a loop cursor */ diff --git a/tools/perf/util/mem-events.h b/tools/perf/util/mem-events.h index 755cef7e0625..63dd383b6ce2 100644 --- a/tools/perf/util/mem-events.h +++ b/tools/perf/util/mem-events.h @@ -81,7 +81,7 @@ struct c2c_stats { u32 rmt_dram; /* count of loads miss to remote DRAM */ u32 blk_data; /* count of loads blocked by data */ u32 blk_addr; /* count of loads blocked by address conflict */ - u32 nomap; /* count of load/stores with no phys adrs */ + u32 nomap; /* count of load/stores with no phys addrs */ u32 noparse; /* count of unparsable data sources */ }; diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index 26c990e32378..6acb44ad439b 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -181,7 +181,7 @@ static bool evsel_same_pmu(struct evsel *ev1, struct evsel *ev2) * @pctx: the parse context for the metric expression. * @metric_no_merge: don't attempt to share events for the metric with other * metrics. - * @has_constraint: is there a contraint on the group of events? In which case + * @has_constraint: is there a constraint on the group of events? In which case * the events won't be grouped. * @metric_events: out argument, null terminated array of evsel's associated * with the metric. diff --git a/tools/perf/util/metricgroup.h b/tools/perf/util/metricgroup.h index ed1b9392e624..026bbf416c48 100644 --- a/tools/perf/util/metricgroup.h +++ b/tools/perf/util/metricgroup.h @@ -9,7 +9,6 @@ struct evlist; struct evsel; -struct evlist; struct option; struct rblist; struct pmu_events_map; diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index c0c0fab22cb8..8123d218ad17 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -846,9 +846,9 @@ split_bpf_config_terms(struct list_head *evt_head_config, struct parse_events_term *term, *temp; /* - * Currectly, all possible user config term + * Currently, all possible user config term * belong to bpf object. parse_events__is_hardcoded_term() - * happends to be a good flag. + * happens to be a good flag. * * See parse_events_config_bpf() and * config_term_tracepoint(). @@ -898,7 +898,7 @@ int parse_events_load_bpf(struct parse_events_state *parse_state, /* * Caller doesn't know anything about obj_head_config, - * so combine them together again before returnning. + * so combine them together again before returning. */ if (head_config) list_splice_tail(&obj_head_config, head_config); @@ -1185,10 +1185,10 @@ do { \ } /* - * Check term availbility after basic checking so + * Check term availability after basic checking so * PARSE_EVENTS__TERM_TYPE_USER can be found and filtered. * - * If check availbility at the entry of this function, + * If check availability at the entry of this function, * user will see "'<sysfs term>' is not usable in 'perf stat'" * if an invalid config term is provided for legacy events * (for example, instructions/badterm/...), which is confusing. diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 46fd0f998484..4e52b94f1ac6 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -1069,7 +1069,7 @@ int perf_pmu__format_type(struct list_head *formats, const char *name) /* * Sets value based on the format definition (format parameter) - * and unformated value (value parameter). + * and unformatted value (value parameter). */ static void pmu_format_value(unsigned long *format, __u64 value, __u64 *v, bool zero) @@ -1408,7 +1408,7 @@ int perf_pmu__check_alias(struct perf_pmu *pmu, struct list_head *head_terms, } /* - * if no unit or scale foundin aliases, then + * if no unit or scale found in aliases, then * set defaults as for evsel * unit cannot left to NULL */ diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index a9cff3a50ddf..a78c8d59a555 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -3228,7 +3228,7 @@ errout: return err; } -/* Concatinate two arrays */ +/* Concatenate two arrays */ static void *memcat(void *a, size_t sz_a, void *b, size_t sz_b) { void *ret; @@ -3258,7 +3258,7 @@ concat_probe_trace_events(struct probe_trace_event **tevs, int *ntevs, if (*ntevs + ntevs2 > probe_conf.max_probes) ret = -E2BIG; else { - /* Concatinate the array of probe_trace_event */ + /* Concatenate the array of probe_trace_event */ new_tevs = memcat(*tevs, (*ntevs) * sizeof(**tevs), *tevs2, ntevs2 * sizeof(**tevs2)); if (!new_tevs) diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index 1b118c9c86a6..866f2d514d72 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -164,7 +164,7 @@ static struct probe_trace_arg_ref *alloc_trace_arg_ref(long offs) /* * Convert a location into trace_arg. * If tvar == NULL, this just checks variable can be converted. - * If fentry == true and vr_die is a parameter, do huristic search + * If fentry == true and vr_die is a parameter, do heuristic search * for the location fuzzed by function entry mcount. */ static int convert_variable_location(Dwarf_Die *vr_die, Dwarf_Addr addr, @@ -498,7 +498,7 @@ static int convert_variable_fields(Dwarf_Die *vr_die, const char *varname, " nor array.\n", varname); return -EINVAL; } - /* While prcessing unnamed field, we don't care about this */ + /* While processing unnamed field, we don't care about this */ if (field->ref && dwarf_diename(vr_die)) { pr_err("Semantic error: %s must be referred by '.'\n", field->name); @@ -1832,7 +1832,7 @@ static int line_range_walk_cb(const char *fname, int lineno, (lf->lno_s > lineno || lf->lno_e < lineno)) return 0; - /* Make sure this line can be reversable */ + /* Make sure this line can be reversible */ if (cu_find_lineinfo(&lf->cu_die, addr, &__fname, &__lineno) > 0 && (lineno != __lineno || strcmp(fname, __fname))) return 0; diff --git a/tools/perf/util/s390-cpumsf.c b/tools/perf/util/s390-cpumsf.c index 078a71773565..8130b56aa04b 100644 --- a/tools/perf/util/s390-cpumsf.c +++ b/tools/perf/util/s390-cpumsf.c @@ -45,7 +45,7 @@ * the data portion is mmap()'ed. * * To sort the queues in chronological order, all queue access is controlled - * by the auxtrace_heap. This is basicly a stack, each stack element has two + * by the auxtrace_heap. This is basically a stack, each stack element has two * entries, the queue number and a time stamp. However the stack is sorted by * the time stamps. The highest time stamp is at the bottom the lowest * (nearest) time stamp is at the top. That sort order is maintained at all @@ -65,11 +65,11 @@ * stamp of the last processed entry of the auxtrace_buffer replaces the * current auxtrace_heap top. * - * 3. Auxtrace_queues might run of out data and are feeded by the + * 3. Auxtrace_queues might run of out data and are fed by the * PERF_RECORD_AUXTRACE handling, see s390_cpumsf_process_auxtrace_event(). * * Event Generation - * Each sampling-data entry in the auxilary trace data generates a perf sample. + * Each sampling-data entry in the auxiliary trace data generates a perf sample. * This sample is filled * with data from the auxtrace such as PID/TID, instruction address, CPU state, * etc. This sample is processed with perf_session__deliver_synth_event() to @@ -575,7 +575,7 @@ static unsigned long long get_trailer_time(const unsigned char *buf) * pointer to the queue, the second parameter is the time stamp. This * is the time stamp: * - of the event that triggered this processing. - * - or the time stamp when the last proccesing of this queue stopped. + * - or the time stamp when the last processing of this queue stopped. * In this case it stopped at a 4KB page boundary and record the * position on where to continue processing on the next invocation * (see buffer->use_data and buffer->use_size). @@ -640,7 +640,7 @@ static int s390_cpumsf_samples(struct s390_cpumsf_queue *sfq, u64 *ts) goto out; } - pos += dsdes; /* Skip diagnositic entry */ + pos += dsdes; /* Skip diagnostic entry */ /* Check for trailer entry */ if (!s390_cpumsf_reached_trailer(bsdes + dsdes, pos)) { diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index c83c2c6564e0..4e4aa4c97ac5 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -1531,7 +1531,7 @@ static void set_table_handlers(struct tables *tables) * Attempt to use the call path root from the call return * processor, if the call return processor is in use. Otherwise, * we allocate a new call path root. This prevents exporting - * duplicate call path ids when both are in use simultaniously. + * duplicate call path ids when both are in use simultaneously. */ if (tables->dbe.crp) tables->dbe.cpr = tables->dbe.crp->cpr; diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 859832a82496..eba3769be3f1 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -1069,7 +1069,7 @@ static void callchain__lbr_callstack_printf(struct perf_sample *sample) * in "to" register. * For example, there is a call stack * "A"->"B"->"C"->"D". - * The LBR registers will recorde like + * The LBR registers will be recorded like * "C"->"D", "B"->"C", "A"->"B". * So only the first "to" register and all "from" * registers are needed to construct the whole stack. @@ -1302,8 +1302,10 @@ static void dump_sample(struct evsel *evsel, union perf_event *event, if (sample_type & PERF_SAMPLE_WEIGHT_TYPE) { printf("... weight: %" PRIu64 "", sample->weight); - if (sample_type & PERF_SAMPLE_WEIGHT_STRUCT) + if (sample_type & PERF_SAMPLE_WEIGHT_STRUCT) { printf(",0x%"PRIx16"", sample->ins_lat); + printf(",0x%"PRIx16"", sample->p_stage_cyc); + } printf("\n"); } @@ -1584,7 +1586,7 @@ static s64 perf_session__process_user_event(struct perf_session *session, return tool->event_update(tool, event, &session->evlist); case PERF_RECORD_HEADER_EVENT_TYPE: /* - * Depreceated, but we need to handle it for sake + * Deprecated, but we need to handle it for sake * of old data files create in pipe mode. */ return 0; diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 552b590485bf..88ce47f2547e 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -25,6 +25,7 @@ #include <traceevent/event-parse.h> #include "mem-events.h" #include "annotate.h" +#include "event.h" #include "time-utils.h" #include "cgroup.h" #include "machine.h" @@ -36,7 +37,7 @@ const char default_parent_pattern[] = "^sys_|^do_page_fault"; const char *parent_pattern = default_parent_pattern; const char *default_sort_order = "comm,dso,symbol"; const char default_branch_sort_order[] = "comm,dso_from,symbol_from,symbol_to,cycles"; -const char default_mem_sort_order[] = "local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked,blocked,local_ins_lat"; +const char default_mem_sort_order[] = "local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked,blocked,local_ins_lat,p_stage_cyc"; const char default_top_sort_order[] = "dso,symbol"; const char default_diff_sort_order[] = "dso,symbol"; const char default_tracepoint_sort_order[] = "trace"; @@ -45,6 +46,8 @@ const char *field_order; regex_t ignore_callees_regex; int have_ignore_callees = 0; enum sort_mode sort__mode = SORT_MODE__NORMAL; +const char *dynamic_headers[] = {"local_ins_lat", "p_stage_cyc"}; +const char *arch_specific_sort_keys[] = {"p_stage_cyc"}; /* * Replaces all occurrences of a char used with the: @@ -1408,6 +1411,25 @@ struct sort_entry sort_global_ins_lat = { .se_width_idx = HISTC_GLOBAL_INS_LAT, }; +static int64_t +sort__global_p_stage_cyc_cmp(struct hist_entry *left, struct hist_entry *right) +{ + return left->stat.p_stage_cyc - right->stat.p_stage_cyc; +} + +static int hist_entry__p_stage_cyc_snprintf(struct hist_entry *he, char *bf, + size_t size, unsigned int width) +{ + return repsep_snprintf(bf, size, "%-*u", width, he->stat.p_stage_cyc); +} + +struct sort_entry sort_p_stage_cyc = { + .se_header = "Pipeline Stage Cycle", + .se_cmp = sort__global_p_stage_cyc_cmp, + .se_snprintf = hist_entry__p_stage_cyc_snprintf, + .se_width_idx = HISTC_P_STAGE_CYC, +}; + struct sort_entry sort_mem_daddr_sym = { .se_header = "Data Symbol", .se_cmp = sort__daddr_cmp, @@ -1816,6 +1838,21 @@ struct sort_dimension { int taken; }; +int __weak arch_support_sort_key(const char *sort_key __maybe_unused) +{ + return 0; +} + +const char * __weak arch_perf_header_entry(const char *se_header) +{ + return se_header; +} + +static void sort_dimension_add_dynamic_header(struct sort_dimension *sd) +{ + sd->entry->se_header = arch_perf_header_entry(sd->entry->se_header); +} + #define DIM(d, n, func) [d] = { .name = n, .entry = &(func) } static struct sort_dimension common_sort_dimensions[] = { @@ -1841,6 +1878,7 @@ static struct sort_dimension common_sort_dimensions[] = { DIM(SORT_CODE_PAGE_SIZE, "code_page_size", sort_code_page_size), DIM(SORT_LOCAL_INS_LAT, "local_ins_lat", sort_local_ins_lat), DIM(SORT_GLOBAL_INS_LAT, "ins_lat", sort_global_ins_lat), + DIM(SORT_PIPELINE_STAGE_CYC, "p_stage_cyc", sort_p_stage_cyc), }; #undef DIM @@ -2739,7 +2777,20 @@ int sort_dimension__add(struct perf_hpp_list *list, const char *tok, struct evlist *evlist, int level) { - unsigned int i; + unsigned int i, j; + + /* + * Check to see if there are any arch specific + * sort dimensions not applicable for the current + * architecture. If so, Skip that sort key since + * we don't want to display it in the output fields. + */ + for (j = 0; j < ARRAY_SIZE(arch_specific_sort_keys); j++) { + if (!strcmp(arch_specific_sort_keys[j], tok) && + !arch_support_sort_key(tok)) { + return 0; + } + } for (i = 0; i < ARRAY_SIZE(common_sort_dimensions); i++) { struct sort_dimension *sd = &common_sort_dimensions[i]; @@ -2747,6 +2798,11 @@ int sort_dimension__add(struct perf_hpp_list *list, const char *tok, if (strncasecmp(tok, sd->name, strlen(tok))) continue; + for (j = 0; j < ARRAY_SIZE(dynamic_headers); j++) { + if (!strcmp(dynamic_headers[j], sd->name)) + sort_dimension_add_dynamic_header(sd); + } + if (sd->entry == &sort_parent) { int ret = regcomp(&parent_regex, parent_pattern, REG_EXTENDED); if (ret) { diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index 63f67a3f3630..87a092645aa7 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -51,6 +51,7 @@ struct he_stat { u64 period_guest_us; u64 weight; u64 ins_lat; + u64 p_stage_cyc; u32 nr_events; }; @@ -234,6 +235,7 @@ enum sort_type { SORT_CODE_PAGE_SIZE, SORT_LOCAL_INS_LAT, SORT_GLOBAL_INS_LAT, + SORT_PIPELINE_STAGE_CYC, /* branch stack specific sort keys */ __SORT_BRANCH_STACK, diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index 7f09cdaf5b60..d3137bc17065 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -439,6 +439,12 @@ static void printout(struct perf_stat_config *config, struct aggr_cpu_id id, int if (counter->cgrp) os.nfields++; } + + if (!config->no_csv_summary && config->csv_output && + config->summary && !config->interval) { + fprintf(config->output, "%16s%s", "summary", config->csv_sep); + } + if (run == 0 || ena == 0 || counter->counts->scaled == -1) { if (config->metric_only) { pm(config, &os, NULL, "", "", 0); diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index 6ccf21a72f06..3f800e71126f 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -9,6 +9,7 @@ #include "expr.h" #include "metricgroup.h" #include "cgroup.h" +#include "units.h" #include <linux/zalloc.h> /* @@ -1270,18 +1271,15 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, generic_metric(config, evsel->metric_expr, evsel->metric_events, NULL, evsel->name, evsel->metric_name, NULL, 1, cpu, out, st); } else if (runtime_stat_n(st, STAT_NSECS, cpu, &rsd) != 0) { - char unit = 'M'; - char unit_buf[10]; + char unit = ' '; + char unit_buf[10] = "/sec"; total = runtime_stat_avg(st, STAT_NSECS, cpu, &rsd); - if (total) - ratio = 1000.0 * avg / total; - if (ratio < 0.001) { - ratio *= 1000; - unit = 'K'; - } - snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit); + ratio = convert_unit_double(1000000000.0 * avg / total, &unit); + + if (unit != ' ') + snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit); print_metric(config, ctxp, NULL, "%8.3f", unit_buf, ratio); } else if (perf_stat_evsel__is(evsel, SMI_NUM)) { print_smi_cost(config, cpu, out, st, &rsd); diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index c400f8dde017..2db46b9bebd0 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -76,8 +76,7 @@ double rel_stddev_stats(double stddev, double avg) return pct; } -bool __perf_evsel_stat__is(struct evsel *evsel, - enum perf_stat_evsel_id id) +bool __perf_stat_evsel__is(struct evsel *evsel, enum perf_stat_evsel_id id) { struct perf_stat_evsel *ps = evsel->stats; diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index d85c292148bb..48e6a06233fa 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -128,6 +128,7 @@ struct perf_stat_config { bool all_user; bool percore_show_thread; bool summary; + bool no_csv_summary; bool metric_no_group; bool metric_no_merge; bool stop_read_counter; @@ -160,6 +161,7 @@ struct perf_stat_config { }; void perf_stat__set_big_num(int set); +void perf_stat__set_no_csv_summary(int set); void update_stats(struct stats *stats, u64 val); double avg_stats(struct stats *stats); @@ -187,11 +189,10 @@ struct perf_aggr_thread_value { u64 ena; }; -bool __perf_evsel_stat__is(struct evsel *evsel, - enum perf_stat_evsel_id id); +bool __perf_stat_evsel__is(struct evsel *evsel, enum perf_stat_evsel_id id); #define perf_stat_evsel__is(evsel, id) \ - __perf_evsel_stat__is(evsel, PERF_STAT_EVSEL_ID__ ## id) + __perf_stat_evsel__is(evsel, PERF_STAT_EVSEL_ID__ ## id) extern struct runtime_stat rt_stat; extern struct stats walltime_nsecs_stats; diff --git a/tools/perf/util/strbuf.h b/tools/perf/util/strbuf.h index ea94d8628980..be94d7046fa0 100644 --- a/tools/perf/util/strbuf.h +++ b/tools/perf/util/strbuf.h @@ -12,7 +12,7 @@ * build complex strings/buffers whose final size isn't easily known. * * It is NOT legal to copy the ->buf pointer away. - * `strbuf_detach' is the operation that detachs a buffer from its shell + * `strbuf_detach' is the operation that detaches a buffer from its shell * while keeping the shell valid wrt its invariants. * * 2. the ->buf member is a byte array that has at least ->len + 1 bytes diff --git a/tools/perf/util/strfilter.h b/tools/perf/util/strfilter.h index e0c25a40f796..c05aca9ca582 100644 --- a/tools/perf/util/strfilter.h +++ b/tools/perf/util/strfilter.h @@ -8,8 +8,8 @@ /* A node of string filter */ struct strfilter_node { - struct strfilter_node *l; /* Tree left branche (for &,|) */ - struct strfilter_node *r; /* Tree right branche (for !,&,|) */ + struct strfilter_node *l; /* Tree left branch (for &,|) */ + struct strfilter_node *r; /* Tree right branch (for !,&,|) */ const char *p; /* Operator or rule */ }; diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 6dff843fd883..4c56aa837434 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -1058,7 +1058,7 @@ static int dso__process_kernel_symbol(struct dso *dso, struct map *map, curr_dso->symtab_type = dso->symtab_type; maps__insert(kmaps, curr_map); /* - * Add it before we drop the referece to curr_map, i.e. while + * Add it before we drop the reference to curr_map, i.e. while * we still are sure to have a reference to this DSO via * *curr_map->dso. */ diff --git a/tools/perf/util/symbol_fprintf.c b/tools/perf/util/symbol_fprintf.c index 35c936ce33ef..2664fb65e47a 100644 --- a/tools/perf/util/symbol_fprintf.c +++ b/tools/perf/util/symbol_fprintf.c @@ -68,7 +68,7 @@ size_t dso__fprintf_symbols_by_name(struct dso *dso, for (nd = rb_first_cached(&dso->symbol_names); nd; nd = rb_next(nd)) { pos = rb_entry(nd, struct symbol_name_rb_node, rb_node); - fprintf(fp, "%s\n", pos->sym.name); + ret += fprintf(fp, "%s\n", pos->sym.name); } return ret; diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c index dff178103ce5..35aa0c0f7cd9 100644 --- a/tools/perf/util/synthetic-events.c +++ b/tools/perf/util/synthetic-events.c @@ -1211,7 +1211,7 @@ static size_t mask_size(struct perf_cpu_map *map, int *max) *max = 0; for (i = 0; i < map->nr; i++) { - /* bit possition of the cpu is + 1 */ + /* bit position of the cpu is + 1 */ int bit = map->map[i] + 1; if (bit > *max) @@ -1237,7 +1237,7 @@ void *cpu_map_data__alloc(struct perf_cpu_map *map, size_t *size, u16 *type, int * mask = size of 'struct perf_record_record_cpu_map' + * maximum cpu bit converted to size of longs * - * and finaly + the size of 'struct perf_record_cpu_map_data'. + * and finally + the size of 'struct perf_record_cpu_map_data'. */ size_cpus = cpus_size(map); size_mask = mask_size(map, max); diff --git a/tools/perf/util/syscalltbl.c b/tools/perf/util/syscalltbl.c index 03bd99d3be16..a2e906858891 100644 --- a/tools/perf/util/syscalltbl.c +++ b/tools/perf/util/syscalltbl.c @@ -34,6 +34,10 @@ static const char **syscalltbl_native = syscalltbl_powerpc_32; #include <asm/syscalls.c> const int syscalltbl_native_max_id = SYSCALLTBL_ARM64_MAX_ID; static const char **syscalltbl_native = syscalltbl_arm64; +#elif defined(__mips__) +#include <asm/syscalls_n64.c> +const int syscalltbl_native_max_id = SYSCALLTBL_MIPS_N64_MAX_ID; +static const char **syscalltbl_native = syscalltbl_mips_n64; #endif struct syscall { diff --git a/tools/perf/util/target.h b/tools/perf/util/target.h index f132c6c2eef8..1bce3eb28ef2 100644 --- a/tools/perf/util/target.h +++ b/tools/perf/util/target.h @@ -16,6 +16,8 @@ struct target { bool uses_mmap; bool default_per_cpu; bool per_thread; + bool use_bpf; + const char *attr_map; }; enum target_errno { @@ -66,7 +68,7 @@ static inline bool target__has_cpu(struct target *target) static inline bool target__has_bpf(struct target *target) { - return target->bpf_str; + return target->bpf_str || target->use_bpf; } static inline bool target__none(struct target *target) diff --git a/tools/perf/util/thread-stack.h b/tools/perf/util/thread-stack.h index 3bc47a42af8e..b3cd09beb62f 100644 --- a/tools/perf/util/thread-stack.h +++ b/tools/perf/util/thread-stack.h @@ -16,7 +16,6 @@ struct comm; struct ip_callchain; struct symbol; struct dso; -struct comm; struct perf_sample; struct addr_location; struct call_path; diff --git a/tools/perf/util/units.c b/tools/perf/util/units.c index a46762aec4c9..32c39cfe209b 100644 --- a/tools/perf/util/units.c +++ b/tools/perf/util/units.c @@ -33,28 +33,35 @@ unsigned long parse_tag_value(const char *str, struct parse_tag *tags) return (unsigned long) -1; } -unsigned long convert_unit(unsigned long value, char *unit) +double convert_unit_double(double value, char *unit) { *unit = ' '; - if (value > 1000) { - value /= 1000; + if (value > 1000.0) { + value /= 1000.0; *unit = 'K'; } - if (value > 1000) { - value /= 1000; + if (value > 1000.0) { + value /= 1000.0; *unit = 'M'; } - if (value > 1000) { - value /= 1000; + if (value > 1000.0) { + value /= 1000.0; *unit = 'G'; } return value; } +unsigned long convert_unit(unsigned long value, char *unit) +{ + double v = convert_unit_double((double)value, unit); + + return (unsigned long)v; +} + int unit_number__scnprintf(char *buf, size_t size, u64 n) { char unit[4] = "BKMG"; diff --git a/tools/perf/util/units.h b/tools/perf/util/units.h index 99263b6a23f7..ea43e74e3240 100644 --- a/tools/perf/util/units.h +++ b/tools/perf/util/units.h @@ -12,6 +12,7 @@ struct parse_tag { unsigned long parse_tag_value(const char *str, struct parse_tag *tags); +double convert_unit_double(double value, char *unit); unsigned long convert_unit(unsigned long value, char *unit); int unit_number__scnprintf(char *buf, size_t size, u64 n); diff --git a/tools/perf/util/unwind-libunwind-local.c b/tools/perf/util/unwind-libunwind-local.c index 9aededc0bc06..71a353349181 100644 --- a/tools/perf/util/unwind-libunwind-local.c +++ b/tools/perf/util/unwind-libunwind-local.c @@ -82,7 +82,7 @@ UNW_OBJ(dwarf_find_debug_frame) (int found, unw_dyn_info_t *di_debug, #define DW_EH_PE_funcrel 0x40 /* start-of-procedure-relative */ #define DW_EH_PE_aligned 0x50 /* aligned pointer */ -/* Flags intentionaly not handled, since they're not needed: +/* Flags intentionally not handled, since they're not needed: * #define DW_EH_PE_indirect 0x80 * #define DW_EH_PE_uleb128 0x01 * #define DW_EH_PE_udata2 0x02 |