diff options
Diffstat (limited to 'tools/perf')
175 files changed, 3424 insertions, 5913 deletions
diff --git a/tools/perf/Documentation/Makefile b/tools/perf/Documentation/Makefile index adc5a7e44b98..31824d5269cc 100644 --- a/tools/perf/Documentation/Makefile +++ b/tools/perf/Documentation/Makefile @@ -295,7 +295,10 @@ $(OUTPUT)%.1 $(OUTPUT)%.5 $(OUTPUT)%.7 : $(OUTPUT)%.xml $(OUTPUT)%.xml : %.txt $(QUIET_ASCIIDOC)$(RM) $@+ $@ && \ $(ASCIIDOC) -b docbook -d manpage \ - $(ASCIIDOC_EXTRA) -aperf_version=$(PERF_VERSION) -o $@+ $< && \ + $(ASCIIDOC_EXTRA) -aperf_version=$(PERF_VERSION) \ + -aperf_date=$(shell git log -1 --pretty="format:%cd" \ + --date=short $<) \ + -o $@+ $< && \ mv $@+ $@ XSLT = docbook.xsl diff --git a/tools/perf/Documentation/intel-pt.txt b/tools/perf/Documentation/intel-pt.txt index 2cf2d9e9d0da..fd9241a1b987 100644 --- a/tools/perf/Documentation/intel-pt.txt +++ b/tools/perf/Documentation/intel-pt.txt @@ -1,991 +1 @@ -Intel Processor Trace -===================== - -Overview -======== - -Intel Processor Trace (Intel PT) is an extension of Intel Architecture that -collects information about software execution such as control flow, execution -modes and timings and formats it into highly compressed binary packets. -Technical details are documented in the Intel 64 and IA-32 Architectures -Software Developer Manuals, Chapter 36 Intel Processor Trace. - -Intel PT is first supported in Intel Core M and 5th generation Intel Core -processors that are based on the Intel micro-architecture code name Broadwell. - -Trace data is collected by 'perf record' and stored within the perf.data file. -See below for options to 'perf record'. - -Trace data must be 'decoded' which involves walking the object code and matching -the trace data packets. For example a TNT packet only tells whether a -conditional branch was taken or not taken, so to make use of that packet the -decoder must know precisely which instruction was being executed. - -Decoding is done on-the-fly. The decoder outputs samples in the same format as -samples output by perf hardware events, for example as though the "instructions" -or "branches" events had been recorded. Presently 3 tools support this: -'perf script', 'perf report' and 'perf inject'. See below for more information -on using those tools. - -The main distinguishing feature of Intel PT is that the decoder can determine -the exact flow of software execution. Intel PT can be used to understand why -and how did software get to a certain point, or behave a certain way. The -software does not have to be recompiled, so Intel PT works with debug or release -builds, however the executed images are needed - which makes use in JIT-compiled -environments, or with self-modified code, a challenge. Also symbols need to be -provided to make sense of addresses. - -A limitation of Intel PT is that it produces huge amounts of trace data -(hundreds of megabytes per second per core) which takes a long time to decode, -for example two or three orders of magnitude longer than it took to collect. -Another limitation is the performance impact of tracing, something that will -vary depending on the use-case and architecture. - - -Quickstart -========== - -It is important to start small. That is because it is easy to capture vastly -more data than can possibly be processed. - -The simplest thing to do with Intel PT is userspace profiling of small programs. -Data is captured with 'perf record' e.g. to trace 'ls' userspace-only: - - perf record -e intel_pt//u ls - -And profiled with 'perf report' e.g. - - perf report - -To also trace kernel space presents a problem, namely kernel self-modifying -code. A fairly good kernel image is available in /proc/kcore but to get an -accurate image a copy of /proc/kcore needs to be made under the same conditions -as the data capture. A script perf-with-kcore can do that, but beware that the -script makes use of 'sudo' to copy /proc/kcore. If you have perf installed -locally from the source tree you can do: - - ~/libexec/perf-core/perf-with-kcore record pt_ls -e intel_pt// -- ls - -which will create a directory named 'pt_ls' and put the perf.data file and -copies of /proc/kcore, /proc/kallsyms and /proc/modules into it. Then to use -'perf report' becomes: - - ~/libexec/perf-core/perf-with-kcore report pt_ls - -Because samples are synthesized after-the-fact, the sampling period can be -selected for reporting. e.g. sample every microsecond - - ~/libexec/perf-core/perf-with-kcore report pt_ls --itrace=i1usge - -See the sections below for more information about the --itrace option. - -Beware the smaller the period, the more samples that are produced, and the -longer it takes to process them. - -Also note that the coarseness of Intel PT timing information will start to -distort the statistical value of the sampling as the sampling period becomes -smaller. - -To represent software control flow, "branches" samples are produced. By default -a branch sample is synthesized for every single branch. To get an idea what -data is available you can use the 'perf script' tool with all itrace sampling -options, which will list all the samples. - - perf record -e intel_pt//u ls - perf script --itrace=ibxwpe - -An interesting field that is not printed by default is 'flags' which can be -displayed as follows: - - perf script --itrace=ibxwpe -F+flags - -The flags are "bcrosyiABEx" which stand for branch, call, return, conditional, -system, asynchronous, interrupt, transaction abort, trace begin, trace end, and -in transaction, respectively. - -Another interesting field that is not printed by default is 'ipc' which can be -displayed as follows: - - perf script --itrace=be -F+ipc - -There are two ways that instructions-per-cycle (IPC) can be calculated depending -on the recording. - -If the 'cyc' config term (see config terms section below) was used, then IPC is -calculated using the cycle count from CYC packets, otherwise MTC packets are -used - refer to the 'mtc' config term. When MTC is used, however, the values -are less accurate because the timing is less accurate. - -Because Intel PT does not update the cycle count on every branch or instruction, -the values will often be zero. When there are values, they will be the number -of instructions and number of cycles since the last update, and thus represent -the average IPC since the last IPC for that event type. Note IPC for "branches" -events is calculated separately from IPC for "instructions" events. - -Also note that the IPC instruction count may or may not include the current -instruction. If the cycle count is associated with an asynchronous branch -(e.g. page fault or interrupt), then the instruction count does not include the -current instruction, otherwise it does. That is consistent with whether or not -that instruction has retired when the cycle count is updated. - -Another note, in the case of "branches" events, non-taken branches are not -presently sampled, so IPC values for them do not appear e.g. a CYC packet with a -TNT packet that starts with a non-taken branch. To see every possible IPC -value, "instructions" events can be used e.g. --itrace=i0ns - -While it is possible to create scripts to analyze the data, an alternative -approach is available to export the data to a sqlite or postgresql database. -Refer to script export-to-sqlite.py or export-to-postgresql.py for more details, -and to script exported-sql-viewer.py for an example of using the database. - -There is also script intel-pt-events.py which provides an example of how to -unpack the raw data for power events and PTWRITE. - -As mentioned above, it is easy to capture too much data. One way to limit the -data captured is to use 'snapshot' mode which is explained further below. -Refer to 'new snapshot option' and 'Intel PT modes of operation' further below. - -Another problem that will be experienced is decoder errors. They can be caused -by inability to access the executed image, self-modified or JIT-ed code, or the -inability to match side-band information (such as context switches and mmaps) -which results in the decoder not knowing what code was executed. - -There is also the problem of perf not being able to copy the data fast enough, -resulting in data lost because the buffer was full. See 'Buffer handling' below -for more details. - - -perf record -=========== - -new event ---------- - -The Intel PT kernel driver creates a new PMU for Intel PT. PMU events are -selected by providing the PMU name followed by the "config" separated by slashes. -An enhancement has been made to allow default "config" e.g. the option - - -e intel_pt// - -will use a default config value. Currently that is the same as - - -e intel_pt/tsc,noretcomp=0/ - -which is the same as - - -e intel_pt/tsc=1,noretcomp=0/ - -Note there are now new config terms - see section 'config terms' further below. - -The config terms are listed in /sys/devices/intel_pt/format. They are bit -fields within the config member of the struct perf_event_attr which is -passed to the kernel by the perf_event_open system call. They correspond to bit -fields in the IA32_RTIT_CTL MSR. Here is a list of them and their definitions: - - $ grep -H . /sys/bus/event_source/devices/intel_pt/format/* - /sys/bus/event_source/devices/intel_pt/format/cyc:config:1 - /sys/bus/event_source/devices/intel_pt/format/cyc_thresh:config:19-22 - /sys/bus/event_source/devices/intel_pt/format/mtc:config:9 - /sys/bus/event_source/devices/intel_pt/format/mtc_period:config:14-17 - /sys/bus/event_source/devices/intel_pt/format/noretcomp:config:11 - /sys/bus/event_source/devices/intel_pt/format/psb_period:config:24-27 - /sys/bus/event_source/devices/intel_pt/format/tsc:config:10 - -Note that the default config must be overridden for each term i.e. - - -e intel_pt/noretcomp=0/ - -is the same as: - - -e intel_pt/tsc=1,noretcomp=0/ - -So, to disable TSC packets use: - - -e intel_pt/tsc=0/ - -It is also possible to specify the config value explicitly: - - -e intel_pt/config=0x400/ - -Note that, as with all events, the event is suffixed with event modifiers: - - u userspace - k kernel - h hypervisor - G guest - H host - p precise ip - -'h', 'G' and 'H' are for virtualization which is not supported by Intel PT. -'p' is also not relevant to Intel PT. So only options 'u' and 'k' are -meaningful for Intel PT. - -perf_event_attr is displayed if the -vv option is used e.g. - - ------------------------------------------------------------ - perf_event_attr: - type 6 - size 112 - config 0x400 - { sample_period, sample_freq } 1 - sample_type IP|TID|TIME|CPU|IDENTIFIER - read_format ID - disabled 1 - inherit 1 - exclude_kernel 1 - exclude_hv 1 - enable_on_exec 1 - sample_id_all 1 - ------------------------------------------------------------ - sys_perf_event_open: pid 31104 cpu 0 group_fd -1 flags 0x8 - sys_perf_event_open: pid 31104 cpu 1 group_fd -1 flags 0x8 - sys_perf_event_open: pid 31104 cpu 2 group_fd -1 flags 0x8 - sys_perf_event_open: pid 31104 cpu 3 group_fd -1 flags 0x8 - ------------------------------------------------------------ - - -config terms ------------- - -The June 2015 version of Intel 64 and IA-32 Architectures Software Developer -Manuals, Chapter 36 Intel Processor Trace, defined new Intel PT features. -Some of the features are reflect in new config terms. All the config terms are -described below. - -tsc Always supported. Produces TSC timestamp packets to provide - timing information. In some cases it is possible to decode - without timing information, for example a per-thread context - that does not overlap executable memory maps. - - The default config selects tsc (i.e. tsc=1). - -noretcomp Always supported. Disables "return compression" so a TIP packet - is produced when a function returns. Causes more packets to be - produced but might make decoding more reliable. - - The default config does not select noretcomp (i.e. noretcomp=0). - -psb_period Allows the frequency of PSB packets to be specified. - - The PSB packet is a synchronization packet that provides a - starting point for decoding or recovery from errors. - - Support for psb_period is indicated by: - - /sys/bus/event_source/devices/intel_pt/caps/psb_cyc - - which contains "1" if the feature is supported and "0" - otherwise. - - Valid values are given by: - - /sys/bus/event_source/devices/intel_pt/caps/psb_periods - - which contains a hexadecimal value, the bits of which represent - valid values e.g. bit 2 set means value 2 is valid. - - The psb_period value is converted to the approximate number of - trace bytes between PSB packets as: - - 2 ^ (value + 11) - - e.g. value 3 means 16KiB bytes between PSBs - - If an invalid value is entered, the error message - will give a list of valid values e.g. - - $ perf record -e intel_pt/psb_period=15/u uname - Invalid psb_period for intel_pt. Valid values are: 0-5 - - If MTC packets are selected, the default config selects a value - of 3 (i.e. psb_period=3) or the nearest lower value that is - supported (0 is always supported). Otherwise the default is 0. - - If decoding is expected to be reliable and the buffer is large - then a large PSB period can be used. - - Because a TSC packet is produced with PSB, the PSB period can - also affect the granularity to timing information in the absence - of MTC or CYC. - -mtc Produces MTC timing packets. - - MTC packets provide finer grain timestamp information than TSC - packets. MTC packets record time using the hardware crystal - clock (CTC) which is related to TSC packets using a TMA packet. - - Support for this feature is indicated by: - - /sys/bus/event_source/devices/intel_pt/caps/mtc - - which contains "1" if the feature is supported and - "0" otherwise. - - The frequency of MTC packets can also be specified - see - mtc_period below. - -mtc_period Specifies how frequently MTC packets are produced - see mtc - above for how to determine if MTC packets are supported. - - Valid values are given by: - - /sys/bus/event_source/devices/intel_pt/caps/mtc_periods - - which contains a hexadecimal value, the bits of which represent - valid values e.g. bit 2 set means value 2 is valid. - - The mtc_period value is converted to the MTC frequency as: - - CTC-frequency / (2 ^ value) - - e.g. value 3 means one eighth of CTC-frequency - - Where CTC is the hardware crystal clock, the frequency of which - can be related to TSC via values provided in cpuid leaf 0x15. - - If an invalid value is entered, the error message - will give a list of valid values e.g. - - $ perf record -e intel_pt/mtc_period=15/u uname - Invalid mtc_period for intel_pt. Valid values are: 0,3,6,9 - - The default value is 3 or the nearest lower value - that is supported (0 is always supported). - -cyc Produces CYC timing packets. - - CYC packets provide even finer grain timestamp information than - MTC and TSC packets. A CYC packet contains the number of CPU - cycles since the last CYC packet. Unlike MTC and TSC packets, - CYC packets are only sent when another packet is also sent. - - Support for this feature is indicated by: - - /sys/bus/event_source/devices/intel_pt/caps/psb_cyc - - which contains "1" if the feature is supported and - "0" otherwise. - - The number of CYC packets produced can be reduced by specifying - a threshold - see cyc_thresh below. - -cyc_thresh Specifies how frequently CYC packets are produced - see cyc - above for how to determine if CYC packets are supported. - - Valid cyc_thresh values are given by: - - /sys/bus/event_source/devices/intel_pt/caps/cycle_thresholds - - which contains a hexadecimal value, the bits of which represent - valid values e.g. bit 2 set means value 2 is valid. - - The cyc_thresh value represents the minimum number of CPU cycles - that must have passed before a CYC packet can be sent. The - number of CPU cycles is: - - 2 ^ (value - 1) - - e.g. value 4 means 8 CPU cycles must pass before a CYC packet - can be sent. Note a CYC packet is still only sent when another - packet is sent, not at, e.g. every 8 CPU cycles. - - If an invalid value is entered, the error message - will give a list of valid values e.g. - - $ perf record -e intel_pt/cyc,cyc_thresh=15/u uname - Invalid cyc_thresh for intel_pt. Valid values are: 0-12 - - CYC packets are not requested by default. - -pt Specifies pass-through which enables the 'branch' config term. - - The default config selects 'pt' if it is available, so a user will - never need to specify this term. - -branch Enable branch tracing. Branch tracing is enabled by default so to - disable branch tracing use 'branch=0'. - - The default config selects 'branch' if it is available. - -ptw Enable PTWRITE packets which are produced when a ptwrite instruction - is executed. - - Support for this feature is indicated by: - - /sys/bus/event_source/devices/intel_pt/caps/ptwrite - - which contains "1" if the feature is supported and - "0" otherwise. - -fup_on_ptw Enable a FUP packet to follow the PTWRITE packet. The FUP packet - provides the address of the ptwrite instruction. In the absence of - fup_on_ptw, the decoder will use the address of the previous branch - if branch tracing is enabled, otherwise the address will be zero. - Note that fup_on_ptw will work even when branch tracing is disabled. - -pwr_evt Enable power events. The power events provide information about - changes to the CPU C-state. - - Support for this feature is indicated by: - - /sys/bus/event_source/devices/intel_pt/caps/power_event_trace - - which contains "1" if the feature is supported and - "0" otherwise. - - -AUX area sampling option ------------------------- - -To select Intel PT "sampling" the AUX area sampling option can be used: - - --aux-sample - -Optionally it can be followed by the sample size in bytes e.g. - - --aux-sample=8192 - -In addition, the Intel PT event to sample must be defined e.g. - - -e intel_pt//u - -Samples on other events will be created containing Intel PT data e.g. the -following will create Intel PT samples on the branch-misses event, note the -events must be grouped using {}: - - perf record --aux-sample -e '{intel_pt//u,branch-misses:u}' - -An alternative to '--aux-sample' is to add the config term 'aux-sample-size' to -events. In this case, the grouping is implied e.g. - - perf record -e intel_pt//u -e branch-misses/aux-sample-size=8192/u - -is the same as: - - perf record -e '{intel_pt//u,branch-misses/aux-sample-size=8192/u}' - -but allows for also using an address filter e.g.: - - perf record -e intel_pt//u --filter 'filter * @/bin/ls' -e branch-misses/aux-sample-size=8192/u -- ls - -It is important to select a sample size that is big enough to contain at least -one PSB packet. If not a warning will be displayed: - - Intel PT sample size (%zu) may be too small for PSB period (%zu) - -The calculation used for that is: if sample_size <= psb_period + 256 display the -warning. When sampling is used, psb_period defaults to 0 (2KiB). - -The default sample size is 4KiB. - -The sample size is passed in aux_sample_size in struct perf_event_attr. The -sample size is limited by the maximum event size which is 64KiB. It is -difficult to know how big the event might be without the trace sample attached, -but the tool validates that the sample size is not greater than 60KiB. - - -new snapshot option -------------------- - -The difference between full trace and snapshot from the kernel's perspective is -that in full trace we don't overwrite trace data that the user hasn't collected -yet (and indicated that by advancing aux_tail), whereas in snapshot mode we let -the trace run and overwrite older data in the buffer so that whenever something -interesting happens, we can stop it and grab a snapshot of what was going on -around that interesting moment. - -To select snapshot mode a new option has been added: - - -S - -Optionally it can be followed by the snapshot size e.g. - - -S0x100000 - -The default snapshot size is the auxtrace mmap size. If neither auxtrace mmap size -nor snapshot size is specified, then the default is 4MiB for privileged users -(or if /proc/sys/kernel/perf_event_paranoid < 0), 128KiB for unprivileged users. -If an unprivileged user does not specify mmap pages, the mmap pages will be -reduced as described in the 'new auxtrace mmap size option' section below. - -The snapshot size is displayed if the option -vv is used e.g. - - Intel PT snapshot size: %zu - - -new auxtrace mmap size option ---------------------------- - -Intel PT buffer size is specified by an addition to the -m option e.g. - - -m,16 - -selects a buffer size of 16 pages i.e. 64KiB. - -Note that the existing functionality of -m is unchanged. The auxtrace mmap size -is specified by the optional addition of a comma and the value. - -The default auxtrace mmap size for Intel PT is 4MiB/page_size for privileged users -(or if /proc/sys/kernel/perf_event_paranoid < 0), 128KiB for unprivileged users. -If an unprivileged user does not specify mmap pages, the mmap pages will be -reduced from the default 512KiB/page_size to 256KiB/page_size, otherwise the -user is likely to get an error as they exceed their mlock limit (Max locked -memory as shown in /proc/self/limits). Note that perf does not count the first -512KiB (actually /proc/sys/kernel/perf_event_mlock_kb minus 1 page) per cpu -against the mlock limit so an unprivileged user is allowed 512KiB per cpu plus -their mlock limit (which defaults to 64KiB but is not multiplied by the number -of cpus). - -In full-trace mode, powers of two are allowed for buffer size, with a minimum -size of 2 pages. In snapshot mode or sampling mode, it is the same but the -minimum size is 1 page. - -The mmap size and auxtrace mmap size are displayed if the -vv option is used e.g. - - mmap length 528384 - auxtrace mmap length 4198400 - - -Intel PT modes of operation ---------------------------- - -Intel PT can be used in 2 modes: - full-trace mode - sample mode - snapshot mode - -Full-trace mode traces continuously e.g. - - perf record -e intel_pt//u uname - -Sample mode attaches a Intel PT sample to other events e.g. - - perf record --aux-sample -e intel_pt//u -e branch-misses:u - -Snapshot mode captures the available data when a signal is sent e.g. - - perf record -v -e intel_pt//u -S ./loopy 1000000000 & - [1] 11435 - kill -USR2 11435 - Recording AUX area tracing snapshot - -Note that the signal sent is SIGUSR2. -Note that "Recording AUX area tracing snapshot" is displayed because the -v -option is used. - -The 2 modes cannot be used together. - - -Buffer handling ---------------- - -There may be buffer limitations (i.e. single ToPa entry) which means that actual -buffer sizes are limited to powers of 2 up to 4MiB (MAX_ORDER). In order to -provide other sizes, and in particular an arbitrarily large size, multiple -buffers are logically concatenated. However an interrupt must be used to switch -between buffers. That has two potential problems: - a) the interrupt may not be handled in time so that the current buffer - becomes full and some trace data is lost. - b) the interrupts may slow the system and affect the performance - results. - -If trace data is lost, the driver sets 'truncated' in the PERF_RECORD_AUX event -which the tools report as an error. - -In full-trace mode, the driver waits for data to be copied out before allowing -the (logical) buffer to wrap-around. If data is not copied out quickly enough, -again 'truncated' is set in the PERF_RECORD_AUX event. If the driver has to -wait, the intel_pt event gets disabled. Because it is difficult to know when -that happens, perf tools always re-enable the intel_pt event after copying out -data. - - -Intel PT and build ids ----------------------- - -By default "perf record" post-processes the event stream to find all build ids -for executables for all addresses sampled. Deliberately, Intel PT is not -decoded for that purpose (it would take too long). Instead the build ids for -all executables encountered (due to mmap, comm or task events) are included -in the perf.data file. - -To see buildids included in the perf.data file use the command: - - perf buildid-list - -If the perf.data file contains Intel PT data, that is the same as: - - perf buildid-list --with-hits - - -Snapshot mode and event disabling ---------------------------------- - -In order to make a snapshot, the intel_pt event is disabled using an IOCTL, -namely PERF_EVENT_IOC_DISABLE. However doing that can also disable the -collection of side-band information. In order to prevent that, a dummy -software event has been introduced that permits tracking events (like mmaps) to -continue to be recorded while intel_pt is disabled. That is important to ensure -there is complete side-band information to allow the decoding of subsequent -snapshots. - -A test has been created for that. To find the test: - - perf test list - ... - 23: Test using a dummy software event to keep tracking - -To run the test: - - perf test 23 - 23: Test using a dummy software event to keep tracking : Ok - - -perf record modes (nothing new here) ------------------------------------- - -perf record essentially operates in one of three modes: - per thread - per cpu - workload only - -"per thread" mode is selected by -t or by --per-thread (with -p or -u or just a -workload). -"per cpu" is selected by -C or -a. -"workload only" mode is selected by not using the other options but providing a -command to run (i.e. the workload). - -In per-thread mode an exact list of threads is traced. There is no inheritance. -Each thread has its own event buffer. - -In per-cpu mode all processes (or processes from the selected cgroup i.e. -G -option, or processes selected with -p or -u) are traced. Each cpu has its own -buffer. Inheritance is allowed. - -In workload-only mode, the workload is traced but with per-cpu buffers. -Inheritance is allowed. Note that you can now trace a workload in per-thread -mode by using the --per-thread option. - - -Privileged vs non-privileged users ----------------------------------- - -Unless /proc/sys/kernel/perf_event_paranoid is set to -1, unprivileged users -have memory limits imposed upon them. That affects what buffer sizes they can -have as outlined above. - -The v4.2 kernel introduced support for a context switch metadata event, -PERF_RECORD_SWITCH, which allows unprivileged users to see when their processes -are scheduled out and in, just not by whom, which is left for the -PERF_RECORD_SWITCH_CPU_WIDE, that is only accessible in system wide context, -which in turn requires CAP_SYS_ADMIN. - -Please see the 45ac1403f564 ("perf: Add PERF_RECORD_SWITCH to indicate context -switches") commit, that introduces these metadata events for further info. - -When working with kernels < v4.2, the following considerations must be taken, -as the sched:sched_switch tracepoints will be used to receive such information: - -Unless /proc/sys/kernel/perf_event_paranoid is set to -1, unprivileged users are -not permitted to use tracepoints which means there is insufficient side-band -information to decode Intel PT in per-cpu mode, and potentially workload-only -mode too if the workload creates new processes. - -Note also, that to use tracepoints, read-access to debugfs is required. So if -debugfs is not mounted or the user does not have read-access, it will again not -be possible to decode Intel PT in per-cpu mode. - - -sched_switch tracepoint ------------------------ - -The sched_switch tracepoint is used to provide side-band data for Intel PT -decoding in kernels where the PERF_RECORD_SWITCH metadata event isn't -available. - -The sched_switch events are automatically added. e.g. the second event shown -below: - - $ perf record -vv -e intel_pt//u uname - ------------------------------------------------------------ - perf_event_attr: - type 6 - size 112 - config 0x400 - { sample_period, sample_freq } 1 - sample_type IP|TID|TIME|CPU|IDENTIFIER - read_format ID - disabled 1 - inherit 1 - exclude_kernel 1 - exclude_hv 1 - enable_on_exec 1 - sample_id_all 1 - ------------------------------------------------------------ - sys_perf_event_open: pid 31104 cpu 0 group_fd -1 flags 0x8 - sys_perf_event_open: pid 31104 cpu 1 group_fd -1 flags 0x8 - sys_perf_event_open: pid 31104 cpu 2 group_fd -1 flags 0x8 - sys_perf_event_open: pid 31104 cpu 3 group_fd -1 flags 0x8 - ------------------------------------------------------------ - perf_event_attr: - type 2 - size 112 - config 0x108 - { sample_period, sample_freq } 1 - sample_type IP|TID|TIME|CPU|PERIOD|RAW|IDENTIFIER - read_format ID - inherit 1 - sample_id_all 1 - exclude_guest 1 - ------------------------------------------------------------ - sys_perf_event_open: pid -1 cpu 0 group_fd -1 flags 0x8 - sys_perf_event_open: pid -1 cpu 1 group_fd -1 flags 0x8 - sys_perf_event_open: pid -1 cpu 2 group_fd -1 flags 0x8 - sys_perf_event_open: pid -1 cpu 3 group_fd -1 flags 0x8 - ------------------------------------------------------------ - perf_event_attr: - type 1 - size 112 - config 0x9 - { sample_period, sample_freq } 1 - sample_type IP|TID|TIME|IDENTIFIER - read_format ID - disabled 1 - inherit 1 - exclude_kernel 1 - exclude_hv 1 - mmap 1 - comm 1 - enable_on_exec 1 - task 1 - sample_id_all 1 - mmap2 1 - comm_exec 1 - ------------------------------------------------------------ - sys_perf_event_open: pid 31104 cpu 0 group_fd -1 flags 0x8 - sys_perf_event_open: pid 31104 cpu 1 group_fd -1 flags 0x8 - sys_perf_event_open: pid 31104 cpu 2 group_fd -1 flags 0x8 - sys_perf_event_open: pid 31104 cpu 3 group_fd -1 flags 0x8 - mmap size 528384B - AUX area mmap length 4194304 - perf event ring buffer mmapped per cpu - Synthesizing auxtrace information - Linux - [ perf record: Woken up 1 times to write data ] - [ perf record: Captured and wrote 0.042 MB perf.data ] - -Note, the sched_switch event is only added if the user is permitted to use it -and only in per-cpu mode. - -Note also, the sched_switch event is only added if TSC packets are requested. -That is because, in the absence of timing information, the sched_switch events -cannot be matched against the Intel PT trace. - - -perf script -=========== - -By default, perf script will decode trace data found in the perf.data file. -This can be further controlled by new option --itrace. - - -New --itrace option -------------------- - -Having no option is the same as - - --itrace - -which, in turn, is the same as - - --itrace=cepwx - -The letters are: - - i synthesize "instructions" events - b synthesize "branches" events - x synthesize "transactions" events - w synthesize "ptwrite" events - p synthesize "power" events - c synthesize branches events (calls only) - r synthesize branches events (returns only) - e synthesize tracing error events - d create a debug log - g synthesize a call chain (use with i or x) - l synthesize last branch entries (use with i or x) - s skip initial number of events - -"Instructions" events look like they were recorded by "perf record -e -instructions". - -"Branches" events look like they were recorded by "perf record -e branches". "c" -and "r" can be combined to get calls and returns. - -"Transactions" events correspond to the start or end of transactions. The -'flags' field can be used in perf script to determine whether the event is a -tranasaction start, commit or abort. - -Note that "instructions", "branches" and "transactions" events depend on code -flow packets which can be disabled by using the config term "branch=0". Refer -to the config terms section above. - -"ptwrite" events record the payload of the ptwrite instruction and whether -"fup_on_ptw" was used. "ptwrite" events depend on PTWRITE packets which are -recorded only if the "ptw" config term was used. Refer to the config terms -section above. perf script "synth" field displays "ptwrite" information like -this: "ip: 0 payload: 0x123456789abcdef0" where "ip" is 1 if "fup_on_ptw" was -used. - -"Power" events correspond to power event packets and CBR (core-to-bus ratio) -packets. While CBR packets are always recorded when tracing is enabled, power -event packets are recorded only if the "pwr_evt" config term was used. Refer to -the config terms section above. The power events record information about -C-state changes, whereas CBR is indicative of CPU frequency. perf script -"event,synth" fields display information like this: - cbr: cbr: 22 freq: 2189 MHz (200%) - mwait: hints: 0x60 extensions: 0x1 - pwre: hw: 0 cstate: 2 sub-cstate: 0 - exstop: ip: 1 - pwrx: deepest cstate: 2 last cstate: 2 wake reason: 0x4 -Where: - "cbr" includes the frequency and the percentage of maximum non-turbo - "mwait" shows mwait hints and extensions - "pwre" shows C-state transitions (to a C-state deeper than C0) and - whether initiated by hardware - "exstop" indicates execution stopped and whether the IP was recorded - exactly, - "pwrx" indicates return to C0 -For more details refer to the Intel 64 and IA-32 Architectures Software -Developer Manuals. - -Error events show where the decoder lost the trace. Error events -are quite important. Users must know if what they are seeing is a complete -picture or not. - -The "d" option will cause the creation of a file "intel_pt.log" containing all -decoded packets and instructions. Note that this option slows down the decoder -and that the resulting file may be very large. - -In addition, the period of the "instructions" event can be specified. e.g. - - --itrace=i10us - -sets the period to 10us i.e. one instruction sample is synthesized for each 10 -microseconds of trace. Alternatives to "us" are "ms" (milliseconds), -"ns" (nanoseconds), "t" (TSC ticks) or "i" (instructions). - -"ms", "us" and "ns" are converted to TSC ticks. - -The timing information included with Intel PT does not give the time of every -instruction. Consequently, for the purpose of sampling, the decoder estimates -the time since the last timing packet based on 1 tick per instruction. The time -on the sample is *not* adjusted and reflects the last known value of TSC. - -For Intel PT, the default period is 100us. - -Setting it to a zero period means "as often as possible". - -In the case of Intel PT that is the same as a period of 1 and a unit of -'instructions' (i.e. --itrace=i1i). - -Also the call chain size (default 16, max. 1024) for instructions or -transactions events can be specified. e.g. - - --itrace=ig32 - --itrace=xg32 - -Also the number of last branch entries (default 64, max. 1024) for instructions or -transactions events can be specified. e.g. - - --itrace=il10 - --itrace=xl10 - -Note that last branch entries are cleared for each sample, so there is no overlap -from one sample to the next. - -To disable trace decoding entirely, use the option --no-itrace. - -It is also possible to skip events generated (instructions, branches, transactions) -at the beginning. This is useful to ignore initialization code. - - --itrace=i0nss1000000 - -skips the first million instructions. - -dump option ------------ - -perf script has an option (-D) to "dump" the events i.e. display the binary -data. - -When -D is used, Intel PT packets are displayed. The packet decoder does not -pay attention to PSB packets, but just decodes the bytes - so the packets seen -by the actual decoder may not be identical in places where the data is corrupt. -One example of that would be when the buffer-switching interrupt has been too -slow, and the buffer has been filled completely. In that case, the last packet -in the buffer might be truncated and immediately followed by a PSB as the trace -continues in the next buffer. - -To disable the display of Intel PT packets, combine the -D option with ---no-itrace. - - -perf report -=========== - -By default, perf report will decode trace data found in the perf.data file. -This can be further controlled by new option --itrace exactly the same as -perf script, with the exception that the default is --itrace=igxe. - - -perf inject -=========== - -perf inject also accepts the --itrace option in which case tracing data is -removed and replaced with the synthesized events. e.g. - - perf inject --itrace -i perf.data -o perf.data.new - -Below is an example of using Intel PT with autofdo. It requires autofdo -(https://github.com/google/autofdo) and gcc version 5. The bubble -sort example is from the AutoFDO tutorial (https://gcc.gnu.org/wiki/AutoFDO/Tutorial) -amended to take the number of elements as a parameter. - - $ gcc-5 -O3 sort.c -o sort_optimized - $ ./sort_optimized 30000 - Bubble sorting array of 30000 elements - 2254 ms - - $ cat ~/.perfconfig - [intel-pt] - mispred-all = on - - $ perf record -e intel_pt//u ./sort 3000 - Bubble sorting array of 3000 elements - 58 ms - [ perf record: Woken up 2 times to write data ] - [ perf record: Captured and wrote 3.939 MB perf.data ] - $ perf inject -i perf.data -o inj --itrace=i100usle --strip - $ ./create_gcov --binary=./sort --profile=inj --gcov=sort.gcov -gcov_version=1 - $ gcc-5 -O3 -fauto-profile=sort.gcov sort.c -o sort_autofdo - $ ./sort_autofdo 30000 - Bubble sorting array of 30000 elements - 2155 ms - -Note there is currently no advantage to using Intel PT instead of LBR, but -that may change in the future if greater use is made of the data. - - -PEBS via Intel PT -================= - -Some hardware has the feature to redirect PEBS records to the Intel PT trace. -Recording is selected by using the aux-output config term e.g. - - perf record -c 10000 -e '{intel_pt/branch=0/,cycles/aux-output/ppp}' uname - -Note that currently, software only supports redirecting at most one PEBS event. - -To display PEBS events from the Intel PT trace, use the itrace 'o' option e.g. - - perf script --itrace=oe +Documentation for support for Intel Processor Trace within perf tools' has moved to file perf-intel-pt.txt diff --git a/tools/perf/Documentation/perf-annotate.txt b/tools/perf/Documentation/perf-annotate.txt index e8c972f89357..1b5042f134a8 100644 --- a/tools/perf/Documentation/perf-annotate.txt +++ b/tools/perf/Documentation/perf-annotate.txt @@ -112,6 +112,12 @@ OPTIONS --objdump=<path>:: Path to objdump binary. +--prefix=PREFIX:: +--prefix-strip=N:: + Remove first N entries from source file path names in executables + and add PREFIX. This allows to display source code compiled on systems + with different file system layout. + --skip-missing:: Skip symbols that cannot be annotated. diff --git a/tools/perf/Documentation/perf-config.txt b/tools/perf/Documentation/perf-config.txt index c4dd23c4b478..8ead55593984 100644 --- a/tools/perf/Documentation/perf-config.txt +++ b/tools/perf/Documentation/perf-config.txt @@ -239,7 +239,6 @@ buildid.*:: set buildid.dir to /dev/null. The default is $HOME/.debug annotate.*:: - These options work only for TUI. These are in control of addresses, jump function, source code in lines of assembly code from a specific program. @@ -269,6 +268,8 @@ annotate.*:: │ mov (%rdi),%rdx │ return n; + This option works with tui, stdio2 browsers. + annotate.use_offset:: Basing on a first address of a loaded function, offset can be used. Instead of using original addresses of assembly code, @@ -287,6 +288,8 @@ annotate.*:: 368:│ mov 0x8(%r14),%rdi + This option works with tui, stdio2 browsers. + annotate.jump_arrows:: There can be jump instruction among assembly code. Depending on a boolean value of jump_arrows, @@ -306,6 +309,8 @@ annotate.*:: │1330: mov %r15,%r10 │1333: cmp %r15,%r14 + This option works with tui browser. + annotate.show_linenr:: When showing source code if this option is 'true', line numbers are printed as below. @@ -325,6 +330,8 @@ annotate.*:: │ array++; │ } + This option works with tui, stdio2 browsers. + annotate.show_nr_jumps:: Let's see a part of assembly code. @@ -335,6 +342,8 @@ annotate.*:: │1 1382: movb $0x1,-0x270(%rbp) + This option works with tui, stdio2 browsers. + annotate.show_total_period:: To compare two records on an instruction base, with this option provided, display total number of samples that belong to a line @@ -348,11 +357,30 @@ annotate.*:: 99.93 │ mov %eax,%eax + This option works with tui, stdio2, stdio browsers. + + annotate.show_nr_samples:: + By default perf annotate shows percentage of samples. This option + can be used to print absolute number of samples. Ex, when set as + false: + + Percent│ + 74.03 │ mov %fs:0x28,%rax + + When set as true: + + Samples│ + 6 │ mov %fs:0x28,%rax + + This option works with tui, stdio2, stdio browsers. + annotate.offset_level:: Default is '1', meaning just jump targets will have offsets show right beside the instruction. When set to '2' 'call' instructions will also have its offsets shown, 3 or higher will show offsets for all instructions. + This option works with tui, stdio2 browsers. + hist.*:: hist.percentage:: This option control the way to calculate overhead of filtered entries - @@ -490,6 +518,12 @@ top.*:: column by default. The default is 'true'. + top.call-graph:: + This is identical to 'call-graph.record-mode', except it is + applicable only for 'top' subcommand. This option ONLY setup + the unwind method. To enable 'perf top' to actually use it, + the command line option -g must be specified. + man.*:: man.viewer:: This option can assign a tool to view manual pages when 'help' @@ -517,6 +551,16 @@ record.*:: But if this option is 'no-cache', it will not update the build-id cache. 'skip' skips post-processing and does not update the cache. + record.call-graph:: + This is identical to 'call-graph.record-mode', except it is + applicable only for 'record' subcommand. This option ONLY setup + the unwind method. To enable 'perf record' to actually use it, + the command line option -g must be specified. + + record.aio:: + Use 'n' control blocks in asynchronous (Posix AIO) trace writing + mode ('n' default: 1, max: 4). + diff.*:: diff.order:: This option sets the number of columns to sort the result. @@ -566,6 +610,11 @@ trace.*:: "libbeauty", the default, to use the same argument beautifiers used in the strace-like sys_enter+sys_exit lines. +ftrace.*:: + ftrace.tracer:: + Can be used to select the default tracer. Possible values are + 'function' and 'function_graph'. + llvm.*:: llvm.clang-path:: Path to clang. If omit, search it from $PATH. @@ -610,6 +659,29 @@ scripts.*:: The script gets the same options passed as a full perf script, in particular -i perfdata file, --cpu, --tid +convert.*:: + + convert.queue-size:: + Limit the size of ordered_events queue, so we could control + allocation size of perf data files without proper finished + round events. + +intel-pt.*:: + + intel-pt.cache-divisor:: + + intel-pt.mispred-all:: + If set, Intel PT decoder will set the mispred flag on all + branches. + +auxtrace.*:: + + auxtrace.dumpdir:: + s390 only. The directory to save the auxiliary trace buffer + can be changed using this option. Ex, auxtrace.dumpdir=/tmp. + If the directory does not exist or has the wrong file type, + the current directory is used. + SEE ALSO -------- linkperf:perf[1] diff --git a/tools/perf/Documentation/perf-inject.txt b/tools/perf/Documentation/perf-inject.txt index a64d6588470e..70969ea73e01 100644 --- a/tools/perf/Documentation/perf-inject.txt +++ b/tools/perf/Documentation/perf-inject.txt @@ -66,4 +66,5 @@ include::itrace.txt[] SEE ALSO -------- -linkperf:perf-record[1], linkperf:perf-report[1], linkperf:perf-archive[1] +linkperf:perf-record[1], linkperf:perf-report[1], linkperf:perf-archive[1], +linkperf:perf-intel-pt[1] diff --git a/tools/perf/Documentation/perf-intel-pt.txt b/tools/perf/Documentation/perf-intel-pt.txt new file mode 100644 index 000000000000..456fdcbf26ac --- /dev/null +++ b/tools/perf/Documentation/perf-intel-pt.txt @@ -0,0 +1,1007 @@ +perf-intel-pt(1) +================ + +NAME +---- +perf-intel-pt - Support for Intel Processor Trace within perf tools + +SYNOPSIS +-------- +[verse] +'perf record' -e intel_pt// + +DESCRIPTION +----------- + +Intel Processor Trace (Intel PT) is an extension of Intel Architecture that +collects information about software execution such as control flow, execution +modes and timings and formats it into highly compressed binary packets. +Technical details are documented in the Intel 64 and IA-32 Architectures +Software Developer Manuals, Chapter 36 Intel Processor Trace. + +Intel PT is first supported in Intel Core M and 5th generation Intel Core +processors that are based on the Intel micro-architecture code name Broadwell. + +Trace data is collected by 'perf record' and stored within the perf.data file. +See below for options to 'perf record'. + +Trace data must be 'decoded' which involves walking the object code and matching +the trace data packets. For example a TNT packet only tells whether a +conditional branch was taken or not taken, so to make use of that packet the +decoder must know precisely which instruction was being executed. + +Decoding is done on-the-fly. The decoder outputs samples in the same format as +samples output by perf hardware events, for example as though the "instructions" +or "branches" events had been recorded. Presently 3 tools support this: +'perf script', 'perf report' and 'perf inject'. See below for more information +on using those tools. + +The main distinguishing feature of Intel PT is that the decoder can determine +the exact flow of software execution. Intel PT can be used to understand why +and how did software get to a certain point, or behave a certain way. The +software does not have to be recompiled, so Intel PT works with debug or release +builds, however the executed images are needed - which makes use in JIT-compiled +environments, or with self-modified code, a challenge. Also symbols need to be +provided to make sense of addresses. + +A limitation of Intel PT is that it produces huge amounts of trace data +(hundreds of megabytes per second per core) which takes a long time to decode, +for example two or three orders of magnitude longer than it took to collect. +Another limitation is the performance impact of tracing, something that will +vary depending on the use-case and architecture. + + +Quickstart +---------- + +It is important to start small. That is because it is easy to capture vastly +more data than can possibly be processed. + +The simplest thing to do with Intel PT is userspace profiling of small programs. +Data is captured with 'perf record' e.g. to trace 'ls' userspace-only: + + perf record -e intel_pt//u ls + +And profiled with 'perf report' e.g. + + perf report + +To also trace kernel space presents a problem, namely kernel self-modifying +code. A fairly good kernel image is available in /proc/kcore but to get an +accurate image a copy of /proc/kcore needs to be made under the same conditions +as the data capture. A script perf-with-kcore can do that, but beware that the +script makes use of 'sudo' to copy /proc/kcore. If you have perf installed +locally from the source tree you can do: + + ~/libexec/perf-core/perf-with-kcore record pt_ls -e intel_pt// -- ls + +which will create a directory named 'pt_ls' and put the perf.data file and +copies of /proc/kcore, /proc/kallsyms and /proc/modules into it. Then to use +'perf report' becomes: + + ~/libexec/perf-core/perf-with-kcore report pt_ls + +Because samples are synthesized after-the-fact, the sampling period can be +selected for reporting. e.g. sample every microsecond + + ~/libexec/perf-core/perf-with-kcore report pt_ls --itrace=i1usge + +See the sections below for more information about the --itrace option. + +Beware the smaller the period, the more samples that are produced, and the +longer it takes to process them. + +Also note that the coarseness of Intel PT timing information will start to +distort the statistical value of the sampling as the sampling period becomes +smaller. + +To represent software control flow, "branches" samples are produced. By default +a branch sample is synthesized for every single branch. To get an idea what +data is available you can use the 'perf script' tool with all itrace sampling +options, which will list all the samples. + + perf record -e intel_pt//u ls + perf script --itrace=ibxwpe + +An interesting field that is not printed by default is 'flags' which can be +displayed as follows: + + perf script --itrace=ibxwpe -F+flags + +The flags are "bcrosyiABEx" which stand for branch, call, return, conditional, +system, asynchronous, interrupt, transaction abort, trace begin, trace end, and +in transaction, respectively. + +Another interesting field that is not printed by default is 'ipc' which can be +displayed as follows: + + perf script --itrace=be -F+ipc + +There are two ways that instructions-per-cycle (IPC) can be calculated depending +on the recording. + +If the 'cyc' config term (see config terms section below) was used, then IPC is +calculated using the cycle count from CYC packets, otherwise MTC packets are +used - refer to the 'mtc' config term. When MTC is used, however, the values +are less accurate because the timing is less accurate. + +Because Intel PT does not update the cycle count on every branch or instruction, +the values will often be zero. When there are values, they will be the number +of instructions and number of cycles since the last update, and thus represent +the average IPC since the last IPC for that event type. Note IPC for "branches" +events is calculated separately from IPC for "instructions" events. + +Also note that the IPC instruction count may or may not include the current +instruction. If the cycle count is associated with an asynchronous branch +(e.g. page fault or interrupt), then the instruction count does not include the +current instruction, otherwise it does. That is consistent with whether or not +that instruction has retired when the cycle count is updated. + +Another note, in the case of "branches" events, non-taken branches are not +presently sampled, so IPC values for them do not appear e.g. a CYC packet with a +TNT packet that starts with a non-taken branch. To see every possible IPC +value, "instructions" events can be used e.g. --itrace=i0ns + +While it is possible to create scripts to analyze the data, an alternative +approach is available to export the data to a sqlite or postgresql database. +Refer to script export-to-sqlite.py or export-to-postgresql.py for more details, +and to script exported-sql-viewer.py for an example of using the database. + +There is also script intel-pt-events.py which provides an example of how to +unpack the raw data for power events and PTWRITE. + +As mentioned above, it is easy to capture too much data. One way to limit the +data captured is to use 'snapshot' mode which is explained further below. +Refer to 'new snapshot option' and 'Intel PT modes of operation' further below. + +Another problem that will be experienced is decoder errors. They can be caused +by inability to access the executed image, self-modified or JIT-ed code, or the +inability to match side-band information (such as context switches and mmaps) +which results in the decoder not knowing what code was executed. + +There is also the problem of perf not being able to copy the data fast enough, +resulting in data lost because the buffer was full. See 'Buffer handling' below +for more details. + + +perf record +----------- + +new event +~~~~~~~~~ + +The Intel PT kernel driver creates a new PMU for Intel PT. PMU events are +selected by providing the PMU name followed by the "config" separated by slashes. +An enhancement has been made to allow default "config" e.g. the option + + -e intel_pt// + +will use a default config value. Currently that is the same as + + -e intel_pt/tsc,noretcomp=0/ + +which is the same as + + -e intel_pt/tsc=1,noretcomp=0/ + +Note there are now new config terms - see section 'config terms' further below. + +The config terms are listed in /sys/devices/intel_pt/format. They are bit +fields within the config member of the struct perf_event_attr which is +passed to the kernel by the perf_event_open system call. They correspond to bit +fields in the IA32_RTIT_CTL MSR. Here is a list of them and their definitions: + + $ grep -H . /sys/bus/event_source/devices/intel_pt/format/* + /sys/bus/event_source/devices/intel_pt/format/cyc:config:1 + /sys/bus/event_source/devices/intel_pt/format/cyc_thresh:config:19-22 + /sys/bus/event_source/devices/intel_pt/format/mtc:config:9 + /sys/bus/event_source/devices/intel_pt/format/mtc_period:config:14-17 + /sys/bus/event_source/devices/intel_pt/format/noretcomp:config:11 + /sys/bus/event_source/devices/intel_pt/format/psb_period:config:24-27 + /sys/bus/event_source/devices/intel_pt/format/tsc:config:10 + +Note that the default config must be overridden for each term i.e. + + -e intel_pt/noretcomp=0/ + +is the same as: + + -e intel_pt/tsc=1,noretcomp=0/ + +So, to disable TSC packets use: + + -e intel_pt/tsc=0/ + +It is also possible to specify the config value explicitly: + + -e intel_pt/config=0x400/ + +Note that, as with all events, the event is suffixed with event modifiers: + + u userspace + k kernel + h hypervisor + G guest + H host + p precise ip + +'h', 'G' and 'H' are for virtualization which is not supported by Intel PT. +'p' is also not relevant to Intel PT. So only options 'u' and 'k' are +meaningful for Intel PT. + +perf_event_attr is displayed if the -vv option is used e.g. + + ------------------------------------------------------------ + perf_event_attr: + type 6 + size 112 + config 0x400 + { sample_period, sample_freq } 1 + sample_type IP|TID|TIME|CPU|IDENTIFIER + read_format ID + disabled 1 + inherit 1 + exclude_kernel 1 + exclude_hv 1 + enable_on_exec 1 + sample_id_all 1 + ------------------------------------------------------------ + sys_perf_event_open: pid 31104 cpu 0 group_fd -1 flags 0x8 + sys_perf_event_open: pid 31104 cpu 1 group_fd -1 flags 0x8 + sys_perf_event_open: pid 31104 cpu 2 group_fd -1 flags 0x8 + sys_perf_event_open: pid 31104 cpu 3 group_fd -1 flags 0x8 + ------------------------------------------------------------ + + +config terms +~~~~~~~~~~~~ + +The June 2015 version of Intel 64 and IA-32 Architectures Software Developer +Manuals, Chapter 36 Intel Processor Trace, defined new Intel PT features. +Some of the features are reflect in new config terms. All the config terms are +described below. + +tsc Always supported. Produces TSC timestamp packets to provide + timing information. In some cases it is possible to decode + without timing information, for example a per-thread context + that does not overlap executable memory maps. + + The default config selects tsc (i.e. tsc=1). + +noretcomp Always supported. Disables "return compression" so a TIP packet + is produced when a function returns. Causes more packets to be + produced but might make decoding more reliable. + + The default config does not select noretcomp (i.e. noretcomp=0). + +psb_period Allows the frequency of PSB packets to be specified. + + The PSB packet is a synchronization packet that provides a + starting point for decoding or recovery from errors. + + Support for psb_period is indicated by: + + /sys/bus/event_source/devices/intel_pt/caps/psb_cyc + + which contains "1" if the feature is supported and "0" + otherwise. + + Valid values are given by: + + /sys/bus/event_source/devices/intel_pt/caps/psb_periods + + which contains a hexadecimal value, the bits of which represent + valid values e.g. bit 2 set means value 2 is valid. + + The psb_period value is converted to the approximate number of + trace bytes between PSB packets as: + + 2 ^ (value + 11) + + e.g. value 3 means 16KiB bytes between PSBs + + If an invalid value is entered, the error message + will give a list of valid values e.g. + + $ perf record -e intel_pt/psb_period=15/u uname + Invalid psb_period for intel_pt. Valid values are: 0-5 + + If MTC packets are selected, the default config selects a value + of 3 (i.e. psb_period=3) or the nearest lower value that is + supported (0 is always supported). Otherwise the default is 0. + + If decoding is expected to be reliable and the buffer is large + then a large PSB period can be used. + + Because a TSC packet is produced with PSB, the PSB period can + also affect the granularity to timing information in the absence + of MTC or CYC. + +mtc Produces MTC timing packets. + + MTC packets provide finer grain timestamp information than TSC + packets. MTC packets record time using the hardware crystal + clock (CTC) which is related to TSC packets using a TMA packet. + + Support for this feature is indicated by: + + /sys/bus/event_source/devices/intel_pt/caps/mtc + + which contains "1" if the feature is supported and + "0" otherwise. + + The frequency of MTC packets can also be specified - see + mtc_period below. + +mtc_period Specifies how frequently MTC packets are produced - see mtc + above for how to determine if MTC packets are supported. + + Valid values are given by: + + /sys/bus/event_source/devices/intel_pt/caps/mtc_periods + + which contains a hexadecimal value, the bits of which represent + valid values e.g. bit 2 set means value 2 is valid. + + The mtc_period value is converted to the MTC frequency as: + + CTC-frequency / (2 ^ value) + + e.g. value 3 means one eighth of CTC-frequency + + Where CTC is the hardware crystal clock, the frequency of which + can be related to TSC via values provided in cpuid leaf 0x15. + + If an invalid value is entered, the error message + will give a list of valid values e.g. + + $ perf record -e intel_pt/mtc_period=15/u uname + Invalid mtc_period for intel_pt. Valid values are: 0,3,6,9 + + The default value is 3 or the nearest lower value + that is supported (0 is always supported). + +cyc Produces CYC timing packets. + + CYC packets provide even finer grain timestamp information than + MTC and TSC packets. A CYC packet contains the number of CPU + cycles since the last CYC packet. Unlike MTC and TSC packets, + CYC packets are only sent when another packet is also sent. + + Support for this feature is indicated by: + + /sys/bus/event_source/devices/intel_pt/caps/psb_cyc + + which contains "1" if the feature is supported and + "0" otherwise. + + The number of CYC packets produced can be reduced by specifying + a threshold - see cyc_thresh below. + +cyc_thresh Specifies how frequently CYC packets are produced - see cyc + above for how to determine if CYC packets are supported. + + Valid cyc_thresh values are given by: + + /sys/bus/event_source/devices/intel_pt/caps/cycle_thresholds + + which contains a hexadecimal value, the bits of which represent + valid values e.g. bit 2 set means value 2 is valid. + + The cyc_thresh value represents the minimum number of CPU cycles + that must have passed before a CYC packet can be sent. The + number of CPU cycles is: + + 2 ^ (value - 1) + + e.g. value 4 means 8 CPU cycles must pass before a CYC packet + can be sent. Note a CYC packet is still only sent when another + packet is sent, not at, e.g. every 8 CPU cycles. + + If an invalid value is entered, the error message + will give a list of valid values e.g. + + $ perf record -e intel_pt/cyc,cyc_thresh=15/u uname + Invalid cyc_thresh for intel_pt. Valid values are: 0-12 + + CYC packets are not requested by default. + +pt Specifies pass-through which enables the 'branch' config term. + + The default config selects 'pt' if it is available, so a user will + never need to specify this term. + +branch Enable branch tracing. Branch tracing is enabled by default so to + disable branch tracing use 'branch=0'. + + The default config selects 'branch' if it is available. + +ptw Enable PTWRITE packets which are produced when a ptwrite instruction + is executed. + + Support for this feature is indicated by: + + /sys/bus/event_source/devices/intel_pt/caps/ptwrite + + which contains "1" if the feature is supported and + "0" otherwise. + +fup_on_ptw Enable a FUP packet to follow the PTWRITE packet. The FUP packet + provides the address of the ptwrite instruction. In the absence of + fup_on_ptw, the decoder will use the address of the previous branch + if branch tracing is enabled, otherwise the address will be zero. + Note that fup_on_ptw will work even when branch tracing is disabled. + +pwr_evt Enable power events. The power events provide information about + changes to the CPU C-state. + + Support for this feature is indicated by: + + /sys/bus/event_source/devices/intel_pt/caps/power_event_trace + + which contains "1" if the feature is supported and + "0" otherwise. + + +AUX area sampling option +~~~~~~~~~~~~~~~~~~~~~~~~ + +To select Intel PT "sampling" the AUX area sampling option can be used: + + --aux-sample + +Optionally it can be followed by the sample size in bytes e.g. + + --aux-sample=8192 + +In addition, the Intel PT event to sample must be defined e.g. + + -e intel_pt//u + +Samples on other events will be created containing Intel PT data e.g. the +following will create Intel PT samples on the branch-misses event, note the +events must be grouped using {}: + + perf record --aux-sample -e '{intel_pt//u,branch-misses:u}' + +An alternative to '--aux-sample' is to add the config term 'aux-sample-size' to +events. In this case, the grouping is implied e.g. + + perf record -e intel_pt//u -e branch-misses/aux-sample-size=8192/u + +is the same as: + + perf record -e '{intel_pt//u,branch-misses/aux-sample-size=8192/u}' + +but allows for also using an address filter e.g.: + + perf record -e intel_pt//u --filter 'filter * @/bin/ls' -e branch-misses/aux-sample-size=8192/u -- ls + +It is important to select a sample size that is big enough to contain at least +one PSB packet. If not a warning will be displayed: + + Intel PT sample size (%zu) may be too small for PSB period (%zu) + +The calculation used for that is: if sample_size <= psb_period + 256 display the +warning. When sampling is used, psb_period defaults to 0 (2KiB). + +The default sample size is 4KiB. + +The sample size is passed in aux_sample_size in struct perf_event_attr. The +sample size is limited by the maximum event size which is 64KiB. It is +difficult to know how big the event might be without the trace sample attached, +but the tool validates that the sample size is not greater than 60KiB. + + +new snapshot option +~~~~~~~~~~~~~~~~~~~ + +The difference between full trace and snapshot from the kernel's perspective is +that in full trace we don't overwrite trace data that the user hasn't collected +yet (and indicated that by advancing aux_tail), whereas in snapshot mode we let +the trace run and overwrite older data in the buffer so that whenever something +interesting happens, we can stop it and grab a snapshot of what was going on +around that interesting moment. + +To select snapshot mode a new option has been added: + + -S + +Optionally it can be followed by the snapshot size e.g. + + -S0x100000 + +The default snapshot size is the auxtrace mmap size. If neither auxtrace mmap size +nor snapshot size is specified, then the default is 4MiB for privileged users +(or if /proc/sys/kernel/perf_event_paranoid < 0), 128KiB for unprivileged users. +If an unprivileged user does not specify mmap pages, the mmap pages will be +reduced as described in the 'new auxtrace mmap size option' section below. + +The snapshot size is displayed if the option -vv is used e.g. + + Intel PT snapshot size: %zu + + +new auxtrace mmap size option +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Intel PT buffer size is specified by an addition to the -m option e.g. + + -m,16 + +selects a buffer size of 16 pages i.e. 64KiB. + +Note that the existing functionality of -m is unchanged. The auxtrace mmap size +is specified by the optional addition of a comma and the value. + +The default auxtrace mmap size for Intel PT is 4MiB/page_size for privileged users +(or if /proc/sys/kernel/perf_event_paranoid < 0), 128KiB for unprivileged users. +If an unprivileged user does not specify mmap pages, the mmap pages will be +reduced from the default 512KiB/page_size to 256KiB/page_size, otherwise the +user is likely to get an error as they exceed their mlock limit (Max locked +memory as shown in /proc/self/limits). Note that perf does not count the first +512KiB (actually /proc/sys/kernel/perf_event_mlock_kb minus 1 page) per cpu +against the mlock limit so an unprivileged user is allowed 512KiB per cpu plus +their mlock limit (which defaults to 64KiB but is not multiplied by the number +of cpus). + +In full-trace mode, powers of two are allowed for buffer size, with a minimum +size of 2 pages. In snapshot mode or sampling mode, it is the same but the +minimum size is 1 page. + +The mmap size and auxtrace mmap size are displayed if the -vv option is used e.g. + + mmap length 528384 + auxtrace mmap length 4198400 + + +Intel PT modes of operation +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Intel PT can be used in 2 modes: + full-trace mode + sample mode + snapshot mode + +Full-trace mode traces continuously e.g. + + perf record -e intel_pt//u uname + +Sample mode attaches a Intel PT sample to other events e.g. + + perf record --aux-sample -e intel_pt//u -e branch-misses:u + +Snapshot mode captures the available data when a signal is sent e.g. + + perf record -v -e intel_pt//u -S ./loopy 1000000000 & + [1] 11435 + kill -USR2 11435 + Recording AUX area tracing snapshot + +Note that the signal sent is SIGUSR2. +Note that "Recording AUX area tracing snapshot" is displayed because the -v +option is used. + +The 2 modes cannot be used together. + + +Buffer handling +~~~~~~~~~~~~~~~ + +There may be buffer limitations (i.e. single ToPa entry) which means that actual +buffer sizes are limited to powers of 2 up to 4MiB (MAX_ORDER). In order to +provide other sizes, and in particular an arbitrarily large size, multiple +buffers are logically concatenated. However an interrupt must be used to switch +between buffers. That has two potential problems: + a) the interrupt may not be handled in time so that the current buffer + becomes full and some trace data is lost. + b) the interrupts may slow the system and affect the performance + results. + +If trace data is lost, the driver sets 'truncated' in the PERF_RECORD_AUX event +which the tools report as an error. + +In full-trace mode, the driver waits for data to be copied out before allowing +the (logical) buffer to wrap-around. If data is not copied out quickly enough, +again 'truncated' is set in the PERF_RECORD_AUX event. If the driver has to +wait, the intel_pt event gets disabled. Because it is difficult to know when +that happens, perf tools always re-enable the intel_pt event after copying out +data. + + +Intel PT and build ids +~~~~~~~~~~~~~~~~~~~~~~ + +By default "perf record" post-processes the event stream to find all build ids +for executables for all addresses sampled. Deliberately, Intel PT is not +decoded for that purpose (it would take too long). Instead the build ids for +all executables encountered (due to mmap, comm or task events) are included +in the perf.data file. + +To see buildids included in the perf.data file use the command: + + perf buildid-list + +If the perf.data file contains Intel PT data, that is the same as: + + perf buildid-list --with-hits + + +Snapshot mode and event disabling +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +In order to make a snapshot, the intel_pt event is disabled using an IOCTL, +namely PERF_EVENT_IOC_DISABLE. However doing that can also disable the +collection of side-band information. In order to prevent that, a dummy +software event has been introduced that permits tracking events (like mmaps) to +continue to be recorded while intel_pt is disabled. That is important to ensure +there is complete side-band information to allow the decoding of subsequent +snapshots. + +A test has been created for that. To find the test: + + perf test list + ... + 23: Test using a dummy software event to keep tracking + +To run the test: + + perf test 23 + 23: Test using a dummy software event to keep tracking : Ok + + +perf record modes (nothing new here) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +perf record essentially operates in one of three modes: + per thread + per cpu + workload only + +"per thread" mode is selected by -t or by --per-thread (with -p or -u or just a +workload). +"per cpu" is selected by -C or -a. +"workload only" mode is selected by not using the other options but providing a +command to run (i.e. the workload). + +In per-thread mode an exact list of threads is traced. There is no inheritance. +Each thread has its own event buffer. + +In per-cpu mode all processes (or processes from the selected cgroup i.e. -G +option, or processes selected with -p or -u) are traced. Each cpu has its own +buffer. Inheritance is allowed. + +In workload-only mode, the workload is traced but with per-cpu buffers. +Inheritance is allowed. Note that you can now trace a workload in per-thread +mode by using the --per-thread option. + + +Privileged vs non-privileged users +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Unless /proc/sys/kernel/perf_event_paranoid is set to -1, unprivileged users +have memory limits imposed upon them. That affects what buffer sizes they can +have as outlined above. + +The v4.2 kernel introduced support for a context switch metadata event, +PERF_RECORD_SWITCH, which allows unprivileged users to see when their processes +are scheduled out and in, just not by whom, which is left for the +PERF_RECORD_SWITCH_CPU_WIDE, that is only accessible in system wide context, +which in turn requires CAP_SYS_ADMIN. + +Please see the 45ac1403f564 ("perf: Add PERF_RECORD_SWITCH to indicate context +switches") commit, that introduces these metadata events for further info. + +When working with kernels < v4.2, the following considerations must be taken, +as the sched:sched_switch tracepoints will be used to receive such information: + +Unless /proc/sys/kernel/perf_event_paranoid is set to -1, unprivileged users are +not permitted to use tracepoints which means there is insufficient side-band +information to decode Intel PT in per-cpu mode, and potentially workload-only +mode too if the workload creates new processes. + +Note also, that to use tracepoints, read-access to debugfs is required. So if +debugfs is not mounted or the user does not have read-access, it will again not +be possible to decode Intel PT in per-cpu mode. + + +sched_switch tracepoint +~~~~~~~~~~~~~~~~~~~~~~~ + +The sched_switch tracepoint is used to provide side-band data for Intel PT +decoding in kernels where the PERF_RECORD_SWITCH metadata event isn't +available. + +The sched_switch events are automatically added. e.g. the second event shown +below: + + $ perf record -vv -e intel_pt//u uname + ------------------------------------------------------------ + perf_event_attr: + type 6 + size 112 + config 0x400 + { sample_period, sample_freq } 1 + sample_type IP|TID|TIME|CPU|IDENTIFIER + read_format ID + disabled 1 + inherit 1 + exclude_kernel 1 + exclude_hv 1 + enable_on_exec 1 + sample_id_all 1 + ------------------------------------------------------------ + sys_perf_event_open: pid 31104 cpu 0 group_fd -1 flags 0x8 + sys_perf_event_open: pid 31104 cpu 1 group_fd -1 flags 0x8 + sys_perf_event_open: pid 31104 cpu 2 group_fd -1 flags 0x8 + sys_perf_event_open: pid 31104 cpu 3 group_fd -1 flags 0x8 + ------------------------------------------------------------ + perf_event_attr: + type 2 + size 112 + config 0x108 + { sample_period, sample_freq } 1 + sample_type IP|TID|TIME|CPU|PERIOD|RAW|IDENTIFIER + read_format ID + inherit 1 + sample_id_all 1 + exclude_guest 1 + ------------------------------------------------------------ + sys_perf_event_open: pid -1 cpu 0 group_fd -1 flags 0x8 + sys_perf_event_open: pid -1 cpu 1 group_fd -1 flags 0x8 + sys_perf_event_open: pid -1 cpu 2 group_fd -1 flags 0x8 + sys_perf_event_open: pid -1 cpu 3 group_fd -1 flags 0x8 + ------------------------------------------------------------ + perf_event_attr: + type 1 + size 112 + config 0x9 + { sample_period, sample_freq } 1 + sample_type IP|TID|TIME|IDENTIFIER + read_format ID + disabled 1 + inherit 1 + exclude_kernel 1 + exclude_hv 1 + mmap 1 + comm 1 + enable_on_exec 1 + task 1 + sample_id_all 1 + mmap2 1 + comm_exec 1 + ------------------------------------------------------------ + sys_perf_event_open: pid 31104 cpu 0 group_fd -1 flags 0x8 + sys_perf_event_open: pid 31104 cpu 1 group_fd -1 flags 0x8 + sys_perf_event_open: pid 31104 cpu 2 group_fd -1 flags 0x8 + sys_perf_event_open: pid 31104 cpu 3 group_fd -1 flags 0x8 + mmap size 528384B + AUX area mmap length 4194304 + perf event ring buffer mmapped per cpu + Synthesizing auxtrace information + Linux + [ perf record: Woken up 1 times to write data ] + [ perf record: Captured and wrote 0.042 MB perf.data ] + +Note, the sched_switch event is only added if the user is permitted to use it +and only in per-cpu mode. + +Note also, the sched_switch event is only added if TSC packets are requested. +That is because, in the absence of timing information, the sched_switch events +cannot be matched against the Intel PT trace. + + +perf script +----------- + +By default, perf script will decode trace data found in the perf.data file. +This can be further controlled by new option --itrace. + + +New --itrace option +~~~~~~~~~~~~~~~~~~~ + +Having no option is the same as + + --itrace + +which, in turn, is the same as + + --itrace=cepwx + +The letters are: + + i synthesize "instructions" events + b synthesize "branches" events + x synthesize "transactions" events + w synthesize "ptwrite" events + p synthesize "power" events + c synthesize branches events (calls only) + r synthesize branches events (returns only) + e synthesize tracing error events + d create a debug log + g synthesize a call chain (use with i or x) + l synthesize last branch entries (use with i or x) + s skip initial number of events + +"Instructions" events look like they were recorded by "perf record -e +instructions". + +"Branches" events look like they were recorded by "perf record -e branches". "c" +and "r" can be combined to get calls and returns. + +"Transactions" events correspond to the start or end of transactions. The +'flags' field can be used in perf script to determine whether the event is a +tranasaction start, commit or abort. + +Note that "instructions", "branches" and "transactions" events depend on code +flow packets which can be disabled by using the config term "branch=0". Refer +to the config terms section above. + +"ptwrite" events record the payload of the ptwrite instruction and whether +"fup_on_ptw" was used. "ptwrite" events depend on PTWRITE packets which are +recorded only if the "ptw" config term was used. Refer to the config terms +section above. perf script "synth" field displays "ptwrite" information like +this: "ip: 0 payload: 0x123456789abcdef0" where "ip" is 1 if "fup_on_ptw" was +used. + +"Power" events correspond to power event packets and CBR (core-to-bus ratio) +packets. While CBR packets are always recorded when tracing is enabled, power +event packets are recorded only if the "pwr_evt" config term was used. Refer to +the config terms section above. The power events record information about +C-state changes, whereas CBR is indicative of CPU frequency. perf script +"event,synth" fields display information like this: + cbr: cbr: 22 freq: 2189 MHz (200%) + mwait: hints: 0x60 extensions: 0x1 + pwre: hw: 0 cstate: 2 sub-cstate: 0 + exstop: ip: 1 + pwrx: deepest cstate: 2 last cstate: 2 wake reason: 0x4 +Where: + "cbr" includes the frequency and the percentage of maximum non-turbo + "mwait" shows mwait hints and extensions + "pwre" shows C-state transitions (to a C-state deeper than C0) and + whether initiated by hardware + "exstop" indicates execution stopped and whether the IP was recorded + exactly, + "pwrx" indicates return to C0 +For more details refer to the Intel 64 and IA-32 Architectures Software +Developer Manuals. + +Error events show where the decoder lost the trace. Error events +are quite important. Users must know if what they are seeing is a complete +picture or not. + +The "d" option will cause the creation of a file "intel_pt.log" containing all +decoded packets and instructions. Note that this option slows down the decoder +and that the resulting file may be very large. + +In addition, the period of the "instructions" event can be specified. e.g. + + --itrace=i10us + +sets the period to 10us i.e. one instruction sample is synthesized for each 10 +microseconds of trace. Alternatives to "us" are "ms" (milliseconds), +"ns" (nanoseconds), "t" (TSC ticks) or "i" (instructions). + +"ms", "us" and "ns" are converted to TSC ticks. + +The timing information included with Intel PT does not give the time of every +instruction. Consequently, for the purpose of sampling, the decoder estimates +the time since the last timing packet based on 1 tick per instruction. The time +on the sample is *not* adjusted and reflects the last known value of TSC. + +For Intel PT, the default period is 100us. + +Setting it to a zero period means "as often as possible". + +In the case of Intel PT that is the same as a period of 1 and a unit of +'instructions' (i.e. --itrace=i1i). + +Also the call chain size (default 16, max. 1024) for instructions or +transactions events can be specified. e.g. + + --itrace=ig32 + --itrace=xg32 + +Also the number of last branch entries (default 64, max. 1024) for instructions or +transactions events can be specified. e.g. + + --itrace=il10 + --itrace=xl10 + +Note that last branch entries are cleared for each sample, so there is no overlap +from one sample to the next. + +To disable trace decoding entirely, use the option --no-itrace. + +It is also possible to skip events generated (instructions, branches, transactions) +at the beginning. This is useful to ignore initialization code. + + --itrace=i0nss1000000 + +skips the first million instructions. + +dump option +~~~~~~~~~~~ + +perf script has an option (-D) to "dump" the events i.e. display the binary +data. + +When -D is used, Intel PT packets are displayed. The packet decoder does not +pay attention to PSB packets, but just decodes the bytes - so the packets seen +by the actual decoder may not be identical in places where the data is corrupt. +One example of that would be when the buffer-switching interrupt has been too +slow, and the buffer has been filled completely. In that case, the last packet +in the buffer might be truncated and immediately followed by a PSB as the trace +continues in the next buffer. + +To disable the display of Intel PT packets, combine the -D option with +--no-itrace. + + +perf report +----------- + +By default, perf report will decode trace data found in the perf.data file. +This can be further controlled by new option --itrace exactly the same as +perf script, with the exception that the default is --itrace=igxe. + + +perf inject +----------- + +perf inject also accepts the --itrace option in which case tracing data is +removed and replaced with the synthesized events. e.g. + + perf inject --itrace -i perf.data -o perf.data.new + +Below is an example of using Intel PT with autofdo. It requires autofdo +(https://github.com/google/autofdo) and gcc version 5. The bubble +sort example is from the AutoFDO tutorial (https://gcc.gnu.org/wiki/AutoFDO/Tutorial) +amended to take the number of elements as a parameter. + + $ gcc-5 -O3 sort.c -o sort_optimized + $ ./sort_optimized 30000 + Bubble sorting array of 30000 elements + 2254 ms + + $ cat ~/.perfconfig + [intel-pt] + mispred-all = on + + $ perf record -e intel_pt//u ./sort 3000 + Bubble sorting array of 3000 elements + 58 ms + [ perf record: Woken up 2 times to write data ] + [ perf record: Captured and wrote 3.939 MB perf.data ] + $ perf inject -i perf.data -o inj --itrace=i100usle --strip + $ ./create_gcov --binary=./sort --profile=inj --gcov=sort.gcov -gcov_version=1 + $ gcc-5 -O3 -fauto-profile=sort.gcov sort.c -o sort_autofdo + $ ./sort_autofdo 30000 + Bubble sorting array of 30000 elements + 2155 ms + +Note there is currently no advantage to using Intel PT instead of LBR, but +that may change in the future if greater use is made of the data. + + +PEBS via Intel PT +----------------- + +Some hardware has the feature to redirect PEBS records to the Intel PT trace. +Recording is selected by using the aux-output config term e.g. + + perf record -c 10000 -e '{intel_pt/branch=0/,cycles/aux-output/ppp}' uname + +Note that currently, software only supports redirecting at most one PEBS event. + +To display PEBS events from the Intel PT trace, use the itrace 'o' option e.g. + + perf script --itrace=oe + + +SEE ALSO +-------- + +linkperf:perf-record[1], linkperf:perf-script[1], linkperf:perf-report[1], +linkperf:perf-inject[1] diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index b23a4012a606..7f4db7592467 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -589,4 +589,4 @@ appended unit character - B/K/M/G SEE ALSO -------- -linkperf:perf-stat[1], linkperf:perf-list[1] +linkperf:perf-stat[1], linkperf:perf-list[1], linkperf:perf-intel-pt[1] diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index 8dbe2119686a..bd0a029d4c08 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -367,6 +367,12 @@ OPTIONS --objdump=<path>:: Path to objdump binary. +--prefix=PREFIX:: +--prefix-strip=N:: + Remove first N entries from source file path names in executables + and add PREFIX. This allows to display source code compiled on systems + with different file system layout. + --group:: Show event group information together. It forces group output also if there are no groups defined in data file. @@ -540,4 +546,5 @@ include::callchain-overhead-calculation.txt[] SEE ALSO -------- -linkperf:perf-stat[1], linkperf:perf-annotate[1], linkperf:perf-record[1] +linkperf:perf-stat[1], linkperf:perf-annotate[1], linkperf:perf-record[1], +linkperf:perf-intel-pt[1] diff --git a/tools/perf/Documentation/perf-sched.txt b/tools/perf/Documentation/perf-sched.txt index 63f938b887dd..5fbe42bd599b 100644 --- a/tools/perf/Documentation/perf-sched.txt +++ b/tools/perf/Documentation/perf-sched.txt @@ -110,6 +110,10 @@ OPTIONS for 'perf sched timehist' --max-stack:: Maximum number of functions to display in backtrace, default 5. +-C=:: +--cpu=:: + Only show events for the given CPU(s) (comma separated list). + -p=:: --pid=:: Only show events for given process ID (comma separated list). diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt index 2599b057e47b..db6a36aac47e 100644 --- a/tools/perf/Documentation/perf-script.txt +++ b/tools/perf/Documentation/perf-script.txt @@ -429,4 +429,4 @@ include::itrace.txt[] SEE ALSO -------- linkperf:perf-record[1], linkperf:perf-script-perl[1], -linkperf:perf-script-python[1] +linkperf:perf-script-python[1], linkperf:perf-intel-pt[1] diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt index 9431b8066fb4..4d56586b2fb9 100644 --- a/tools/perf/Documentation/perf-stat.txt +++ b/tools/perf/Documentation/perf-stat.txt @@ -334,6 +334,15 @@ Configure all used events to run in kernel space. --all-user:: Configure all used events to run in user space. +--percore-show-thread:: +The event modifier "percore" has supported to sum up the event counts +for all hardware threads in a core and show the counts per core. + +This option with event modifier "percore" enabled also sums up the event +counts for all hardware threads in a core but show the sum counts per +hardware thread. This is essentially a replacement for the any bit and +convenient for post processing. + EXAMPLES -------- diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt index 5596129a71cf..324b6b53c86b 100644 --- a/tools/perf/Documentation/perf-top.txt +++ b/tools/perf/Documentation/perf-top.txt @@ -158,6 +158,12 @@ Default is to monitor all CPUS. -M:: --disassembler-style=:: Set disassembler style for objdump. +--prefix=PREFIX:: +--prefix-strip=N:: + Remove first N entries from source file path names in executables + and add PREFIX. This allows to display source code compiled on systems + with different file system layout. + --source:: Interleave source code with assembly code. Enabled by default, disable with --no-source. diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST index 4934edb5adfd..5d7b947320fb 100644 --- a/tools/perf/MANIFEST +++ b/tools/perf/MANIFEST @@ -7,6 +7,7 @@ tools/lib/traceevent tools/lib/api tools/lib/bpf tools/lib/subcmd +tools/lib/perf tools/lib/argv_split.c tools/lib/ctype.c tools/lib/hweight.c diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 7902a5681fc8..b8fc7d972be9 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -35,7 +35,7 @@ endif # Only pass canonical directory names as the output directory: # ifneq ($(O),) - FULL_O := $(shell readlink -f $(O) || echo $(O)) + FULL_O := $(shell cd $(PWD); readlink -f $(O) || echo $(O)) endif # diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index c90f4146e5a2..80e55e796be9 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -286,7 +286,7 @@ ifeq ($(DEBUG),0) endif endif -INC_FLAGS += -I$(src-perf)/lib/include +INC_FLAGS += -I$(srctree)/tools/lib/perf/include INC_FLAGS += -I$(src-perf)/util/include INC_FLAGS += -I$(src-perf)/arch/$(SRCARCH)/include INC_FLAGS += -I$(srctree)/tools/include/ diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index eae5d5e95952..3eda9d4b88e7 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -230,7 +230,7 @@ LIB_DIR = $(srctree)/tools/lib/api/ TRACE_EVENT_DIR = $(srctree)/tools/lib/traceevent/ BPF_DIR = $(srctree)/tools/lib/bpf/ SUBCMD_DIR = $(srctree)/tools/lib/subcmd/ -LIBPERF_DIR = $(srctree)/tools/perf/lib/ +LIBPERF_DIR = $(srctree)/tools/lib/perf/ # Set FEATURE_TESTS to 'all' so all possible feature checkers are executed. # Without this setting the output feature dump file misses some features, for diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c index ede040cf82ad..941f814820b8 100644 --- a/tools/perf/arch/arm/util/cs-etm.c +++ b/tools/perf/arch/arm/util/cs-etm.c @@ -226,7 +226,7 @@ static int cs_etm_set_sink_attr(struct perf_pmu *pmu, if (term->type != PERF_EVSEL__CONFIG_TERM_DRV_CFG) continue; - sink = term->val.drv_cfg; + sink = term->val.str; snprintf(path, PATH_MAX, "sinks/%s", sink); ret = perf_pmu__scan_file(pmu, path, "%x", &hash); @@ -858,21 +858,6 @@ static void cs_etm_recording_free(struct auxtrace_record *itr) free(ptr); } -static int cs_etm_read_finish(struct auxtrace_record *itr, int idx) -{ - struct cs_etm_recording *ptr = - container_of(itr, struct cs_etm_recording, itr); - struct evsel *evsel; - - evlist__for_each_entry(ptr->evlist, evsel) { - if (evsel->core.attr.type == ptr->cs_etm_pmu->type) - return perf_evlist__enable_event_idx(ptr->evlist, - evsel, idx); - } - - return -EINVAL; -} - struct auxtrace_record *cs_etm_record_init(int *err) { struct perf_pmu *cs_etm_pmu; @@ -892,6 +877,7 @@ struct auxtrace_record *cs_etm_record_init(int *err) } ptr->cs_etm_pmu = cs_etm_pmu; + ptr->itr.pmu = cs_etm_pmu; ptr->itr.parse_snapshot_options = cs_etm_parse_snapshot_options; ptr->itr.recording_options = cs_etm_recording_options; ptr->itr.info_priv_size = cs_etm_info_priv_size; @@ -901,7 +887,7 @@ struct auxtrace_record *cs_etm_record_init(int *err) ptr->itr.snapshot_finish = cs_etm_snapshot_finish; ptr->itr.reference = cs_etm_reference; ptr->itr.free = cs_etm_recording_free; - ptr->itr.read_finish = cs_etm_read_finish; + ptr->itr.read_finish = auxtrace_record__read_finish; *err = 0; return &ptr->itr; diff --git a/tools/perf/arch/arm64/util/arm-spe.c b/tools/perf/arch/arm64/util/arm-spe.c index eba6541ec0f1..27653be24447 100644 --- a/tools/perf/arch/arm64/util/arm-spe.c +++ b/tools/perf/arch/arm64/util/arm-spe.c @@ -11,17 +11,17 @@ #include <linux/zalloc.h> #include <time.h> -#include "../../util/cpumap.h" -#include "../../util/event.h" -#include "../../util/evsel.h" -#include "../../util/evlist.h" -#include "../../util/session.h" +#include "../../../util/cpumap.h" +#include "../../../util/event.h" +#include "../../../util/evsel.h" +#include "../../../util/evlist.h" +#include "../../../util/session.h" #include <internal/lib.h> // page_size -#include "../../util/pmu.h" -#include "../../util/debug.h" -#include "../../util/auxtrace.h" -#include "../../util/record.h" -#include "../../util/arm-spe.h" +#include "../../../util/pmu.h" +#include "../../../util/debug.h" +#include "../../../util/auxtrace.h" +#include "../../../util/record.h" +#include "../../../util/arm-spe.h" #define KiB(x) ((x) * 1024) #define MiB(x) ((x) * 1024 * 1024) @@ -158,20 +158,6 @@ static void arm_spe_recording_free(struct auxtrace_record *itr) free(sper); } -static int arm_spe_read_finish(struct auxtrace_record *itr, int idx) -{ - struct arm_spe_recording *sper = - container_of(itr, struct arm_spe_recording, itr); - struct evsel *evsel; - - evlist__for_each_entry(sper->evlist, evsel) { - if (evsel->core.attr.type == sper->arm_spe_pmu->type) - return perf_evlist__enable_event_idx(sper->evlist, - evsel, idx); - } - return -EINVAL; -} - struct auxtrace_record *arm_spe_recording_init(int *err, struct perf_pmu *arm_spe_pmu) { @@ -189,12 +175,13 @@ struct auxtrace_record *arm_spe_recording_init(int *err, } sper->arm_spe_pmu = arm_spe_pmu; + sper->itr.pmu = arm_spe_pmu; sper->itr.recording_options = arm_spe_recording_options; sper->itr.info_priv_size = arm_spe_info_priv_size; sper->itr.info_fill = arm_spe_info_fill; sper->itr.free = arm_spe_recording_free; sper->itr.reference = arm_spe_reference; - sper->itr.read_finish = arm_spe_read_finish; + sper->itr.read_finish = auxtrace_record__read_finish; sper->itr.alignment = 0; *err = 0; diff --git a/tools/perf/arch/arm64/util/header.c b/tools/perf/arch/arm64/util/header.c index a32e4b72a98f..d730666ab95d 100644 --- a/tools/perf/arch/arm64/util/header.c +++ b/tools/perf/arch/arm64/util/header.c @@ -1,8 +1,10 @@ #include <stdio.h> #include <stdlib.h> #include <perf/cpumap.h> +#include <util/cpumap.h> #include <internal/cpumap.h> #include <api/fs/fs.h> +#include <errno.h> #include "debug.h" #include "header.h" @@ -12,26 +14,21 @@ #define MIDR_VARIANT_SHIFT 20 #define MIDR_VARIANT_MASK (0xf << MIDR_VARIANT_SHIFT) -char *get_cpuid_str(struct perf_pmu *pmu) +static int _get_cpuid(char *buf, size_t sz, struct perf_cpu_map *cpus) { - char *buf = NULL; - char path[PATH_MAX]; const char *sysfs = sysfs__mountpoint(); - int cpu; u64 midr = 0; - struct perf_cpu_map *cpus; - FILE *file; + int cpu; - if (!sysfs || !pmu || !pmu->cpus) - return NULL; + if (!sysfs || sz < MIDR_SIZE) + return EINVAL; - buf = malloc(MIDR_SIZE); - if (!buf) - return NULL; + cpus = perf_cpu_map__get(cpus); - /* read midr from list of cpus mapped to this pmu */ - cpus = perf_cpu_map__get(pmu->cpus); for (cpu = 0; cpu < perf_cpu_map__nr(cpus); cpu++) { + char path[PATH_MAX]; + FILE *file; + scnprintf(path, PATH_MAX, "%s/devices/system/cpu/cpu%d"MIDR, sysfs, cpus->map[cpu]); @@ -57,12 +54,48 @@ char *get_cpuid_str(struct perf_pmu *pmu) break; } - if (!midr) { + perf_cpu_map__put(cpus); + + if (!midr) + return EINVAL; + + return 0; +} + +int get_cpuid(char *buf, size_t sz) +{ + struct perf_cpu_map *cpus = perf_cpu_map__new(NULL); + int ret; + + if (!cpus) + return EINVAL; + + ret = _get_cpuid(buf, sz, cpus); + + perf_cpu_map__put(cpus); + + return ret; +} + +char *get_cpuid_str(struct perf_pmu *pmu) +{ + char *buf = NULL; + int res; + + if (!pmu || !pmu->cpus) + return NULL; + + buf = malloc(MIDR_SIZE); + if (!buf) + return NULL; + + /* read midr from list of cpus mapped to this pmu */ + res = _get_cpuid(buf, MIDR_SIZE, pmu->cpus); + if (res) { pr_err("failed to get cpuid string for PMU %s\n", pmu->name); free(buf); buf = NULL; } - perf_cpu_map__put(cpus); return buf; } diff --git a/tools/perf/arch/arm64/util/perf_regs.c b/tools/perf/arch/arm64/util/perf_regs.c index 2864e2e3776d..2833e101a7c6 100644 --- a/tools/perf/arch/arm64/util/perf_regs.c +++ b/tools/perf/arch/arm64/util/perf_regs.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -#include "../../util/perf_regs.h" +#include "../../../util/perf_regs.h" const struct sample_reg sample_reg_masks[] = { SMPL_REG_END diff --git a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl index 43f736ed47f2..35b61bfc1b1a 100644 --- a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl @@ -517,3 +517,5 @@ 433 common fspick sys_fspick 434 common pidfd_open sys_pidfd_open 435 nospu clone3 ppc_clone3 +437 common openat2 sys_openat2 +438 common pidfd_getfd sys_pidfd_getfd diff --git a/tools/perf/arch/powerpc/util/perf_regs.c b/tools/perf/arch/powerpc/util/perf_regs.c index e9c436eeffc9..0a5242900248 100644 --- a/tools/perf/arch/powerpc/util/perf_regs.c +++ b/tools/perf/arch/powerpc/util/perf_regs.c @@ -4,8 +4,8 @@ #include <regex.h> #include <linux/zalloc.h> -#include "../../util/perf_regs.h" -#include "../../util/debug.h" +#include "../../../util/perf_regs.h" +#include "../../../util/debug.h" #include <linux/kernel.h> diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl index c29976eca4a8..44d510bc9b78 100644 --- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl +++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl @@ -357,6 +357,8 @@ 433 common fspick __x64_sys_fspick 434 common pidfd_open __x64_sys_pidfd_open 435 common clone3 __x64_sys_clone3/ptregs +437 common openat2 __x64_sys_openat2 +438 common pidfd_getfd __x64_sys_pidfd_getfd # # x32-specific system call numbers start at 512 to avoid cache impact diff --git a/tools/perf/arch/x86/tests/insn-x86-dat-32.c b/tools/perf/arch/x86/tests/insn-x86-dat-32.c index e6461abc9e7b..9708ae892061 100644 --- a/tools/perf/arch/x86/tests/insn-x86-dat-32.c +++ b/tools/perf/arch/x86/tests/insn-x86-dat-32.c @@ -2085,6 +2085,118 @@ "67 f3 0f 38 f8 1c \tenqcmds (%si),%bx",}, {{0x67, 0xf3, 0x0f, 0x38, 0xf8, 0x8c, 0x34, 0x12, }, 8, 0, "", "", "67 f3 0f 38 f8 8c 34 12 \tenqcmds 0x1234(%si),%cx",}, +{{0xf3, 0x0f, 0xae, 0xe8, }, 4, 0, "", "", +"f3 0f ae e8 \tincsspd %eax",}, +{{0x0f, 0xae, 0x28, }, 3, 0, "", "", +"0f ae 28 \txrstor (%eax)",}, +{{0x0f, 0xae, 0x2d, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "", +"0f ae 2d 78 56 34 12 \txrstor 0x12345678",}, +{{0x0f, 0xae, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"0f ae ac c8 78 56 34 12 \txrstor 0x12345678(%eax,%ecx,8)",}, +{{0x0f, 0xae, 0xe8, }, 3, 0, "", "", +"0f ae e8 \tlfence ",}, +{{0xf3, 0x0f, 0x1e, 0xc8, }, 4, 0, "", "", +"f3 0f 1e c8 \trdsspd %eax",}, +{{0xf3, 0x0f, 0x01, 0xea, }, 4, 0, "", "", +"f3 0f 01 ea \tsaveprevssp ",}, +{{0xf3, 0x0f, 0x01, 0x28, }, 4, 0, "", "", +"f3 0f 01 28 \trstorssp (%eax)",}, +{{0xf3, 0x0f, 0x01, 0x2d, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"f3 0f 01 2d 78 56 34 12 \trstorssp 0x12345678",}, +{{0xf3, 0x0f, 0x01, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"f3 0f 01 ac c8 78 56 34 12 \trstorssp 0x12345678(%eax,%ecx,8)",}, +{{0x0f, 0x38, 0xf6, 0x08, }, 4, 0, "", "", +"0f 38 f6 08 \twrssd %ecx,(%eax)",}, +{{0x0f, 0x38, 0xf6, 0x15, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"0f 38 f6 15 78 56 34 12 \twrssd %edx,0x12345678",}, +{{0x0f, 0x38, 0xf6, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 f6 94 c8 78 56 34 12 \twrssd %edx,0x12345678(%eax,%ecx,8)",}, +{{0x66, 0x0f, 0x38, 0xf5, 0x08, }, 5, 0, "", "", +"66 0f 38 f5 08 \twrussd %ecx,(%eax)",}, +{{0x66, 0x0f, 0x38, 0xf5, 0x15, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"66 0f 38 f5 15 78 56 34 12 \twrussd %edx,0x12345678",}, +{{0x66, 0x0f, 0x38, 0xf5, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "", +"66 0f 38 f5 94 c8 78 56 34 12 \twrussd %edx,0x12345678(%eax,%ecx,8)",}, +{{0xf3, 0x0f, 0x01, 0xe8, }, 4, 0, "", "", +"f3 0f 01 e8 \tsetssbsy ",}, +{{0x0f, 0x01, 0xee, }, 3, 0, "", "", +"0f 01 ee \trdpkru ",}, +{{0x0f, 0x01, 0xef, }, 3, 0, "", "", +"0f 01 ef \twrpkru ",}, +{{0xf3, 0x0f, 0xae, 0x30, }, 4, 0, "", "", +"f3 0f ae 30 \tclrssbsy (%eax)",}, +{{0xf3, 0x0f, 0xae, 0x35, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"f3 0f ae 35 78 56 34 12 \tclrssbsy 0x12345678",}, +{{0xf3, 0x0f, 0xae, 0xb4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"f3 0f ae b4 c8 78 56 34 12 \tclrssbsy 0x12345678(%eax,%ecx,8)",}, +{{0xf3, 0x0f, 0x1e, 0xfb, }, 4, 0, "", "", +"f3 0f 1e fb \tendbr32 ",}, +{{0xf3, 0x0f, 0x1e, 0xfa, }, 4, 0, "", "", +"f3 0f 1e fa \tendbr64 ",}, +{{0xff, 0xd0, }, 2, 0, "call", "indirect", +"ff d0 \tcall *%eax",}, +{{0xff, 0x10, }, 2, 0, "call", "indirect", +"ff 10 \tcall *(%eax)",}, +{{0xff, 0x15, 0x78, 0x56, 0x34, 0x12, }, 6, 0, "call", "indirect", +"ff 15 78 56 34 12 \tcall *0x12345678",}, +{{0xff, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "call", "indirect", +"ff 94 c8 78 56 34 12 \tcall *0x12345678(%eax,%ecx,8)",}, +{{0xf2, 0xff, 0xd0, }, 3, 0, "call", "indirect", +"f2 ff d0 \tbnd call *%eax",}, +{{0xf2, 0xff, 0x10, }, 3, 0, "call", "indirect", +"f2 ff 10 \tbnd call *(%eax)",}, +{{0xf2, 0xff, 0x15, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "call", "indirect", +"f2 ff 15 78 56 34 12 \tbnd call *0x12345678",}, +{{0xf2, 0xff, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "call", "indirect", +"f2 ff 94 c8 78 56 34 12 \tbnd call *0x12345678(%eax,%ecx,8)",}, +{{0x3e, 0xff, 0xd0, }, 3, 0, "call", "indirect", +"3e ff d0 \tnotrack call *%eax",}, +{{0x3e, 0xff, 0x10, }, 3, 0, "call", "indirect", +"3e ff 10 \tnotrack call *(%eax)",}, +{{0x3e, 0xff, 0x15, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "call", "indirect", +"3e ff 15 78 56 34 12 \tnotrack call *0x12345678",}, +{{0x3e, 0xff, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "call", "indirect", +"3e ff 94 c8 78 56 34 12 \tnotrack call *0x12345678(%eax,%ecx,8)",}, +{{0x3e, 0xf2, 0xff, 0xd0, }, 4, 0, "call", "indirect", +"3e f2 ff d0 \tnotrack bnd call *%eax",}, +{{0x3e, 0xf2, 0xff, 0x10, }, 4, 0, "call", "indirect", +"3e f2 ff 10 \tnotrack bnd call *(%eax)",}, +{{0x3e, 0xf2, 0xff, 0x15, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "call", "indirect", +"3e f2 ff 15 78 56 34 12 \tnotrack bnd call *0x12345678",}, +{{0x3e, 0xf2, 0xff, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "call", "indirect", +"3e f2 ff 94 c8 78 56 34 12 \tnotrack bnd call *0x12345678(%eax,%ecx,8)",}, +{{0xff, 0xe0, }, 2, 0, "jmp", "indirect", +"ff e0 \tjmp *%eax",}, +{{0xff, 0x20, }, 2, 0, "jmp", "indirect", +"ff 20 \tjmp *(%eax)",}, +{{0xff, 0x25, 0x78, 0x56, 0x34, 0x12, }, 6, 0, "jmp", "indirect", +"ff 25 78 56 34 12 \tjmp *0x12345678",}, +{{0xff, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "jmp", "indirect", +"ff a4 c8 78 56 34 12 \tjmp *0x12345678(%eax,%ecx,8)",}, +{{0xf2, 0xff, 0xe0, }, 3, 0, "jmp", "indirect", +"f2 ff e0 \tbnd jmp *%eax",}, +{{0xf2, 0xff, 0x20, }, 3, 0, "jmp", "indirect", +"f2 ff 20 \tbnd jmp *(%eax)",}, +{{0xf2, 0xff, 0x25, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "jmp", "indirect", +"f2 ff 25 78 56 34 12 \tbnd jmp *0x12345678",}, +{{0xf2, 0xff, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "jmp", "indirect", +"f2 ff a4 c8 78 56 34 12 \tbnd jmp *0x12345678(%eax,%ecx,8)",}, +{{0x3e, 0xff, 0xe0, }, 3, 0, "jmp", "indirect", +"3e ff e0 \tnotrack jmp *%eax",}, +{{0x3e, 0xff, 0x20, }, 3, 0, "jmp", "indirect", +"3e ff 20 \tnotrack jmp *(%eax)",}, +{{0x3e, 0xff, 0x25, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "jmp", "indirect", +"3e ff 25 78 56 34 12 \tnotrack jmp *0x12345678",}, +{{0x3e, 0xff, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "jmp", "indirect", +"3e ff a4 c8 78 56 34 12 \tnotrack jmp *0x12345678(%eax,%ecx,8)",}, +{{0x3e, 0xf2, 0xff, 0xe0, }, 4, 0, "jmp", "indirect", +"3e f2 ff e0 \tnotrack bnd jmp *%eax",}, +{{0x3e, 0xf2, 0xff, 0x20, }, 4, 0, "jmp", "indirect", +"3e f2 ff 20 \tnotrack bnd jmp *(%eax)",}, +{{0x3e, 0xf2, 0xff, 0x25, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "jmp", "indirect", +"3e f2 ff 25 78 56 34 12 \tnotrack bnd jmp *0x12345678",}, +{{0x3e, 0xf2, 0xff, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "jmp", "indirect", +"3e f2 ff a4 c8 78 56 34 12 \tnotrack bnd jmp *0x12345678(%eax,%ecx,8)",}, {{0x0f, 0x01, 0xcf, }, 3, 0, "", "", "0f 01 cf \tencls ",}, {{0x0f, 0x01, 0xd7, }, 3, 0, "", "", diff --git a/tools/perf/arch/x86/tests/insn-x86-dat-64.c b/tools/perf/arch/x86/tests/insn-x86-dat-64.c index 567ecccfad7c..5da17d41d302 100644 --- a/tools/perf/arch/x86/tests/insn-x86-dat-64.c +++ b/tools/perf/arch/x86/tests/insn-x86-dat-64.c @@ -2263,6 +2263,202 @@ "67 f3 0f 38 f8 18 \tenqcmds (%eax),%ebx",}, {{0x67, 0xf3, 0x0f, 0x38, 0xf8, 0x88, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "", "67 f3 0f 38 f8 88 78 56 34 12 \tenqcmds 0x12345678(%eax),%ecx",}, +{{0xf3, 0x0f, 0xae, 0xe8, }, 4, 0, "", "", +"f3 0f ae e8 \tincsspd %eax",}, +{{0xf3, 0x41, 0x0f, 0xae, 0xe8, }, 5, 0, "", "", +"f3 41 0f ae e8 \tincsspd %r8d",}, +{{0xf3, 0x48, 0x0f, 0xae, 0xe8, }, 5, 0, "", "", +"f3 48 0f ae e8 \tincsspq %rax",}, +{{0xf3, 0x49, 0x0f, 0xae, 0xe8, }, 5, 0, "", "", +"f3 49 0f ae e8 \tincsspq %r8",}, +{{0x0f, 0xae, 0x28, }, 3, 0, "", "", +"0f ae 28 \txrstor (%rax)",}, +{{0x41, 0x0f, 0xae, 0x28, }, 4, 0, "", "", +"41 0f ae 28 \txrstor (%r8)",}, +{{0x0f, 0xae, 0x2c, 0x25, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"0f ae 2c 25 78 56 34 12 \txrstor 0x12345678",}, +{{0x0f, 0xae, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "", +"0f ae ac c8 78 56 34 12 \txrstor 0x12345678(%rax,%rcx,8)",}, +{{0x41, 0x0f, 0xae, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"41 0f ae ac c8 78 56 34 12 \txrstor 0x12345678(%r8,%rcx,8)",}, +{{0x0f, 0xae, 0xe8, }, 3, 0, "", "", +"0f ae e8 \tlfence ",}, +{{0xf3, 0x0f, 0x1e, 0xc8, }, 4, 0, "", "", +"f3 0f 1e c8 \trdsspd %eax",}, +{{0xf3, 0x41, 0x0f, 0x1e, 0xc8, }, 5, 0, "", "", +"f3 41 0f 1e c8 \trdsspd %r8d",}, +{{0xf3, 0x48, 0x0f, 0x1e, 0xc8, }, 5, 0, "", "", +"f3 48 0f 1e c8 \trdsspq %rax",}, +{{0xf3, 0x49, 0x0f, 0x1e, 0xc8, }, 5, 0, "", "", +"f3 49 0f 1e c8 \trdsspq %r8",}, +{{0xf3, 0x0f, 0x01, 0xea, }, 4, 0, "", "", +"f3 0f 01 ea \tsaveprevssp ",}, +{{0xf3, 0x0f, 0x01, 0x28, }, 4, 0, "", "", +"f3 0f 01 28 \trstorssp (%rax)",}, +{{0xf3, 0x41, 0x0f, 0x01, 0x28, }, 5, 0, "", "", +"f3 41 0f 01 28 \trstorssp (%r8)",}, +{{0xf3, 0x0f, 0x01, 0x2c, 0x25, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"f3 0f 01 2c 25 78 56 34 12 \trstorssp 0x12345678",}, +{{0xf3, 0x0f, 0x01, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"f3 0f 01 ac c8 78 56 34 12 \trstorssp 0x12345678(%rax,%rcx,8)",}, +{{0xf3, 0x41, 0x0f, 0x01, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "", +"f3 41 0f 01 ac c8 78 56 34 12 \trstorssp 0x12345678(%r8,%rcx,8)",}, +{{0x0f, 0x38, 0xf6, 0x08, }, 4, 0, "", "", +"0f 38 f6 08 \twrssd %ecx,(%rax)",}, +{{0x41, 0x0f, 0x38, 0xf6, 0x10, }, 5, 0, "", "", +"41 0f 38 f6 10 \twrssd %edx,(%r8)",}, +{{0x0f, 0x38, 0xf6, 0x14, 0x25, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 f6 14 25 78 56 34 12 \twrssd %edx,0x12345678",}, +{{0x0f, 0x38, 0xf6, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"0f 38 f6 94 c8 78 56 34 12 \twrssd %edx,0x12345678(%rax,%rcx,8)",}, +{{0x41, 0x0f, 0x38, 0xf6, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "", +"41 0f 38 f6 94 c8 78 56 34 12 \twrssd %edx,0x12345678(%r8,%rcx,8)",}, +{{0x48, 0x0f, 0x38, 0xf6, 0x08, }, 5, 0, "", "", +"48 0f 38 f6 08 \twrssq %rcx,(%rax)",}, +{{0x49, 0x0f, 0x38, 0xf6, 0x10, }, 5, 0, "", "", +"49 0f 38 f6 10 \twrssq %rdx,(%r8)",}, +{{0x48, 0x0f, 0x38, 0xf6, 0x14, 0x25, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "", +"48 0f 38 f6 14 25 78 56 34 12 \twrssq %rdx,0x12345678",}, +{{0x48, 0x0f, 0x38, 0xf6, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "", +"48 0f 38 f6 94 c8 78 56 34 12 \twrssq %rdx,0x12345678(%rax,%rcx,8)",}, +{{0x49, 0x0f, 0x38, 0xf6, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "", +"49 0f 38 f6 94 c8 78 56 34 12 \twrssq %rdx,0x12345678(%r8,%rcx,8)",}, +{{0x66, 0x0f, 0x38, 0xf5, 0x08, }, 5, 0, "", "", +"66 0f 38 f5 08 \twrussd %ecx,(%rax)",}, +{{0x66, 0x41, 0x0f, 0x38, 0xf5, 0x10, }, 6, 0, "", "", +"66 41 0f 38 f5 10 \twrussd %edx,(%r8)",}, +{{0x66, 0x0f, 0x38, 0xf5, 0x14, 0x25, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "", +"66 0f 38 f5 14 25 78 56 34 12 \twrussd %edx,0x12345678",}, +{{0x66, 0x0f, 0x38, 0xf5, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "", +"66 0f 38 f5 94 c8 78 56 34 12 \twrussd %edx,0x12345678(%rax,%rcx,8)",}, +{{0x66, 0x41, 0x0f, 0x38, 0xf5, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"66 41 0f 38 f5 94 c8 78 56 34 12 \twrussd %edx,0x12345678(%r8,%rcx,8)",}, +{{0x66, 0x48, 0x0f, 0x38, 0xf5, 0x08, }, 6, 0, "", "", +"66 48 0f 38 f5 08 \twrussq %rcx,(%rax)",}, +{{0x66, 0x49, 0x0f, 0x38, 0xf5, 0x10, }, 6, 0, "", "", +"66 49 0f 38 f5 10 \twrussq %rdx,(%r8)",}, +{{0x66, 0x48, 0x0f, 0x38, 0xf5, 0x14, 0x25, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"66 48 0f 38 f5 14 25 78 56 34 12 \twrussq %rdx,0x12345678",}, +{{0x66, 0x48, 0x0f, 0x38, 0xf5, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"66 48 0f 38 f5 94 c8 78 56 34 12 \twrussq %rdx,0x12345678(%rax,%rcx,8)",}, +{{0x66, 0x49, 0x0f, 0x38, 0xf5, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "", +"66 49 0f 38 f5 94 c8 78 56 34 12 \twrussq %rdx,0x12345678(%r8,%rcx,8)",}, +{{0xf3, 0x0f, 0x01, 0xe8, }, 4, 0, "", "", +"f3 0f 01 e8 \tsetssbsy ",}, +{{0x0f, 0x01, 0xee, }, 3, 0, "", "", +"0f 01 ee \trdpkru ",}, +{{0x0f, 0x01, 0xef, }, 3, 0, "", "", +"0f 01 ef \twrpkru ",}, +{{0xf3, 0x0f, 0xae, 0x30, }, 4, 0, "", "", +"f3 0f ae 30 \tclrssbsy (%rax)",}, +{{0xf3, 0x41, 0x0f, 0xae, 0x30, }, 5, 0, "", "", +"f3 41 0f ae 30 \tclrssbsy (%r8)",}, +{{0xf3, 0x0f, 0xae, 0x34, 0x25, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"f3 0f ae 34 25 78 56 34 12 \tclrssbsy 0x12345678",}, +{{0xf3, 0x0f, 0xae, 0xb4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "", +"f3 0f ae b4 c8 78 56 34 12 \tclrssbsy 0x12345678(%rax,%rcx,8)",}, +{{0xf3, 0x41, 0x0f, 0xae, 0xb4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "", +"f3 41 0f ae b4 c8 78 56 34 12 \tclrssbsy 0x12345678(%r8,%rcx,8)",}, +{{0xf3, 0x0f, 0x1e, 0xfb, }, 4, 0, "", "", +"f3 0f 1e fb \tendbr32 ",}, +{{0xf3, 0x0f, 0x1e, 0xfa, }, 4, 0, "", "", +"f3 0f 1e fa \tendbr64 ",}, +{{0xff, 0xd0, }, 2, 0, "call", "indirect", +"ff d0 \tcallq *%rax",}, +{{0xff, 0x10, }, 2, 0, "call", "indirect", +"ff 10 \tcallq *(%rax)",}, +{{0x41, 0xff, 0x10, }, 3, 0, "call", "indirect", +"41 ff 10 \tcallq *(%r8)",}, +{{0xff, 0x14, 0x25, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "call", "indirect", +"ff 14 25 78 56 34 12 \tcallq *0x12345678",}, +{{0xff, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "call", "indirect", +"ff 94 c8 78 56 34 12 \tcallq *0x12345678(%rax,%rcx,8)",}, +{{0x41, 0xff, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "call", "indirect", +"41 ff 94 c8 78 56 34 12 \tcallq *0x12345678(%r8,%rcx,8)",}, +{{0xf2, 0xff, 0xd0, }, 3, 0, "call", "indirect", +"f2 ff d0 \tbnd callq *%rax",}, +{{0xf2, 0xff, 0x10, }, 3, 0, "call", "indirect", +"f2 ff 10 \tbnd callq *(%rax)",}, +{{0xf2, 0x41, 0xff, 0x10, }, 4, 0, "call", "indirect", +"f2 41 ff 10 \tbnd callq *(%r8)",}, +{{0xf2, 0xff, 0x14, 0x25, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "call", "indirect", +"f2 ff 14 25 78 56 34 12 \tbnd callq *0x12345678",}, +{{0xf2, 0xff, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "call", "indirect", +"f2 ff 94 c8 78 56 34 12 \tbnd callq *0x12345678(%rax,%rcx,8)",}, +{{0xf2, 0x41, 0xff, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "call", "indirect", +"f2 41 ff 94 c8 78 56 34 12 \tbnd callq *0x12345678(%r8,%rcx,8)",}, +{{0x3e, 0xff, 0xd0, }, 3, 0, "call", "indirect", +"3e ff d0 \tnotrack callq *%rax",}, +{{0x3e, 0xff, 0x10, }, 3, 0, "call", "indirect", +"3e ff 10 \tnotrack callq *(%rax)",}, +{{0x3e, 0x41, 0xff, 0x10, }, 4, 0, "call", "indirect", +"3e 41 ff 10 \tnotrack callq *(%r8)",}, +{{0x3e, 0xff, 0x14, 0x25, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "call", "indirect", +"3e ff 14 25 78 56 34 12 \tnotrack callq *0x12345678",}, +{{0x3e, 0xff, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "call", "indirect", +"3e ff 94 c8 78 56 34 12 \tnotrack callq *0x12345678(%rax,%rcx,8)",}, +{{0x3e, 0x41, 0xff, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "call", "indirect", +"3e 41 ff 94 c8 78 56 34 12 \tnotrack callq *0x12345678(%r8,%rcx,8)",}, +{{0x3e, 0xf2, 0xff, 0xd0, }, 4, 0, "call", "indirect", +"3e f2 ff d0 \tnotrack bnd callq *%rax",}, +{{0x3e, 0xf2, 0xff, 0x10, }, 4, 0, "call", "indirect", +"3e f2 ff 10 \tnotrack bnd callq *(%rax)",}, +{{0x3e, 0xf2, 0x41, 0xff, 0x10, }, 5, 0, "call", "indirect", +"3e f2 41 ff 10 \tnotrack bnd callq *(%r8)",}, +{{0x3e, 0xf2, 0xff, 0x14, 0x25, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "call", "indirect", +"3e f2 ff 14 25 78 56 34 12 \tnotrack bnd callq *0x12345678",}, +{{0x3e, 0xf2, 0xff, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "call", "indirect", +"3e f2 ff 94 c8 78 56 34 12 \tnotrack bnd callq *0x12345678(%rax,%rcx,8)",}, +{{0x3e, 0xf2, 0x41, 0xff, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "call", "indirect", +"3e f2 41 ff 94 c8 78 56 34 12 \tnotrack bnd callq *0x12345678(%r8,%rcx,8)",}, +{{0xff, 0xe0, }, 2, 0, "jmp", "indirect", +"ff e0 \tjmpq *%rax",}, +{{0xff, 0x20, }, 2, 0, "jmp", "indirect", +"ff 20 \tjmpq *(%rax)",}, +{{0x41, 0xff, 0x20, }, 3, 0, "jmp", "indirect", +"41 ff 20 \tjmpq *(%r8)",}, +{{0xff, 0x24, 0x25, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "jmp", "indirect", +"ff 24 25 78 56 34 12 \tjmpq *0x12345678",}, +{{0xff, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "jmp", "indirect", +"ff a4 c8 78 56 34 12 \tjmpq *0x12345678(%rax,%rcx,8)",}, +{{0x41, 0xff, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "jmp", "indirect", +"41 ff a4 c8 78 56 34 12 \tjmpq *0x12345678(%r8,%rcx,8)",}, +{{0xf2, 0xff, 0xe0, }, 3, 0, "jmp", "indirect", +"f2 ff e0 \tbnd jmpq *%rax",}, +{{0xf2, 0xff, 0x20, }, 3, 0, "jmp", "indirect", +"f2 ff 20 \tbnd jmpq *(%rax)",}, +{{0xf2, 0x41, 0xff, 0x20, }, 4, 0, "jmp", "indirect", +"f2 41 ff 20 \tbnd jmpq *(%r8)",}, +{{0xf2, 0xff, 0x24, 0x25, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "jmp", "indirect", +"f2 ff 24 25 78 56 34 12 \tbnd jmpq *0x12345678",}, +{{0xf2, 0xff, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "jmp", "indirect", +"f2 ff a4 c8 78 56 34 12 \tbnd jmpq *0x12345678(%rax,%rcx,8)",}, +{{0xf2, 0x41, 0xff, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "jmp", "indirect", +"f2 41 ff a4 c8 78 56 34 12 \tbnd jmpq *0x12345678(%r8,%rcx,8)",}, +{{0x3e, 0xff, 0xe0, }, 3, 0, "jmp", "indirect", +"3e ff e0 \tnotrack jmpq *%rax",}, +{{0x3e, 0xff, 0x20, }, 3, 0, "jmp", "indirect", +"3e ff 20 \tnotrack jmpq *(%rax)",}, +{{0x3e, 0x41, 0xff, 0x20, }, 4, 0, "jmp", "indirect", +"3e 41 ff 20 \tnotrack jmpq *(%r8)",}, +{{0x3e, 0xff, 0x24, 0x25, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "jmp", "indirect", +"3e ff 24 25 78 56 34 12 \tnotrack jmpq *0x12345678",}, +{{0x3e, 0xff, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "jmp", "indirect", +"3e ff a4 c8 78 56 34 12 \tnotrack jmpq *0x12345678(%rax,%rcx,8)",}, +{{0x3e, 0x41, 0xff, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "jmp", "indirect", +"3e 41 ff a4 c8 78 56 34 12 \tnotrack jmpq *0x12345678(%r8,%rcx,8)",}, +{{0x3e, 0xf2, 0xff, 0xe0, }, 4, 0, "jmp", "indirect", +"3e f2 ff e0 \tnotrack bnd jmpq *%rax",}, +{{0x3e, 0xf2, 0xff, 0x20, }, 4, 0, "jmp", "indirect", +"3e f2 ff 20 \tnotrack bnd jmpq *(%rax)",}, +{{0x3e, 0xf2, 0x41, 0xff, 0x20, }, 5, 0, "jmp", "indirect", +"3e f2 41 ff 20 \tnotrack bnd jmpq *(%r8)",}, +{{0x3e, 0xf2, 0xff, 0x24, 0x25, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "jmp", "indirect", +"3e f2 ff 24 25 78 56 34 12 \tnotrack bnd jmpq *0x12345678",}, +{{0x3e, 0xf2, 0xff, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "jmp", "indirect", +"3e f2 ff a4 c8 78 56 34 12 \tnotrack bnd jmpq *0x12345678(%rax,%rcx,8)",}, +{{0x3e, 0xf2, 0x41, 0xff, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "jmp", "indirect", +"3e f2 41 ff a4 c8 78 56 34 12 \tnotrack bnd jmpq *0x12345678(%r8,%rcx,8)",}, {{0x0f, 0x01, 0xcf, }, 3, 0, "", "", "0f 01 cf \tencls ",}, {{0x0f, 0x01, 0xd7, }, 3, 0, "", "", diff --git a/tools/perf/arch/x86/tests/insn-x86-dat-src.c b/tools/perf/arch/x86/tests/insn-x86-dat-src.c index ddbf07c50bb8..c3808e94c46e 100644 --- a/tools/perf/arch/x86/tests/insn-x86-dat-src.c +++ b/tools/perf/arch/x86/tests/insn-x86-dat-src.c @@ -1771,6 +1771,145 @@ int main(void) asm volatile("enqcmds (%eax),%ebx"); asm volatile("enqcmds 0x12345678(%eax),%ecx"); + /* incsspd/q */ + + asm volatile("incsspd %eax"); + asm volatile("incsspd %r8d"); + asm volatile("incsspq %rax"); + asm volatile("incsspq %r8"); + /* Also check instructions in the same group encoding as incsspd/q */ + asm volatile("xrstor (%rax)"); + asm volatile("xrstor (%r8)"); + asm volatile("xrstor (0x12345678)"); + asm volatile("xrstor 0x12345678(%rax,%rcx,8)"); + asm volatile("xrstor 0x12345678(%r8,%rcx,8)"); + asm volatile("lfence"); + + /* rdsspd/q */ + + asm volatile("rdsspd %eax"); + asm volatile("rdsspd %r8d"); + asm volatile("rdsspq %rax"); + asm volatile("rdsspq %r8"); + + /* saveprevssp */ + + asm volatile("saveprevssp"); + + /* rstorssp */ + + asm volatile("rstorssp (%rax)"); + asm volatile("rstorssp (%r8)"); + asm volatile("rstorssp (0x12345678)"); + asm volatile("rstorssp 0x12345678(%rax,%rcx,8)"); + asm volatile("rstorssp 0x12345678(%r8,%rcx,8)"); + + /* wrssd/q */ + + asm volatile("wrssd %ecx,(%rax)"); + asm volatile("wrssd %edx,(%r8)"); + asm volatile("wrssd %edx,(0x12345678)"); + asm volatile("wrssd %edx,0x12345678(%rax,%rcx,8)"); + asm volatile("wrssd %edx,0x12345678(%r8,%rcx,8)"); + asm volatile("wrssq %rcx,(%rax)"); + asm volatile("wrssq %rdx,(%r8)"); + asm volatile("wrssq %rdx,(0x12345678)"); + asm volatile("wrssq %rdx,0x12345678(%rax,%rcx,8)"); + asm volatile("wrssq %rdx,0x12345678(%r8,%rcx,8)"); + + /* wrussd/q */ + + asm volatile("wrussd %ecx,(%rax)"); + asm volatile("wrussd %edx,(%r8)"); + asm volatile("wrussd %edx,(0x12345678)"); + asm volatile("wrussd %edx,0x12345678(%rax,%rcx,8)"); + asm volatile("wrussd %edx,0x12345678(%r8,%rcx,8)"); + asm volatile("wrussq %rcx,(%rax)"); + asm volatile("wrussq %rdx,(%r8)"); + asm volatile("wrussq %rdx,(0x12345678)"); + asm volatile("wrussq %rdx,0x12345678(%rax,%rcx,8)"); + asm volatile("wrussq %rdx,0x12345678(%r8,%rcx,8)"); + + /* setssbsy */ + + asm volatile("setssbsy"); + /* Also check instructions in the same group encoding as setssbsy */ + asm volatile("rdpkru"); + asm volatile("wrpkru"); + + /* clrssbsy */ + + asm volatile("clrssbsy (%rax)"); + asm volatile("clrssbsy (%r8)"); + asm volatile("clrssbsy (0x12345678)"); + asm volatile("clrssbsy 0x12345678(%rax,%rcx,8)"); + asm volatile("clrssbsy 0x12345678(%r8,%rcx,8)"); + + /* endbr32/64 */ + + asm volatile("endbr32"); + asm volatile("endbr64"); + + /* call with/without notrack prefix */ + + asm volatile("callq *%rax"); /* Expecting: call indirect 0 */ + asm volatile("callq *(%rax)"); /* Expecting: call indirect 0 */ + asm volatile("callq *(%r8)"); /* Expecting: call indirect 0 */ + asm volatile("callq *(0x12345678)"); /* Expecting: call indirect 0 */ + asm volatile("callq *0x12345678(%rax,%rcx,8)"); /* Expecting: call indirect 0 */ + asm volatile("callq *0x12345678(%r8,%rcx,8)"); /* Expecting: call indirect 0 */ + + asm volatile("bnd callq *%rax"); /* Expecting: call indirect 0 */ + asm volatile("bnd callq *(%rax)"); /* Expecting: call indirect 0 */ + asm volatile("bnd callq *(%r8)"); /* Expecting: call indirect 0 */ + asm volatile("bnd callq *(0x12345678)"); /* Expecting: call indirect 0 */ + asm volatile("bnd callq *0x12345678(%rax,%rcx,8)"); /* Expecting: call indirect 0 */ + asm volatile("bnd callq *0x12345678(%r8,%rcx,8)"); /* Expecting: call indirect 0 */ + + asm volatile("notrack callq *%rax"); /* Expecting: call indirect 0 */ + asm volatile("notrack callq *(%rax)"); /* Expecting: call indirect 0 */ + asm volatile("notrack callq *(%r8)"); /* Expecting: call indirect 0 */ + asm volatile("notrack callq *(0x12345678)"); /* Expecting: call indirect 0 */ + asm volatile("notrack callq *0x12345678(%rax,%rcx,8)"); /* Expecting: call indirect 0 */ + asm volatile("notrack callq *0x12345678(%r8,%rcx,8)"); /* Expecting: call indirect 0 */ + + asm volatile("notrack bnd callq *%rax"); /* Expecting: call indirect 0 */ + asm volatile("notrack bnd callq *(%rax)"); /* Expecting: call indirect 0 */ + asm volatile("notrack bnd callq *(%r8)"); /* Expecting: call indirect 0 */ + asm volatile("notrack bnd callq *(0x12345678)"); /* Expecting: call indirect 0 */ + asm volatile("notrack bnd callq *0x12345678(%rax,%rcx,8)"); /* Expecting: call indirect 0 */ + asm volatile("notrack bnd callq *0x12345678(%r8,%rcx,8)"); /* Expecting: call indirect 0 */ + + /* jmp with/without notrack prefix */ + + asm volatile("jmpq *%rax"); /* Expecting: jmp indirect 0 */ + asm volatile("jmpq *(%rax)"); /* Expecting: jmp indirect 0 */ + asm volatile("jmpq *(%r8)"); /* Expecting: jmp indirect 0 */ + asm volatile("jmpq *(0x12345678)"); /* Expecting: jmp indirect 0 */ + asm volatile("jmpq *0x12345678(%rax,%rcx,8)"); /* Expecting: jmp indirect 0 */ + asm volatile("jmpq *0x12345678(%r8,%rcx,8)"); /* Expecting: jmp indirect 0 */ + + asm volatile("bnd jmpq *%rax"); /* Expecting: jmp indirect 0 */ + asm volatile("bnd jmpq *(%rax)"); /* Expecting: jmp indirect 0 */ + asm volatile("bnd jmpq *(%r8)"); /* Expecting: jmp indirect 0 */ + asm volatile("bnd jmpq *(0x12345678)"); /* Expecting: jmp indirect 0 */ + asm volatile("bnd jmpq *0x12345678(%rax,%rcx,8)"); /* Expecting: jmp indirect 0 */ + asm volatile("bnd jmpq *0x12345678(%r8,%rcx,8)"); /* Expecting: jmp indirect 0 */ + + asm volatile("notrack jmpq *%rax"); /* Expecting: jmp indirect 0 */ + asm volatile("notrack jmpq *(%rax)"); /* Expecting: jmp indirect 0 */ + asm volatile("notrack jmpq *(%r8)"); /* Expecting: jmp indirect 0 */ + asm volatile("notrack jmpq *(0x12345678)"); /* Expecting: jmp indirect 0 */ + asm volatile("notrack jmpq *0x12345678(%rax,%rcx,8)"); /* Expecting: jmp indirect 0 */ + asm volatile("notrack jmpq *0x12345678(%r8,%rcx,8)"); /* Expecting: jmp indirect 0 */ + + asm volatile("notrack bnd jmpq *%rax"); /* Expecting: jmp indirect 0 */ + asm volatile("notrack bnd jmpq *(%rax)"); /* Expecting: jmp indirect 0 */ + asm volatile("notrack bnd jmpq *(%r8)"); /* Expecting: jmp indirect 0 */ + asm volatile("notrack bnd jmpq *(0x12345678)"); /* Expecting: jmp indirect 0 */ + asm volatile("notrack bnd jmpq *0x12345678(%rax,%rcx,8)"); /* Expecting: jmp indirect 0 */ + asm volatile("notrack bnd jmpq *0x12345678(%r8,%rcx,8)"); /* Expecting: jmp indirect 0 */ + #else /* #ifdef __x86_64__ */ /* bound r32, mem (same op code as EVEX prefix) */ @@ -3434,6 +3573,103 @@ int main(void) asm volatile("enqcmds (%si),%bx"); asm volatile("enqcmds 0x1234(%si),%cx"); + /* incsspd */ + + asm volatile("incsspd %eax"); + /* Also check instructions in the same group encoding as incsspd */ + asm volatile("xrstor (%eax)"); + asm volatile("xrstor (0x12345678)"); + asm volatile("xrstor 0x12345678(%eax,%ecx,8)"); + asm volatile("lfence"); + + /* rdsspd */ + + asm volatile("rdsspd %eax"); + + /* saveprevssp */ + + asm volatile("saveprevssp"); + + /* rstorssp */ + + asm volatile("rstorssp (%eax)"); + asm volatile("rstorssp (0x12345678)"); + asm volatile("rstorssp 0x12345678(%eax,%ecx,8)"); + + /* wrssd */ + + asm volatile("wrssd %ecx,(%eax)"); + asm volatile("wrssd %edx,(0x12345678)"); + asm volatile("wrssd %edx,0x12345678(%eax,%ecx,8)"); + + /* wrussd */ + + asm volatile("wrussd %ecx,(%eax)"); + asm volatile("wrussd %edx,(0x12345678)"); + asm volatile("wrussd %edx,0x12345678(%eax,%ecx,8)"); + + /* setssbsy */ + + asm volatile("setssbsy"); + /* Also check instructions in the same group encoding as setssbsy */ + asm volatile("rdpkru"); + asm volatile("wrpkru"); + + /* clrssbsy */ + + asm volatile("clrssbsy (%eax)"); + asm volatile("clrssbsy (0x12345678)"); + asm volatile("clrssbsy 0x12345678(%eax,%ecx,8)"); + + /* endbr32/64 */ + + asm volatile("endbr32"); + asm volatile("endbr64"); + + /* call with/without notrack prefix */ + + asm volatile("call *%eax"); /* Expecting: call indirect 0 */ + asm volatile("call *(%eax)"); /* Expecting: call indirect 0 */ + asm volatile("call *(0x12345678)"); /* Expecting: call indirect 0 */ + asm volatile("call *0x12345678(%eax,%ecx,8)"); /* Expecting: call indirect 0 */ + + asm volatile("bnd call *%eax"); /* Expecting: call indirect 0 */ + asm volatile("bnd call *(%eax)"); /* Expecting: call indirect 0 */ + asm volatile("bnd call *(0x12345678)"); /* Expecting: call indirect 0 */ + asm volatile("bnd call *0x12345678(%eax,%ecx,8)"); /* Expecting: call indirect 0 */ + + asm volatile("notrack call *%eax"); /* Expecting: call indirect 0 */ + asm volatile("notrack call *(%eax)"); /* Expecting: call indirect 0 */ + asm volatile("notrack call *(0x12345678)"); /* Expecting: call indirect 0 */ + asm volatile("notrack call *0x12345678(%eax,%ecx,8)"); /* Expecting: call indirect 0 */ + + asm volatile("notrack bnd call *%eax"); /* Expecting: call indirect 0 */ + asm volatile("notrack bnd call *(%eax)"); /* Expecting: call indirect 0 */ + asm volatile("notrack bnd call *(0x12345678)"); /* Expecting: call indirect 0 */ + asm volatile("notrack bnd call *0x12345678(%eax,%ecx,8)"); /* Expecting: call indirect 0 */ + + /* jmp with/without notrack prefix */ + + asm volatile("jmp *%eax"); /* Expecting: jmp indirect 0 */ + asm volatile("jmp *(%eax)"); /* Expecting: jmp indirect 0 */ + asm volatile("jmp *(0x12345678)"); /* Expecting: jmp indirect 0 */ + asm volatile("jmp *0x12345678(%eax,%ecx,8)"); /* Expecting: jmp indirect 0 */ + + asm volatile("bnd jmp *%eax"); /* Expecting: jmp indirect 0 */ + asm volatile("bnd jmp *(%eax)"); /* Expecting: jmp indirect 0 */ + asm volatile("bnd jmp *(0x12345678)"); /* Expecting: jmp indirect 0 */ + asm volatile("bnd jmp *0x12345678(%eax,%ecx,8)"); /* Expecting: jmp indirect 0 */ + + asm volatile("notrack jmp *%eax"); /* Expecting: jmp indirect 0 */ + asm volatile("notrack jmp *(%eax)"); /* Expecting: jmp indirect 0 */ + asm volatile("notrack jmp *(0x12345678)"); /* Expecting: jmp indirect 0 */ + asm volatile("notrack jmp *0x12345678(%eax,%ecx,8)"); /* Expecting: jmp indirect 0 */ + + asm volatile("notrack bnd jmp *%eax"); /* Expecting: jmp indirect 0 */ + asm volatile("notrack bnd jmp *(%eax)"); /* Expecting: jmp indirect 0 */ + asm volatile("notrack bnd jmp *(0x12345678)"); /* Expecting: jmp indirect 0 */ + asm volatile("notrack bnd jmp *0x12345678(%eax,%ecx,8)"); /* Expecting: jmp indirect 0 */ + #endif /* #ifndef __x86_64__ */ /* SGX */ diff --git a/tools/perf/arch/x86/util/auxtrace.c b/tools/perf/arch/x86/util/auxtrace.c index 7abc9fd4cbec..3da506e13f49 100644 --- a/tools/perf/arch/x86/util/auxtrace.c +++ b/tools/perf/arch/x86/util/auxtrace.c @@ -7,13 +7,13 @@ #include <errno.h> #include <stdbool.h> -#include "../../util/header.h" -#include "../../util/debug.h" -#include "../../util/pmu.h" -#include "../../util/auxtrace.h" -#include "../../util/intel-pt.h" -#include "../../util/intel-bts.h" -#include "../../util/evlist.h" +#include "../../../util/header.h" +#include "../../../util/debug.h" +#include "../../../util/pmu.h" +#include "../../../util/auxtrace.h" +#include "../../../util/intel-pt.h" +#include "../../../util/intel-bts.h" +#include "../../../util/evlist.h" static struct auxtrace_record *auxtrace_record__init_intel(struct evlist *evlist, diff --git a/tools/perf/arch/x86/util/event.c b/tools/perf/arch/x86/util/event.c index ac45015cc6ba..047dc00eafa6 100644 --- a/tools/perf/arch/x86/util/event.c +++ b/tools/perf/arch/x86/util/event.c @@ -3,12 +3,12 @@ #include <linux/string.h> #include <linux/zalloc.h> -#include "../../util/event.h" -#include "../../util/synthetic-events.h" -#include "../../util/machine.h" -#include "../../util/tool.h" -#include "../../util/map.h" -#include "../../util/debug.h" +#include "../../../util/event.h" +#include "../../../util/synthetic-events.h" +#include "../../../util/machine.h" +#include "../../../util/tool.h" +#include "../../../util/map.h" +#include "../../../util/debug.h" #if defined(__x86_64__) diff --git a/tools/perf/arch/x86/util/header.c b/tools/perf/arch/x86/util/header.c index aa6deb463bf3..578c8c568ffd 100644 --- a/tools/perf/arch/x86/util/header.c +++ b/tools/perf/arch/x86/util/header.c @@ -7,8 +7,8 @@ #include <string.h> #include <regex.h> -#include "../../util/debug.h" -#include "../../util/header.h" +#include "../../../util/debug.h" +#include "../../../util/header.h" static inline void cpuid(unsigned int op, unsigned int *a, unsigned int *b, unsigned int *c, diff --git a/tools/perf/arch/x86/util/intel-bts.c b/tools/perf/arch/x86/util/intel-bts.c index 27d9e214d068..09f93800bffd 100644 --- a/tools/perf/arch/x86/util/intel-bts.c +++ b/tools/perf/arch/x86/util/intel-bts.c @@ -11,18 +11,18 @@ #include <linux/log2.h> #include <linux/zalloc.h> -#include "../../util/cpumap.h" -#include "../../util/event.h" -#include "../../util/evsel.h" -#include "../../util/evlist.h" -#include "../../util/mmap.h" -#include "../../util/session.h" -#include "../../util/pmu.h" -#include "../../util/debug.h" -#include "../../util/record.h" -#include "../../util/tsc.h" -#include "../../util/auxtrace.h" -#include "../../util/intel-bts.h" +#include "../../../util/cpumap.h" +#include "../../../util/event.h" +#include "../../../util/evsel.h" +#include "../../../util/evlist.h" +#include "../../../util/mmap.h" +#include "../../../util/session.h" +#include "../../../util/pmu.h" +#include "../../../util/debug.h" +#include "../../../util/record.h" +#include "../../../util/tsc.h" +#include "../../../util/auxtrace.h" +#include "../../../util/intel-bts.h" #include <internal/lib.h> // page_size #define KiB(x) ((x) * 1024) @@ -413,20 +413,6 @@ out_err: return err; } -static int intel_bts_read_finish(struct auxtrace_record *itr, int idx) -{ - struct intel_bts_recording *btsr = - container_of(itr, struct intel_bts_recording, itr); - struct evsel *evsel; - - evlist__for_each_entry(btsr->evlist, evsel) { - if (evsel->core.attr.type == btsr->intel_bts_pmu->type) - return perf_evlist__enable_event_idx(btsr->evlist, - evsel, idx); - } - return -EINVAL; -} - struct auxtrace_record *intel_bts_recording_init(int *err) { struct perf_pmu *intel_bts_pmu = perf_pmu__find(INTEL_BTS_PMU_NAME); @@ -447,6 +433,7 @@ struct auxtrace_record *intel_bts_recording_init(int *err) } btsr->intel_bts_pmu = intel_bts_pmu; + btsr->itr.pmu = intel_bts_pmu; btsr->itr.recording_options = intel_bts_recording_options; btsr->itr.info_priv_size = intel_bts_info_priv_size; btsr->itr.info_fill = intel_bts_info_fill; @@ -456,7 +443,7 @@ struct auxtrace_record *intel_bts_recording_init(int *err) btsr->itr.find_snapshot = intel_bts_find_snapshot; btsr->itr.parse_snapshot_options = intel_bts_parse_snapshot_options; btsr->itr.reference = intel_bts_reference; - btsr->itr.read_finish = intel_bts_read_finish; + btsr->itr.read_finish = auxtrace_record__read_finish; btsr->itr.alignment = sizeof(struct branch); return &btsr->itr; } diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c index 20df442fdf36..1643aed8c4c8 100644 --- a/tools/perf/arch/x86/util/intel-pt.c +++ b/tools/perf/arch/x86/util/intel-pt.c @@ -13,23 +13,23 @@ #include <linux/zalloc.h> #include <cpuid.h> -#include "../../util/session.h" -#include "../../util/event.h" -#include "../../util/evlist.h" -#include "../../util/evsel.h" -#include "../../util/evsel_config.h" -#include "../../util/cpumap.h" -#include "../../util/mmap.h" +#include "../../../util/session.h" +#include "../../../util/event.h" +#include "../../../util/evlist.h" +#include "../../../util/evsel.h" +#include "../../../util/evsel_config.h" +#include "../../../util/cpumap.h" +#include "../../../util/mmap.h" #include <subcmd/parse-options.h> -#include "../../util/parse-events.h" -#include "../../util/pmu.h" -#include "../../util/debug.h" -#include "../../util/auxtrace.h" -#include "../../util/record.h" -#include "../../util/target.h" -#include "../../util/tsc.h" +#include "../../../util/parse-events.h" +#include "../../../util/pmu.h" +#include "../../../util/debug.h" +#include "../../../util/auxtrace.h" +#include "../../../util/record.h" +#include "../../../util/target.h" +#include "../../../util/tsc.h" #include <internal/lib.h> // page_size -#include "../../util/intel-pt.h" +#include "../../../util/intel-pt.h" #define KiB(x) ((x) * 1024) #define MiB(x) ((x) * 1024 * 1024) @@ -1166,20 +1166,6 @@ static u64 intel_pt_reference(struct auxtrace_record *itr __maybe_unused) return rdtsc(); } -static int intel_pt_read_finish(struct auxtrace_record *itr, int idx) -{ - struct intel_pt_recording *ptr = - container_of(itr, struct intel_pt_recording, itr); - struct evsel *evsel; - - evlist__for_each_entry(ptr->evlist, evsel) { - if (evsel->core.attr.type == ptr->intel_pt_pmu->type) - return perf_evlist__enable_event_idx(ptr->evlist, evsel, - idx); - } - return -EINVAL; -} - struct auxtrace_record *intel_pt_recording_init(int *err) { struct perf_pmu *intel_pt_pmu = perf_pmu__find(INTEL_PT_PMU_NAME); @@ -1200,6 +1186,7 @@ struct auxtrace_record *intel_pt_recording_init(int *err) } ptr->intel_pt_pmu = intel_pt_pmu; + ptr->itr.pmu = intel_pt_pmu; ptr->itr.recording_options = intel_pt_recording_options; ptr->itr.info_priv_size = intel_pt_info_priv_size; ptr->itr.info_fill = intel_pt_info_fill; @@ -1209,7 +1196,7 @@ struct auxtrace_record *intel_pt_recording_init(int *err) ptr->itr.find_snapshot = intel_pt_find_snapshot; ptr->itr.parse_snapshot_options = intel_pt_parse_snapshot_options; ptr->itr.reference = intel_pt_reference; - ptr->itr.read_finish = intel_pt_read_finish; + ptr->itr.read_finish = auxtrace_record__read_finish; /* * Decoding starts at a PSB packet. Minimum PSB period is 2K so 4K * should give at least 1 PSB per sample. diff --git a/tools/perf/arch/x86/util/machine.c b/tools/perf/arch/x86/util/machine.c index e17e080e76f4..31679c35d493 100644 --- a/tools/perf/arch/x86/util/machine.c +++ b/tools/perf/arch/x86/util/machine.c @@ -5,9 +5,9 @@ #include <stdlib.h> #include <internal/lib.h> // page_size -#include "../../util/machine.h" -#include "../../util/map.h" -#include "../../util/symbol.h" +#include "../../../util/machine.h" +#include "../../../util/map.h" +#include "../../../util/symbol.h" #include <linux/ctype.h> #include <symbol/kallsyms.h> diff --git a/tools/perf/arch/x86/util/perf_regs.c b/tools/perf/arch/x86/util/perf_regs.c index c218b83e063b..fca81b39b09f 100644 --- a/tools/perf/arch/x86/util/perf_regs.c +++ b/tools/perf/arch/x86/util/perf_regs.c @@ -5,10 +5,10 @@ #include <linux/kernel.h> #include <linux/zalloc.h> -#include "../../perf-sys.h" -#include "../../util/perf_regs.h" -#include "../../util/debug.h" -#include "../../util/event.h" +#include "../../../perf-sys.h" +#include "../../../util/perf_regs.h" +#include "../../../util/debug.h" +#include "../../../util/event.h" const struct sample_reg sample_reg_masks[] = { SMPL_REG(AX, PERF_REG_X86_AX), diff --git a/tools/perf/arch/x86/util/pmu.c b/tools/perf/arch/x86/util/pmu.c index e33ef5bc31c5..d48d608517fd 100644 --- a/tools/perf/arch/x86/util/pmu.c +++ b/tools/perf/arch/x86/util/pmu.c @@ -4,9 +4,9 @@ #include <linux/stddef.h> #include <linux/perf_event.h> -#include "../../util/intel-pt.h" -#include "../../util/intel-bts.h" -#include "../../util/pmu.h" +#include "../../../util/intel-pt.h" +#include "../../../util/intel-bts.h" +#include "../../../util/pmu.h" struct perf_event_attr *perf_pmu__get_default_config(struct perf_pmu *pmu __maybe_unused) { diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h index fddb3ced9db6..4aa6de1aa67d 100644 --- a/tools/perf/bench/bench.h +++ b/tools/perf/bench/bench.h @@ -2,6 +2,10 @@ #ifndef BENCH_H #define BENCH_H +#include <sys/time.h> + +extern struct timeval bench__start, bench__end, bench__runtime; + /* * The madvise transparent hugepage constants were added in glibc * 2.13. For compatibility with older versions of glibc, define these diff --git a/tools/perf/bench/epoll-ctl.c b/tools/perf/bench/epoll-ctl.c index bb617e568841..cadc18d42aa4 100644 --- a/tools/perf/bench/epoll-ctl.c +++ b/tools/perf/bench/epoll-ctl.c @@ -35,7 +35,6 @@ static unsigned int nthreads = 0; static unsigned int nsecs = 8; -struct timeval start, end, runtime; static bool done, __verbose, randomize; /* @@ -94,8 +93,8 @@ static void toggle_done(int sig __maybe_unused, { /* inform all threads that we're done for the day */ done = true; - gettimeofday(&end, NULL); - timersub(&end, &start, &runtime); + gettimeofday(&bench__end, NULL); + timersub(&bench__end, &bench__start, &bench__runtime); } static void nest_epollfd(void) @@ -313,6 +312,7 @@ int bench_epoll_ctl(int argc, const char **argv) exit(EXIT_FAILURE); } + memset(&act, 0, sizeof(act)); sigfillset(&act.sa_mask); act.sa_sigaction = toggle_done; sigaction(SIGINT, &act, NULL); @@ -361,7 +361,7 @@ int bench_epoll_ctl(int argc, const char **argv) threads_starting = nthreads; - gettimeofday(&start, NULL); + gettimeofday(&bench__start, NULL); do_threads(worker, cpu); diff --git a/tools/perf/bench/epoll-wait.c b/tools/perf/bench/epoll-wait.c index 7af694437f4e..f938c585d512 100644 --- a/tools/perf/bench/epoll-wait.c +++ b/tools/perf/bench/epoll-wait.c @@ -90,7 +90,6 @@ static unsigned int nthreads = 0; static unsigned int nsecs = 8; -struct timeval start, end, runtime; static bool wdone, done, __verbose, randomize, nonblocking; /* @@ -276,8 +275,8 @@ static void toggle_done(int sig __maybe_unused, { /* inform all threads that we're done for the day */ done = true; - gettimeofday(&end, NULL); - timersub(&end, &start, &runtime); + gettimeofday(&bench__end, NULL); + timersub(&bench__end, &bench__start, &bench__runtime); } static void print_summary(void) @@ -287,7 +286,7 @@ static void print_summary(void) printf("\nAveraged %ld operations/sec (+- %.2f%%), total secs = %d\n", avg, rel_stddev_stats(stddev, avg), - (int) runtime.tv_sec); + (int)bench__runtime.tv_sec); } static int do_threads(struct worker *worker, struct perf_cpu_map *cpu) @@ -427,6 +426,7 @@ int bench_epoll_wait(int argc, const char **argv) exit(EXIT_FAILURE); } + memset(&act, 0, sizeof(act)); sigfillset(&act.sa_mask); act.sa_sigaction = toggle_done; sigaction(SIGINT, &act, NULL); @@ -479,7 +479,7 @@ int bench_epoll_wait(int argc, const char **argv) threads_starting = nthreads; - gettimeofday(&start, NULL); + gettimeofday(&bench__start, NULL); do_threads(worker, cpu); @@ -519,7 +519,7 @@ int bench_epoll_wait(int argc, const char **argv) qsort(worker, nthreads, sizeof(struct worker), cmpworker); for (i = 0; i < nthreads; i++) { - unsigned long t = worker[i].ops/runtime.tv_sec; + unsigned long t = worker[i].ops / bench__runtime.tv_sec; update_stats(&throughput_stats, t); diff --git a/tools/perf/bench/futex-hash.c b/tools/perf/bench/futex-hash.c index 8ba0c3330a9a..65eebe06c04d 100644 --- a/tools/perf/bench/futex-hash.c +++ b/tools/perf/bench/futex-hash.c @@ -37,7 +37,7 @@ static unsigned int nfutexes = 1024; static bool fshared = false, done = false, silent = false; static int futex_flag = 0; -struct timeval start, end, runtime; +struct timeval bench__start, bench__end, bench__runtime; static pthread_mutex_t thread_lock; static unsigned int threads_starting; static struct stats throughput_stats; @@ -103,8 +103,8 @@ static void toggle_done(int sig __maybe_unused, { /* inform all threads that we're done for the day */ done = true; - gettimeofday(&end, NULL); - timersub(&end, &start, &runtime); + gettimeofday(&bench__end, NULL); + timersub(&bench__end, &bench__start, &bench__runtime); } static void print_summary(void) @@ -114,7 +114,7 @@ static void print_summary(void) printf("%sAveraged %ld operations/sec (+- %.2f%%), total secs = %d\n", !silent ? "\n" : "", avg, rel_stddev_stats(stddev, avg), - (int) runtime.tv_sec); + (int)bench__runtime.tv_sec); } int bench_futex_hash(int argc, const char **argv) @@ -137,6 +137,7 @@ int bench_futex_hash(int argc, const char **argv) if (!cpu) goto errmem; + memset(&act, 0, sizeof(act)); sigfillset(&act.sa_mask); act.sa_sigaction = toggle_done; sigaction(SIGINT, &act, NULL); @@ -161,7 +162,7 @@ int bench_futex_hash(int argc, const char **argv) threads_starting = nthreads; pthread_attr_init(&thread_attr); - gettimeofday(&start, NULL); + gettimeofday(&bench__start, NULL); for (i = 0; i < nthreads; i++) { worker[i].tid = i; worker[i].futex = calloc(nfutexes, sizeof(*worker[i].futex)); @@ -204,7 +205,7 @@ int bench_futex_hash(int argc, const char **argv) pthread_mutex_destroy(&thread_lock); for (i = 0; i < nthreads; i++) { - unsigned long t = worker[i].ops/runtime.tv_sec; + unsigned long t = worker[i].ops / bench__runtime.tv_sec; update_stats(&throughput_stats, t); if (!silent) { if (nfutexes == 1) diff --git a/tools/perf/bench/futex-lock-pi.c b/tools/perf/bench/futex-lock-pi.c index d0cae8125423..89fd8f325f38 100644 --- a/tools/perf/bench/futex-lock-pi.c +++ b/tools/perf/bench/futex-lock-pi.c @@ -37,7 +37,6 @@ static bool silent = false, multi = false; static bool done = false, fshared = false; static unsigned int nthreads = 0; static int futex_flag = 0; -struct timeval start, end, runtime; static pthread_mutex_t thread_lock; static unsigned int threads_starting; static struct stats throughput_stats; @@ -64,7 +63,7 @@ static void print_summary(void) printf("%sAveraged %ld operations/sec (+- %.2f%%), total secs = %d\n", !silent ? "\n" : "", avg, rel_stddev_stats(stddev, avg), - (int) runtime.tv_sec); + (int)bench__runtime.tv_sec); } static void toggle_done(int sig __maybe_unused, @@ -73,8 +72,8 @@ static void toggle_done(int sig __maybe_unused, { /* inform all threads that we're done for the day */ done = true; - gettimeofday(&end, NULL); - timersub(&end, &start, &runtime); + gettimeofday(&bench__end, NULL); + timersub(&bench__end, &bench__start, &bench__runtime); } static void *workerfn(void *arg) @@ -161,6 +160,7 @@ int bench_futex_lock_pi(int argc, const char **argv) if (!cpu) err(EXIT_FAILURE, "calloc"); + memset(&act, 0, sizeof(act)); sigfillset(&act.sa_mask); act.sa_sigaction = toggle_done; sigaction(SIGINT, &act, NULL); @@ -185,7 +185,7 @@ int bench_futex_lock_pi(int argc, const char **argv) threads_starting = nthreads; pthread_attr_init(&thread_attr); - gettimeofday(&start, NULL); + gettimeofday(&bench__start, NULL); create_threads(worker, thread_attr, cpu); pthread_attr_destroy(&thread_attr); @@ -211,7 +211,7 @@ int bench_futex_lock_pi(int argc, const char **argv) pthread_mutex_destroy(&thread_lock); for (i = 0; i < nthreads; i++) { - unsigned long t = worker[i].ops/runtime.tv_sec; + unsigned long t = worker[i].ops / bench__runtime.tv_sec; update_stats(&throughput_stats, t); if (!silent) diff --git a/tools/perf/bench/futex-requeue.c b/tools/perf/bench/futex-requeue.c index a00a6891447a..7a15c2e61022 100644 --- a/tools/perf/bench/futex-requeue.c +++ b/tools/perf/bench/futex-requeue.c @@ -128,6 +128,7 @@ int bench_futex_requeue(int argc, const char **argv) if (!cpu) err(EXIT_FAILURE, "cpu_map__new"); + memset(&act, 0, sizeof(act)); sigfillset(&act.sa_mask); act.sa_sigaction = toggle_done; sigaction(SIGINT, &act, NULL); diff --git a/tools/perf/bench/futex-wake-parallel.c b/tools/perf/bench/futex-wake-parallel.c index a053cf2b7039..cd2b81a845ac 100644 --- a/tools/perf/bench/futex-wake-parallel.c +++ b/tools/perf/bench/futex-wake-parallel.c @@ -234,6 +234,7 @@ int bench_futex_wake_parallel(int argc, const char **argv) exit(EXIT_FAILURE); } + memset(&act, 0, sizeof(act)); sigfillset(&act.sa_mask); act.sa_sigaction = toggle_done; sigaction(SIGINT, &act, NULL); diff --git a/tools/perf/bench/futex-wake.c b/tools/perf/bench/futex-wake.c index df810096abfe..2dfcef3e371e 100644 --- a/tools/perf/bench/futex-wake.c +++ b/tools/perf/bench/futex-wake.c @@ -43,7 +43,7 @@ static bool done = false, silent = false, fshared = false; static pthread_mutex_t thread_lock; static pthread_cond_t thread_parent, thread_worker; static struct stats waketime_stats, wakeup_stats; -static unsigned int ncpus, threads_starting, nthreads = 0; +static unsigned int threads_starting, nthreads = 0; static int futex_flag = 0; static const struct option options[] = { @@ -136,12 +136,13 @@ int bench_futex_wake(int argc, const char **argv) if (!cpu) err(EXIT_FAILURE, "calloc"); + memset(&act, 0, sizeof(act)); sigfillset(&act.sa_mask); act.sa_sigaction = toggle_done; sigaction(SIGINT, &act, NULL); if (!nthreads) - nthreads = ncpus; + nthreads = cpu->nr; worker = calloc(nthreads, sizeof(*worker)); if (!worker) diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 5898662bc8fb..6c0a0412502e 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -535,6 +535,10 @@ int cmd_annotate(int argc, const char **argv) "Display raw encoding of assembly instructions (default)"), OPT_STRING('M', "disassembler-style", &annotate.opts.disassembler_style, "disassembler style", "Specify disassembler style (e.g. -M intel for intel syntax)"), + OPT_STRING(0, "prefix", &annotate.opts.prefix, "prefix", + "Add prefix to source file path names in programs (with --prefix-strip)"), + OPT_STRING(0, "prefix-strip", &annotate.opts.prefix_strip, "N", + "Strip first N entries of source file path name in programs (with --prefix)"), OPT_STRING(0, "objdump", &annotate.opts.objdump_path, "path", "objdump binary to use for disassembly and annotations"), OPT_BOOLEAN(0, "group", &symbol_conf.event_group, @@ -562,6 +566,8 @@ int cmd_annotate(int argc, const char **argv) if (ret < 0) return ret; + annotation_config__init(&annotate.opts); + argc = parse_options(argc, argv, options, annotate_usage, 0); if (argc) { /* @@ -574,6 +580,9 @@ int cmd_annotate(int argc, const char **argv) annotate.sym_hist_filter = argv[0]; } + if (annotate_check_args(&annotate.opts) < 0) + return -EINVAL; + if (symbol_conf.show_nr_samples && annotate.use_gtk) { pr_err("--show-nr-samples is not available in --gtk mode at this time\n"); return ret; @@ -598,8 +607,6 @@ int cmd_annotate(int argc, const char **argv) if (ret < 0) goto out_delete; - annotation_config__init(); - symbol_conf.try_vmlinux_path = true; ret = symbol__init(&annotate.session->header.env); diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index e69f44941aad..246ac0b4d54f 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -595,8 +595,8 @@ tot_hitm_cmp(struct perf_hpp_fmt *fmt __maybe_unused, { struct c2c_hist_entry *c2c_left; struct c2c_hist_entry *c2c_right; - unsigned int tot_hitm_left; - unsigned int tot_hitm_right; + uint64_t tot_hitm_left; + uint64_t tot_hitm_right; c2c_left = container_of(left, struct c2c_hist_entry, he); c2c_right = container_of(right, struct c2c_hist_entry, he); @@ -629,7 +629,8 @@ __f ## _cmp(struct perf_hpp_fmt *fmt __maybe_unused, \ \ c2c_left = container_of(left, struct c2c_hist_entry, he); \ c2c_right = container_of(right, struct c2c_hist_entry, he); \ - return c2c_left->stats.__f - c2c_right->stats.__f; \ + return (uint64_t) c2c_left->stats.__f - \ + (uint64_t) c2c_right->stats.__f; \ } #define STAT_FN(__f) \ @@ -682,7 +683,8 @@ ld_llcmiss_cmp(struct perf_hpp_fmt *fmt __maybe_unused, c2c_left = container_of(left, struct c2c_hist_entry, he); c2c_right = container_of(right, struct c2c_hist_entry, he); - return llc_miss(&c2c_left->stats) - llc_miss(&c2c_right->stats); + return (uint64_t) llc_miss(&c2c_left->stats) - + (uint64_t) llc_miss(&c2c_right->stats); } static uint64_t total_records(struct c2c_stats *stats) @@ -2384,7 +2386,7 @@ static int perf_c2c__browse_cacheline(struct hist_entry *he) c2c_browser__update_nr_entries(browser); while (1) { - key = hist_browser__run(browser, "? - help", true); + key = hist_browser__run(browser, "? - help", true, 0); switch (key) { case 's': @@ -2453,7 +2455,7 @@ static int perf_c2c__hists_browse(struct hists *hists) c2c_browser__update_nr_entries(browser); while (1) { - key = hist_browser__run(browser, "? - help", true); + key = hist_browser__run(browser, "? - help", true, 0); switch (key) { case 'q': diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c index f8b6ae557d8b..5e697cd2224a 100644 --- a/tools/perf/builtin-diff.c +++ b/tools/perf/builtin-diff.c @@ -572,29 +572,12 @@ static void init_block_hist(struct block_hist *bh) bh->valid = true; } -static int block_pair_cmp(struct hist_entry *a, struct hist_entry *b) -{ - struct block_info *bi_a = a->block_info; - struct block_info *bi_b = b->block_info; - int cmp; - - if (!bi_a->sym || !bi_b->sym) - return -1; - - cmp = strcmp(bi_a->sym->name, bi_b->sym->name); - - if ((!cmp) && (bi_a->start == bi_b->start) && (bi_a->end == bi_b->end)) - return 0; - - return -1; -} - static struct hist_entry *get_block_pair(struct hist_entry *he, struct hists *hists_pair) { struct rb_root_cached *root = hists_pair->entries_in; struct rb_node *next = rb_first_cached(root); - int cmp; + int64_t cmp; while (next != NULL) { struct hist_entry *he_pair = rb_entry(next, struct hist_entry, @@ -602,7 +585,7 @@ static struct hist_entry *get_block_pair(struct hist_entry *he, next = rb_next(&he_pair->rb_node_in); - cmp = block_pair_cmp(he_pair, he); + cmp = __block_info__cmp(he_pair, he); if (!cmp) return he_pair; } @@ -1312,7 +1295,8 @@ static int cycles_printf(struct hist_entry *he, struct hist_entry *pair, end_line = map__srcline(he->ms.map, bi->sym->start + bi->end, he->ms.sym); - if ((start_line != SRCLINE_UNKNOWN) && (end_line != SRCLINE_UNKNOWN)) { + if ((strncmp(start_line, SRCLINE_UNKNOWN, strlen(SRCLINE_UNKNOWN)) != 0) && + (strncmp(end_line, SRCLINE_UNKNOWN, strlen(SRCLINE_UNKNOWN)) != 0)) { scnprintf(buf, sizeof(buf), "[%s -> %s] %4ld", start_line, end_line, block_he->diff.cycles); } else { diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c index 26bc5923e6b5..70548df2abb9 100644 --- a/tools/perf/builtin-probe.c +++ b/tools/perf/builtin-probe.c @@ -449,7 +449,8 @@ static int perf_del_probe_events(struct strfilter *filter) ret = probe_file__del_strlist(kfd, klist); if (ret < 0) goto error; - } + } else if (ret == -ENOMEM) + goto error; ret2 = probe_file__get_events(ufd, filter, ulist); if (ret2 == 0) { @@ -459,7 +460,8 @@ static int perf_del_probe_events(struct strfilter *filter) ret2 = probe_file__del_strlist(ufd, ulist); if (ret2 < 0) goto error; - } + } else if (ret2 == -ENOMEM) + goto error; if (ret == -ENOENT && ret2 == -ENOENT) pr_warning("\"%s\" does not hit any event.\n", str); diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index fb19ef63cc35..4c301466101b 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -62,6 +62,7 @@ #include <linux/string.h> #include <linux/time64.h> #include <linux/zalloc.h> +#include <linux/bitmap.h> struct switch_output { bool enabled; @@ -93,7 +94,7 @@ struct record { bool timestamp_boundary; struct switch_output switch_output; unsigned long long samples; - cpu_set_t affinity_mask; + struct mmap_cpu_mask affinity_mask; unsigned long output_max_size; /* = 0: unlimited */ }; @@ -961,10 +962,15 @@ static struct perf_event_header finished_round_event = { static void record__adjust_affinity(struct record *rec, struct mmap *map) { if (rec->opts.affinity != PERF_AFFINITY_SYS && - !CPU_EQUAL(&rec->affinity_mask, &map->affinity_mask)) { - CPU_ZERO(&rec->affinity_mask); - CPU_OR(&rec->affinity_mask, &rec->affinity_mask, &map->affinity_mask); - sched_setaffinity(0, sizeof(rec->affinity_mask), &rec->affinity_mask); + !bitmap_equal(rec->affinity_mask.bits, map->affinity_mask.bits, + rec->affinity_mask.nbits)) { + bitmap_zero(rec->affinity_mask.bits, rec->affinity_mask.nbits); + bitmap_or(rec->affinity_mask.bits, rec->affinity_mask.bits, + map->affinity_mask.bits, rec->affinity_mask.nbits); + sched_setaffinity(0, MMAP_CPU_MASK_BYTES(&rec->affinity_mask), + (cpu_set_t *)rec->affinity_mask.bits); + if (verbose == 2) + mmap_cpu_mask__scnprintf(&rec->affinity_mask, "thread"); } } @@ -2433,7 +2439,6 @@ int cmd_record(int argc, const char **argv) # undef REASON #endif - CPU_ZERO(&rec->affinity_mask); rec->opts.affinity = PERF_AFFINITY_SYS; rec->evlist = evlist__new(); @@ -2499,6 +2504,16 @@ int cmd_record(int argc, const char **argv) symbol__init(NULL); + if (rec->opts.affinity != PERF_AFFINITY_SYS) { + rec->affinity_mask.nbits = cpu__max_cpu(); + rec->affinity_mask.bits = bitmap_alloc(rec->affinity_mask.nbits); + if (!rec->affinity_mask.bits) { + pr_err("Failed to allocate thread mask for %zd cpus\n", rec->affinity_mask.nbits); + return -ENOMEM; + } + pr_debug2("thread mask[%zd]: empty\n", rec->affinity_mask.nbits); + } + err = record__auxtrace_init(rec); if (err) goto out; @@ -2613,6 +2628,7 @@ int cmd_record(int argc, const char **argv) err = __cmd_record(&record, argc, argv); out: + bitmap_free(rec->affinity_mask.bits); evlist__delete(rec->evlist); symbol__exit(); auxtrace_record__free(rec->itr); diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index de988589d99b..5f4045df76f4 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -104,6 +104,7 @@ struct report { bool symbol_ipc; bool total_cycles_mode; struct block_report *block_reports; + int nr_block_reports; }; static int report__config(const char *var, const char *value, void *cb) @@ -185,24 +186,23 @@ static int hist_iter__branch_callback(struct hist_entry_iter *iter, { struct hist_entry *he = iter->he; struct report *rep = arg; - struct branch_info *bi; + struct branch_info *bi = he->branch_info; struct perf_sample *sample = iter->sample; struct evsel *evsel = iter->evsel; int err; + branch_type_count(&rep->brtype_stat, &bi->flags, + bi->from.addr, bi->to.addr); + if (!ui__has_annotation() && !rep->symbol_ipc) return 0; - bi = he->branch_info; err = addr_map_symbol__inc_samples(&bi->from, sample, evsel); if (err) goto out; err = addr_map_symbol__inc_samples(&bi->to, sample, evsel); - branch_type_count(&rep->brtype_stat, &bi->flags, - bi->from.addr, bi->to.addr); - out: return err; } @@ -412,10 +412,10 @@ static int report__setup_sample_type(struct report *rep) PERF_SAMPLE_BRANCH_ANY)) rep->nonany_branch_mode = true; -#ifndef HAVE_LIBUNWIND_SUPPORT +#if !defined(HAVE_LIBUNWIND_SUPPORT) && !defined(HAVE_DWARF_SUPPORT) if (dwarf_callchain_users) { - ui__warning("Please install libunwind development packages " - "during the perf build.\n"); + ui__warning("Please install libunwind or libdw " + "development packages during the perf build.\n"); } #endif @@ -966,8 +966,19 @@ static int __cmd_report(struct report *rep) report__output_resort(rep); if (rep->total_cycles_mode) { + int block_hpps[6] = { + PERF_HPP_REPORT__BLOCK_TOTAL_CYCLES_PCT, + PERF_HPP_REPORT__BLOCK_LBR_CYCLES, + PERF_HPP_REPORT__BLOCK_CYCLES_PCT, + PERF_HPP_REPORT__BLOCK_AVG_CYCLES, + PERF_HPP_REPORT__BLOCK_RANGE, + PERF_HPP_REPORT__BLOCK_DSO, + }; + rep->block_reports = block_info__create_report(session->evlist, - rep->total_cycles); + rep->total_cycles, + block_hpps, 6, + &rep->nr_block_reports); if (!rep->block_reports) return -1; } @@ -1164,7 +1175,8 @@ int cmd_report(int argc, const char **argv) report_callchain_help, &report_parse_callchain_opt, callchain_default_opt), OPT_BOOLEAN(0, "children", &symbol_conf.cumulate_callchain, - "Accumulate callchains of children and show total overhead as well"), + "Accumulate callchains of children and show total overhead as well. " + "Enabled by default, use --no-children to disable."), OPT_INTEGER(0, "max-stack", &report.max_stack, "Set the maximum stack depth when parsing the callchain, " "anything beyond the specified depth will be ignored. " @@ -1207,6 +1219,10 @@ int cmd_report(int argc, const char **argv) "Display raw encoding of assembly instructions (default)"), OPT_STRING('M', "disassembler-style", &report.annotation_opts.disassembler_style, "disassembler style", "Specify disassembler style (e.g. -M intel for intel syntax)"), + OPT_STRING(0, "prefix", &report.annotation_opts.prefix, "prefix", + "Add prefix to source file path names in programs (with --prefix-strip)"), + OPT_STRING(0, "prefix-strip", &report.annotation_opts.prefix_strip, "N", + "Strip first N entries of source file path name in programs (with --prefix)"), OPT_BOOLEAN(0, "show-total-period", &symbol_conf.show_total_period, "Show a column with the sum of periods"), OPT_BOOLEAN_SET(0, "group", &symbol_conf.event_group, &report.group_set, @@ -1286,6 +1302,9 @@ int cmd_report(int argc, const char **argv) report.symbol_filter_str = argv[0]; } + if (annotate_check_args(&report.annotation_opts) < 0) + return -EINVAL; + if (report.mmaps_mode) report.tasks_mode = true; @@ -1499,7 +1518,7 @@ repeat: symbol_conf.priv_size += sizeof(u32); symbol_conf.sort_by_name = true; } - annotation_config__init(); + annotation_config__init(&report.annotation_opts); } if (symbol__init(&session->header.env) < 0) @@ -1543,8 +1562,11 @@ error: zfree(&report.ptime_range); } - if (report.block_reports) - zfree(&report.block_reports); + if (report.block_reports) { + block_info__free_report(report.block_reports, + report.nr_block_reports); + report.block_reports = NULL; + } zstd_fini(&(session->zstd_data)); perf_session__delete(session); diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index 8a12d71364c3..82fcc2c15fe4 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -51,6 +51,9 @@ #define SYM_LEN 129 #define MAX_PID 1024000 +static const char *cpu_list; +static DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS); + struct sched_atom; struct task_desc { @@ -2008,6 +2011,9 @@ static void timehist_print_sample(struct perf_sched *sched, char nstr[30]; u64 wait_time; + if (cpu_list && !test_bit(sample->cpu, cpu_bitmap)) + return; + timestamp__scnprintf_usec(t, tstr, sizeof(tstr)); printf("%15s [%04d] ", tstr, sample->cpu); @@ -2994,6 +3000,12 @@ static int perf_sched__timehist(struct perf_sched *sched) if (IS_ERR(session)) return PTR_ERR(session); + if (cpu_list) { + err = perf_session__cpu_bitmap(session, cpu_list, cpu_bitmap); + if (err < 0) + goto out; + } + evlist = session->evlist; symbol__init(&session->header.env); @@ -3429,6 +3441,7 @@ int cmd_sched(int argc, const char **argv) "analyze events only for given process id(s)"), OPT_STRING('t', "tid", &symbol_conf.tid_list_str, "tid[,tid...]", "analyze events only for given thread id(s)"), + OPT_STRING('C', "cpu", &cpu_list, "cpu", "list of cpus to profile"), OPT_PARENT(sched_options) }; diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index e2406b291c1c..656b347f6dd8 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -735,6 +735,7 @@ static int perf_sample__fprintf_brstack(struct perf_sample *sample, struct perf_event_attr *attr, FILE *fp) { struct branch_stack *br = sample->branch_stack; + struct branch_entry *entries = perf_sample__branch_entries(sample); struct addr_location alf, alt; u64 i, from, to; int printed = 0; @@ -743,8 +744,8 @@ static int perf_sample__fprintf_brstack(struct perf_sample *sample, return 0; for (i = 0; i < br->nr; i++) { - from = br->entries[i].from; - to = br->entries[i].to; + from = entries[i].from; + to = entries[i].to; if (PRINT_FIELD(DSO)) { memset(&alf, 0, sizeof(alf)); @@ -768,10 +769,10 @@ static int perf_sample__fprintf_brstack(struct perf_sample *sample, } printed += fprintf(fp, "/%c/%c/%c/%d ", - mispred_str( br->entries + i), - br->entries[i].flags.in_tx? 'X' : '-', - br->entries[i].flags.abort? 'A' : '-', - br->entries[i].flags.cycles); + mispred_str(entries + i), + entries[i].flags.in_tx ? 'X' : '-', + entries[i].flags.abort ? 'A' : '-', + entries[i].flags.cycles); } return printed; @@ -782,6 +783,7 @@ static int perf_sample__fprintf_brstacksym(struct perf_sample *sample, struct perf_event_attr *attr, FILE *fp) { struct branch_stack *br = sample->branch_stack; + struct branch_entry *entries = perf_sample__branch_entries(sample); struct addr_location alf, alt; u64 i, from, to; int printed = 0; @@ -793,8 +795,8 @@ static int perf_sample__fprintf_brstacksym(struct perf_sample *sample, memset(&alf, 0, sizeof(alf)); memset(&alt, 0, sizeof(alt)); - from = br->entries[i].from; - to = br->entries[i].to; + from = entries[i].from; + to = entries[i].to; thread__find_symbol_fb(thread, sample->cpumode, from, &alf); thread__find_symbol_fb(thread, sample->cpumode, to, &alt); @@ -813,10 +815,10 @@ static int perf_sample__fprintf_brstacksym(struct perf_sample *sample, printed += fprintf(fp, ")"); } printed += fprintf(fp, "/%c/%c/%c/%d ", - mispred_str( br->entries + i), - br->entries[i].flags.in_tx? 'X' : '-', - br->entries[i].flags.abort? 'A' : '-', - br->entries[i].flags.cycles); + mispred_str(entries + i), + entries[i].flags.in_tx ? 'X' : '-', + entries[i].flags.abort ? 'A' : '-', + entries[i].flags.cycles); } return printed; @@ -827,6 +829,7 @@ static int perf_sample__fprintf_brstackoff(struct perf_sample *sample, struct perf_event_attr *attr, FILE *fp) { struct branch_stack *br = sample->branch_stack; + struct branch_entry *entries = perf_sample__branch_entries(sample); struct addr_location alf, alt; u64 i, from, to; int printed = 0; @@ -838,8 +841,8 @@ static int perf_sample__fprintf_brstackoff(struct perf_sample *sample, memset(&alf, 0, sizeof(alf)); memset(&alt, 0, sizeof(alt)); - from = br->entries[i].from; - to = br->entries[i].to; + from = entries[i].from; + to = entries[i].to; if (thread__find_map_fb(thread, sample->cpumode, from, &alf) && !alf.map->dso->adjust_symbols) @@ -862,10 +865,10 @@ static int perf_sample__fprintf_brstackoff(struct perf_sample *sample, printed += fprintf(fp, ")"); } printed += fprintf(fp, "/%c/%c/%c/%d ", - mispred_str(br->entries + i), - br->entries[i].flags.in_tx ? 'X' : '-', - br->entries[i].flags.abort ? 'A' : '-', - br->entries[i].flags.cycles); + mispred_str(entries + i), + entries[i].flags.in_tx ? 'X' : '-', + entries[i].flags.abort ? 'A' : '-', + entries[i].flags.cycles); } return printed; @@ -1053,6 +1056,7 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample, struct machine *machine, FILE *fp) { struct branch_stack *br = sample->branch_stack; + struct branch_entry *entries = perf_sample__branch_entries(sample); u64 start, end; int i, insn, len, nr, ilen, printed = 0; struct perf_insn x; @@ -1073,31 +1077,31 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample, printed += fprintf(fp, "%c", '\n'); /* Handle first from jump, of which we don't know the entry. */ - len = grab_bb(buffer, br->entries[nr-1].from, - br->entries[nr-1].from, + len = grab_bb(buffer, entries[nr-1].from, + entries[nr-1].from, machine, thread, &x.is64bit, &x.cpumode, false); if (len > 0) { - printed += ip__fprintf_sym(br->entries[nr - 1].from, thread, + printed += ip__fprintf_sym(entries[nr - 1].from, thread, x.cpumode, x.cpu, &lastsym, attr, fp); - printed += ip__fprintf_jump(br->entries[nr - 1].from, &br->entries[nr - 1], + printed += ip__fprintf_jump(entries[nr - 1].from, &entries[nr - 1], &x, buffer, len, 0, fp, &total_cycles); if (PRINT_FIELD(SRCCODE)) - printed += print_srccode(thread, x.cpumode, br->entries[nr - 1].from); + printed += print_srccode(thread, x.cpumode, entries[nr - 1].from); } /* Print all blocks */ for (i = nr - 2; i >= 0; i--) { - if (br->entries[i].from || br->entries[i].to) + if (entries[i].from || entries[i].to) pr_debug("%d: %" PRIx64 "-%" PRIx64 "\n", i, - br->entries[i].from, - br->entries[i].to); - start = br->entries[i + 1].to; - end = br->entries[i].from; + entries[i].from, + entries[i].to); + start = entries[i + 1].to; + end = entries[i].from; len = grab_bb(buffer, start, end, machine, thread, &x.is64bit, &x.cpumode, false); /* Patch up missing kernel transfers due to ring filters */ if (len == -ENXIO && i > 0) { - end = br->entries[--i].from; + end = entries[--i].from; pr_debug("\tpatching up to %" PRIx64 "-%" PRIx64 "\n", start, end); len = grab_bb(buffer, start, end, machine, thread, &x.is64bit, &x.cpumode, false); } @@ -1110,7 +1114,7 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample, printed += ip__fprintf_sym(ip, thread, x.cpumode, x.cpu, &lastsym, attr, fp); if (ip == end) { - printed += ip__fprintf_jump(ip, &br->entries[i], &x, buffer + off, len - off, ++insn, fp, + printed += ip__fprintf_jump(ip, &entries[i], &x, buffer + off, len - off, ++insn, fp, &total_cycles); if (PRINT_FIELD(SRCCODE)) printed += print_srccode(thread, x.cpumode, ip); @@ -1134,9 +1138,9 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample, * Hit the branch? In this case we are already done, and the target * has not been executed yet. */ - if (br->entries[0].from == sample->ip) + if (entries[0].from == sample->ip) goto out; - if (br->entries[0].flags.abort) + if (entries[0].flags.abort) goto out; /* @@ -1147,7 +1151,7 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample, * between final branch and sample. When this happens just * continue walking after the last TO until we hit a branch. */ - start = br->entries[0].to; + start = entries[0].to; end = sample->ip; if (end < start) { /* Missing jump. Scan 128 bytes for the next branch */ diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index a098c2ebf4ea..ec053dc1e35c 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -929,6 +929,10 @@ static struct option stat_options[] = { OPT_BOOLEAN_FLAG(0, "all-user", &stat_config.all_user, "Configure all used events to run in user space.", PARSE_OPT_EXCLUSIVE), + OPT_BOOLEAN(0, "percore-show-thread", &stat_config.percore_show_thread, + "Use with 'percore' event qualifier to show the event " + "counts of one hardware thread by sum up total hardware " + "threads of same physical core"), OPT_END() }; diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 795e353de095..d2539b793f9d 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -143,7 +143,7 @@ static int perf_top__parse_source(struct perf_top *top, struct hist_entry *he) return err; } - err = symbol__annotate(&he->ms, evsel, 0, &top->annotation_opts, NULL); + err = symbol__annotate(&he->ms, evsel, &top->annotation_opts, NULL); if (err == 0) { top->sym_filter_entry = he; } else { @@ -684,7 +684,9 @@ repeat: delay_msecs = top->delay_secs * MSEC_PER_SEC; set_term_quiet_input(&save); /* trash return*/ - getc(stdin); + clearerr(stdin); + if (poll(&stdin_poll, 1, 0) > 0) + getc(stdin); while (!done) { perf_top__print_sym_table(top); @@ -1512,6 +1514,10 @@ int cmd_top(int argc, const char **argv) "objdump binary to use for disassembly and annotations"), OPT_STRING('M', "disassembler-style", &top.annotation_opts.disassembler_style, "disassembler style", "Specify disassembler style (e.g. -M intel for intel syntax)"), + OPT_STRING(0, "prefix", &top.annotation_opts.prefix, "prefix", + "Add prefix to source file path names in programs (with --prefix-strip)"), + OPT_STRING(0, "prefix-strip", &top.annotation_opts.prefix_strip, "N", + "Strip first N entries of source file path name in programs (with --prefix)"), OPT_STRING('u', "uid", &target->uid_str, "user", "user to profile"), OPT_CALLBACK(0, "percent-limit", &top, "percent", "Don't show entries under that percent", parse_percent_limit), @@ -1582,6 +1588,9 @@ int cmd_top(int argc, const char **argv) if (argc) usage_with_options(top_usage, options); + if (annotate_check_args(&top.annotation_opts) < 0) + goto out_delete_evlist; + if (!top.evlist->core.nr_entries && perf_evlist__add_default(top.evlist) < 0) { pr_err("Not enough memory for event selector list\n"); @@ -1676,7 +1685,7 @@ int cmd_top(int argc, const char **argv) if (status < 0) goto out_delete_evlist; - annotation_config__init(); + annotation_config__init(&top.annotation_opts); symbol_conf.try_vmlinux_path = (symbol_conf.vmlinux_name == NULL); status = symbol__init(NULL); diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 46a72ecac427..01d542007c8b 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -1065,7 +1065,9 @@ static struct syscall_fmt syscall_fmts[] = { { .name = "poll", .timeout = true, }, { .name = "ppoll", .timeout = true, }, { .name = "prctl", - .arg = { [0] = { .scnprintf = SCA_PRCTL_OPTION, /* option */ }, + .arg = { [0] = { .scnprintf = SCA_PRCTL_OPTION, /* option */ + .strtoul = STUL_STRARRAY, + .parm = &strarray__prctl_options, }, [1] = { .scnprintf = SCA_PRCTL_ARG2, /* arg2 */ }, [2] = { .scnprintf = SCA_PRCTL_ARG3, /* arg3 */ }, }, }, { .name = "pread", .alias = "pread64", }, diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh index 68039a96c1dc..bfb21d049e6c 100755 --- a/tools/perf/check-headers.sh +++ b/tools/perf/check-headers.sh @@ -13,6 +13,7 @@ include/uapi/linux/kcmp.h include/uapi/linux/kvm.h include/uapi/linux/in.h include/uapi/linux/mount.h +include/uapi/linux/openat2.h include/uapi/linux/perf_event.h include/uapi/linux/prctl.h include/uapi/linux/sched.h diff --git a/tools/perf/examples/bpf/5sec.c b/tools/perf/examples/bpf/5sec.c index b9c203219691..65c4ff6892d9 100644 --- a/tools/perf/examples/bpf/5sec.c +++ b/tools/perf/examples/bpf/5sec.c @@ -39,11 +39,13 @@ Copyright (C) 2018 Red Hat, Inc., Arnaldo Carvalho de Melo <acme@redhat.com> */ -#include <bpf.h> +#include <bpf/bpf.h> -int probe(hrtimer_nanosleep, rqtp->tv_sec)(void *ctx, int err, long sec) +#define NSEC_PER_SEC 1000000000L + +int probe(hrtimer_nanosleep, rqtp)(void *ctx, int err, long long sec) { - return sec == 5; + return sec / NSEC_PER_SEC == 5ULL; } license(GPL); diff --git a/tools/perf/examples/bpf/empty.c b/tools/perf/examples/bpf/empty.c index 3776d26db9e7..7d7fb0c9fe76 100644 --- a/tools/perf/examples/bpf/empty.c +++ b/tools/perf/examples/bpf/empty.c @@ -1,3 +1,3 @@ -#include <bpf.h> +#include <bpf/bpf.h> license(GPL); diff --git a/tools/perf/examples/bpf/sys_enter_openat.c b/tools/perf/examples/bpf/sys_enter_openat.c index 9cd124b09392..c4481c390d23 100644 --- a/tools/perf/examples/bpf/sys_enter_openat.c +++ b/tools/perf/examples/bpf/sys_enter_openat.c @@ -14,7 +14,7 @@ * the return value. */ -#include <bpf.h> +#include <bpf/bpf.h> struct syscall_enter_openat_args { unsigned long long unused; diff --git a/tools/perf/lib/Build b/tools/perf/lib/Build deleted file mode 100644 index 2ef9a4ec6d99..000000000000 --- a/tools/perf/lib/Build +++ /dev/null @@ -1,13 +0,0 @@ -libperf-y += core.o -libperf-y += cpumap.o -libperf-y += threadmap.o -libperf-y += evsel.o -libperf-y += evlist.o -libperf-y += mmap.o -libperf-y += zalloc.o -libperf-y += xyarray.o -libperf-y += lib.o - -$(OUTPUT)zalloc.o: ../../lib/zalloc.c FORCE - $(call rule_mkdir) - $(call if_changed_dep,cc_o_c) diff --git a/tools/perf/lib/Documentation/Makefile b/tools/perf/lib/Documentation/Makefile deleted file mode 100644 index 586425a88795..000000000000 --- a/tools/perf/lib/Documentation/Makefile +++ /dev/null @@ -1,7 +0,0 @@ -all: - rst2man man/libperf.rst > man/libperf.7 - rst2pdf tutorial/tutorial.rst - -clean: - rm -f man/libperf.7 - rm -f tutorial/tutorial.pdf diff --git a/tools/perf/lib/Documentation/man/libperf.rst b/tools/perf/lib/Documentation/man/libperf.rst deleted file mode 100644 index 09a270fccb9c..000000000000 --- a/tools/perf/lib/Documentation/man/libperf.rst +++ /dev/null @@ -1,100 +0,0 @@ -.. SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) - -libperf - -The libperf library provides an API to access the linux kernel perf -events subsystem. It provides the following high level objects: - - - struct perf_cpu_map - - struct perf_thread_map - - struct perf_evlist - - struct perf_evsel - -reference -========= -Function reference by header files: - -perf/core.h ------------ -.. code-block:: c - - typedef int (\*libperf_print_fn_t)(enum libperf_print_level level, - const char \*, va_list ap); - - void libperf_set_print(libperf_print_fn_t fn); - -perf/cpumap.h -------------- -.. code-block:: c - - struct perf_cpu_map \*perf_cpu_map__dummy_new(void); - struct perf_cpu_map \*perf_cpu_map__new(const char \*cpu_list); - struct perf_cpu_map \*perf_cpu_map__read(FILE \*file); - struct perf_cpu_map \*perf_cpu_map__get(struct perf_cpu_map \*map); - void perf_cpu_map__put(struct perf_cpu_map \*map); - int perf_cpu_map__cpu(const struct perf_cpu_map \*cpus, int idx); - int perf_cpu_map__nr(const struct perf_cpu_map \*cpus); - perf_cpu_map__for_each_cpu(cpu, idx, cpus) - -perf/threadmap.h ----------------- -.. code-block:: c - - struct perf_thread_map \*perf_thread_map__new_dummy(void); - void perf_thread_map__set_pid(struct perf_thread_map \*map, int thread, pid_t pid); - char \*perf_thread_map__comm(struct perf_thread_map \*map, int thread); - struct perf_thread_map \*perf_thread_map__get(struct perf_thread_map \*map); - void perf_thread_map__put(struct perf_thread_map \*map); - -perf/evlist.h -------------- -.. code-block:: - - void perf_evlist__init(struct perf_evlist \*evlist); - void perf_evlist__add(struct perf_evlist \*evlist, - struct perf_evsel \*evsel); - void perf_evlist__remove(struct perf_evlist \*evlist, - struct perf_evsel \*evsel); - struct perf_evlist \*perf_evlist__new(void); - void perf_evlist__delete(struct perf_evlist \*evlist); - struct perf_evsel\* perf_evlist__next(struct perf_evlist \*evlist, - struct perf_evsel \*evsel); - int perf_evlist__open(struct perf_evlist \*evlist); - void perf_evlist__close(struct perf_evlist \*evlist); - void perf_evlist__enable(struct perf_evlist \*evlist); - void perf_evlist__disable(struct perf_evlist \*evlist); - perf_evlist__for_each_evsel(evlist, pos) - void perf_evlist__set_maps(struct perf_evlist \*evlist, - struct perf_cpu_map \*cpus, - struct perf_thread_map \*threads); - -perf/evsel.h ------------- -.. code-block:: c - - struct perf_counts_values { - union { - struct { - uint64_t val; - uint64_t ena; - uint64_t run; - }; - uint64_t values[3]; - }; - }; - - void perf_evsel__init(struct perf_evsel \*evsel, - struct perf_event_attr \*attr); - struct perf_evsel \*perf_evsel__new(struct perf_event_attr \*attr); - void perf_evsel__delete(struct perf_evsel \*evsel); - int perf_evsel__open(struct perf_evsel \*evsel, struct perf_cpu_map \*cpus, - struct perf_thread_map \*threads); - void perf_evsel__close(struct perf_evsel \*evsel); - int perf_evsel__read(struct perf_evsel \*evsel, int cpu, int thread, - struct perf_counts_values \*count); - int perf_evsel__enable(struct perf_evsel \*evsel); - int perf_evsel__disable(struct perf_evsel \*evsel); - int perf_evsel__apply_filter(struct perf_evsel \*evsel, const char \*filter); - struct perf_cpu_map \*perf_evsel__cpus(struct perf_evsel \*evsel); - struct perf_thread_map \*perf_evsel__threads(struct perf_evsel \*evsel); - struct perf_event_attr \*perf_evsel__attr(struct perf_evsel \*evsel); diff --git a/tools/perf/lib/Documentation/tutorial/tutorial.rst b/tools/perf/lib/Documentation/tutorial/tutorial.rst deleted file mode 100644 index 7be7bc27b385..000000000000 --- a/tools/perf/lib/Documentation/tutorial/tutorial.rst +++ /dev/null @@ -1,123 +0,0 @@ -.. SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) - -libperf tutorial -================ - -Compile and install libperf from kernel sources -=============================================== -.. code-block:: bash - - git clone git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git - cd linux/tools/perf/lib - make - sudo make install prefix=/usr - -Libperf object -============== -The libperf library provides several high level objects: - -struct perf_cpu_map - Provides a cpu list abstraction. - -struct perf_thread_map - Provides a thread list abstraction. - -struct perf_evsel - Provides an abstraction for single a perf event. - -struct perf_evlist - Gathers several struct perf_evsel object and performs functions on all of them. - -The exported API binds these objects together, -for full reference see the libperf.7 man page. - -Examples -======== -Examples aim to explain libperf functionality on simple use cases. -They are based in on a checked out linux kernel git tree: - -.. code-block:: bash - - $ cd tools/perf/lib/Documentation/tutorial/ - $ ls -d ex-* - ex-1-compile ex-2-evsel-stat ex-3-evlist-stat - -ex-1-compile example -==================== -This example shows the basic usage of *struct perf_cpu_map*, -how to create it and display its cpus: - -.. code-block:: bash - - $ cd ex-1-compile/ - $ make - gcc -o test test.c -lperf - $ ./test - 0 1 2 3 4 5 6 7 - - -The full code listing is here: - -.. code-block:: c - - 1 #include <perf/cpumap.h> - 2 - 3 int main(int argc, char **Argv) - 4 { - 5 struct perf_cpu_map *cpus; - 6 int cpu, tmp; - 7 - 8 cpus = perf_cpu_map__new(NULL); - 9 - 10 perf_cpu_map__for_each_cpu(cpu, tmp, cpus) - 11 fprintf(stdout, "%d ", cpu); - 12 - 13 fprintf(stdout, "\n"); - 14 - 15 perf_cpu_map__put(cpus); - 16 return 0; - 17 } - - -First you need to include the proper header to have *struct perf_cpumap* -declaration and functions: - -.. code-block:: c - - 1 #include <perf/cpumap.h> - - -The *struct perf_cpumap* object is created by *perf_cpu_map__new* call. -The *NULL* argument asks it to populate the object with the current online CPUs list: - -.. code-block:: c - - 8 cpus = perf_cpu_map__new(NULL); - -This is paired with a *perf_cpu_map__put*, that drops its reference at the end, possibly deleting it. - -.. code-block:: c - - 15 perf_cpu_map__put(cpus); - -The iteration through the *struct perf_cpumap* CPUs is done using the *perf_cpu_map__for_each_cpu* -macro which requires 3 arguments: - -- cpu - the cpu numer -- tmp - iteration helper variable -- cpus - the *struct perf_cpumap* object - -.. code-block:: c - - 10 perf_cpu_map__for_each_cpu(cpu, tmp, cpus) - 11 fprintf(stdout, "%d ", cpu); - -ex-2-evsel-stat example -======================= - -TBD - -ex-3-evlist-stat example -======================== - -TBD diff --git a/tools/perf/lib/Makefile b/tools/perf/lib/Makefile deleted file mode 100644 index 0f233638ef1f..000000000000 --- a/tools/perf/lib/Makefile +++ /dev/null @@ -1,188 +0,0 @@ -# SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) -# Most of this file is copied from tools/lib/bpf/Makefile - -LIBPERF_VERSION = 0 -LIBPERF_PATCHLEVEL = 0 -LIBPERF_EXTRAVERSION = 1 - -MAKEFLAGS += --no-print-directory - -ifeq ($(srctree),) -srctree := $(patsubst %/,%,$(dir $(CURDIR))) -srctree := $(patsubst %/,%,$(dir $(srctree))) -srctree := $(patsubst %/,%,$(dir $(srctree))) -#$(info Determined 'srctree' to be $(srctree)) -endif - -INSTALL = install - -# Use DESTDIR for installing into a different root directory. -# This is useful for building a package. The program will be -# installed in this directory as if it was the root directory. -# Then the build tool can move it later. -DESTDIR ?= -DESTDIR_SQ = '$(subst ','\'',$(DESTDIR))' - -include $(srctree)/tools/scripts/Makefile.include -include $(srctree)/tools/scripts/Makefile.arch - -ifeq ($(LP64), 1) - libdir_relative = lib64 -else - libdir_relative = lib -endif - -prefix ?= -libdir = $(prefix)/$(libdir_relative) - -# Shell quotes -libdir_SQ = $(subst ','\'',$(libdir)) -libdir_relative_SQ = $(subst ','\'',$(libdir_relative)) - -ifeq ("$(origin V)", "command line") - VERBOSE = $(V) -endif -ifndef VERBOSE - VERBOSE = 0 -endif - -ifeq ($(VERBOSE),1) - Q = -else - Q = @ -endif - -# Set compile option CFLAGS -ifdef EXTRA_CFLAGS - CFLAGS := $(EXTRA_CFLAGS) -else - CFLAGS := -g -Wall -endif - -INCLUDES = \ --I$(srctree)/tools/perf/lib/include \ --I$(srctree)/tools/lib/ \ --I$(srctree)/tools/include \ --I$(srctree)/tools/arch/$(SRCARCH)/include/ \ --I$(srctree)/tools/arch/$(SRCARCH)/include/uapi \ --I$(srctree)/tools/include/uapi - -# Append required CFLAGS -override CFLAGS += $(EXTRA_WARNINGS) -override CFLAGS += -Werror -Wall -override CFLAGS += -fPIC -override CFLAGS += $(INCLUDES) -override CFLAGS += -fvisibility=hidden - -all: - -export srctree OUTPUT CC LD CFLAGS V -export DESTDIR DESTDIR_SQ - -include $(srctree)/tools/build/Makefile.include - -VERSION_SCRIPT := libperf.map - -PATCHLEVEL = $(LIBPERF_PATCHLEVEL) -EXTRAVERSION = $(LIBPERF_EXTRAVERSION) -VERSION = $(LIBPERF_VERSION).$(LIBPERF_PATCHLEVEL).$(LIBPERF_EXTRAVERSION) - -LIBPERF_SO := $(OUTPUT)libperf.so.$(VERSION) -LIBPERF_A := $(OUTPUT)libperf.a -LIBPERF_IN := $(OUTPUT)libperf-in.o -LIBPERF_PC := $(OUTPUT)libperf.pc - -LIBPERF_ALL := $(LIBPERF_A) $(OUTPUT)libperf.so* - -LIB_DIR := $(srctree)/tools/lib/api/ - -ifneq ($(OUTPUT),) -ifneq ($(subdir),) - API_PATH=$(OUTPUT)/../lib/api/ -else - API_PATH=$(OUTPUT) -endif -else - API_PATH=$(LIB_DIR) -endif - -LIBAPI = $(API_PATH)libapi.a -export LIBAPI - -$(LIBAPI): FORCE - $(Q)$(MAKE) -C $(LIB_DIR) O=$(OUTPUT) $(OUTPUT)libapi.a - -$(LIBAPI)-clean: - $(call QUIET_CLEAN, libapi) - $(Q)$(MAKE) -C $(LIB_DIR) O=$(OUTPUT) clean >/dev/null - -$(LIBPERF_IN): FORCE - $(Q)$(MAKE) $(build)=libperf - -$(LIBPERF_A): $(LIBPERF_IN) - $(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(LIBPERF_IN) - -$(LIBPERF_SO): $(LIBPERF_IN) $(LIBAPI) - $(QUIET_LINK)$(CC) --shared -Wl,-soname,libperf.so \ - -Wl,--version-script=$(VERSION_SCRIPT) $^ -o $@ - @ln -sf $(@F) $(OUTPUT)libperf.so - @ln -sf $(@F) $(OUTPUT)libperf.so.$(LIBPERF_VERSION) - - -libs: $(LIBPERF_A) $(LIBPERF_SO) $(LIBPERF_PC) - -all: fixdep - $(Q)$(MAKE) libs - -clean: $(LIBAPI)-clean - $(call QUIET_CLEAN, libperf) $(RM) $(LIBPERF_A) \ - *.o *~ *.a *.so *.so.$(VERSION) *.so.$(LIBPERF_VERSION) .*.d .*.cmd LIBPERF-CFLAGS $(LIBPERF_PC) - $(Q)$(MAKE) -C tests clean - -tests: libs - $(Q)$(MAKE) -C tests - $(Q)$(MAKE) -C tests run - -$(LIBPERF_PC): - $(QUIET_GEN)sed -e "s|@PREFIX@|$(prefix)|" \ - -e "s|@LIBDIR@|$(libdir_SQ)|" \ - -e "s|@VERSION@|$(VERSION)|" \ - < libperf.pc.template > $@ - -define do_install_mkdir - if [ ! -d '$(DESTDIR_SQ)$1' ]; then \ - $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$1'; \ - fi -endef - -define do_install - if [ ! -d '$(DESTDIR_SQ)$2' ]; then \ - $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$2'; \ - fi; \ - $(INSTALL) $1 $(if $3,-m $3,) '$(DESTDIR_SQ)$2' -endef - -install_lib: libs - $(call QUIET_INSTALL, $(LIBPERF_ALL)) \ - $(call do_install_mkdir,$(libdir_SQ)); \ - cp -fpR $(LIBPERF_ALL) $(DESTDIR)$(libdir_SQ) - -install_headers: - $(call QUIET_INSTALL, headers) \ - $(call do_install,include/perf/core.h,$(prefix)/include/perf,644); \ - $(call do_install,include/perf/cpumap.h,$(prefix)/include/perf,644); \ - $(call do_install,include/perf/threadmap.h,$(prefix)/include/perf,644); \ - $(call do_install,include/perf/evlist.h,$(prefix)/include/perf,644); \ - $(call do_install,include/perf/evsel.h,$(prefix)/include/perf,644); \ - $(call do_install,include/perf/event.h,$(prefix)/include/perf,644); \ - $(call do_install,include/perf/mmap.h,$(prefix)/include/perf,644); - -install_pkgconfig: $(LIBPERF_PC) - $(call QUIET_INSTALL, $(LIBPERF_PC)) \ - $(call do_install,$(LIBPERF_PC),$(libdir_SQ)/pkgconfig,644) - -install: install_lib install_headers install_pkgconfig - -FORCE: - -.PHONY: all install clean tests FORCE diff --git a/tools/perf/lib/core.c b/tools/perf/lib/core.c deleted file mode 100644 index 58fc894b76c5..000000000000 --- a/tools/perf/lib/core.c +++ /dev/null @@ -1,38 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only - -#define __printf(a, b) __attribute__((format(printf, a, b))) - -#include <stdio.h> -#include <stdarg.h> -#include <unistd.h> -#include <linux/compiler.h> -#include <perf/core.h> -#include <internal/lib.h> -#include "internal.h" - -static int __base_pr(enum libperf_print_level level __maybe_unused, const char *format, - va_list args) -{ - return vfprintf(stderr, format, args); -} - -static libperf_print_fn_t __libperf_pr = __base_pr; - -__printf(2, 3) -void libperf_print(enum libperf_print_level level, const char *format, ...) -{ - va_list args; - - if (!__libperf_pr) - return; - - va_start(args, format); - __libperf_pr(level, format, args); - va_end(args); -} - -void libperf_init(libperf_print_fn_t fn) -{ - page_size = sysconf(_SC_PAGE_SIZE); - __libperf_pr = fn; -} diff --git a/tools/perf/lib/cpumap.c b/tools/perf/lib/cpumap.c deleted file mode 100644 index f93f4e703e4c..000000000000 --- a/tools/perf/lib/cpumap.c +++ /dev/null @@ -1,345 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -#include <perf/cpumap.h> -#include <stdlib.h> -#include <linux/refcount.h> -#include <internal/cpumap.h> -#include <asm/bug.h> -#include <stdio.h> -#include <string.h> -#include <unistd.h> -#include <ctype.h> -#include <limits.h> - -struct perf_cpu_map *perf_cpu_map__dummy_new(void) -{ - struct perf_cpu_map *cpus = malloc(sizeof(*cpus) + sizeof(int)); - - if (cpus != NULL) { - cpus->nr = 1; - cpus->map[0] = -1; - refcount_set(&cpus->refcnt, 1); - } - - return cpus; -} - -static void cpu_map__delete(struct perf_cpu_map *map) -{ - if (map) { - WARN_ONCE(refcount_read(&map->refcnt) != 0, - "cpu_map refcnt unbalanced\n"); - free(map); - } -} - -struct perf_cpu_map *perf_cpu_map__get(struct perf_cpu_map *map) -{ - if (map) - refcount_inc(&map->refcnt); - return map; -} - -void perf_cpu_map__put(struct perf_cpu_map *map) -{ - if (map && refcount_dec_and_test(&map->refcnt)) - cpu_map__delete(map); -} - -static struct perf_cpu_map *cpu_map__default_new(void) -{ - struct perf_cpu_map *cpus; - int nr_cpus; - - nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); - if (nr_cpus < 0) - return NULL; - - cpus = malloc(sizeof(*cpus) + nr_cpus * sizeof(int)); - if (cpus != NULL) { - int i; - - for (i = 0; i < nr_cpus; ++i) - cpus->map[i] = i; - - cpus->nr = nr_cpus; - refcount_set(&cpus->refcnt, 1); - } - - return cpus; -} - -static int cmp_int(const void *a, const void *b) -{ - return *(const int *)a - *(const int*)b; -} - -static struct perf_cpu_map *cpu_map__trim_new(int nr_cpus, int *tmp_cpus) -{ - size_t payload_size = nr_cpus * sizeof(int); - struct perf_cpu_map *cpus = malloc(sizeof(*cpus) + payload_size); - int i, j; - - if (cpus != NULL) { - memcpy(cpus->map, tmp_cpus, payload_size); - qsort(cpus->map, nr_cpus, sizeof(int), cmp_int); - /* Remove dups */ - j = 0; - for (i = 0; i < nr_cpus; i++) { - if (i == 0 || cpus->map[i] != cpus->map[i - 1]) - cpus->map[j++] = cpus->map[i]; - } - cpus->nr = j; - assert(j <= nr_cpus); - refcount_set(&cpus->refcnt, 1); - } - - return cpus; -} - -struct perf_cpu_map *perf_cpu_map__read(FILE *file) -{ - struct perf_cpu_map *cpus = NULL; - int nr_cpus = 0; - int *tmp_cpus = NULL, *tmp; - int max_entries = 0; - int n, cpu, prev; - char sep; - - sep = 0; - prev = -1; - for (;;) { - n = fscanf(file, "%u%c", &cpu, &sep); - if (n <= 0) - break; - if (prev >= 0) { - int new_max = nr_cpus + cpu - prev - 1; - - WARN_ONCE(new_max >= MAX_NR_CPUS, "Perf can support %d CPUs. " - "Consider raising MAX_NR_CPUS\n", MAX_NR_CPUS); - - if (new_max >= max_entries) { - max_entries = new_max + MAX_NR_CPUS / 2; - tmp = realloc(tmp_cpus, max_entries * sizeof(int)); - if (tmp == NULL) - goto out_free_tmp; - tmp_cpus = tmp; - } - - while (++prev < cpu) - tmp_cpus[nr_cpus++] = prev; - } - if (nr_cpus == max_entries) { - max_entries += MAX_NR_CPUS; - tmp = realloc(tmp_cpus, max_entries * sizeof(int)); - if (tmp == NULL) - goto out_free_tmp; - tmp_cpus = tmp; - } - - tmp_cpus[nr_cpus++] = cpu; - if (n == 2 && sep == '-') - prev = cpu; - else - prev = -1; - if (n == 1 || sep == '\n') - break; - } - - if (nr_cpus > 0) - cpus = cpu_map__trim_new(nr_cpus, tmp_cpus); - else - cpus = cpu_map__default_new(); -out_free_tmp: - free(tmp_cpus); - return cpus; -} - -static struct perf_cpu_map *cpu_map__read_all_cpu_map(void) -{ - struct perf_cpu_map *cpus = NULL; - FILE *onlnf; - - onlnf = fopen("/sys/devices/system/cpu/online", "r"); - if (!onlnf) - return cpu_map__default_new(); - - cpus = perf_cpu_map__read(onlnf); - fclose(onlnf); - return cpus; -} - -struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list) -{ - struct perf_cpu_map *cpus = NULL; - unsigned long start_cpu, end_cpu = 0; - char *p = NULL; - int i, nr_cpus = 0; - int *tmp_cpus = NULL, *tmp; - int max_entries = 0; - - if (!cpu_list) - return cpu_map__read_all_cpu_map(); - - /* - * must handle the case of empty cpumap to cover - * TOPOLOGY header for NUMA nodes with no CPU - * ( e.g., because of CPU hotplug) - */ - if (!isdigit(*cpu_list) && *cpu_list != '\0') - goto out; - - while (isdigit(*cpu_list)) { - p = NULL; - start_cpu = strtoul(cpu_list, &p, 0); - if (start_cpu >= INT_MAX - || (*p != '\0' && *p != ',' && *p != '-')) - goto invalid; - - if (*p == '-') { - cpu_list = ++p; - p = NULL; - end_cpu = strtoul(cpu_list, &p, 0); - - if (end_cpu >= INT_MAX || (*p != '\0' && *p != ',')) - goto invalid; - - if (end_cpu < start_cpu) - goto invalid; - } else { - end_cpu = start_cpu; - } - - WARN_ONCE(end_cpu >= MAX_NR_CPUS, "Perf can support %d CPUs. " - "Consider raising MAX_NR_CPUS\n", MAX_NR_CPUS); - - for (; start_cpu <= end_cpu; start_cpu++) { - /* check for duplicates */ - for (i = 0; i < nr_cpus; i++) - if (tmp_cpus[i] == (int)start_cpu) - goto invalid; - - if (nr_cpus == max_entries) { - max_entries += MAX_NR_CPUS; - tmp = realloc(tmp_cpus, max_entries * sizeof(int)); - if (tmp == NULL) - goto invalid; - tmp_cpus = tmp; - } - tmp_cpus[nr_cpus++] = (int)start_cpu; - } - if (*p) - ++p; - - cpu_list = p; - } - - if (nr_cpus > 0) - cpus = cpu_map__trim_new(nr_cpus, tmp_cpus); - else if (*cpu_list != '\0') - cpus = cpu_map__default_new(); - else - cpus = perf_cpu_map__dummy_new(); -invalid: - free(tmp_cpus); -out: - return cpus; -} - -int perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx) -{ - if (idx < cpus->nr) - return cpus->map[idx]; - - return -1; -} - -int perf_cpu_map__nr(const struct perf_cpu_map *cpus) -{ - return cpus ? cpus->nr : 1; -} - -bool perf_cpu_map__empty(const struct perf_cpu_map *map) -{ - return map ? map->map[0] == -1 : true; -} - -int perf_cpu_map__idx(struct perf_cpu_map *cpus, int cpu) -{ - int i; - - for (i = 0; i < cpus->nr; ++i) { - if (cpus->map[i] == cpu) - return i; - } - - return -1; -} - -int perf_cpu_map__max(struct perf_cpu_map *map) -{ - int i, max = -1; - - for (i = 0; i < map->nr; i++) { - if (map->map[i] > max) - max = map->map[i]; - } - - return max; -} - -/* - * Merge two cpumaps - * - * orig either gets freed and replaced with a new map, or reused - * with no reference count change (similar to "realloc") - * other has its reference count increased. - */ - -struct perf_cpu_map *perf_cpu_map__merge(struct perf_cpu_map *orig, - struct perf_cpu_map *other) -{ - int *tmp_cpus; - int tmp_len; - int i, j, k; - struct perf_cpu_map *merged; - - if (!orig && !other) - return NULL; - if (!orig) { - perf_cpu_map__get(other); - return other; - } - if (!other) - return orig; - if (orig->nr == other->nr && - !memcmp(orig->map, other->map, orig->nr * sizeof(int))) - return orig; - - tmp_len = orig->nr + other->nr; - tmp_cpus = malloc(tmp_len * sizeof(int)); - if (!tmp_cpus) - return NULL; - - /* Standard merge algorithm from wikipedia */ - i = j = k = 0; - while (i < orig->nr && j < other->nr) { - if (orig->map[i] <= other->map[j]) { - if (orig->map[i] == other->map[j]) - j++; - tmp_cpus[k++] = orig->map[i++]; - } else - tmp_cpus[k++] = other->map[j++]; - } - - while (i < orig->nr) - tmp_cpus[k++] = orig->map[i++]; - - while (j < other->nr) - tmp_cpus[k++] = other->map[j++]; - assert(k <= tmp_len); - - merged = cpu_map__trim_new(k, tmp_cpus); - free(tmp_cpus); - perf_cpu_map__put(orig); - return merged; -} diff --git a/tools/perf/lib/evlist.c b/tools/perf/lib/evlist.c deleted file mode 100644 index ae9e65aa2491..000000000000 --- a/tools/perf/lib/evlist.c +++ /dev/null @@ -1,641 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include <perf/evlist.h> -#include <perf/evsel.h> -#include <linux/bitops.h> -#include <linux/list.h> -#include <linux/hash.h> -#include <sys/ioctl.h> -#include <internal/evlist.h> -#include <internal/evsel.h> -#include <internal/xyarray.h> -#include <internal/mmap.h> -#include <internal/cpumap.h> -#include <internal/threadmap.h> -#include <internal/xyarray.h> -#include <internal/lib.h> -#include <linux/zalloc.h> -#include <sys/ioctl.h> -#include <stdlib.h> -#include <errno.h> -#include <unistd.h> -#include <fcntl.h> -#include <signal.h> -#include <poll.h> -#include <sys/mman.h> -#include <perf/cpumap.h> -#include <perf/threadmap.h> -#include <api/fd/array.h> - -void perf_evlist__init(struct perf_evlist *evlist) -{ - int i; - - for (i = 0; i < PERF_EVLIST__HLIST_SIZE; ++i) - INIT_HLIST_HEAD(&evlist->heads[i]); - INIT_LIST_HEAD(&evlist->entries); - evlist->nr_entries = 0; - fdarray__init(&evlist->pollfd, 64); -} - -static void __perf_evlist__propagate_maps(struct perf_evlist *evlist, - struct perf_evsel *evsel) -{ - /* - * We already have cpus for evsel (via PMU sysfs) so - * keep it, if there's no target cpu list defined. - */ - if (!evsel->own_cpus || evlist->has_user_cpus) { - perf_cpu_map__put(evsel->cpus); - evsel->cpus = perf_cpu_map__get(evlist->cpus); - } else if (evsel->cpus != evsel->own_cpus) { - perf_cpu_map__put(evsel->cpus); - evsel->cpus = perf_cpu_map__get(evsel->own_cpus); - } - - perf_thread_map__put(evsel->threads); - evsel->threads = perf_thread_map__get(evlist->threads); - evlist->all_cpus = perf_cpu_map__merge(evlist->all_cpus, evsel->cpus); -} - -static void perf_evlist__propagate_maps(struct perf_evlist *evlist) -{ - struct perf_evsel *evsel; - - perf_evlist__for_each_evsel(evlist, evsel) - __perf_evlist__propagate_maps(evlist, evsel); -} - -void perf_evlist__add(struct perf_evlist *evlist, - struct perf_evsel *evsel) -{ - list_add_tail(&evsel->node, &evlist->entries); - evlist->nr_entries += 1; - __perf_evlist__propagate_maps(evlist, evsel); -} - -void perf_evlist__remove(struct perf_evlist *evlist, - struct perf_evsel *evsel) -{ - list_del_init(&evsel->node); - evlist->nr_entries -= 1; -} - -struct perf_evlist *perf_evlist__new(void) -{ - struct perf_evlist *evlist = zalloc(sizeof(*evlist)); - - if (evlist != NULL) - perf_evlist__init(evlist); - - return evlist; -} - -struct perf_evsel * -perf_evlist__next(struct perf_evlist *evlist, struct perf_evsel *prev) -{ - struct perf_evsel *next; - - if (!prev) { - next = list_first_entry(&evlist->entries, - struct perf_evsel, - node); - } else { - next = list_next_entry(prev, node); - } - - /* Empty list is noticed here so don't need checking on entry. */ - if (&next->node == &evlist->entries) - return NULL; - - return next; -} - -static void perf_evlist__purge(struct perf_evlist *evlist) -{ - struct perf_evsel *pos, *n; - - perf_evlist__for_each_entry_safe(evlist, n, pos) { - list_del_init(&pos->node); - perf_evsel__delete(pos); - } - - evlist->nr_entries = 0; -} - -void perf_evlist__exit(struct perf_evlist *evlist) -{ - perf_cpu_map__put(evlist->cpus); - perf_thread_map__put(evlist->threads); - evlist->cpus = NULL; - evlist->threads = NULL; - fdarray__exit(&evlist->pollfd); -} - -void perf_evlist__delete(struct perf_evlist *evlist) -{ - if (evlist == NULL) - return; - - perf_evlist__munmap(evlist); - perf_evlist__close(evlist); - perf_evlist__purge(evlist); - perf_evlist__exit(evlist); - free(evlist); -} - -void perf_evlist__set_maps(struct perf_evlist *evlist, - struct perf_cpu_map *cpus, - struct perf_thread_map *threads) -{ - /* - * Allow for the possibility that one or another of the maps isn't being - * changed i.e. don't put it. Note we are assuming the maps that are - * being applied are brand new and evlist is taking ownership of the - * original reference count of 1. If that is not the case it is up to - * the caller to increase the reference count. - */ - if (cpus != evlist->cpus) { - perf_cpu_map__put(evlist->cpus); - evlist->cpus = perf_cpu_map__get(cpus); - } - - if (threads != evlist->threads) { - perf_thread_map__put(evlist->threads); - evlist->threads = perf_thread_map__get(threads); - } - - perf_evlist__propagate_maps(evlist); -} - -int perf_evlist__open(struct perf_evlist *evlist) -{ - struct perf_evsel *evsel; - int err; - - perf_evlist__for_each_entry(evlist, evsel) { - err = perf_evsel__open(evsel, evsel->cpus, evsel->threads); - if (err < 0) - goto out_err; - } - - return 0; - -out_err: - perf_evlist__close(evlist); - return err; -} - -void perf_evlist__close(struct perf_evlist *evlist) -{ - struct perf_evsel *evsel; - - perf_evlist__for_each_entry_reverse(evlist, evsel) - perf_evsel__close(evsel); -} - -void perf_evlist__enable(struct perf_evlist *evlist) -{ - struct perf_evsel *evsel; - - perf_evlist__for_each_entry(evlist, evsel) - perf_evsel__enable(evsel); -} - -void perf_evlist__disable(struct perf_evlist *evlist) -{ - struct perf_evsel *evsel; - - perf_evlist__for_each_entry(evlist, evsel) - perf_evsel__disable(evsel); -} - -u64 perf_evlist__read_format(struct perf_evlist *evlist) -{ - struct perf_evsel *first = perf_evlist__first(evlist); - - return first->attr.read_format; -} - -#define SID(e, x, y) xyarray__entry(e->sample_id, x, y) - -static void perf_evlist__id_hash(struct perf_evlist *evlist, - struct perf_evsel *evsel, - int cpu, int thread, u64 id) -{ - int hash; - struct perf_sample_id *sid = SID(evsel, cpu, thread); - - sid->id = id; - sid->evsel = evsel; - hash = hash_64(sid->id, PERF_EVLIST__HLIST_BITS); - hlist_add_head(&sid->node, &evlist->heads[hash]); -} - -void perf_evlist__id_add(struct perf_evlist *evlist, - struct perf_evsel *evsel, - int cpu, int thread, u64 id) -{ - perf_evlist__id_hash(evlist, evsel, cpu, thread, id); - evsel->id[evsel->ids++] = id; -} - -int perf_evlist__id_add_fd(struct perf_evlist *evlist, - struct perf_evsel *evsel, - int cpu, int thread, int fd) -{ - u64 read_data[4] = { 0, }; - int id_idx = 1; /* The first entry is the counter value */ - u64 id; - int ret; - - ret = ioctl(fd, PERF_EVENT_IOC_ID, &id); - if (!ret) - goto add; - - if (errno != ENOTTY) - return -1; - - /* Legacy way to get event id.. All hail to old kernels! */ - - /* - * This way does not work with group format read, so bail - * out in that case. - */ - if (perf_evlist__read_format(evlist) & PERF_FORMAT_GROUP) - return -1; - - if (!(evsel->attr.read_format & PERF_FORMAT_ID) || - read(fd, &read_data, sizeof(read_data)) == -1) - return -1; - - if (evsel->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) - ++id_idx; - if (evsel->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) - ++id_idx; - - id = read_data[id_idx]; - -add: - perf_evlist__id_add(evlist, evsel, cpu, thread, id); - return 0; -} - -int perf_evlist__alloc_pollfd(struct perf_evlist *evlist) -{ - int nr_cpus = perf_cpu_map__nr(evlist->cpus); - int nr_threads = perf_thread_map__nr(evlist->threads); - int nfds = 0; - struct perf_evsel *evsel; - - perf_evlist__for_each_entry(evlist, evsel) { - if (evsel->system_wide) - nfds += nr_cpus; - else - nfds += nr_cpus * nr_threads; - } - - if (fdarray__available_entries(&evlist->pollfd) < nfds && - fdarray__grow(&evlist->pollfd, nfds) < 0) - return -ENOMEM; - - return 0; -} - -int perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd, - void *ptr, short revent) -{ - int pos = fdarray__add(&evlist->pollfd, fd, revent | POLLERR | POLLHUP); - - if (pos >= 0) { - evlist->pollfd.priv[pos].ptr = ptr; - fcntl(fd, F_SETFL, O_NONBLOCK); - } - - return pos; -} - -static void perf_evlist__munmap_filtered(struct fdarray *fda, int fd, - void *arg __maybe_unused) -{ - struct perf_mmap *map = fda->priv[fd].ptr; - - if (map) - perf_mmap__put(map); -} - -int perf_evlist__filter_pollfd(struct perf_evlist *evlist, short revents_and_mask) -{ - return fdarray__filter(&evlist->pollfd, revents_and_mask, - perf_evlist__munmap_filtered, NULL); -} - -int perf_evlist__poll(struct perf_evlist *evlist, int timeout) -{ - return fdarray__poll(&evlist->pollfd, timeout); -} - -static struct perf_mmap* perf_evlist__alloc_mmap(struct perf_evlist *evlist, bool overwrite) -{ - int i; - struct perf_mmap *map; - - map = zalloc(evlist->nr_mmaps * sizeof(struct perf_mmap)); - if (!map) - return NULL; - - for (i = 0; i < evlist->nr_mmaps; i++) { - struct perf_mmap *prev = i ? &map[i - 1] : NULL; - - /* - * When the perf_mmap() call is made we grab one refcount, plus - * one extra to let perf_mmap__consume() get the last - * events after all real references (perf_mmap__get()) are - * dropped. - * - * Each PERF_EVENT_IOC_SET_OUTPUT points to this mmap and - * thus does perf_mmap__get() on it. - */ - perf_mmap__init(&map[i], prev, overwrite, NULL); - } - - return map; -} - -static void perf_evlist__set_sid_idx(struct perf_evlist *evlist, - struct perf_evsel *evsel, int idx, int cpu, - int thread) -{ - struct perf_sample_id *sid = SID(evsel, cpu, thread); - - sid->idx = idx; - if (evlist->cpus && cpu >= 0) - sid->cpu = evlist->cpus->map[cpu]; - else - sid->cpu = -1; - if (!evsel->system_wide && evlist->threads && thread >= 0) - sid->tid = perf_thread_map__pid(evlist->threads, thread); - else - sid->tid = -1; -} - -static struct perf_mmap* -perf_evlist__mmap_cb_get(struct perf_evlist *evlist, bool overwrite, int idx) -{ - struct perf_mmap *maps; - - maps = overwrite ? evlist->mmap_ovw : evlist->mmap; - - if (!maps) { - maps = perf_evlist__alloc_mmap(evlist, overwrite); - if (!maps) - return NULL; - - if (overwrite) - evlist->mmap_ovw = maps; - else - evlist->mmap = maps; - } - - return &maps[idx]; -} - -#define FD(e, x, y) (*(int *) xyarray__entry(e->fd, x, y)) - -static int -perf_evlist__mmap_cb_mmap(struct perf_mmap *map, struct perf_mmap_param *mp, - int output, int cpu) -{ - return perf_mmap__mmap(map, mp, output, cpu); -} - -static void perf_evlist__set_mmap_first(struct perf_evlist *evlist, struct perf_mmap *map, - bool overwrite) -{ - if (overwrite) - evlist->mmap_ovw_first = map; - else - evlist->mmap_first = map; -} - -static int -mmap_per_evsel(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops, - int idx, struct perf_mmap_param *mp, int cpu_idx, - int thread, int *_output, int *_output_overwrite) -{ - int evlist_cpu = perf_cpu_map__cpu(evlist->cpus, cpu_idx); - struct perf_evsel *evsel; - int revent; - - perf_evlist__for_each_entry(evlist, evsel) { - bool overwrite = evsel->attr.write_backward; - struct perf_mmap *map; - int *output, fd, cpu; - - if (evsel->system_wide && thread) - continue; - - cpu = perf_cpu_map__idx(evsel->cpus, evlist_cpu); - if (cpu == -1) - continue; - - map = ops->get(evlist, overwrite, idx); - if (map == NULL) - return -ENOMEM; - - if (overwrite) { - mp->prot = PROT_READ; - output = _output_overwrite; - } else { - mp->prot = PROT_READ | PROT_WRITE; - output = _output; - } - - fd = FD(evsel, cpu, thread); - - if (*output == -1) { - *output = fd; - - /* - * The last one will be done at perf_mmap__consume(), so that we - * make sure we don't prevent tools from consuming every last event in - * the ring buffer. - * - * I.e. we can get the POLLHUP meaning that the fd doesn't exist - * anymore, but the last events for it are still in the ring buffer, - * waiting to be consumed. - * - * Tools can chose to ignore this at their own discretion, but the - * evlist layer can't just drop it when filtering events in - * perf_evlist__filter_pollfd(). - */ - refcount_set(&map->refcnt, 2); - - if (ops->mmap(map, mp, *output, evlist_cpu) < 0) - return -1; - - if (!idx) - perf_evlist__set_mmap_first(evlist, map, overwrite); - } else { - if (ioctl(fd, PERF_EVENT_IOC_SET_OUTPUT, *output) != 0) - return -1; - - perf_mmap__get(map); - } - - revent = !overwrite ? POLLIN : 0; - - if (!evsel->system_wide && - perf_evlist__add_pollfd(evlist, fd, map, revent) < 0) { - perf_mmap__put(map); - return -1; - } - - if (evsel->attr.read_format & PERF_FORMAT_ID) { - if (perf_evlist__id_add_fd(evlist, evsel, cpu, thread, - fd) < 0) - return -1; - perf_evlist__set_sid_idx(evlist, evsel, idx, cpu, - thread); - } - } - - return 0; -} - -static int -mmap_per_thread(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops, - struct perf_mmap_param *mp) -{ - int thread; - int nr_threads = perf_thread_map__nr(evlist->threads); - - for (thread = 0; thread < nr_threads; thread++) { - int output = -1; - int output_overwrite = -1; - - if (ops->idx) - ops->idx(evlist, mp, thread, false); - - if (mmap_per_evsel(evlist, ops, thread, mp, 0, thread, - &output, &output_overwrite)) - goto out_unmap; - } - - return 0; - -out_unmap: - perf_evlist__munmap(evlist); - return -1; -} - -static int -mmap_per_cpu(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops, - struct perf_mmap_param *mp) -{ - int nr_threads = perf_thread_map__nr(evlist->threads); - int nr_cpus = perf_cpu_map__nr(evlist->cpus); - int cpu, thread; - - for (cpu = 0; cpu < nr_cpus; cpu++) { - int output = -1; - int output_overwrite = -1; - - if (ops->idx) - ops->idx(evlist, mp, cpu, true); - - for (thread = 0; thread < nr_threads; thread++) { - if (mmap_per_evsel(evlist, ops, cpu, mp, cpu, - thread, &output, &output_overwrite)) - goto out_unmap; - } - } - - return 0; - -out_unmap: - perf_evlist__munmap(evlist); - return -1; -} - -static int perf_evlist__nr_mmaps(struct perf_evlist *evlist) -{ - int nr_mmaps; - - nr_mmaps = perf_cpu_map__nr(evlist->cpus); - if (perf_cpu_map__empty(evlist->cpus)) - nr_mmaps = perf_thread_map__nr(evlist->threads); - - return nr_mmaps; -} - -int perf_evlist__mmap_ops(struct perf_evlist *evlist, - struct perf_evlist_mmap_ops *ops, - struct perf_mmap_param *mp) -{ - struct perf_evsel *evsel; - const struct perf_cpu_map *cpus = evlist->cpus; - const struct perf_thread_map *threads = evlist->threads; - - if (!ops || !ops->get || !ops->mmap) - return -EINVAL; - - mp->mask = evlist->mmap_len - page_size - 1; - - evlist->nr_mmaps = perf_evlist__nr_mmaps(evlist); - - perf_evlist__for_each_entry(evlist, evsel) { - if ((evsel->attr.read_format & PERF_FORMAT_ID) && - evsel->sample_id == NULL && - perf_evsel__alloc_id(evsel, perf_cpu_map__nr(cpus), threads->nr) < 0) - return -ENOMEM; - } - - if (evlist->pollfd.entries == NULL && perf_evlist__alloc_pollfd(evlist) < 0) - return -ENOMEM; - - if (perf_cpu_map__empty(cpus)) - return mmap_per_thread(evlist, ops, mp); - - return mmap_per_cpu(evlist, ops, mp); -} - -int perf_evlist__mmap(struct perf_evlist *evlist, int pages) -{ - struct perf_mmap_param mp; - struct perf_evlist_mmap_ops ops = { - .get = perf_evlist__mmap_cb_get, - .mmap = perf_evlist__mmap_cb_mmap, - }; - - evlist->mmap_len = (pages + 1) * page_size; - - return perf_evlist__mmap_ops(evlist, &ops, &mp); -} - -void perf_evlist__munmap(struct perf_evlist *evlist) -{ - int i; - - if (evlist->mmap) { - for (i = 0; i < evlist->nr_mmaps; i++) - perf_mmap__munmap(&evlist->mmap[i]); - } - - if (evlist->mmap_ovw) { - for (i = 0; i < evlist->nr_mmaps; i++) - perf_mmap__munmap(&evlist->mmap_ovw[i]); - } - - zfree(&evlist->mmap); - zfree(&evlist->mmap_ovw); -} - -struct perf_mmap* -perf_evlist__next_mmap(struct perf_evlist *evlist, struct perf_mmap *map, - bool overwrite) -{ - if (map) - return map->next; - - return overwrite ? evlist->mmap_ovw_first : evlist->mmap_first; -} diff --git a/tools/perf/lib/evsel.c b/tools/perf/lib/evsel.c deleted file mode 100644 index 4dc06289f4c7..000000000000 --- a/tools/perf/lib/evsel.c +++ /dev/null @@ -1,301 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include <errno.h> -#include <unistd.h> -#include <sys/syscall.h> -#include <perf/evsel.h> -#include <perf/cpumap.h> -#include <perf/threadmap.h> -#include <linux/list.h> -#include <internal/evsel.h> -#include <linux/zalloc.h> -#include <stdlib.h> -#include <internal/xyarray.h> -#include <internal/cpumap.h> -#include <internal/threadmap.h> -#include <internal/lib.h> -#include <linux/string.h> -#include <sys/ioctl.h> - -void perf_evsel__init(struct perf_evsel *evsel, struct perf_event_attr *attr) -{ - INIT_LIST_HEAD(&evsel->node); - evsel->attr = *attr; -} - -struct perf_evsel *perf_evsel__new(struct perf_event_attr *attr) -{ - struct perf_evsel *evsel = zalloc(sizeof(*evsel)); - - if (evsel != NULL) - perf_evsel__init(evsel, attr); - - return evsel; -} - -void perf_evsel__delete(struct perf_evsel *evsel) -{ - free(evsel); -} - -#define FD(e, x, y) (*(int *) xyarray__entry(e->fd, x, y)) - -int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads) -{ - evsel->fd = xyarray__new(ncpus, nthreads, sizeof(int)); - - if (evsel->fd) { - int cpu, thread; - for (cpu = 0; cpu < ncpus; cpu++) { - for (thread = 0; thread < nthreads; thread++) { - FD(evsel, cpu, thread) = -1; - } - } - } - - return evsel->fd != NULL ? 0 : -ENOMEM; -} - -static int -sys_perf_event_open(struct perf_event_attr *attr, - pid_t pid, int cpu, int group_fd, - unsigned long flags) -{ - return syscall(__NR_perf_event_open, attr, pid, cpu, group_fd, flags); -} - -int perf_evsel__open(struct perf_evsel *evsel, struct perf_cpu_map *cpus, - struct perf_thread_map *threads) -{ - int cpu, thread, err = 0; - - if (cpus == NULL) { - static struct perf_cpu_map *empty_cpu_map; - - if (empty_cpu_map == NULL) { - empty_cpu_map = perf_cpu_map__dummy_new(); - if (empty_cpu_map == NULL) - return -ENOMEM; - } - - cpus = empty_cpu_map; - } - - if (threads == NULL) { - static struct perf_thread_map *empty_thread_map; - - if (empty_thread_map == NULL) { - empty_thread_map = perf_thread_map__new_dummy(); - if (empty_thread_map == NULL) - return -ENOMEM; - } - - threads = empty_thread_map; - } - - if (evsel->fd == NULL && - perf_evsel__alloc_fd(evsel, cpus->nr, threads->nr) < 0) - return -ENOMEM; - - for (cpu = 0; cpu < cpus->nr; cpu++) { - for (thread = 0; thread < threads->nr; thread++) { - int fd; - - fd = sys_perf_event_open(&evsel->attr, - threads->map[thread].pid, - cpus->map[cpu], -1, 0); - - if (fd < 0) - return -errno; - - FD(evsel, cpu, thread) = fd; - } - } - - return err; -} - -static void perf_evsel__close_fd_cpu(struct perf_evsel *evsel, int cpu) -{ - int thread; - - for (thread = 0; thread < xyarray__max_y(evsel->fd); ++thread) { - if (FD(evsel, cpu, thread) >= 0) - close(FD(evsel, cpu, thread)); - FD(evsel, cpu, thread) = -1; - } -} - -void perf_evsel__close_fd(struct perf_evsel *evsel) -{ - int cpu; - - for (cpu = 0; cpu < xyarray__max_x(evsel->fd); cpu++) - perf_evsel__close_fd_cpu(evsel, cpu); -} - -void perf_evsel__free_fd(struct perf_evsel *evsel) -{ - xyarray__delete(evsel->fd); - evsel->fd = NULL; -} - -void perf_evsel__close(struct perf_evsel *evsel) -{ - if (evsel->fd == NULL) - return; - - perf_evsel__close_fd(evsel); - perf_evsel__free_fd(evsel); -} - -void perf_evsel__close_cpu(struct perf_evsel *evsel, int cpu) -{ - if (evsel->fd == NULL) - return; - - perf_evsel__close_fd_cpu(evsel, cpu); -} - -int perf_evsel__read_size(struct perf_evsel *evsel) -{ - u64 read_format = evsel->attr.read_format; - int entry = sizeof(u64); /* value */ - int size = 0; - int nr = 1; - - if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) - size += sizeof(u64); - - if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) - size += sizeof(u64); - - if (read_format & PERF_FORMAT_ID) - entry += sizeof(u64); - - if (read_format & PERF_FORMAT_GROUP) { - nr = evsel->nr_members; - size += sizeof(u64); - } - - size += entry * nr; - return size; -} - -int perf_evsel__read(struct perf_evsel *evsel, int cpu, int thread, - struct perf_counts_values *count) -{ - size_t size = perf_evsel__read_size(evsel); - - memset(count, 0, sizeof(*count)); - - if (FD(evsel, cpu, thread) < 0) - return -EINVAL; - - if (readn(FD(evsel, cpu, thread), count->values, size) <= 0) - return -errno; - - return 0; -} - -static int perf_evsel__run_ioctl(struct perf_evsel *evsel, - int ioc, void *arg, - int cpu) -{ - int thread; - - for (thread = 0; thread < xyarray__max_y(evsel->fd); thread++) { - int fd = FD(evsel, cpu, thread), - err = ioctl(fd, ioc, arg); - - if (err) - return err; - } - - return 0; -} - -int perf_evsel__enable_cpu(struct perf_evsel *evsel, int cpu) -{ - return perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_ENABLE, NULL, cpu); -} - -int perf_evsel__enable(struct perf_evsel *evsel) -{ - int i; - int err = 0; - - for (i = 0; i < xyarray__max_x(evsel->fd) && !err; i++) - err = perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_ENABLE, NULL, i); - return err; -} - -int perf_evsel__disable_cpu(struct perf_evsel *evsel, int cpu) -{ - return perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_DISABLE, NULL, cpu); -} - -int perf_evsel__disable(struct perf_evsel *evsel) -{ - int i; - int err = 0; - - for (i = 0; i < xyarray__max_x(evsel->fd) && !err; i++) - err = perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_DISABLE, NULL, i); - return err; -} - -int perf_evsel__apply_filter(struct perf_evsel *evsel, const char *filter) -{ - int err = 0, i; - - for (i = 0; i < evsel->cpus->nr && !err; i++) - err = perf_evsel__run_ioctl(evsel, - PERF_EVENT_IOC_SET_FILTER, - (void *)filter, i); - return err; -} - -struct perf_cpu_map *perf_evsel__cpus(struct perf_evsel *evsel) -{ - return evsel->cpus; -} - -struct perf_thread_map *perf_evsel__threads(struct perf_evsel *evsel) -{ - return evsel->threads; -} - -struct perf_event_attr *perf_evsel__attr(struct perf_evsel *evsel) -{ - return &evsel->attr; -} - -int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads) -{ - if (ncpus == 0 || nthreads == 0) - return 0; - - if (evsel->system_wide) - nthreads = 1; - - evsel->sample_id = xyarray__new(ncpus, nthreads, sizeof(struct perf_sample_id)); - if (evsel->sample_id == NULL) - return -ENOMEM; - - evsel->id = zalloc(ncpus * nthreads * sizeof(u64)); - if (evsel->id == NULL) { - xyarray__delete(evsel->sample_id); - evsel->sample_id = NULL; - return -ENOMEM; - } - - return 0; -} - -void perf_evsel__free_id(struct perf_evsel *evsel) -{ - xyarray__delete(evsel->sample_id); - evsel->sample_id = NULL; - zfree(&evsel->id); - evsel->ids = 0; -} diff --git a/tools/perf/lib/include/internal/cpumap.h b/tools/perf/lib/include/internal/cpumap.h deleted file mode 100644 index 840d4032587b..000000000000 --- a/tools/perf/lib/include/internal/cpumap.h +++ /dev/null @@ -1,19 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __LIBPERF_INTERNAL_CPUMAP_H -#define __LIBPERF_INTERNAL_CPUMAP_H - -#include <linux/refcount.h> - -struct perf_cpu_map { - refcount_t refcnt; - int nr; - int map[]; -}; - -#ifndef MAX_NR_CPUS -#define MAX_NR_CPUS 2048 -#endif - -int perf_cpu_map__idx(struct perf_cpu_map *cpus, int cpu); - -#endif /* __LIBPERF_INTERNAL_CPUMAP_H */ diff --git a/tools/perf/lib/include/internal/evlist.h b/tools/perf/lib/include/internal/evlist.h deleted file mode 100644 index 74dc8c3f0b66..000000000000 --- a/tools/perf/lib/include/internal/evlist.h +++ /dev/null @@ -1,127 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __LIBPERF_INTERNAL_EVLIST_H -#define __LIBPERF_INTERNAL_EVLIST_H - -#include <linux/list.h> -#include <api/fd/array.h> -#include <internal/evsel.h> - -#define PERF_EVLIST__HLIST_BITS 8 -#define PERF_EVLIST__HLIST_SIZE (1 << PERF_EVLIST__HLIST_BITS) - -struct perf_cpu_map; -struct perf_thread_map; -struct perf_mmap_param; - -struct perf_evlist { - struct list_head entries; - int nr_entries; - bool has_user_cpus; - struct perf_cpu_map *cpus; - struct perf_cpu_map *all_cpus; - struct perf_thread_map *threads; - int nr_mmaps; - size_t mmap_len; - struct fdarray pollfd; - struct hlist_head heads[PERF_EVLIST__HLIST_SIZE]; - struct perf_mmap *mmap; - struct perf_mmap *mmap_ovw; - struct perf_mmap *mmap_first; - struct perf_mmap *mmap_ovw_first; -}; - -typedef void -(*perf_evlist_mmap__cb_idx_t)(struct perf_evlist*, struct perf_mmap_param*, int, bool); -typedef struct perf_mmap* -(*perf_evlist_mmap__cb_get_t)(struct perf_evlist*, bool, int); -typedef int -(*perf_evlist_mmap__cb_mmap_t)(struct perf_mmap*, struct perf_mmap_param*, int, int); - -struct perf_evlist_mmap_ops { - perf_evlist_mmap__cb_idx_t idx; - perf_evlist_mmap__cb_get_t get; - perf_evlist_mmap__cb_mmap_t mmap; -}; - -int perf_evlist__alloc_pollfd(struct perf_evlist *evlist); -int perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd, - void *ptr, short revent); - -int perf_evlist__mmap_ops(struct perf_evlist *evlist, - struct perf_evlist_mmap_ops *ops, - struct perf_mmap_param *mp); - -void perf_evlist__init(struct perf_evlist *evlist); -void perf_evlist__exit(struct perf_evlist *evlist); - -/** - * __perf_evlist__for_each_entry - iterate thru all the evsels - * @list: list_head instance to iterate - * @evsel: struct perf_evsel iterator - */ -#define __perf_evlist__for_each_entry(list, evsel) \ - list_for_each_entry(evsel, list, node) - -/** - * evlist__for_each_entry - iterate thru all the evsels - * @evlist: perf_evlist instance to iterate - * @evsel: struct perf_evsel iterator - */ -#define perf_evlist__for_each_entry(evlist, evsel) \ - __perf_evlist__for_each_entry(&(evlist)->entries, evsel) - -/** - * __perf_evlist__for_each_entry_reverse - iterate thru all the evsels in reverse order - * @list: list_head instance to iterate - * @evsel: struct evsel iterator - */ -#define __perf_evlist__for_each_entry_reverse(list, evsel) \ - list_for_each_entry_reverse(evsel, list, node) - -/** - * perf_evlist__for_each_entry_reverse - iterate thru all the evsels in reverse order - * @evlist: evlist instance to iterate - * @evsel: struct evsel iterator - */ -#define perf_evlist__for_each_entry_reverse(evlist, evsel) \ - __perf_evlist__for_each_entry_reverse(&(evlist)->entries, evsel) - -/** - * __perf_evlist__for_each_entry_safe - safely iterate thru all the evsels - * @list: list_head instance to iterate - * @tmp: struct evsel temp iterator - * @evsel: struct evsel iterator - */ -#define __perf_evlist__for_each_entry_safe(list, tmp, evsel) \ - list_for_each_entry_safe(evsel, tmp, list, node) - -/** - * perf_evlist__for_each_entry_safe - safely iterate thru all the evsels - * @evlist: evlist instance to iterate - * @evsel: struct evsel iterator - * @tmp: struct evsel temp iterator - */ -#define perf_evlist__for_each_entry_safe(evlist, tmp, evsel) \ - __perf_evlist__for_each_entry_safe(&(evlist)->entries, tmp, evsel) - -static inline struct perf_evsel *perf_evlist__first(struct perf_evlist *evlist) -{ - return list_entry(evlist->entries.next, struct perf_evsel, node); -} - -static inline struct perf_evsel *perf_evlist__last(struct perf_evlist *evlist) -{ - return list_entry(evlist->entries.prev, struct perf_evsel, node); -} - -u64 perf_evlist__read_format(struct perf_evlist *evlist); - -void perf_evlist__id_add(struct perf_evlist *evlist, - struct perf_evsel *evsel, - int cpu, int thread, u64 id); - -int perf_evlist__id_add_fd(struct perf_evlist *evlist, - struct perf_evsel *evsel, - int cpu, int thread, int fd); - -#endif /* __LIBPERF_INTERNAL_EVLIST_H */ diff --git a/tools/perf/lib/include/internal/evsel.h b/tools/perf/lib/include/internal/evsel.h deleted file mode 100644 index 1ffd083b235e..000000000000 --- a/tools/perf/lib/include/internal/evsel.h +++ /dev/null @@ -1,63 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __LIBPERF_INTERNAL_EVSEL_H -#define __LIBPERF_INTERNAL_EVSEL_H - -#include <linux/types.h> -#include <linux/perf_event.h> -#include <stdbool.h> -#include <sys/types.h> - -struct perf_cpu_map; -struct perf_thread_map; -struct xyarray; - -/* - * Per fd, to map back from PERF_SAMPLE_ID to evsel, only used when there are - * more than one entry in the evlist. - */ -struct perf_sample_id { - struct hlist_node node; - u64 id; - struct perf_evsel *evsel; - /* - * 'idx' will be used for AUX area sampling. A sample will have AUX area - * data that will be queued for decoding, where there are separate - * queues for each CPU (per-cpu tracing) or task (per-thread tracing). - * The sample ID can be used to lookup 'idx' which is effectively the - * queue number. - */ - int idx; - int cpu; - pid_t tid; - - /* Holds total ID period value for PERF_SAMPLE_READ processing. */ - u64 period; -}; - -struct perf_evsel { - struct list_head node; - struct perf_event_attr attr; - struct perf_cpu_map *cpus; - struct perf_cpu_map *own_cpus; - struct perf_thread_map *threads; - struct xyarray *fd; - struct xyarray *sample_id; - u64 *id; - u32 ids; - - /* parse modifier helper */ - int nr_members; - bool system_wide; -}; - -void perf_evsel__init(struct perf_evsel *evsel, struct perf_event_attr *attr); -int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads); -void perf_evsel__close_fd(struct perf_evsel *evsel); -void perf_evsel__free_fd(struct perf_evsel *evsel); -int perf_evsel__read_size(struct perf_evsel *evsel); -int perf_evsel__apply_filter(struct perf_evsel *evsel, const char *filter); - -int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads); -void perf_evsel__free_id(struct perf_evsel *evsel); - -#endif /* __LIBPERF_INTERNAL_EVSEL_H */ diff --git a/tools/perf/lib/include/internal/lib.h b/tools/perf/lib/include/internal/lib.h deleted file mode 100644 index 5175d491b2d4..000000000000 --- a/tools/perf/lib/include/internal/lib.h +++ /dev/null @@ -1,12 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __LIBPERF_INTERNAL_LIB_H -#define __LIBPERF_INTERNAL_LIB_H - -#include <sys/types.h> - -extern unsigned int page_size; - -ssize_t readn(int fd, void *buf, size_t n); -ssize_t writen(int fd, const void *buf, size_t n); - -#endif /* __LIBPERF_INTERNAL_CPUMAP_H */ diff --git a/tools/perf/lib/include/internal/mmap.h b/tools/perf/lib/include/internal/mmap.h deleted file mode 100644 index be7556e0a2b2..000000000000 --- a/tools/perf/lib/include/internal/mmap.h +++ /dev/null @@ -1,55 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __LIBPERF_INTERNAL_MMAP_H -#define __LIBPERF_INTERNAL_MMAP_H - -#include <linux/compiler.h> -#include <linux/refcount.h> -#include <linux/types.h> -#include <stdbool.h> - -/* perf sample has 16 bits size limit */ -#define PERF_SAMPLE_MAX_SIZE (1 << 16) - -struct perf_mmap; - -typedef void (*libperf_unmap_cb_t)(struct perf_mmap *map); - -/** - * struct perf_mmap - perf's ring buffer mmap details - * - * @refcnt - e.g. code using PERF_EVENT_IOC_SET_OUTPUT to share this - */ -struct perf_mmap { - void *base; - int mask; - int fd; - int cpu; - refcount_t refcnt; - u64 prev; - u64 start; - u64 end; - bool overwrite; - u64 flush; - libperf_unmap_cb_t unmap_cb; - char event_copy[PERF_SAMPLE_MAX_SIZE] __aligned(8); - struct perf_mmap *next; -}; - -struct perf_mmap_param { - int prot; - int mask; -}; - -size_t perf_mmap__mmap_len(struct perf_mmap *map); - -void perf_mmap__init(struct perf_mmap *map, struct perf_mmap *prev, - bool overwrite, libperf_unmap_cb_t unmap_cb); -int perf_mmap__mmap(struct perf_mmap *map, struct perf_mmap_param *mp, - int fd, int cpu); -void perf_mmap__munmap(struct perf_mmap *map); -void perf_mmap__get(struct perf_mmap *map); -void perf_mmap__put(struct perf_mmap *map); - -u64 perf_mmap__read_head(struct perf_mmap *map); - -#endif /* __LIBPERF_INTERNAL_MMAP_H */ diff --git a/tools/perf/lib/include/internal/tests.h b/tools/perf/lib/include/internal/tests.h deleted file mode 100644 index 2093e8868a67..000000000000 --- a/tools/perf/lib/include/internal/tests.h +++ /dev/null @@ -1,33 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __LIBPERF_INTERNAL_TESTS_H -#define __LIBPERF_INTERNAL_TESTS_H - -#include <stdio.h> - -int tests_failed; - -#define __T_START \ -do { \ - fprintf(stdout, "- running %s...", __FILE__); \ - fflush(NULL); \ - tests_failed = 0; \ -} while (0) - -#define __T_END \ -do { \ - if (tests_failed) \ - fprintf(stdout, " FAILED (%d)\n", tests_failed); \ - else \ - fprintf(stdout, "OK\n"); \ -} while (0) - -#define __T(text, cond) \ -do { \ - if (!(cond)) { \ - fprintf(stderr, "FAILED %s:%d %s\n", __FILE__, __LINE__, text); \ - tests_failed++; \ - return -1; \ - } \ -} while (0) - -#endif /* __LIBPERF_INTERNAL_TESTS_H */ diff --git a/tools/perf/lib/include/internal/threadmap.h b/tools/perf/lib/include/internal/threadmap.h deleted file mode 100644 index df748baf9eda..000000000000 --- a/tools/perf/lib/include/internal/threadmap.h +++ /dev/null @@ -1,23 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __LIBPERF_INTERNAL_THREADMAP_H -#define __LIBPERF_INTERNAL_THREADMAP_H - -#include <linux/refcount.h> -#include <sys/types.h> -#include <unistd.h> - -struct thread_map_data { - pid_t pid; - char *comm; -}; - -struct perf_thread_map { - refcount_t refcnt; - int nr; - int err_thread; - struct thread_map_data map[]; -}; - -struct perf_thread_map *perf_thread_map__realloc(struct perf_thread_map *map, int nr); - -#endif /* __LIBPERF_INTERNAL_THREADMAP_H */ diff --git a/tools/perf/lib/include/internal/xyarray.h b/tools/perf/lib/include/internal/xyarray.h deleted file mode 100644 index 51e35d6c8ec4..000000000000 --- a/tools/perf/lib/include/internal/xyarray.h +++ /dev/null @@ -1,36 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __LIBPERF_INTERNAL_XYARRAY_H -#define __LIBPERF_INTERNAL_XYARRAY_H - -#include <linux/compiler.h> -#include <sys/types.h> - -struct xyarray { - size_t row_size; - size_t entry_size; - size_t entries; - size_t max_x; - size_t max_y; - char contents[] __aligned(8); -}; - -struct xyarray *xyarray__new(int xlen, int ylen, size_t entry_size); -void xyarray__delete(struct xyarray *xy); -void xyarray__reset(struct xyarray *xy); - -static inline void *xyarray__entry(struct xyarray *xy, int x, int y) -{ - return &xy->contents[x * xy->row_size + y * xy->entry_size]; -} - -static inline int xyarray__max_y(struct xyarray *xy) -{ - return xy->max_y; -} - -static inline int xyarray__max_x(struct xyarray *xy) -{ - return xy->max_x; -} - -#endif /* __LIBPERF_INTERNAL_XYARRAY_H */ diff --git a/tools/perf/lib/include/perf/core.h b/tools/perf/lib/include/perf/core.h deleted file mode 100644 index a3f6d68edad7..000000000000 --- a/tools/perf/lib/include/perf/core.h +++ /dev/null @@ -1,25 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __LIBPERF_CORE_H -#define __LIBPERF_CORE_H - -#include <stdarg.h> - -#ifndef LIBPERF_API -#define LIBPERF_API __attribute__((visibility("default"))) -#endif - -enum libperf_print_level { - LIBPERF_ERR, - LIBPERF_WARN, - LIBPERF_INFO, - LIBPERF_DEBUG, - LIBPERF_DEBUG2, - LIBPERF_DEBUG3, -}; - -typedef int (*libperf_print_fn_t)(enum libperf_print_level level, - const char *, va_list ap); - -LIBPERF_API void libperf_init(libperf_print_fn_t fn); - -#endif /* __LIBPERF_CORE_H */ diff --git a/tools/perf/lib/include/perf/cpumap.h b/tools/perf/lib/include/perf/cpumap.h deleted file mode 100644 index 6a17ad730cbc..000000000000 --- a/tools/perf/lib/include/perf/cpumap.h +++ /dev/null @@ -1,28 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __LIBPERF_CPUMAP_H -#define __LIBPERF_CPUMAP_H - -#include <perf/core.h> -#include <stdio.h> -#include <stdbool.h> - -struct perf_cpu_map; - -LIBPERF_API struct perf_cpu_map *perf_cpu_map__dummy_new(void); -LIBPERF_API struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list); -LIBPERF_API struct perf_cpu_map *perf_cpu_map__read(FILE *file); -LIBPERF_API struct perf_cpu_map *perf_cpu_map__get(struct perf_cpu_map *map); -LIBPERF_API struct perf_cpu_map *perf_cpu_map__merge(struct perf_cpu_map *orig, - struct perf_cpu_map *other); -LIBPERF_API void perf_cpu_map__put(struct perf_cpu_map *map); -LIBPERF_API int perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx); -LIBPERF_API int perf_cpu_map__nr(const struct perf_cpu_map *cpus); -LIBPERF_API bool perf_cpu_map__empty(const struct perf_cpu_map *map); -LIBPERF_API int perf_cpu_map__max(struct perf_cpu_map *map); - -#define perf_cpu_map__for_each_cpu(cpu, idx, cpus) \ - for ((idx) = 0, (cpu) = perf_cpu_map__cpu(cpus, idx); \ - (idx) < perf_cpu_map__nr(cpus); \ - (idx)++, (cpu) = perf_cpu_map__cpu(cpus, idx)) - -#endif /* __LIBPERF_CPUMAP_H */ diff --git a/tools/perf/lib/include/perf/event.h b/tools/perf/lib/include/perf/event.h deleted file mode 100644 index 18106899cb4e..000000000000 --- a/tools/perf/lib/include/perf/event.h +++ /dev/null @@ -1,385 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __LIBPERF_EVENT_H -#define __LIBPERF_EVENT_H - -#include <linux/perf_event.h> -#include <linux/types.h> -#include <linux/limits.h> -#include <linux/bpf.h> -#include <sys/types.h> /* pid_t */ - -struct perf_record_mmap { - struct perf_event_header header; - __u32 pid, tid; - __u64 start; - __u64 len; - __u64 pgoff; - char filename[PATH_MAX]; -}; - -struct perf_record_mmap2 { - struct perf_event_header header; - __u32 pid, tid; - __u64 start; - __u64 len; - __u64 pgoff; - __u32 maj; - __u32 min; - __u64 ino; - __u64 ino_generation; - __u32 prot; - __u32 flags; - char filename[PATH_MAX]; -}; - -struct perf_record_comm { - struct perf_event_header header; - __u32 pid, tid; - char comm[16]; -}; - -struct perf_record_namespaces { - struct perf_event_header header; - __u32 pid, tid; - __u64 nr_namespaces; - struct perf_ns_link_info link_info[]; -}; - -struct perf_record_fork { - struct perf_event_header header; - __u32 pid, ppid; - __u32 tid, ptid; - __u64 time; -}; - -struct perf_record_lost { - struct perf_event_header header; - __u64 id; - __u64 lost; -}; - -struct perf_record_lost_samples { - struct perf_event_header header; - __u64 lost; -}; - -/* - * PERF_FORMAT_ENABLED | PERF_FORMAT_RUNNING | PERF_FORMAT_ID - */ -struct perf_record_read { - struct perf_event_header header; - __u32 pid, tid; - __u64 value; - __u64 time_enabled; - __u64 time_running; - __u64 id; -}; - -struct perf_record_throttle { - struct perf_event_header header; - __u64 time; - __u64 id; - __u64 stream_id; -}; - -#ifndef KSYM_NAME_LEN -#define KSYM_NAME_LEN 256 -#endif - -struct perf_record_ksymbol { - struct perf_event_header header; - __u64 addr; - __u32 len; - __u16 ksym_type; - __u16 flags; - char name[KSYM_NAME_LEN]; -}; - -struct perf_record_bpf_event { - struct perf_event_header header; - __u16 type; - __u16 flags; - __u32 id; - - /* for bpf_prog types */ - __u8 tag[BPF_TAG_SIZE]; // prog tag -}; - -struct perf_record_sample { - struct perf_event_header header; - __u64 array[]; -}; - -struct perf_record_switch { - struct perf_event_header header; - __u32 next_prev_pid; - __u32 next_prev_tid; -}; - -struct perf_record_header_attr { - struct perf_event_header header; - struct perf_event_attr attr; - __u64 id[]; -}; - -enum { - PERF_CPU_MAP__CPUS = 0, - PERF_CPU_MAP__MASK = 1, -}; - -struct cpu_map_entries { - __u16 nr; - __u16 cpu[]; -}; - -struct perf_record_record_cpu_map { - __u16 nr; - __u16 long_size; - unsigned long mask[]; -}; - -struct perf_record_cpu_map_data { - __u16 type; - char data[]; -}; - -struct perf_record_cpu_map { - struct perf_event_header header; - struct perf_record_cpu_map_data data; -}; - -enum { - PERF_EVENT_UPDATE__UNIT = 0, - PERF_EVENT_UPDATE__SCALE = 1, - PERF_EVENT_UPDATE__NAME = 2, - PERF_EVENT_UPDATE__CPUS = 3, -}; - -struct perf_record_event_update_cpus { - struct perf_record_cpu_map_data cpus; -}; - -struct perf_record_event_update_scale { - double scale; -}; - -struct perf_record_event_update { - struct perf_event_header header; - __u64 type; - __u64 id; - char data[]; -}; - -#define MAX_EVENT_NAME 64 - -struct perf_trace_event_type { - __u64 event_id; - char name[MAX_EVENT_NAME]; -}; - -struct perf_record_header_event_type { - struct perf_event_header header; - struct perf_trace_event_type event_type; -}; - -struct perf_record_header_tracing_data { - struct perf_event_header header; - __u32 size; -}; - -struct perf_record_header_build_id { - struct perf_event_header header; - pid_t pid; - __u8 build_id[24]; - char filename[]; -}; - -struct id_index_entry { - __u64 id; - __u64 idx; - __u64 cpu; - __u64 tid; -}; - -struct perf_record_id_index { - struct perf_event_header header; - __u64 nr; - struct id_index_entry entries[0]; -}; - -struct perf_record_auxtrace_info { - struct perf_event_header header; - __u32 type; - __u32 reserved__; /* For alignment */ - __u64 priv[]; -}; - -struct perf_record_auxtrace { - struct perf_event_header header; - __u64 size; - __u64 offset; - __u64 reference; - __u32 idx; - __u32 tid; - __u32 cpu; - __u32 reserved__; /* For alignment */ -}; - -#define MAX_AUXTRACE_ERROR_MSG 64 - -struct perf_record_auxtrace_error { - struct perf_event_header header; - __u32 type; - __u32 code; - __u32 cpu; - __u32 pid; - __u32 tid; - __u32 fmt; - __u64 ip; - __u64 time; - char msg[MAX_AUXTRACE_ERROR_MSG]; -}; - -struct perf_record_aux { - struct perf_event_header header; - __u64 aux_offset; - __u64 aux_size; - __u64 flags; -}; - -struct perf_record_itrace_start { - struct perf_event_header header; - __u32 pid; - __u32 tid; -}; - -struct perf_record_thread_map_entry { - __u64 pid; - char comm[16]; -}; - -struct perf_record_thread_map { - struct perf_event_header header; - __u64 nr; - struct perf_record_thread_map_entry entries[]; -}; - -enum { - PERF_STAT_CONFIG_TERM__AGGR_MODE = 0, - PERF_STAT_CONFIG_TERM__INTERVAL = 1, - PERF_STAT_CONFIG_TERM__SCALE = 2, - PERF_STAT_CONFIG_TERM__MAX = 3, -}; - -struct perf_record_stat_config_entry { - __u64 tag; - __u64 val; -}; - -struct perf_record_stat_config { - struct perf_event_header header; - __u64 nr; - struct perf_record_stat_config_entry data[]; -}; - -struct perf_record_stat { - struct perf_event_header header; - - __u64 id; - __u32 cpu; - __u32 thread; - - union { - struct { - __u64 val; - __u64 ena; - __u64 run; - }; - __u64 values[3]; - }; -}; - -struct perf_record_stat_round { - struct perf_event_header header; - __u64 type; - __u64 time; -}; - -struct perf_record_time_conv { - struct perf_event_header header; - __u64 time_shift; - __u64 time_mult; - __u64 time_zero; -}; - -struct perf_record_header_feature { - struct perf_event_header header; - __u64 feat_id; - char data[]; -}; - -struct perf_record_compressed { - struct perf_event_header header; - char data[]; -}; - -enum perf_user_event_type { /* above any possible kernel type */ - PERF_RECORD_USER_TYPE_START = 64, - PERF_RECORD_HEADER_ATTR = 64, - PERF_RECORD_HEADER_EVENT_TYPE = 65, /* deprecated */ - PERF_RECORD_HEADER_TRACING_DATA = 66, - PERF_RECORD_HEADER_BUILD_ID = 67, - PERF_RECORD_FINISHED_ROUND = 68, - PERF_RECORD_ID_INDEX = 69, - PERF_RECORD_AUXTRACE_INFO = 70, - PERF_RECORD_AUXTRACE = 71, - PERF_RECORD_AUXTRACE_ERROR = 72, - PERF_RECORD_THREAD_MAP = 73, - PERF_RECORD_CPU_MAP = 74, - PERF_RECORD_STAT_CONFIG = 75, - PERF_RECORD_STAT = 76, - PERF_RECORD_STAT_ROUND = 77, - PERF_RECORD_EVENT_UPDATE = 78, - PERF_RECORD_TIME_CONV = 79, - PERF_RECORD_HEADER_FEATURE = 80, - PERF_RECORD_COMPRESSED = 81, - PERF_RECORD_HEADER_MAX -}; - -union perf_event { - struct perf_event_header header; - struct perf_record_mmap mmap; - struct perf_record_mmap2 mmap2; - struct perf_record_comm comm; - struct perf_record_namespaces namespaces; - struct perf_record_fork fork; - struct perf_record_lost lost; - struct perf_record_lost_samples lost_samples; - struct perf_record_read read; - struct perf_record_throttle throttle; - struct perf_record_sample sample; - struct perf_record_bpf_event bpf; - struct perf_record_ksymbol ksymbol; - struct perf_record_header_attr attr; - struct perf_record_event_update event_update; - struct perf_record_header_event_type event_type; - struct perf_record_header_tracing_data tracing_data; - struct perf_record_header_build_id build_id; - struct perf_record_id_index id_index; - struct perf_record_auxtrace_info auxtrace_info; - struct perf_record_auxtrace auxtrace; - struct perf_record_auxtrace_error auxtrace_error; - struct perf_record_aux aux; - struct perf_record_itrace_start itrace_start; - struct perf_record_switch context_switch; - struct perf_record_thread_map thread_map; - struct perf_record_cpu_map cpu_map; - struct perf_record_stat_config stat_config; - struct perf_record_stat stat; - struct perf_record_stat_round stat_round; - struct perf_record_time_conv time_conv; - struct perf_record_header_feature feat; - struct perf_record_compressed pack; -}; - -#endif /* __LIBPERF_EVENT_H */ diff --git a/tools/perf/lib/include/perf/evlist.h b/tools/perf/lib/include/perf/evlist.h deleted file mode 100644 index 0a7479dc13bf..000000000000 --- a/tools/perf/lib/include/perf/evlist.h +++ /dev/null @@ -1,49 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __LIBPERF_EVLIST_H -#define __LIBPERF_EVLIST_H - -#include <perf/core.h> -#include <stdbool.h> - -struct perf_evlist; -struct perf_evsel; -struct perf_cpu_map; -struct perf_thread_map; - -LIBPERF_API void perf_evlist__add(struct perf_evlist *evlist, - struct perf_evsel *evsel); -LIBPERF_API void perf_evlist__remove(struct perf_evlist *evlist, - struct perf_evsel *evsel); -LIBPERF_API struct perf_evlist *perf_evlist__new(void); -LIBPERF_API void perf_evlist__delete(struct perf_evlist *evlist); -LIBPERF_API struct perf_evsel* perf_evlist__next(struct perf_evlist *evlist, - struct perf_evsel *evsel); -LIBPERF_API int perf_evlist__open(struct perf_evlist *evlist); -LIBPERF_API void perf_evlist__close(struct perf_evlist *evlist); -LIBPERF_API void perf_evlist__enable(struct perf_evlist *evlist); -LIBPERF_API void perf_evlist__disable(struct perf_evlist *evlist); - -#define perf_evlist__for_each_evsel(evlist, pos) \ - for ((pos) = perf_evlist__next((evlist), NULL); \ - (pos) != NULL; \ - (pos) = perf_evlist__next((evlist), (pos))) - -LIBPERF_API void perf_evlist__set_maps(struct perf_evlist *evlist, - struct perf_cpu_map *cpus, - struct perf_thread_map *threads); -LIBPERF_API int perf_evlist__poll(struct perf_evlist *evlist, int timeout); -LIBPERF_API int perf_evlist__filter_pollfd(struct perf_evlist *evlist, - short revents_and_mask); - -LIBPERF_API int perf_evlist__mmap(struct perf_evlist *evlist, int pages); -LIBPERF_API void perf_evlist__munmap(struct perf_evlist *evlist); - -LIBPERF_API struct perf_mmap *perf_evlist__next_mmap(struct perf_evlist *evlist, - struct perf_mmap *map, - bool overwrite); -#define perf_evlist__for_each_mmap(evlist, pos, overwrite) \ - for ((pos) = perf_evlist__next_mmap((evlist), NULL, overwrite); \ - (pos) != NULL; \ - (pos) = perf_evlist__next_mmap((evlist), (pos), overwrite)) - -#endif /* __LIBPERF_EVLIST_H */ diff --git a/tools/perf/lib/include/perf/evsel.h b/tools/perf/lib/include/perf/evsel.h deleted file mode 100644 index c82ec39a4ad0..000000000000 --- a/tools/perf/lib/include/perf/evsel.h +++ /dev/null @@ -1,40 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __LIBPERF_EVSEL_H -#define __LIBPERF_EVSEL_H - -#include <stdint.h> -#include <perf/core.h> - -struct perf_evsel; -struct perf_event_attr; -struct perf_cpu_map; -struct perf_thread_map; - -struct perf_counts_values { - union { - struct { - uint64_t val; - uint64_t ena; - uint64_t run; - }; - uint64_t values[3]; - }; -}; - -LIBPERF_API struct perf_evsel *perf_evsel__new(struct perf_event_attr *attr); -LIBPERF_API void perf_evsel__delete(struct perf_evsel *evsel); -LIBPERF_API int perf_evsel__open(struct perf_evsel *evsel, struct perf_cpu_map *cpus, - struct perf_thread_map *threads); -LIBPERF_API void perf_evsel__close(struct perf_evsel *evsel); -LIBPERF_API void perf_evsel__close_cpu(struct perf_evsel *evsel, int cpu); -LIBPERF_API int perf_evsel__read(struct perf_evsel *evsel, int cpu, int thread, - struct perf_counts_values *count); -LIBPERF_API int perf_evsel__enable(struct perf_evsel *evsel); -LIBPERF_API int perf_evsel__enable_cpu(struct perf_evsel *evsel, int cpu); -LIBPERF_API int perf_evsel__disable(struct perf_evsel *evsel); -LIBPERF_API int perf_evsel__disable_cpu(struct perf_evsel *evsel, int cpu); -LIBPERF_API struct perf_cpu_map *perf_evsel__cpus(struct perf_evsel *evsel); -LIBPERF_API struct perf_thread_map *perf_evsel__threads(struct perf_evsel *evsel); -LIBPERF_API struct perf_event_attr *perf_evsel__attr(struct perf_evsel *evsel); - -#endif /* __LIBPERF_EVSEL_H */ diff --git a/tools/perf/lib/include/perf/mmap.h b/tools/perf/lib/include/perf/mmap.h deleted file mode 100644 index 9508ad90d8b9..000000000000 --- a/tools/perf/lib/include/perf/mmap.h +++ /dev/null @@ -1,15 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __LIBPERF_MMAP_H -#define __LIBPERF_MMAP_H - -#include <perf/core.h> - -struct perf_mmap; -union perf_event; - -LIBPERF_API void perf_mmap__consume(struct perf_mmap *map); -LIBPERF_API int perf_mmap__read_init(struct perf_mmap *map); -LIBPERF_API void perf_mmap__read_done(struct perf_mmap *map); -LIBPERF_API union perf_event *perf_mmap__read_event(struct perf_mmap *map); - -#endif /* __LIBPERF_MMAP_H */ diff --git a/tools/perf/lib/include/perf/threadmap.h b/tools/perf/lib/include/perf/threadmap.h deleted file mode 100644 index a7c50de8d010..000000000000 --- a/tools/perf/lib/include/perf/threadmap.h +++ /dev/null @@ -1,20 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __LIBPERF_THREADMAP_H -#define __LIBPERF_THREADMAP_H - -#include <perf/core.h> -#include <sys/types.h> - -struct perf_thread_map; - -LIBPERF_API struct perf_thread_map *perf_thread_map__new_dummy(void); - -LIBPERF_API void perf_thread_map__set_pid(struct perf_thread_map *map, int thread, pid_t pid); -LIBPERF_API char *perf_thread_map__comm(struct perf_thread_map *map, int thread); -LIBPERF_API int perf_thread_map__nr(struct perf_thread_map *threads); -LIBPERF_API pid_t perf_thread_map__pid(struct perf_thread_map *map, int thread); - -LIBPERF_API struct perf_thread_map *perf_thread_map__get(struct perf_thread_map *map); -LIBPERF_API void perf_thread_map__put(struct perf_thread_map *map); - -#endif /* __LIBPERF_THREADMAP_H */ diff --git a/tools/perf/lib/internal.h b/tools/perf/lib/internal.h deleted file mode 100644 index 2c27e158de6b..000000000000 --- a/tools/perf/lib/internal.h +++ /dev/null @@ -1,23 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __LIBPERF_INTERNAL_H -#define __LIBPERF_INTERNAL_H - -#include <perf/core.h> - -void libperf_print(enum libperf_print_level level, - const char *format, ...) - __attribute__((format(printf, 2, 3))); - -#define __pr(level, fmt, ...) \ -do { \ - libperf_print(level, "libperf: " fmt, ##__VA_ARGS__); \ -} while (0) - -#define pr_err(fmt, ...) __pr(LIBPERF_ERR, fmt, ##__VA_ARGS__) -#define pr_warning(fmt, ...) __pr(LIBPERF_WARN, fmt, ##__VA_ARGS__) -#define pr_info(fmt, ...) __pr(LIBPERF_INFO, fmt, ##__VA_ARGS__) -#define pr_debug(fmt, ...) __pr(LIBPERF_DEBUG, fmt, ##__VA_ARGS__) -#define pr_debug2(fmt, ...) __pr(LIBPERF_DEBUG2, fmt, ##__VA_ARGS__) -#define pr_debug3(fmt, ...) __pr(LIBPERF_DEBUG3, fmt, ##__VA_ARGS__) - -#endif /* __LIBPERF_INTERNAL_H */ diff --git a/tools/perf/lib/lib.c b/tools/perf/lib/lib.c deleted file mode 100644 index 18658931fc71..000000000000 --- a/tools/perf/lib/lib.c +++ /dev/null @@ -1,48 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include <unistd.h> -#include <stdbool.h> -#include <errno.h> -#include <linux/kernel.h> -#include <internal/lib.h> - -unsigned int page_size; - -static ssize_t ion(bool is_read, int fd, void *buf, size_t n) -{ - void *buf_start = buf; - size_t left = n; - - while (left) { - /* buf must be treated as const if !is_read. */ - ssize_t ret = is_read ? read(fd, buf, left) : - write(fd, buf, left); - - if (ret < 0 && errno == EINTR) - continue; - if (ret <= 0) - return ret; - - left -= ret; - buf += ret; - } - - BUG_ON((size_t)(buf - buf_start) != n); - return n; -} - -/* - * Read exactly 'n' bytes or return an error. - */ -ssize_t readn(int fd, void *buf, size_t n) -{ - return ion(true, fd, buf, n); -} - -/* - * Write exactly 'n' bytes or return an error. - */ -ssize_t writen(int fd, const void *buf, size_t n) -{ - /* ion does not modify buf. */ - return ion(false, fd, (void *)buf, n); -} diff --git a/tools/perf/lib/libperf.map b/tools/perf/lib/libperf.map deleted file mode 100644 index 7be1af8a546c..000000000000 --- a/tools/perf/lib/libperf.map +++ /dev/null @@ -1,51 +0,0 @@ -LIBPERF_0.0.1 { - global: - libperf_init; - perf_cpu_map__dummy_new; - perf_cpu_map__get; - perf_cpu_map__put; - perf_cpu_map__new; - perf_cpu_map__read; - perf_cpu_map__nr; - perf_cpu_map__cpu; - perf_cpu_map__empty; - perf_cpu_map__max; - perf_thread_map__new_dummy; - perf_thread_map__set_pid; - perf_thread_map__comm; - perf_thread_map__nr; - perf_thread_map__pid; - perf_thread_map__get; - perf_thread_map__put; - perf_evsel__new; - perf_evsel__delete; - perf_evsel__enable; - perf_evsel__disable; - perf_evsel__open; - perf_evsel__close; - perf_evsel__read; - perf_evsel__cpus; - perf_evsel__threads; - perf_evsel__attr; - perf_evlist__new; - perf_evlist__delete; - perf_evlist__open; - perf_evlist__close; - perf_evlist__enable; - perf_evlist__disable; - perf_evlist__add; - perf_evlist__remove; - perf_evlist__next; - perf_evlist__set_maps; - perf_evlist__poll; - perf_evlist__mmap; - perf_evlist__munmap; - perf_evlist__filter_pollfd; - perf_evlist__next_mmap; - perf_mmap__consume; - perf_mmap__read_init; - perf_mmap__read_done; - perf_mmap__read_event; - local: - *; -}; diff --git a/tools/perf/lib/libperf.pc.template b/tools/perf/lib/libperf.pc.template deleted file mode 100644 index 117e4a237b55..000000000000 --- a/tools/perf/lib/libperf.pc.template +++ /dev/null @@ -1,11 +0,0 @@ -# SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) - -prefix=@PREFIX@ -libdir=@LIBDIR@ -includedir=${prefix}/include - -Name: libperf -Description: perf library -Version: @VERSION@ -Libs: -L${libdir} -lperf -Cflags: -I${includedir} diff --git a/tools/perf/lib/mmap.c b/tools/perf/lib/mmap.c deleted file mode 100644 index 79d5ed6c38cc..000000000000 --- a/tools/perf/lib/mmap.c +++ /dev/null @@ -1,275 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include <sys/mman.h> -#include <inttypes.h> -#include <asm/bug.h> -#include <errno.h> -#include <string.h> -#include <linux/ring_buffer.h> -#include <linux/perf_event.h> -#include <perf/mmap.h> -#include <perf/event.h> -#include <internal/mmap.h> -#include <internal/lib.h> -#include <linux/kernel.h> -#include "internal.h" - -void perf_mmap__init(struct perf_mmap *map, struct perf_mmap *prev, - bool overwrite, libperf_unmap_cb_t unmap_cb) -{ - map->fd = -1; - map->overwrite = overwrite; - map->unmap_cb = unmap_cb; - refcount_set(&map->refcnt, 0); - if (prev) - prev->next = map; -} - -size_t perf_mmap__mmap_len(struct perf_mmap *map) -{ - return map->mask + 1 + page_size; -} - -int perf_mmap__mmap(struct perf_mmap *map, struct perf_mmap_param *mp, - int fd, int cpu) -{ - map->prev = 0; - map->mask = mp->mask; - map->base = mmap(NULL, perf_mmap__mmap_len(map), mp->prot, - MAP_SHARED, fd, 0); - if (map->base == MAP_FAILED) { - map->base = NULL; - return -1; - } - - map->fd = fd; - map->cpu = cpu; - return 0; -} - -void perf_mmap__munmap(struct perf_mmap *map) -{ - if (map && map->base != NULL) { - munmap(map->base, perf_mmap__mmap_len(map)); - map->base = NULL; - map->fd = -1; - refcount_set(&map->refcnt, 0); - } - if (map && map->unmap_cb) - map->unmap_cb(map); -} - -void perf_mmap__get(struct perf_mmap *map) -{ - refcount_inc(&map->refcnt); -} - -void perf_mmap__put(struct perf_mmap *map) -{ - BUG_ON(map->base && refcount_read(&map->refcnt) == 0); - - if (refcount_dec_and_test(&map->refcnt)) - perf_mmap__munmap(map); -} - -static inline void perf_mmap__write_tail(struct perf_mmap *md, u64 tail) -{ - ring_buffer_write_tail(md->base, tail); -} - -u64 perf_mmap__read_head(struct perf_mmap *map) -{ - return ring_buffer_read_head(map->base); -} - -static bool perf_mmap__empty(struct perf_mmap *map) -{ - struct perf_event_mmap_page *pc = map->base; - - return perf_mmap__read_head(map) == map->prev && !pc->aux_size; -} - -void perf_mmap__consume(struct perf_mmap *map) -{ - if (!map->overwrite) { - u64 old = map->prev; - - perf_mmap__write_tail(map, old); - } - - if (refcount_read(&map->refcnt) == 1 && perf_mmap__empty(map)) - perf_mmap__put(map); -} - -static int overwrite_rb_find_range(void *buf, int mask, u64 *start, u64 *end) -{ - struct perf_event_header *pheader; - u64 evt_head = *start; - int size = mask + 1; - - pr_debug2("%s: buf=%p, start=%"PRIx64"\n", __func__, buf, *start); - pheader = (struct perf_event_header *)(buf + (*start & mask)); - while (true) { - if (evt_head - *start >= (unsigned int)size) { - pr_debug("Finished reading overwrite ring buffer: rewind\n"); - if (evt_head - *start > (unsigned int)size) - evt_head -= pheader->size; - *end = evt_head; - return 0; - } - - pheader = (struct perf_event_header *)(buf + (evt_head & mask)); - - if (pheader->size == 0) { - pr_debug("Finished reading overwrite ring buffer: get start\n"); - *end = evt_head; - return 0; - } - - evt_head += pheader->size; - pr_debug3("move evt_head: %"PRIx64"\n", evt_head); - } - WARN_ONCE(1, "Shouldn't get here\n"); - return -1; -} - -/* - * Report the start and end of the available data in ringbuffer - */ -static int __perf_mmap__read_init(struct perf_mmap *md) -{ - u64 head = perf_mmap__read_head(md); - u64 old = md->prev; - unsigned char *data = md->base + page_size; - unsigned long size; - - md->start = md->overwrite ? head : old; - md->end = md->overwrite ? old : head; - - if ((md->end - md->start) < md->flush) - return -EAGAIN; - - size = md->end - md->start; - if (size > (unsigned long)(md->mask) + 1) { - if (!md->overwrite) { - WARN_ONCE(1, "failed to keep up with mmap data. (warn only once)\n"); - - md->prev = head; - perf_mmap__consume(md); - return -EAGAIN; - } - - /* - * Backward ring buffer is full. We still have a chance to read - * most of data from it. - */ - if (overwrite_rb_find_range(data, md->mask, &md->start, &md->end)) - return -EINVAL; - } - - return 0; -} - -int perf_mmap__read_init(struct perf_mmap *map) -{ - /* - * Check if event was unmapped due to a POLLHUP/POLLERR. - */ - if (!refcount_read(&map->refcnt)) - return -ENOENT; - - return __perf_mmap__read_init(map); -} - -/* - * Mandatory for overwrite mode - * The direction of overwrite mode is backward. - * The last perf_mmap__read() will set tail to map->core.prev. - * Need to correct the map->core.prev to head which is the end of next read. - */ -void perf_mmap__read_done(struct perf_mmap *map) -{ - /* - * Check if event was unmapped due to a POLLHUP/POLLERR. - */ - if (!refcount_read(&map->refcnt)) - return; - - map->prev = perf_mmap__read_head(map); -} - -/* When check_messup is true, 'end' must points to a good entry */ -static union perf_event *perf_mmap__read(struct perf_mmap *map, - u64 *startp, u64 end) -{ - unsigned char *data = map->base + page_size; - union perf_event *event = NULL; - int diff = end - *startp; - - if (diff >= (int)sizeof(event->header)) { - size_t size; - - event = (union perf_event *)&data[*startp & map->mask]; - size = event->header.size; - - if (size < sizeof(event->header) || diff < (int)size) - return NULL; - - /* - * Event straddles the mmap boundary -- header should always - * be inside due to u64 alignment of output. - */ - if ((*startp & map->mask) + size != ((*startp + size) & map->mask)) { - unsigned int offset = *startp; - unsigned int len = min(sizeof(*event), size), cpy; - void *dst = map->event_copy; - - do { - cpy = min(map->mask + 1 - (offset & map->mask), len); - memcpy(dst, &data[offset & map->mask], cpy); - offset += cpy; - dst += cpy; - len -= cpy; - } while (len); - - event = (union perf_event *)map->event_copy; - } - - *startp += size; - } - - return event; -} - -/* - * Read event from ring buffer one by one. - * Return one event for each call. - * - * Usage: - * perf_mmap__read_init() - * while(event = perf_mmap__read_event()) { - * //process the event - * perf_mmap__consume() - * } - * perf_mmap__read_done() - */ -union perf_event *perf_mmap__read_event(struct perf_mmap *map) -{ - union perf_event *event; - - /* - * Check if event was unmapped due to a POLLHUP/POLLERR. - */ - if (!refcount_read(&map->refcnt)) - return NULL; - - /* non-overwirte doesn't pause the ringbuffer */ - if (!map->overwrite) - map->end = perf_mmap__read_head(map); - - event = perf_mmap__read(map, &map->start, map->end); - - if (!map->overwrite) - map->prev = map->start; - - return event; -} diff --git a/tools/perf/lib/tests/Makefile b/tools/perf/lib/tests/Makefile deleted file mode 100644 index a43cd08c5c03..000000000000 --- a/tools/perf/lib/tests/Makefile +++ /dev/null @@ -1,38 +0,0 @@ -# SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) - -TESTS = test-cpumap test-threadmap test-evlist test-evsel - -TESTS_SO := $(addsuffix -so,$(TESTS)) -TESTS_A := $(addsuffix -a,$(TESTS)) - -# Set compile option CFLAGS -ifdef EXTRA_CFLAGS - CFLAGS := $(EXTRA_CFLAGS) -else - CFLAGS := -g -Wall -endif - -all: - -include $(srctree)/tools/scripts/Makefile.include - -INCLUDE = -I$(srctree)/tools/perf/lib/include -I$(srctree)/tools/include -I$(srctree)/tools/lib - -$(TESTS_A): FORCE - $(QUIET_LINK)$(CC) $(INCLUDE) $(CFLAGS) -o $@ $(subst -a,.c,$@) ../libperf.a $(LIBAPI) - -$(TESTS_SO): FORCE - $(QUIET_LINK)$(CC) $(INCLUDE) $(CFLAGS) -L.. -o $@ $(subst -so,.c,$@) $(LIBAPI) -lperf - -all: $(TESTS_A) $(TESTS_SO) - -run: - @echo "running static:" - @for i in $(TESTS_A); do ./$$i; done - @echo "running dynamic:" - @for i in $(TESTS_SO); do LD_LIBRARY_PATH=../ ./$$i; done - -clean: - $(call QUIET_CLEAN, tests)$(RM) $(TESTS_A) $(TESTS_SO) - -.PHONY: all clean FORCE diff --git a/tools/perf/lib/tests/test-cpumap.c b/tools/perf/lib/tests/test-cpumap.c deleted file mode 100644 index c8d45091e7c2..000000000000 --- a/tools/perf/lib/tests/test-cpumap.c +++ /dev/null @@ -1,31 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include <stdarg.h> -#include <stdio.h> -#include <perf/cpumap.h> -#include <internal/tests.h> - -static int libperf_print(enum libperf_print_level level, - const char *fmt, va_list ap) -{ - return vfprintf(stderr, fmt, ap); -} - -int main(int argc, char **argv) -{ - struct perf_cpu_map *cpus; - - __T_START; - - libperf_init(libperf_print); - - cpus = perf_cpu_map__dummy_new(); - if (!cpus) - return -1; - - perf_cpu_map__get(cpus); - perf_cpu_map__put(cpus); - perf_cpu_map__put(cpus); - - __T_END; - return 0; -} diff --git a/tools/perf/lib/tests/test-evlist.c b/tools/perf/lib/tests/test-evlist.c deleted file mode 100644 index 6d8ebe0c2504..000000000000 --- a/tools/perf/lib/tests/test-evlist.c +++ /dev/null @@ -1,413 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#define _GNU_SOURCE // needed for sched.h to get sched_[gs]etaffinity and CPU_(ZERO,SET) -#include <sched.h> -#include <stdio.h> -#include <stdarg.h> -#include <unistd.h> -#include <stdlib.h> -#include <linux/perf_event.h> -#include <linux/limits.h> -#include <sys/types.h> -#include <sys/wait.h> -#include <sys/prctl.h> -#include <perf/cpumap.h> -#include <perf/threadmap.h> -#include <perf/evlist.h> -#include <perf/evsel.h> -#include <perf/mmap.h> -#include <perf/event.h> -#include <internal/tests.h> -#include <api/fs/fs.h> - -static int libperf_print(enum libperf_print_level level, - const char *fmt, va_list ap) -{ - return vfprintf(stderr, fmt, ap); -} - -static int test_stat_cpu(void) -{ - struct perf_cpu_map *cpus; - struct perf_evlist *evlist; - struct perf_evsel *evsel; - struct perf_event_attr attr1 = { - .type = PERF_TYPE_SOFTWARE, - .config = PERF_COUNT_SW_CPU_CLOCK, - }; - struct perf_event_attr attr2 = { - .type = PERF_TYPE_SOFTWARE, - .config = PERF_COUNT_SW_TASK_CLOCK, - }; - int err, cpu, tmp; - - cpus = perf_cpu_map__new(NULL); - __T("failed to create cpus", cpus); - - evlist = perf_evlist__new(); - __T("failed to create evlist", evlist); - - evsel = perf_evsel__new(&attr1); - __T("failed to create evsel1", evsel); - - perf_evlist__add(evlist, evsel); - - evsel = perf_evsel__new(&attr2); - __T("failed to create evsel2", evsel); - - perf_evlist__add(evlist, evsel); - - perf_evlist__set_maps(evlist, cpus, NULL); - - err = perf_evlist__open(evlist); - __T("failed to open evsel", err == 0); - - perf_evlist__for_each_evsel(evlist, evsel) { - cpus = perf_evsel__cpus(evsel); - - perf_cpu_map__for_each_cpu(cpu, tmp, cpus) { - struct perf_counts_values counts = { .val = 0 }; - - perf_evsel__read(evsel, cpu, 0, &counts); - __T("failed to read value for evsel", counts.val != 0); - } - } - - perf_evlist__close(evlist); - perf_evlist__delete(evlist); - - perf_cpu_map__put(cpus); - return 0; -} - -static int test_stat_thread(void) -{ - struct perf_counts_values counts = { .val = 0 }; - struct perf_thread_map *threads; - struct perf_evlist *evlist; - struct perf_evsel *evsel; - struct perf_event_attr attr1 = { - .type = PERF_TYPE_SOFTWARE, - .config = PERF_COUNT_SW_CPU_CLOCK, - }; - struct perf_event_attr attr2 = { - .type = PERF_TYPE_SOFTWARE, - .config = PERF_COUNT_SW_TASK_CLOCK, - }; - int err; - - threads = perf_thread_map__new_dummy(); - __T("failed to create threads", threads); - - perf_thread_map__set_pid(threads, 0, 0); - - evlist = perf_evlist__new(); - __T("failed to create evlist", evlist); - - evsel = perf_evsel__new(&attr1); - __T("failed to create evsel1", evsel); - - perf_evlist__add(evlist, evsel); - - evsel = perf_evsel__new(&attr2); - __T("failed to create evsel2", evsel); - - perf_evlist__add(evlist, evsel); - - perf_evlist__set_maps(evlist, NULL, threads); - - err = perf_evlist__open(evlist); - __T("failed to open evsel", err == 0); - - perf_evlist__for_each_evsel(evlist, evsel) { - perf_evsel__read(evsel, 0, 0, &counts); - __T("failed to read value for evsel", counts.val != 0); - } - - perf_evlist__close(evlist); - perf_evlist__delete(evlist); - - perf_thread_map__put(threads); - return 0; -} - -static int test_stat_thread_enable(void) -{ - struct perf_counts_values counts = { .val = 0 }; - struct perf_thread_map *threads; - struct perf_evlist *evlist; - struct perf_evsel *evsel; - struct perf_event_attr attr1 = { - .type = PERF_TYPE_SOFTWARE, - .config = PERF_COUNT_SW_CPU_CLOCK, - .disabled = 1, - }; - struct perf_event_attr attr2 = { - .type = PERF_TYPE_SOFTWARE, - .config = PERF_COUNT_SW_TASK_CLOCK, - .disabled = 1, - }; - int err; - - threads = perf_thread_map__new_dummy(); - __T("failed to create threads", threads); - - perf_thread_map__set_pid(threads, 0, 0); - - evlist = perf_evlist__new(); - __T("failed to create evlist", evlist); - - evsel = perf_evsel__new(&attr1); - __T("failed to create evsel1", evsel); - - perf_evlist__add(evlist, evsel); - - evsel = perf_evsel__new(&attr2); - __T("failed to create evsel2", evsel); - - perf_evlist__add(evlist, evsel); - - perf_evlist__set_maps(evlist, NULL, threads); - - err = perf_evlist__open(evlist); - __T("failed to open evsel", err == 0); - - perf_evlist__for_each_evsel(evlist, evsel) { - perf_evsel__read(evsel, 0, 0, &counts); - __T("failed to read value for evsel", counts.val == 0); - } - - perf_evlist__enable(evlist); - - perf_evlist__for_each_evsel(evlist, evsel) { - perf_evsel__read(evsel, 0, 0, &counts); - __T("failed to read value for evsel", counts.val != 0); - } - - perf_evlist__disable(evlist); - - perf_evlist__close(evlist); - perf_evlist__delete(evlist); - - perf_thread_map__put(threads); - return 0; -} - -static int test_mmap_thread(void) -{ - struct perf_evlist *evlist; - struct perf_evsel *evsel; - struct perf_mmap *map; - struct perf_cpu_map *cpus; - struct perf_thread_map *threads; - struct perf_event_attr attr = { - .type = PERF_TYPE_TRACEPOINT, - .sample_period = 1, - .wakeup_watermark = 1, - .disabled = 1, - }; - char path[PATH_MAX]; - int id, err, pid, go_pipe[2]; - union perf_event *event; - char bf; - int count = 0; - - snprintf(path, PATH_MAX, "%s/kernel/debug/tracing/events/syscalls/sys_enter_prctl/id", - sysfs__mountpoint()); - - if (filename__read_int(path, &id)) { - fprintf(stderr, "error: failed to get tracepoint id: %s\n", path); - return -1; - } - - attr.config = id; - - err = pipe(go_pipe); - __T("failed to create pipe", err == 0); - - fflush(NULL); - - pid = fork(); - if (!pid) { - int i; - - read(go_pipe[0], &bf, 1); - - /* Generate 100 prctl calls. */ - for (i = 0; i < 100; i++) - prctl(0, 0, 0, 0, 0); - - exit(0); - } - - threads = perf_thread_map__new_dummy(); - __T("failed to create threads", threads); - - cpus = perf_cpu_map__dummy_new(); - __T("failed to create cpus", cpus); - - perf_thread_map__set_pid(threads, 0, pid); - - evlist = perf_evlist__new(); - __T("failed to create evlist", evlist); - - evsel = perf_evsel__new(&attr); - __T("failed to create evsel1", evsel); - - perf_evlist__add(evlist, evsel); - - perf_evlist__set_maps(evlist, cpus, threads); - - err = perf_evlist__open(evlist); - __T("failed to open evlist", err == 0); - - err = perf_evlist__mmap(evlist, 4); - __T("failed to mmap evlist", err == 0); - - perf_evlist__enable(evlist); - - /* kick the child and wait for it to finish */ - write(go_pipe[1], &bf, 1); - waitpid(pid, NULL, 0); - - /* - * There's no need to call perf_evlist__disable, - * monitored process is dead now. - */ - - perf_evlist__for_each_mmap(evlist, map, false) { - if (perf_mmap__read_init(map) < 0) - continue; - - while ((event = perf_mmap__read_event(map)) != NULL) { - count++; - perf_mmap__consume(map); - } - - perf_mmap__read_done(map); - } - - /* calls perf_evlist__munmap/perf_evlist__close */ - perf_evlist__delete(evlist); - - perf_thread_map__put(threads); - perf_cpu_map__put(cpus); - - /* - * The generated prctl calls should match the - * number of events in the buffer. - */ - __T("failed count", count == 100); - - return 0; -} - -static int test_mmap_cpus(void) -{ - struct perf_evlist *evlist; - struct perf_evsel *evsel; - struct perf_mmap *map; - struct perf_cpu_map *cpus; - struct perf_event_attr attr = { - .type = PERF_TYPE_TRACEPOINT, - .sample_period = 1, - .wakeup_watermark = 1, - .disabled = 1, - }; - cpu_set_t saved_mask; - char path[PATH_MAX]; - int id, err, cpu, tmp; - union perf_event *event; - int count = 0; - - snprintf(path, PATH_MAX, "%s/kernel/debug/tracing/events/syscalls/sys_enter_prctl/id", - sysfs__mountpoint()); - - if (filename__read_int(path, &id)) { - fprintf(stderr, "error: failed to get tracepoint id: %s\n", path); - return -1; - } - - attr.config = id; - - cpus = perf_cpu_map__new(NULL); - __T("failed to create cpus", cpus); - - evlist = perf_evlist__new(); - __T("failed to create evlist", evlist); - - evsel = perf_evsel__new(&attr); - __T("failed to create evsel1", evsel); - - perf_evlist__add(evlist, evsel); - - perf_evlist__set_maps(evlist, cpus, NULL); - - err = perf_evlist__open(evlist); - __T("failed to open evlist", err == 0); - - err = perf_evlist__mmap(evlist, 4); - __T("failed to mmap evlist", err == 0); - - perf_evlist__enable(evlist); - - err = sched_getaffinity(0, sizeof(saved_mask), &saved_mask); - __T("sched_getaffinity failed", err == 0); - - perf_cpu_map__for_each_cpu(cpu, tmp, cpus) { - cpu_set_t mask; - - CPU_ZERO(&mask); - CPU_SET(cpu, &mask); - - err = sched_setaffinity(0, sizeof(mask), &mask); - __T("sched_setaffinity failed", err == 0); - - prctl(0, 0, 0, 0, 0); - } - - err = sched_setaffinity(0, sizeof(saved_mask), &saved_mask); - __T("sched_setaffinity failed", err == 0); - - perf_evlist__disable(evlist); - - perf_evlist__for_each_mmap(evlist, map, false) { - if (perf_mmap__read_init(map) < 0) - continue; - - while ((event = perf_mmap__read_event(map)) != NULL) { - count++; - perf_mmap__consume(map); - } - - perf_mmap__read_done(map); - } - - /* calls perf_evlist__munmap/perf_evlist__close */ - perf_evlist__delete(evlist); - - /* - * The generated prctl events should match the - * number of cpus or be bigger (we are system-wide). - */ - __T("failed count", count >= perf_cpu_map__nr(cpus)); - - perf_cpu_map__put(cpus); - - return 0; -} - -int main(int argc, char **argv) -{ - __T_START; - - libperf_init(libperf_print); - - test_stat_cpu(); - test_stat_thread(); - test_stat_thread_enable(); - test_mmap_thread(); - test_mmap_cpus(); - - __T_END; - return 0; -} diff --git a/tools/perf/lib/tests/test-evsel.c b/tools/perf/lib/tests/test-evsel.c deleted file mode 100644 index 135722ac965b..000000000000 --- a/tools/perf/lib/tests/test-evsel.c +++ /dev/null @@ -1,135 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include <stdarg.h> -#include <stdio.h> -#include <linux/perf_event.h> -#include <perf/cpumap.h> -#include <perf/threadmap.h> -#include <perf/evsel.h> -#include <internal/tests.h> - -static int libperf_print(enum libperf_print_level level, - const char *fmt, va_list ap) -{ - return vfprintf(stderr, fmt, ap); -} - -static int test_stat_cpu(void) -{ - struct perf_cpu_map *cpus; - struct perf_evsel *evsel; - struct perf_event_attr attr = { - .type = PERF_TYPE_SOFTWARE, - .config = PERF_COUNT_SW_CPU_CLOCK, - }; - int err, cpu, tmp; - - cpus = perf_cpu_map__new(NULL); - __T("failed to create cpus", cpus); - - evsel = perf_evsel__new(&attr); - __T("failed to create evsel", evsel); - - err = perf_evsel__open(evsel, cpus, NULL); - __T("failed to open evsel", err == 0); - - perf_cpu_map__for_each_cpu(cpu, tmp, cpus) { - struct perf_counts_values counts = { .val = 0 }; - - perf_evsel__read(evsel, cpu, 0, &counts); - __T("failed to read value for evsel", counts.val != 0); - } - - perf_evsel__close(evsel); - perf_evsel__delete(evsel); - - perf_cpu_map__put(cpus); - return 0; -} - -static int test_stat_thread(void) -{ - struct perf_counts_values counts = { .val = 0 }; - struct perf_thread_map *threads; - struct perf_evsel *evsel; - struct perf_event_attr attr = { - .type = PERF_TYPE_SOFTWARE, - .config = PERF_COUNT_SW_TASK_CLOCK, - }; - int err; - - threads = perf_thread_map__new_dummy(); - __T("failed to create threads", threads); - - perf_thread_map__set_pid(threads, 0, 0); - - evsel = perf_evsel__new(&attr); - __T("failed to create evsel", evsel); - - err = perf_evsel__open(evsel, NULL, threads); - __T("failed to open evsel", err == 0); - - perf_evsel__read(evsel, 0, 0, &counts); - __T("failed to read value for evsel", counts.val != 0); - - perf_evsel__close(evsel); - perf_evsel__delete(evsel); - - perf_thread_map__put(threads); - return 0; -} - -static int test_stat_thread_enable(void) -{ - struct perf_counts_values counts = { .val = 0 }; - struct perf_thread_map *threads; - struct perf_evsel *evsel; - struct perf_event_attr attr = { - .type = PERF_TYPE_SOFTWARE, - .config = PERF_COUNT_SW_TASK_CLOCK, - .disabled = 1, - }; - int err; - - threads = perf_thread_map__new_dummy(); - __T("failed to create threads", threads); - - perf_thread_map__set_pid(threads, 0, 0); - - evsel = perf_evsel__new(&attr); - __T("failed to create evsel", evsel); - - err = perf_evsel__open(evsel, NULL, threads); - __T("failed to open evsel", err == 0); - - perf_evsel__read(evsel, 0, 0, &counts); - __T("failed to read value for evsel", counts.val == 0); - - err = perf_evsel__enable(evsel); - __T("failed to enable evsel", err == 0); - - perf_evsel__read(evsel, 0, 0, &counts); - __T("failed to read value for evsel", counts.val != 0); - - err = perf_evsel__disable(evsel); - __T("failed to enable evsel", err == 0); - - perf_evsel__close(evsel); - perf_evsel__delete(evsel); - - perf_thread_map__put(threads); - return 0; -} - -int main(int argc, char **argv) -{ - __T_START; - - libperf_init(libperf_print); - - test_stat_cpu(); - test_stat_thread(); - test_stat_thread_enable(); - - __T_END; - return 0; -} diff --git a/tools/perf/lib/tests/test-threadmap.c b/tools/perf/lib/tests/test-threadmap.c deleted file mode 100644 index 7dc4d6fbedde..000000000000 --- a/tools/perf/lib/tests/test-threadmap.c +++ /dev/null @@ -1,31 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include <stdarg.h> -#include <stdio.h> -#include <perf/threadmap.h> -#include <internal/tests.h> - -static int libperf_print(enum libperf_print_level level, - const char *fmt, va_list ap) -{ - return vfprintf(stderr, fmt, ap); -} - -int main(int argc, char **argv) -{ - struct perf_thread_map *threads; - - __T_START; - - libperf_init(libperf_print); - - threads = perf_thread_map__new_dummy(); - if (!threads) - return -1; - - perf_thread_map__get(threads); - perf_thread_map__put(threads); - perf_thread_map__put(threads); - - __T_END; - return 0; -} diff --git a/tools/perf/lib/threadmap.c b/tools/perf/lib/threadmap.c deleted file mode 100644 index e92c368b0a6c..000000000000 --- a/tools/perf/lib/threadmap.c +++ /dev/null @@ -1,91 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include <perf/threadmap.h> -#include <stdlib.h> -#include <linux/refcount.h> -#include <internal/threadmap.h> -#include <string.h> -#include <asm/bug.h> -#include <stdio.h> - -static void perf_thread_map__reset(struct perf_thread_map *map, int start, int nr) -{ - size_t size = (nr - start) * sizeof(map->map[0]); - - memset(&map->map[start], 0, size); - map->err_thread = -1; -} - -struct perf_thread_map *perf_thread_map__realloc(struct perf_thread_map *map, int nr) -{ - size_t size = sizeof(*map) + sizeof(map->map[0]) * nr; - int start = map ? map->nr : 0; - - map = realloc(map, size); - /* - * We only realloc to add more items, let's reset new items. - */ - if (map) - perf_thread_map__reset(map, start, nr); - - return map; -} - -#define thread_map__alloc(__nr) perf_thread_map__realloc(NULL, __nr) - -void perf_thread_map__set_pid(struct perf_thread_map *map, int thread, pid_t pid) -{ - map->map[thread].pid = pid; -} - -char *perf_thread_map__comm(struct perf_thread_map *map, int thread) -{ - return map->map[thread].comm; -} - -struct perf_thread_map *perf_thread_map__new_dummy(void) -{ - struct perf_thread_map *threads = thread_map__alloc(1); - - if (threads != NULL) { - perf_thread_map__set_pid(threads, 0, -1); - threads->nr = 1; - refcount_set(&threads->refcnt, 1); - } - return threads; -} - -static void perf_thread_map__delete(struct perf_thread_map *threads) -{ - if (threads) { - int i; - - WARN_ONCE(refcount_read(&threads->refcnt) != 0, - "thread map refcnt unbalanced\n"); - for (i = 0; i < threads->nr; i++) - free(perf_thread_map__comm(threads, i)); - free(threads); - } -} - -struct perf_thread_map *perf_thread_map__get(struct perf_thread_map *map) -{ - if (map) - refcount_inc(&map->refcnt); - return map; -} - -void perf_thread_map__put(struct perf_thread_map *map) -{ - if (map && refcount_dec_and_test(&map->refcnt)) - perf_thread_map__delete(map); -} - -int perf_thread_map__nr(struct perf_thread_map *threads) -{ - return threads ? threads->nr : 1; -} - -pid_t perf_thread_map__pid(struct perf_thread_map *map, int thread) -{ - return map->map[thread].pid; -} diff --git a/tools/perf/lib/xyarray.c b/tools/perf/lib/xyarray.c deleted file mode 100644 index dcd901d154bb..000000000000 --- a/tools/perf/lib/xyarray.c +++ /dev/null @@ -1,33 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include <internal/xyarray.h> -#include <linux/zalloc.h> -#include <stdlib.h> -#include <string.h> - -struct xyarray *xyarray__new(int xlen, int ylen, size_t entry_size) -{ - size_t row_size = ylen * entry_size; - struct xyarray *xy = zalloc(sizeof(*xy) + xlen * row_size); - - if (xy != NULL) { - xy->entry_size = entry_size; - xy->row_size = row_size; - xy->entries = xlen * ylen; - xy->max_x = xlen; - xy->max_y = ylen; - } - - return xy; -} - -void xyarray__reset(struct xyarray *xy) -{ - size_t n = xy->entries * xy->entry_size; - - memset(xy->contents, 0, n); -} - -void xyarray__delete(struct xyarray *xy) -{ - free(xy); -} diff --git a/tools/perf/pmu-events/arch/s390/cf_z15/crypto6.json b/tools/perf/pmu-events/arch/s390/cf_z15/crypto6.json index 5e36bc2468d0..c998e4f1d1d2 100644 --- a/tools/perf/pmu-events/arch/s390/cf_z15/crypto6.json +++ b/tools/perf/pmu-events/arch/s390/cf_z15/crypto6.json @@ -4,27 +4,27 @@ "EventCode": "80", "EventName": "ECC_FUNCTION_COUNT", "BriefDescription": "ECC Function Count", - "PublicDescription": "Long ECC function Count" + "PublicDescription": "This counter counts the total number of the elliptic-curve cryptography (ECC) functions issued by the CPU." }, { "Unit": "CPU-M-CF", "EventCode": "81", "EventName": "ECC_CYCLES_COUNT", "BriefDescription": "ECC Cycles Count", - "PublicDescription": "Long ECC Function cycles count" + "PublicDescription": "This counter counts the total number of CPU cycles when the ECC coprocessor is busy performing the elliptic-curve cryptography (ECC) functions issued by the CPU." }, { "Unit": "CPU-M-CF", "EventCode": "82", "EventName": "ECC_BLOCKED_FUNCTION_COUNT", "BriefDescription": "Ecc Blocked Function Count", - "PublicDescription": "Long ECC blocked function count" + "PublicDescription": "This counter counts the total number of the elliptic-curve cryptography (ECC) functions that are issued by the CPU and are blocked because the ECC coprocessor is busy performing a function issued by another CPU." }, { "Unit": "CPU-M-CF", "EventCode": "83", "EventName": "ECC_BLOCKED_CYCLES_COUNT", "BriefDescription": "ECC Blocked Cycles Count", - "PublicDescription": "Long ECC blocked cycles count" + "PublicDescription": "This counter counts the total number of CPU cycles blocked for the elliptic-curve cryptography (ECC) functions issued by the CPU because the ECC coprocessor is busy performing a function issued by another CPU." }, ] diff --git a/tools/perf/pmu-events/arch/s390/cf_z15/extended.json b/tools/perf/pmu-events/arch/s390/cf_z15/extended.json index 89e070727e1b..2df2e231e9ee 100644 --- a/tools/perf/pmu-events/arch/s390/cf_z15/extended.json +++ b/tools/perf/pmu-events/arch/s390/cf_z15/extended.json @@ -25,7 +25,7 @@ "EventCode": "131", "EventName": "DTLB2_HPAGE_WRITES", "BriefDescription": "DTLB2 One-Megabyte Page Writes", - "PublicDescription": "A translation entry was written into the Combined Region and Segment Table Entry array in the Level-2 TLB for a one-megabyte page or a Last Host Translation was done" + "PublicDescription": "A translation entry was written into the Combined Region and Segment Table Entry array in the Level-2 TLB for a one-megabyte page" }, { "Unit": "CPU-M-CF", @@ -358,6 +358,34 @@ }, { "Unit": "CPU-M-CF", + "EventCode": "247", + "EventName": "DFLT_ACCESS", + "BriefDescription": "Cycles CPU spent obtaining access to Deflate unit", + "PublicDescription": "Cycles CPU spent obtaining access to Deflate unit" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "252", + "EventName": "DFLT_CYCLES", + "BriefDescription": "Cycles CPU is using Deflate unit", + "PublicDescription": "Cycles CPU is using Deflate unit" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "264", + "EventName": "DFLT_CC", + "BriefDescription": "Increments by one for every DEFLATE CONVERSION CALL instruction executed", + "PublicDescription": "Increments by one for every DEFLATE CONVERSION CALL instruction executed" + }, + { + "Unit": "CPU-M-CF", + "EventCode": "265", + "EventName": "DFLT_CCERROR", + "BriefDescription": "Increments by one for every DEFLATE CONVERSION CALL instruction executed that ended in Condition Codes 0, 1 or 2", + "PublicDescription": "Increments by one for every DEFLATE CONVERSION CALL instruction executed that ended in Condition Codes 0, 1 or 2" + }, + { + "Unit": "CPU-M-CF", "EventCode": "448", "EventName": "MT_DIAG_CYCLES_ONE_THR_ACTIVE", "BriefDescription": "Cycle count with one thread active", diff --git a/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json b/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json index f94653229dd4..a728c6e5119b 100644 --- a/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json +++ b/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json @@ -215,7 +215,8 @@ "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses", "MetricExpr": "( ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING + EPT.WALK_PENDING ) / ( 2 * cycles )", "MetricGroup": "TLB", - "MetricName": "Page_Walks_Utilization" + "MetricName": "Page_Walks_Utilization", + "MetricConstraint": "NO_NMI_WATCHDOG" }, { "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses", diff --git a/tools/perf/pmu-events/arch/x86/skylake/skl-metrics.json b/tools/perf/pmu-events/arch/x86/skylake/skl-metrics.json index e7feb60f9fa9..f97e8316ad2f 100644 --- a/tools/perf/pmu-events/arch/x86/skylake/skl-metrics.json +++ b/tools/perf/pmu-events/arch/x86/skylake/skl-metrics.json @@ -215,7 +215,8 @@ "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses", "MetricExpr": "( ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING + EPT.WALK_PENDING ) / ( 2 * cycles )", "MetricGroup": "TLB", - "MetricName": "Page_Walks_Utilization" + "MetricName": "Page_Walks_Utilization", + "MetricConstraint": "NO_NMI_WATCHDOG" }, { "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses", diff --git a/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json b/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json index 21d7a0c2c2e8..35f5db1786f7 100644 --- a/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json +++ b/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json @@ -215,7 +215,8 @@ "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses", "MetricExpr": "( ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING + EPT.WALK_PENDING ) / ( 2 * cycles )", "MetricGroup": "TLB", - "MetricName": "Page_Walks_Utilization" + "MetricName": "Page_Walks_Utilization", + "MetricConstraint": "NO_NMI_WATCHDOG" }, { "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses", diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c index 079c77b6a2fd..3c4236a5bad8 100644 --- a/tools/perf/pmu-events/jevents.c +++ b/tools/perf/pmu-events/jevents.c @@ -323,7 +323,7 @@ static int print_events_table_entry(void *data, char *name, char *event, char *pmu, char *unit, char *perpkg, char *metric_expr, char *metric_name, char *metric_group, - char *deprecated) + char *deprecated, char *metric_constraint) { struct perf_entry_data *pd = data; FILE *outfp = pd->outfp; @@ -357,6 +357,8 @@ static int print_events_table_entry(void *data, char *name, char *event, fprintf(outfp, "\t.metric_group = \"%s\",\n", metric_group); if (deprecated) fprintf(outfp, "\t.deprecated = \"%s\",\n", deprecated); + if (metric_constraint) + fprintf(outfp, "\t.metric_constraint = \"%s\",\n", metric_constraint); fprintf(outfp, "},\n"); return 0; @@ -375,6 +377,7 @@ struct event_struct { char *metric_name; char *metric_group; char *deprecated; + char *metric_constraint; }; #define ADD_EVENT_FIELD(field) do { if (field) { \ @@ -422,7 +425,7 @@ static int save_arch_std_events(void *data, char *name, char *event, char *desc, char *long_desc, char *pmu, char *unit, char *perpkg, char *metric_expr, char *metric_name, char *metric_group, - char *deprecated) + char *deprecated, char *metric_constraint) { struct event_struct *es; @@ -486,7 +489,7 @@ try_fixup(const char *fn, char *arch_std, char **event, char **desc, char **name, char **long_desc, char **pmu, char **filter, char **perpkg, char **unit, char **metric_expr, char **metric_name, char **metric_group, unsigned long long eventcode, - char **deprecated) + char **deprecated, char **metric_constraint) { /* try to find matching event from arch standard values */ struct event_struct *es; @@ -515,7 +518,7 @@ int json_events(const char *fn, char *pmu, char *unit, char *perpkg, char *metric_expr, char *metric_name, char *metric_group, - char *deprecated), + char *deprecated, char *metric_constraint), void *data) { int err; @@ -545,6 +548,7 @@ int json_events(const char *fn, char *metric_name = NULL; char *metric_group = NULL; char *deprecated = NULL; + char *metric_constraint = NULL; char *arch_std = NULL; unsigned long long eventcode = 0; struct msrmap *msr = NULL; @@ -629,6 +633,8 @@ int json_events(const char *fn, addfield(map, &metric_name, "", "", val); } else if (json_streq(map, field, "MetricGroup")) { addfield(map, &metric_group, "", "", val); + } else if (json_streq(map, field, "MetricConstraint")) { + addfield(map, &metric_constraint, "", "", val); } else if (json_streq(map, field, "MetricExpr")) { addfield(map, &metric_expr, "", "", val); for (s = metric_expr; *s; s++) @@ -670,13 +676,13 @@ int json_events(const char *fn, &long_desc, &pmu, &filter, &perpkg, &unit, &metric_expr, &metric_name, &metric_group, eventcode, - &deprecated); + &deprecated, &metric_constraint); if (err) goto free_strings; } err = func(data, name, real_event(name, event), desc, long_desc, pmu, unit, perpkg, metric_expr, metric_name, - metric_group, deprecated); + metric_group, deprecated, metric_constraint); free_strings: free(event); free(desc); @@ -691,6 +697,7 @@ free_strings: free(metric_expr); free(metric_name); free(metric_group); + free(metric_constraint); free(arch_std); if (err) @@ -1082,10 +1089,9 @@ static int process_one_file(const char *fpath, const struct stat *sb, */ int main(int argc, char *argv[]) { - int rc; + int rc, ret = 0; int maxfds; char ldirname[PATH_MAX]; - const char *arch; const char *output_file; const char *start_dirname; @@ -1156,7 +1162,8 @@ int main(int argc, char *argv[]) /* Make build fail */ fclose(eventsfp); free_arch_std_events(); - return 1; + ret = 1; + goto out_free_mapfile; } else if (rc) { goto empty_map; } @@ -1174,14 +1181,17 @@ int main(int argc, char *argv[]) /* Make build fail */ fclose(eventsfp); free_arch_std_events(); - return 1; + ret = 1; } - return 0; + + goto out_free_mapfile; empty_map: fclose(eventsfp); create_empty_mapping(output_file); free_arch_std_events(); - return 0; +out_free_mapfile: + free(mapfile); + return ret; } diff --git a/tools/perf/pmu-events/jevents.h b/tools/perf/pmu-events/jevents.h index 5cda49a42143..2afc8304529e 100644 --- a/tools/perf/pmu-events/jevents.h +++ b/tools/perf/pmu-events/jevents.h @@ -8,7 +8,7 @@ int json_events(const char *fn, char *pmu, char *unit, char *perpkg, char *metric_expr, char *metric_name, char *metric_group, - char *deprecated), + char *deprecated, char *metric_constraint), void *data); char *get_cpu_str(void); diff --git a/tools/perf/pmu-events/pmu-events.h b/tools/perf/pmu-events/pmu-events.h index caeb577d36c9..53e76d5d5b37 100644 --- a/tools/perf/pmu-events/pmu-events.h +++ b/tools/perf/pmu-events/pmu-events.h @@ -18,6 +18,7 @@ struct pmu_event { const char *metric_name; const char *metric_group; const char *deprecated; + const char *metric_constraint; }; /* diff --git a/tools/perf/scripts/perl/check-perf-trace.pl b/tools/perf/scripts/perl/check-perf-trace.pl index 4e7076c20616..d307ce8fd6ed 100644 --- a/tools/perf/scripts/perl/check-perf-trace.pl +++ b/tools/perf/scripts/perl/check-perf-trace.pl @@ -28,7 +28,7 @@ sub trace_end sub irq::softirq_entry { my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs, - $common_pid, $common_comm, + $common_pid, $common_comm, $common_callchain, $vec) = @_; print_header($event_name, $common_cpu, $common_secs, $common_nsecs, @@ -43,7 +43,7 @@ sub irq::softirq_entry sub kmem::kmalloc { my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs, - $common_pid, $common_comm, + $common_pid, $common_comm, $common_callchain, $call_site, $ptr, $bytes_req, $bytes_alloc, $gfp_flags) = @_; @@ -92,7 +92,7 @@ sub print_unhandled sub trace_unhandled { my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs, - $common_pid, $common_comm) = @_; + $common_pid, $common_comm, $common_callchain) = @_; $unhandled{$event_name}++; } diff --git a/tools/perf/scripts/perl/failed-syscalls.pl b/tools/perf/scripts/perl/failed-syscalls.pl index 55e7ae4c5c88..05954a8f363a 100644 --- a/tools/perf/scripts/perl/failed-syscalls.pl +++ b/tools/perf/scripts/perl/failed-syscalls.pl @@ -18,7 +18,7 @@ my %failed_syscalls; sub raw_syscalls::sys_exit { my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs, - $common_pid, $common_comm, + $common_pid, $common_comm, $common_callchain, $id, $ret) = @_; if ($ret < 0) { diff --git a/tools/perf/scripts/perl/rw-by-file.pl b/tools/perf/scripts/perl/rw-by-file.pl index 168fa5e94b44..92a750b8552b 100644 --- a/tools/perf/scripts/perl/rw-by-file.pl +++ b/tools/perf/scripts/perl/rw-by-file.pl @@ -28,7 +28,7 @@ my %writes; sub syscalls::sys_enter_read { my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs, - $common_pid, $common_comm, $nr, $fd, $buf, $count) = @_; + $common_pid, $common_comm, $common_callchain, $nr, $fd, $buf, $count) = @_; if ($common_comm eq $for_comm) { $reads{$fd}{bytes_requested} += $count; @@ -39,7 +39,7 @@ sub syscalls::sys_enter_read sub syscalls::sys_enter_write { my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs, - $common_pid, $common_comm, $nr, $fd, $buf, $count) = @_; + $common_pid, $common_comm, $common_callchain, $nr, $fd, $buf, $count) = @_; if ($common_comm eq $for_comm) { $writes{$fd}{bytes_written} += $count; @@ -98,7 +98,7 @@ sub print_unhandled sub trace_unhandled { my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs, - $common_pid, $common_comm) = @_; + $common_pid, $common_comm, $common_callchain) = @_; $unhandled{$event_name}++; } diff --git a/tools/perf/scripts/perl/rw-by-pid.pl b/tools/perf/scripts/perl/rw-by-pid.pl index 495698250b2f..d789fe39caab 100644 --- a/tools/perf/scripts/perl/rw-by-pid.pl +++ b/tools/perf/scripts/perl/rw-by-pid.pl @@ -24,7 +24,7 @@ my %writes; sub syscalls::sys_exit_read { my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs, - $common_pid, $common_comm, + $common_pid, $common_comm, $common_callchain, $nr, $ret) = @_; if ($ret > 0) { @@ -40,7 +40,7 @@ sub syscalls::sys_exit_read sub syscalls::sys_enter_read { my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs, - $common_pid, $common_comm, + $common_pid, $common_comm, $common_callchain, $nr, $fd, $buf, $count) = @_; $reads{$common_pid}{bytes_requested} += $count; @@ -51,7 +51,7 @@ sub syscalls::sys_enter_read sub syscalls::sys_exit_write { my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs, - $common_pid, $common_comm, + $common_pid, $common_comm, $common_callchain, $nr, $ret) = @_; if ($ret <= 0) { @@ -62,7 +62,7 @@ sub syscalls::sys_exit_write sub syscalls::sys_enter_write { my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs, - $common_pid, $common_comm, + $common_pid, $common_comm, $common_callchain, $nr, $fd, $buf, $count) = @_; $writes{$common_pid}{bytes_written} += $count; @@ -178,7 +178,7 @@ sub print_unhandled sub trace_unhandled { my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs, - $common_pid, $common_comm) = @_; + $common_pid, $common_comm, $common_callchain) = @_; $unhandled{$event_name}++; } diff --git a/tools/perf/scripts/perl/rwtop.pl b/tools/perf/scripts/perl/rwtop.pl index 6473442568a2..eba4df67af6b 100644 --- a/tools/perf/scripts/perl/rwtop.pl +++ b/tools/perf/scripts/perl/rwtop.pl @@ -35,7 +35,7 @@ if (!$interval) { sub syscalls::sys_exit_read { my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs, - $common_pid, $common_comm, + $common_pid, $common_comm, $common_callchain, $nr, $ret) = @_; print_check(); @@ -53,7 +53,7 @@ sub syscalls::sys_exit_read sub syscalls::sys_enter_read { my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs, - $common_pid, $common_comm, + $common_pid, $common_comm, $common_callchain, $nr, $fd, $buf, $count) = @_; print_check(); @@ -66,7 +66,7 @@ sub syscalls::sys_enter_read sub syscalls::sys_exit_write { my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs, - $common_pid, $common_comm, + $common_pid, $common_comm, $common_callchain, $nr, $ret) = @_; print_check(); @@ -79,7 +79,7 @@ sub syscalls::sys_exit_write sub syscalls::sys_enter_write { my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs, - $common_pid, $common_comm, + $common_pid, $common_comm, $common_callchain, $nr, $fd, $buf, $count) = @_; print_check(); @@ -197,7 +197,7 @@ sub print_unhandled sub trace_unhandled { my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs, - $common_pid, $common_comm) = @_; + $common_pid, $common_comm, $common_callchain) = @_; $unhandled{$event_name}++; } diff --git a/tools/perf/scripts/perl/wakeup-latency.pl b/tools/perf/scripts/perl/wakeup-latency.pl index efcfec5e347a..53444ff4ec7f 100644 --- a/tools/perf/scripts/perl/wakeup-latency.pl +++ b/tools/perf/scripts/perl/wakeup-latency.pl @@ -28,7 +28,7 @@ my $total_wakeups = 0; sub sched::sched_switch { my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs, - $common_pid, $common_comm, + $common_pid, $common_comm, $common_callchain, $prev_comm, $prev_pid, $prev_prio, $prev_state, $next_comm, $next_pid, $next_prio) = @_; @@ -51,7 +51,7 @@ sub sched::sched_switch sub sched::sched_wakeup { my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs, - $common_pid, $common_comm, + $common_pid, $common_comm, $common_callchain, $comm, $pid, $prio, $success, $target_cpu) = @_; $last_wakeup{$target_cpu}{ts} = nsecs($common_secs, $common_nsecs); @@ -101,7 +101,7 @@ sub print_unhandled sub trace_unhandled { my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs, - $common_pid, $common_comm) = @_; + $common_pid, $common_comm, $common_callchain) = @_; $unhandled{$event_name}++; } diff --git a/tools/perf/tests/bp_account.c b/tools/perf/tests/bp_account.c index d0b935356274..489b50604cf2 100644 --- a/tools/perf/tests/bp_account.c +++ b/tools/perf/tests/bp_account.c @@ -19,7 +19,7 @@ #include "../perf-sys.h" #include "cloexec.h" -volatile long the_var; +static volatile long the_var; static noinline int test_function(void) { diff --git a/tools/perf/tests/bp_signal.c b/tools/perf/tests/bp_signal.c index 415903b48578..da8ec1e8e064 100644 --- a/tools/perf/tests/bp_signal.c +++ b/tools/perf/tests/bp_signal.c @@ -263,20 +263,20 @@ int test__bp_signal(struct test *test __maybe_unused, int subtest __maybe_unused if (count1 == 11) pr_debug("failed: RF EFLAG recursion issue detected\n"); else - pr_debug("failed: wrong count for bp1%lld\n", count1); + pr_debug("failed: wrong count for bp1: %lld, expected 1\n", count1); } if (overflows != 3) - pr_debug("failed: wrong overflow hit\n"); + pr_debug("failed: wrong overflow (%d) hit, expected 3\n", overflows); if (overflows_2 != 3) - pr_debug("failed: wrong overflow_2 hit\n"); + pr_debug("failed: wrong overflow_2 (%d) hit, expected 3\n", overflows_2); if (count2 != 3) - pr_debug("failed: wrong count for bp2\n"); + pr_debug("failed: wrong count for bp2 (%lld), expected 3\n", count2); if (count3 != 2) - pr_debug("failed: wrong count for bp3\n"); + pr_debug("failed: wrong count for bp3 (%lld), expected 2\n", count3); return count1 == 1 && overflows == 3 && count2 == 3 && overflows_2 == 3 && count3 == 2 ? TEST_OK : TEST_FAIL; diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index 5f05db75cdd8..54d9516c9839 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -543,8 +543,11 @@ static int run_shell_tests(int argc, const char *argv[], int i, int width) return -1; dir = opendir(st.dir); - if (!dir) + if (!dir) { + pr_err("failed to open shell test directory: %s\n", + st.dir); return -1; + } for_each_shell_test(dir, st.dir, ent) { int curr = i++; diff --git a/tools/perf/tests/cpumap.c b/tools/perf/tests/cpumap.c index 4ac56741ac5f..29c793ac7d10 100644 --- a/tools/perf/tests/cpumap.c +++ b/tools/perf/tests/cpumap.c @@ -131,7 +131,6 @@ int test__cpu_map_merge(struct test *test __maybe_unused, int subtest __maybe_un TEST_ASSERT_VAL("failed to merge map: bad nr", c->nr == 5); cpu_map__snprint(c, buf, sizeof(buf)); TEST_ASSERT_VAL("failed to merge map: bad result", !strcmp(buf, "1-2,4-5,7")); - perf_cpu_map__put(a); perf_cpu_map__put(b); perf_cpu_map__put(c); return 0; diff --git a/tools/perf/tests/expr.c b/tools/perf/tests/expr.c index 87843af4c118..28313e59d6f6 100644 --- a/tools/perf/tests/expr.c +++ b/tools/perf/tests/expr.c @@ -10,7 +10,7 @@ static int test(struct parse_ctx *ctx, const char *e, double val2) { double val; - if (expr__parse(&val, ctx, &e)) + if (expr__parse(&val, ctx, e)) TEST_ASSERT_VAL("parse test failed", 0); TEST_ASSERT_VAL("unexpected value", val == val2); return 0; @@ -44,12 +44,12 @@ int test__expr(struct test *t __maybe_unused, int subtest __maybe_unused) return ret; p = "FOO/0"; - ret = expr__parse(&val, &ctx, &p); - TEST_ASSERT_VAL("division by zero", ret == 1); + ret = expr__parse(&val, &ctx, p); + TEST_ASSERT_VAL("division by zero", ret == -1); p = "BAR/"; - ret = expr__parse(&val, &ctx, &p); - TEST_ASSERT_VAL("missing operand", ret == 1); + ret = expr__parse(&val, &ctx, p); + TEST_ASSERT_VAL("missing operand", ret == -1); TEST_ASSERT_VAL("find other", expr__find_other("FOO + BAR + BAZ + BOZO", "FOO", &other, &num_other) == 0); diff --git a/tools/perf/tests/sample-parsing.c b/tools/perf/tests/sample-parsing.c index 2762e1155238..14239e472187 100644 --- a/tools/perf/tests/sample-parsing.c +++ b/tools/perf/tests/sample-parsing.c @@ -99,6 +99,7 @@ static bool samples_same(const struct perf_sample *s1, if (type & PERF_SAMPLE_BRANCH_STACK) { COMP(branch_stack->nr); + COMP(branch_stack->hw_idx); for (i = 0; i < s1->branch_stack->nr; i++) MCOMP(branch_stack->entries[i]); } @@ -186,7 +187,7 @@ static int do_test(u64 sample_type, u64 sample_regs, u64 read_format) u64 data[64]; } branch_stack = { /* 1 branch_entry */ - .data = {1, 211, 212, 213}, + .data = {1, -1ULL, 211, 212, 213}, }; u64 regs[64]; const u64 raw_data[] = {0x123456780a0b0c0dULL, 0x1102030405060708ULL}; @@ -208,6 +209,7 @@ static int do_test(u64 sample_type, u64 sample_regs, u64 read_format) .transaction = 112, .raw_data = (void *)raw_data, .callchain = &callchain.callchain, + .no_hw_idx = false, .branch_stack = &branch_stack.branch_stack, .user_regs = { .abi = PERF_SAMPLE_REGS_ABI_64, @@ -244,6 +246,9 @@ static int do_test(u64 sample_type, u64 sample_regs, u64 read_format) if (sample_type & PERF_SAMPLE_REGS_INTR) evsel.core.attr.sample_regs_intr = sample_regs; + if (sample_type & PERF_SAMPLE_BRANCH_STACK) + evsel.core.attr.branch_sample_type |= PERF_SAMPLE_BRANCH_HW_INDEX; + for (i = 0; i < sizeof(regs); i++) *(i + (u8 *)regs) = i & 0xfe; diff --git a/tools/perf/tests/shell/lib/probe_vfs_getname.sh b/tools/perf/tests/shell/lib/probe_vfs_getname.sh index 7cb99b433888..c2cc42daf924 100644 --- a/tools/perf/tests/shell/lib/probe_vfs_getname.sh +++ b/tools/perf/tests/shell/lib/probe_vfs_getname.sh @@ -14,7 +14,7 @@ add_probe_vfs_getname() { if [ $had_vfs_getname -eq 1 ] ; then line=$(perf probe -L getname_flags 2>&1 | egrep 'result.*=.*filename;' | sed -r 's/[[:space:]]+([[:digit:]]+)[[:space:]]+result->uptr.*/\1/') perf probe -q "vfs_getname=getname_flags:${line} pathname=result->name:string" || \ - perf probe $verbose "vfs_getname=getname_flags:${line} pathname=filename:string" + perf probe $verbose "vfs_getname=getname_flags:${line} pathname=filename:ustring" fi } diff --git a/tools/perf/trace/beauty/beauty.h b/tools/perf/trace/beauty/beauty.h index 5a61043c2ff7..d6dfe68a7612 100644 --- a/tools/perf/trace/beauty/beauty.h +++ b/tools/perf/trace/beauty/beauty.h @@ -213,6 +213,8 @@ size_t syscall_arg__scnprintf_x86_arch_prctl_code(char *bf, size_t size, struct size_t syscall_arg__scnprintf_prctl_option(char *bf, size_t size, struct syscall_arg *arg); #define SCA_PRCTL_OPTION syscall_arg__scnprintf_prctl_option +extern struct strarray strarray__prctl_options; + size_t syscall_arg__scnprintf_prctl_arg2(char *bf, size_t size, struct syscall_arg *arg); #define SCA_PRCTL_ARG2 syscall_arg__scnprintf_prctl_arg2 diff --git a/tools/perf/trace/beauty/prctl.c b/tools/perf/trace/beauty/prctl.c index ba2179abed00..6fe5ad5f5d3a 100644 --- a/tools/perf/trace/beauty/prctl.c +++ b/tools/perf/trace/beauty/prctl.c @@ -11,9 +11,10 @@ #include "trace/beauty/generated/prctl_option_array.c" +DEFINE_STRARRAY(prctl_options, "PR_"); + static size_t prctl__scnprintf_option(int option, char *bf, size_t size, bool show_prefix) { - static DEFINE_STRARRAY(prctl_options, "PR_"); return strarray__scnprintf(&strarray__prctl_options, bf, size, "%d", show_prefix, option); } diff --git a/tools/perf/trace/beauty/sockaddr.c b/tools/perf/trace/beauty/sockaddr.c index 173c8f760763..e0c13e6a5788 100644 --- a/tools/perf/trace/beauty/sockaddr.c +++ b/tools/perf/trace/beauty/sockaddr.c @@ -72,5 +72,5 @@ size_t syscall_arg__scnprintf_sockaddr(char *bf, size_t size, struct syscall_arg if (arg->augmented.args) return syscall_arg__scnprintf_augmented_sockaddr(arg, bf, size); - return scnprintf(bf, size, "%#x", arg->val); + return scnprintf(bf, size, "%#lx", arg->val); } diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index badbddbb30f8..9023267e5643 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -754,10 +754,9 @@ static int annotate_browser__run(struct annotate_browser *browser, "? Search string backwards\n"); continue; case 'r': - { - script_browse(NULL, NULL); - continue; - } + script_browse(NULL, NULL); + annotate_browser__show(&browser->b, title, help); + continue; case 'k': notes->options->show_linenr = !notes->options->show_linenr; break; @@ -834,13 +833,13 @@ show_sup_ins: map_symbol__annotation_dump(ms, evsel, browser->opts); continue; case 't': - if (notes->options->show_total_period) { - notes->options->show_total_period = false; - notes->options->show_nr_samples = true; - } else if (notes->options->show_nr_samples) - notes->options->show_nr_samples = false; + if (symbol_conf.show_total_period) { + symbol_conf.show_total_period = false; + symbol_conf.show_nr_samples = true; + } else if (symbol_conf.show_nr_samples) + symbol_conf.show_nr_samples = false; else - notes->options->show_total_period = true; + symbol_conf.show_total_period = true; annotation__update_column_widths(notes); continue; case 'c': diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index d4d3558fdef4..f36dee499320 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -18,7 +18,9 @@ #include "../../util/evlist.h" #include "../../util/header.h" #include "../../util/hist.h" +#include "../../util/machine.h" #include "../../util/map.h" +#include "../../util/maps.h" #include "../../util/symbol.h" #include "../../util/map_symbol.h" #include "../../util/branch.h" @@ -391,6 +393,57 @@ static void hist_entry__init_have_children(struct hist_entry *he) he->init_have_children = true; } +static bool hist_browser__selection_has_children(struct hist_browser *browser) +{ + struct hist_entry *he = browser->he_selection; + struct map_symbol *ms = browser->selection; + + if (!he || !ms) + return false; + + if (ms == &he->ms) + return he->has_children; + + return container_of(ms, struct callchain_list, ms)->has_children; +} + +static bool hist_browser__he_selection_unfolded(struct hist_browser *browser) +{ + return browser->he_selection ? browser->he_selection->unfolded : false; +} + +static bool hist_browser__selection_unfolded(struct hist_browser *browser) +{ + struct hist_entry *he = browser->he_selection; + struct map_symbol *ms = browser->selection; + + if (!he || !ms) + return false; + + if (ms == &he->ms) + return he->unfolded; + + return container_of(ms, struct callchain_list, ms)->unfolded; +} + +static char *hist_browser__selection_sym_name(struct hist_browser *browser, char *bf, size_t size) +{ + struct hist_entry *he = browser->he_selection; + struct map_symbol *ms = browser->selection; + struct callchain_list *callchain_entry; + + if (!he || !ms) + return NULL; + + if (ms == &he->ms) { + hist_entry__sym_snprintf(he, bf, size, 0); + return bf + 4; // skip the level, e.g. '[k] ' + } + + callchain_entry = container_of(ms, struct callchain_list, ms); + return callchain_list__sym_name(callchain_entry, bf, size, browser->show_dso); +} + static bool hist_browser__toggle_fold(struct hist_browser *browser) { struct hist_entry *he = browser->he_selection; @@ -624,10 +677,81 @@ static int hist_browser__title(struct hist_browser *browser, char *bf, size_t si return browser->title ? browser->title(browser, bf, size) : 0; } +static int hist_browser__handle_hotkey(struct hist_browser *browser, bool warn_lost_event, char *title, int key) +{ + switch (key) { + case K_TIMER: { + struct hist_browser_timer *hbt = browser->hbt; + u64 nr_entries; + + WARN_ON_ONCE(!hbt); + + if (hbt) + hbt->timer(hbt->arg); + + if (hist_browser__has_filter(browser) || symbol_conf.report_hierarchy) + hist_browser__update_nr_entries(browser); + + nr_entries = hist_browser__nr_entries(browser); + ui_browser__update_nr_entries(&browser->b, nr_entries); + + if (warn_lost_event && + (browser->hists->stats.nr_lost_warned != + browser->hists->stats.nr_events[PERF_RECORD_LOST])) { + browser->hists->stats.nr_lost_warned = + browser->hists->stats.nr_events[PERF_RECORD_LOST]; + ui_browser__warn_lost_events(&browser->b); + } + + hist_browser__title(browser, title, sizeof(title)); + ui_browser__show_title(&browser->b, title); + break; + } + case 'D': { /* Debug */ + struct hist_entry *h = rb_entry(browser->b.top, struct hist_entry, rb_node); + static int seq; + + ui_helpline__pop(); + ui_helpline__fpush("%d: nr_ent=(%d,%d), etl: %d, rows=%d, idx=%d, fve: idx=%d, row_off=%d, nrows=%d", + seq++, browser->b.nr_entries, browser->hists->nr_entries, + browser->b.extra_title_lines, browser->b.rows, + browser->b.index, browser->b.top_idx, h->row_offset, h->nr_rows); + } + break; + case 'C': + /* Collapse the whole world. */ + hist_browser__set_folding(browser, false); + break; + case 'c': + /* Collapse the selected entry. */ + hist_browser__set_folding_selected(browser, false); + break; + case 'E': + /* Expand the whole world. */ + hist_browser__set_folding(browser, true); + break; + case 'e': + /* Expand the selected entry. */ + hist_browser__set_folding_selected(browser, !hist_browser__he_selection_unfolded(browser)); + break; + case 'H': + browser->show_headers = !browser->show_headers; + hist_browser__update_rows(browser); + break; + case '+': + if (hist_browser__toggle_fold(browser)) + break; + /* fall thru */ + default: + return -1; + } + + return 0; +} + int hist_browser__run(struct hist_browser *browser, const char *help, - bool warn_lost_event) + bool warn_lost_event, int key) { - int key; char title[160]; struct hist_browser_timer *hbt = browser->hbt; int delay_secs = hbt ? hbt->refresh : 0; @@ -640,79 +764,14 @@ int hist_browser__run(struct hist_browser *browser, const char *help, if (ui_browser__show(&browser->b, title, "%s", help) < 0) return -1; + if (key && hist_browser__handle_hotkey(browser, warn_lost_event, title, key)) + goto out; + while (1) { key = ui_browser__run(&browser->b, delay_secs); - switch (key) { - case K_TIMER: { - u64 nr_entries; - - WARN_ON_ONCE(!hbt); - - if (hbt) - hbt->timer(hbt->arg); - - if (hist_browser__has_filter(browser) || - symbol_conf.report_hierarchy) - hist_browser__update_nr_entries(browser); - - nr_entries = hist_browser__nr_entries(browser); - ui_browser__update_nr_entries(&browser->b, nr_entries); - - if (warn_lost_event && - (browser->hists->stats.nr_lost_warned != - browser->hists->stats.nr_events[PERF_RECORD_LOST])) { - browser->hists->stats.nr_lost_warned = - browser->hists->stats.nr_events[PERF_RECORD_LOST]; - ui_browser__warn_lost_events(&browser->b); - } - - hist_browser__title(browser, title, sizeof(title)); - ui_browser__show_title(&browser->b, title); - continue; - } - case 'D': { /* Debug */ - static int seq; - struct hist_entry *h = rb_entry(browser->b.top, - struct hist_entry, rb_node); - ui_helpline__pop(); - ui_helpline__fpush("%d: nr_ent=(%d,%d), etl: %d, rows=%d, idx=%d, fve: idx=%d, row_off=%d, nrows=%d", - seq++, browser->b.nr_entries, - browser->hists->nr_entries, - browser->b.extra_title_lines, - browser->b.rows, - browser->b.index, - browser->b.top_idx, - h->row_offset, h->nr_rows); - } - break; - case 'C': - /* Collapse the whole world. */ - hist_browser__set_folding(browser, false); - break; - case 'c': - /* Collapse the selected entry. */ - hist_browser__set_folding_selected(browser, false); + if (hist_browser__handle_hotkey(browser, warn_lost_event, title, key)) break; - case 'E': - /* Expand the whole world. */ - hist_browser__set_folding(browser, true); - break; - case 'e': - /* Expand the selected entry. */ - hist_browser__set_folding_selected(browser, true); - break; - case 'H': - browser->show_headers = !browser->show_headers; - hist_browser__update_rows(browser); - break; - case K_ENTER: - if (hist_browser__toggle_fold(browser)) - break; - /* fall thru */ - default: - goto out; - } } out: ui_browser__hide(&browser->b); @@ -2339,7 +2398,7 @@ close_file_and_continue: closedir(pwd_dir); if (nr_options) { - choice = ui__popup_menu(nr_options, options); + choice = ui__popup_menu(nr_options, options, NULL); if (choice < nr_options && choice >= 0) { tmp = strdup(abs_path[choice]); if (tmp) { @@ -2411,7 +2470,8 @@ add_annotate_opt(struct hist_browser *browser __maybe_unused, struct popup_action *act, char **optstr, struct map_symbol *ms) { - if (ms->sym == NULL || ms->map->dso->annotate_warned) + if (ms->sym == NULL || ms->map->dso->annotate_warned || + symbol__annotation(ms->sym)->src == NULL) return 0; if (asprintf(optstr, "Annotate %s", ms->sym->name) < 0) @@ -2484,11 +2544,8 @@ add_thread_opt(struct hist_browser *browser, struct popup_action *act, return 1; } -static int -do_zoom_dso(struct hist_browser *browser, struct popup_action *act) +static int hists_browser__zoom_map(struct hist_browser *browser, struct map *map) { - struct map *map = act->ms.map; - if (!hists__has(browser->hists, dso) || map == NULL) return 0; @@ -2511,13 +2568,19 @@ do_zoom_dso(struct hist_browser *browser, struct popup_action *act) } static int +do_zoom_dso(struct hist_browser *browser, struct popup_action *act) +{ + return hists_browser__zoom_map(browser, act->ms.map); +} + +static int add_dso_opt(struct hist_browser *browser, struct popup_action *act, char **optstr, struct map *map) { if (!hists__has(browser->hists, dso) || map == NULL) return 0; - if (asprintf(optstr, "Zoom %s %s DSO", + if (asprintf(optstr, "Zoom %s %s DSO (use the 'k' hotkey to zoom directly into the kernel)", browser->hists->dso_filter ? "out of" : "into", __map__is_kernel(map) ? "the Kernel" : map->dso->short_name) < 0) return 0; @@ -2527,6 +2590,28 @@ add_dso_opt(struct hist_browser *browser, struct popup_action *act, return 1; } +static int do_toggle_callchain(struct hist_browser *browser, struct popup_action *act __maybe_unused) +{ + hist_browser__toggle_fold(browser); + return 0; +} + +static int add_callchain_toggle_opt(struct hist_browser *browser, struct popup_action *act, char **optstr) +{ + char sym_name[512]; + + if (!hist_browser__selection_has_children(browser)) + return 0; + + if (asprintf(optstr, "%s [%s] callchain (one level, same as '+' hotkey, use 'e'/'c' for the whole main level entry)", + hist_browser__selection_unfolded(browser) ? "Collapse" : "Expand", + hist_browser__selection_sym_name(browser, sym_name, sizeof(sym_name))) < 0) + return 0; + + act->fn = do_toggle_callchain; + return 1; +} + static int do_browse_map(struct hist_browser *browser __maybe_unused, struct popup_action *act) @@ -2858,12 +2943,15 @@ static int perf_evsel__hists_browse(struct evsel *evsel, int nr_events, "For symbolic views (--sort has sym):\n\n" \ "ENTER Zoom into DSO/Threads & Annotate current symbol\n" \ "ESC Zoom out\n" \ + "+ Expand/Collapse one callchain level\n" \ "a Annotate current symbol\n" \ "C Collapse all callchains\n" \ "d Zoom into current DSO\n" \ + "e Expand/Collapse main entry callchains\n" \ "E Expand all callchains\n" \ "F Toggle percentage of filtered entries\n" \ "H Display column headers\n" \ + "k Zoom into the kernel map\n" \ "L Change percent limit\n" \ "m Display context menu\n" \ "S Zoom into current Processor Socket\n" \ @@ -2914,13 +3002,13 @@ static int perf_evsel__hists_browse(struct evsel *evsel, int nr_events, while (1) { struct thread *thread = NULL; struct map *map = NULL; - int choice = 0; + int choice; int socked_id = -1; - nr_options = 0; - - key = hist_browser__run(browser, helpline, - warn_lost_event); + key = 0; // reset key +do_hotkey: // key came straight from options ui__popup_menu() + choice = nr_options = 0; + key = hist_browser__run(browser, helpline, warn_lost_event, key); if (browser->he_selection != NULL) { thread = hist_browser__selected_thread(browser); @@ -2950,6 +3038,14 @@ static int perf_evsel__hists_browse(struct evsel *evsel, int nr_events, browser->selection->map->dso->annotate_warned) continue; + if (symbol__annotation(browser->selection->sym)->src == NULL) { + ui_browser__warning(&browser->b, delay_secs * 2, + "No samples for the \"%s\" symbol.\n\n" + "Probably appeared just in a callchain", + browser->selection->sym->name); + continue; + } + actions->ms.map = browser->selection->map; actions->ms.sym = browser->selection->sym; do_annotate(browser, actions); @@ -2961,6 +3057,10 @@ static int perf_evsel__hists_browse(struct evsel *evsel, int nr_events, actions->ms.map = map; do_zoom_dso(browser, actions); continue; + case 'k': + if (browser->selection != NULL) + hists_browser__zoom_map(browser, browser->selection->maps->machine->vmlinux_map); + continue; case 'V': verbose = (verbose + 1) % 4; browser->show_dso = verbose > 0; @@ -3062,6 +3162,7 @@ static int perf_evsel__hists_browse(struct evsel *evsel, int nr_events, continue; } + actions->ms.map = map; top = pstack__peek(browser->pstack); if (top == &browser->hists->dso_filter) { /* @@ -3135,6 +3236,7 @@ skip_annotation: &options[nr_options], thread); nr_options += add_dso_opt(browser, &actions[nr_options], &options[nr_options], map); + nr_options += add_callchain_toggle_opt(browser, &actions[nr_options], &options[nr_options]); nr_options += add_map_opt(browser, &actions[nr_options], &options[nr_options], browser->selection ? @@ -3193,10 +3295,13 @@ skip_scripting: do { struct popup_action *act; - choice = ui__popup_menu(nr_options, options); - if (choice == -1 || choice >= nr_options) + choice = ui__popup_menu(nr_options, options, &key); + if (choice == -1) break; + if (choice == nr_options) + goto do_hotkey; + act = &actions[choice]; key = act->fn(browser, act); } while (key == 1); @@ -3492,7 +3597,7 @@ int block_hists_tui_browse(struct block_hist *bh, struct evsel *evsel, memset(&action, 0, sizeof(action)); while (1) { - key = hist_browser__run(browser, "? - help", true); + key = hist_browser__run(browser, "? - help", true, 0); switch (key) { case 'q': diff --git a/tools/perf/ui/browsers/hists.h b/tools/perf/ui/browsers/hists.h index 078f2f2c7abd..1e938d9ffa5e 100644 --- a/tools/perf/ui/browsers/hists.h +++ b/tools/perf/ui/browsers/hists.h @@ -34,7 +34,7 @@ struct hist_browser { struct hist_browser *hist_browser__new(struct hists *hists); void hist_browser__delete(struct hist_browser *browser); int hist_browser__run(struct hist_browser *browser, const char *help, - bool warn_lost_event); + bool warn_lost_event, int key); void hist_browser__init(struct hist_browser *browser, struct hists *hists); #endif /* _PERF_UI_BROWSER_HISTS_H_ */ diff --git a/tools/perf/ui/browsers/res_sample.c b/tools/perf/ui/browsers/res_sample.c index 76d356a18790..7cb2d6678039 100644 --- a/tools/perf/ui/browsers/res_sample.c +++ b/tools/perf/ui/browsers/res_sample.c @@ -56,7 +56,7 @@ int res_sample_browse(struct res_sample *res_samples, int num_res, return -1; } } - choice = ui__popup_menu(num_res, names); + choice = ui__popup_menu(num_res, names, NULL); for (i = 0; i < num_res; i++) zfree(&names[i]); free(names); diff --git a/tools/perf/ui/browsers/scripts.c b/tools/perf/ui/browsers/scripts.c index fc733a6354d4..47d2c7a8cbe1 100644 --- a/tools/perf/ui/browsers/scripts.c +++ b/tools/perf/ui/browsers/scripts.c @@ -126,7 +126,7 @@ static int list_scripts(char *script_name, bool *custom, SCRIPT_FULLPATH_LEN); if (num < 0) num = 0; - choice = ui__popup_menu(num + max_std, (char * const *)names); + choice = ui__popup_menu(num + max_std, (char * const *)names, NULL); if (choice < 0) { ret = -1; goto out; diff --git a/tools/perf/ui/gtk/Build b/tools/perf/ui/gtk/Build index ec22e899a224..eef708c502f4 100644 --- a/tools/perf/ui/gtk/Build +++ b/tools/perf/ui/gtk/Build @@ -1,4 +1,4 @@ -CFLAGS_gtk += -fPIC $(GTK_CFLAGS) +CFLAGS_gtk += -fPIC $(GTK_CFLAGS) -Wno-deprecated-declarations gtk-y += browser.o gtk-y += hists.o @@ -7,3 +7,8 @@ gtk-y += util.o gtk-y += helpline.o gtk-y += progress.o gtk-y += annotate.o +gtk-y += zalloc.o + +$(OUTPUT)ui/gtk/zalloc.o: ../lib/zalloc.c FORCE + $(call rule_mkdir) + $(call if_changed_dep,cc_o_c) diff --git a/tools/perf/ui/gtk/annotate.c b/tools/perf/ui/gtk/annotate.c index 22cc240f7371..35f9641bf670 100644 --- a/tools/perf/ui/gtk/annotate.c +++ b/tools/perf/ui/gtk/annotate.c @@ -174,7 +174,7 @@ static int symbol__gtk_annotate(struct map_symbol *ms, struct evsel *evsel, if (ms->map->dso->annotate_warned) return -1; - err = symbol__annotate(ms, evsel, 0, &annotation__default_options, NULL); + err = symbol__annotate(ms, evsel, &annotation__default_options, NULL); if (err) { char msg[BUFSIZ]; symbol__strerror_disassemble(ms, err, msg, sizeof(msg)); diff --git a/tools/perf/ui/tui/util.c b/tools/perf/ui/tui/util.c index b98dd0e31dc1..0f562e2cb1e8 100644 --- a/tools/perf/ui/tui/util.c +++ b/tools/perf/ui/tui/util.c @@ -23,7 +23,7 @@ static void ui_browser__argv_write(struct ui_browser *browser, ui_browser__write_nstring(browser, *arg, browser->width); } -static int popup_menu__run(struct ui_browser *menu) +static int popup_menu__run(struct ui_browser *menu, int *keyp) { int key; @@ -45,6 +45,11 @@ static int popup_menu__run(struct ui_browser *menu) key = -1; break; default: + if (keyp) { + *keyp = key; + key = menu->nr_entries; + break; + } continue; } @@ -55,7 +60,7 @@ static int popup_menu__run(struct ui_browser *menu) return key; } -int ui__popup_menu(int argc, char * const argv[]) +int ui__popup_menu(int argc, char * const argv[], int *keyp) { struct ui_browser menu = { .entries = (void *)argv, @@ -64,8 +69,7 @@ int ui__popup_menu(int argc, char * const argv[]) .write = ui_browser__argv_write, .nr_entries = argc, }; - - return popup_menu__run(&menu); + return popup_menu__run(&menu, keyp); } int ui_browser__input_window(const char *title, const char *text, char *input, diff --git a/tools/perf/ui/util.h b/tools/perf/ui/util.h index 40891942f465..e30cea807564 100644 --- a/tools/perf/ui/util.h +++ b/tools/perf/ui/util.h @@ -5,7 +5,7 @@ #include <stdarg.h> int ui__getch(int delay_secs); -int ui__popup_menu(int argc, char * const argv[]); +int ui__popup_menu(int argc, char * const argv[], int *keyp); int ui__help_window(const char *text); int ui__dialog_yesno(const char *msg); void __ui__info_window(const char *title, const char *text, const char *exit_msg); diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 07da6c790b63..c0cf8dff694e 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -121,7 +121,9 @@ perf-y += mem-events.o perf-y += vsprintf.o perf-y += units.o perf-y += time-utils.o +perf-y += expr-flex.o perf-y += expr-bison.o +perf-y += expr.o perf-y += branch.o perf-y += mem2node.o @@ -189,9 +191,13 @@ $(OUTPUT)util/parse-events-bison.c: util/parse-events.y $(call rule_mkdir) $(Q)$(call echo-cmd,bison)$(BISON) -v util/parse-events.y -d $(PARSER_DEBUG_BISON) -o $@ -p parse_events_ +$(OUTPUT)util/expr-flex.c: util/expr.l $(OUTPUT)util/expr-bison.c + $(call rule_mkdir) + $(Q)$(call echo-cmd,flex)$(FLEX) -o $@ --header-file=$(OUTPUT)util/expr-flex.h $(PARSER_DEBUG_FLEX) util/expr.l + $(OUTPUT)util/expr-bison.c: util/expr.y $(call rule_mkdir) - $(Q)$(call echo-cmd,bison)$(BISON) -v util/expr.y -d $(PARSER_DEBUG_BISON) -o $@ -p expr__ + $(Q)$(call echo-cmd,bison)$(BISON) -v util/expr.y -d $(PARSER_DEBUG_BISON) -o $@ -p expr_ $(OUTPUT)util/pmu-flex.c: util/pmu.l $(OUTPUT)util/pmu-bison.c $(call rule_mkdir) @@ -203,12 +209,14 @@ $(OUTPUT)util/pmu-bison.c: util/pmu.y CFLAGS_parse-events-flex.o += -w CFLAGS_pmu-flex.o += -w +CFLAGS_expr-flex.o += -w CFLAGS_parse-events-bison.o += -DYYENABLE_NLS=0 -w CFLAGS_pmu-bison.o += -DYYENABLE_NLS=0 -DYYLTYPE_IS_TRIVIAL=0 -w CFLAGS_expr-bison.o += -DYYENABLE_NLS=0 -DYYLTYPE_IS_TRIVIAL=0 -w $(OUTPUT)util/parse-events.o: $(OUTPUT)util/parse-events-flex.c $(OUTPUT)util/parse-events-bison.c $(OUTPUT)util/pmu.o: $(OUTPUT)util/pmu-flex.c $(OUTPUT)util/pmu-bison.c +$(OUTPUT)util/expr.o: $(OUTPUT)util/expr-flex.c $(OUTPUT)util/expr-bison.c CFLAGS_bitmap.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))" CFLAGS_find_bit.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))" @@ -216,6 +224,7 @@ CFLAGS_rbtree.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ET CFLAGS_libstring.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))" CFLAGS_hweight.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))" CFLAGS_parse-events.o += -Wno-redundant-decls +CFLAGS_expr.o += -Wno-redundant-decls CFLAGS_header.o += -include $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)util/kallsyms.o: ../lib/symbol/kallsyms.c FORCE diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index f5e77ed237e8..f1ea0d61eb5b 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -1143,93 +1143,70 @@ out: } struct annotate_args { - size_t privsize; - struct arch *arch; - struct map_symbol ms; - struct evsel *evsel; + struct arch *arch; + struct map_symbol ms; + struct evsel *evsel; struct annotation_options *options; - s64 offset; - char *line; - int line_nr; + s64 offset; + char *line; + int line_nr; }; -static void annotation_line__delete(struct annotation_line *al) +static void annotation_line__init(struct annotation_line *al, + struct annotate_args *args, + int nr) { - void *ptr = (void *) al - al->privsize; + al->offset = args->offset; + al->line = strdup(args->line); + al->line_nr = args->line_nr; + al->data_nr = nr; +} +static void annotation_line__exit(struct annotation_line *al) +{ free_srcline(al->path); zfree(&al->line); - free(ptr); } -/* - * Allocating the annotation line data with following - * structure: - * - * -------------------------------------- - * private space | struct annotation_line - * -------------------------------------- - * - * Size of the private space is stored in 'struct annotation_line'. - * - */ -static struct annotation_line * -annotation_line__new(struct annotate_args *args, size_t privsize) +static size_t disasm_line_size(int nr) { struct annotation_line *al; - struct evsel *evsel = args->evsel; - size_t size = privsize + sizeof(*al); - int nr = 1; - - if (perf_evsel__is_group_event(evsel)) - nr = evsel->core.nr_members; - - size += sizeof(al->data[0]) * nr; - al = zalloc(size); - if (al) { - al = (void *) al + privsize; - al->privsize = privsize; - al->offset = args->offset; - al->line = strdup(args->line); - al->line_nr = args->line_nr; - al->data_nr = nr; - } - - return al; + return (sizeof(struct disasm_line) + (sizeof(al->data[0]) * nr)); } /* * Allocating the disasm annotation line data with * following structure: * - * ------------------------------------------------------------ - * privsize space | struct disasm_line | struct annotation_line - * ------------------------------------------------------------ + * ------------------------------------------- + * struct disasm_line | struct annotation_line + * ------------------------------------------- * * We have 'struct annotation_line' member as last member * of 'struct disasm_line' to have an easy access. - * */ static struct disasm_line *disasm_line__new(struct annotate_args *args) { struct disasm_line *dl = NULL; - struct annotation_line *al; - size_t privsize = args->privsize + offsetof(struct disasm_line, al); + int nr = 1; - al = annotation_line__new(args, privsize); - if (al != NULL) { - dl = disasm_line(al); + if (perf_evsel__is_group_event(args->evsel)) + nr = args->evsel->core.nr_members; - if (dl->al.line == NULL) - goto out_delete; + dl = zalloc(disasm_line_size(nr)); + if (!dl) + return NULL; - if (args->offset != -1) { - if (disasm_line__parse(dl->al.line, &dl->ins.name, &dl->ops.raw) < 0) - goto out_free_line; + annotation_line__init(&dl->al, args, nr); + if (dl->al.line == NULL) + goto out_delete; - disasm_line__init_ins(dl, args->arch, &args->ms); - } + if (args->offset != -1) { + if (disasm_line__parse(dl->al.line, &dl->ins.name, &dl->ops.raw) < 0) + goto out_free_line; + + disasm_line__init_ins(dl, args->arch, &args->ms); } return dl; @@ -1248,7 +1225,8 @@ void disasm_line__free(struct disasm_line *dl) else ins__delete(&dl->ops); zfree(&dl->ins.name); - annotation_line__delete(&dl->al); + annotation_line__exit(&dl->al); + free(dl); } int disasm_line__scnprintf(struct disasm_line *dl, char *bf, size_t size, bool raw, int max_ins_name) @@ -1966,14 +1944,20 @@ static int symbol__disassemble(struct symbol *sym, struct annotate_args *args) err = asprintf(&command, "%s %s%s --start-address=0x%016" PRIx64 " --stop-address=0x%016" PRIx64 - " -l -d %s %s -C \"$1\"", + " -l -d %s %s %s %c%s%c %s%s -C \"$1\"", opts->objdump_path ?: "objdump", opts->disassembler_style ? "-M " : "", opts->disassembler_style ?: "", map__rip_2objdump(map, sym->start), map__rip_2objdump(map, sym->end), opts->show_asm_raw ? "" : "--no-show-raw-insn", - opts->annotate_src ? "-S" : ""); + opts->annotate_src ? "-S" : "", + opts->prefix ? "--prefix " : "", + opts->prefix ? '"' : ' ', + opts->prefix ?: "", + opts->prefix ? '"' : ' ', + opts->prefix_strip ? "--prefix-strip=" : "", + opts->prefix_strip ?: ""); if (err < 0) { pr_err("Failure allocating memory for the command to run\n"); @@ -2143,13 +2127,12 @@ void symbol__calc_percent(struct symbol *sym, struct evsel *evsel) annotation__calc_percent(notes, evsel, symbol__size(sym)); } -int symbol__annotate(struct map_symbol *ms, struct evsel *evsel, size_t privsize, +int symbol__annotate(struct map_symbol *ms, struct evsel *evsel, struct annotation_options *options, struct arch **parch) { struct symbol *sym = ms->sym; struct annotation *notes = symbol__annotation(sym); struct annotate_args args = { - .privsize = privsize, .evsel = evsel, .options = options, }; @@ -2628,8 +2611,6 @@ void annotation__mark_jump_targets(struct annotation *notes, struct symbol *sym) if (++al->jump_sources > notes->max_jump_sources) notes->max_jump_sources = al->jump_sources; - - ++notes->nr_jumps; } } @@ -2638,6 +2619,8 @@ void annotation__set_offsets(struct annotation *notes, s64 size) struct annotation_line *al; notes->max_line_len = 0; + notes->nr_entries = 0; + notes->nr_asm_entries = 0; list_for_each_entry(al, ¬es->src->source, node) { size_t line_len = strlen(al->line); @@ -2784,7 +2767,7 @@ int symbol__tty_annotate(struct map_symbol *ms, struct evsel *evsel, struct symbol *sym = ms->sym; struct rb_root source_line = RB_ROOT; - if (symbol__annotate(ms, evsel, 0, opts, NULL) < 0) + if (symbol__annotate(ms, evsel, opts, NULL) < 0) return -1; symbol__calc_percent(sym, evsel); @@ -2909,9 +2892,9 @@ static void __annotation_line__write(struct annotation_line *al, struct annotati percent = annotation_data__percent(&al->data[i], percent_type); obj__set_percent_color(obj, percent, current_entry); - if (notes->options->show_total_period) { + if (symbol_conf.show_total_period) { obj__printf(obj, "%11" PRIu64 " ", al->data[i].he.period); - } else if (notes->options->show_nr_samples) { + } else if (symbol_conf.show_nr_samples) { obj__printf(obj, "%6" PRIu64 " ", al->data[i].he.nr_samples); } else { @@ -2925,8 +2908,8 @@ static void __annotation_line__write(struct annotation_line *al, struct annotati obj__printf(obj, "%-*s", pcnt_width, " "); else { obj__printf(obj, "%-*s", pcnt_width, - notes->options->show_total_period ? "Period" : - notes->options->show_nr_samples ? "Samples" : "Percent"); + symbol_conf.show_total_period ? "Period" : + symbol_conf.show_nr_samples ? "Samples" : "Percent"); } } @@ -3064,7 +3047,7 @@ int symbol__annotate2(struct map_symbol *ms, struct evsel *evsel, if (perf_evsel__is_group_event(evsel)) nr_pcnt = evsel->core.nr_members; - err = symbol__annotate(ms, evsel, 0, options, parch); + err = symbol__annotate(ms, evsel, options, parch); if (err) goto out_free_offsets; @@ -3088,69 +3071,46 @@ out_free_offsets: return err; } -#define ANNOTATION__CFG(n) \ - { .name = #n, .value = &annotation__default_options.n, } - -/* - * Keep the entries sorted, they are bsearch'ed - */ -static struct annotation_config { - const char *name; - void *value; -} annotation__configs[] = { - ANNOTATION__CFG(hide_src_code), - ANNOTATION__CFG(jump_arrows), - ANNOTATION__CFG(offset_level), - ANNOTATION__CFG(show_linenr), - ANNOTATION__CFG(show_nr_jumps), - ANNOTATION__CFG(show_nr_samples), - ANNOTATION__CFG(show_total_period), - ANNOTATION__CFG(use_offset), -}; - -#undef ANNOTATION__CFG - -static int annotation_config__cmp(const void *name, const void *cfgp) +static int annotation__config(const char *var, const char *value, void *data) { - const struct annotation_config *cfg = cfgp; - - return strcmp(name, cfg->name); -} - -static int annotation__config(const char *var, const char *value, - void *data __maybe_unused) -{ - struct annotation_config *cfg; - const char *name; + struct annotation_options *opt = data; if (!strstarts(var, "annotate.")) return 0; - name = var + 9; - cfg = bsearch(name, annotation__configs, ARRAY_SIZE(annotation__configs), - sizeof(struct annotation_config), annotation_config__cmp); - - if (cfg == NULL) - pr_debug("%s variable unknown, ignoring...", var); - else if (strcmp(var, "annotate.offset_level") == 0) { - perf_config_int(cfg->value, name, value); - - if (*(int *)cfg->value > ANNOTATION__MAX_OFFSET_LEVEL) - *(int *)cfg->value = ANNOTATION__MAX_OFFSET_LEVEL; - else if (*(int *)cfg->value < ANNOTATION__MIN_OFFSET_LEVEL) - *(int *)cfg->value = ANNOTATION__MIN_OFFSET_LEVEL; + if (!strcmp(var, "annotate.offset_level")) { + perf_config_u8(&opt->offset_level, "offset_level", value); + + if (opt->offset_level > ANNOTATION__MAX_OFFSET_LEVEL) + opt->offset_level = ANNOTATION__MAX_OFFSET_LEVEL; + else if (opt->offset_level < ANNOTATION__MIN_OFFSET_LEVEL) + opt->offset_level = ANNOTATION__MIN_OFFSET_LEVEL; + } else if (!strcmp(var, "annotate.hide_src_code")) { + opt->hide_src_code = perf_config_bool("hide_src_code", value); + } else if (!strcmp(var, "annotate.jump_arrows")) { + opt->jump_arrows = perf_config_bool("jump_arrows", value); + } else if (!strcmp(var, "annotate.show_linenr")) { + opt->show_linenr = perf_config_bool("show_linenr", value); + } else if (!strcmp(var, "annotate.show_nr_jumps")) { + opt->show_nr_jumps = perf_config_bool("show_nr_jumps", value); + } else if (!strcmp(var, "annotate.show_nr_samples")) { + symbol_conf.show_nr_samples = perf_config_bool("show_nr_samples", + value); + } else if (!strcmp(var, "annotate.show_total_period")) { + symbol_conf.show_total_period = perf_config_bool("show_total_period", + value); + } else if (!strcmp(var, "annotate.use_offset")) { + opt->use_offset = perf_config_bool("use_offset", value); } else { - *(bool *)cfg->value = perf_config_bool(name, value); + pr_debug("%s variable unknown, ignoring...", var); } + return 0; } -void annotation_config__init(void) +void annotation_config__init(struct annotation_options *opt) { - perf_config(annotation__config, NULL); - - annotation__default_options.show_total_period = symbol_conf.show_total_period; - annotation__default_options.show_nr_samples = symbol_conf.show_nr_samples; + perf_config(annotation__config, opt); } static unsigned int parse_percent_type(char *str1, char *str2) @@ -3204,3 +3164,12 @@ out: free(str1); return err; } + +int annotate_check_args(struct annotation_options *args) +{ + if (args->prefix_strip && !args->prefix) { + pr_err("--prefix-strip requires --prefix\n"); + return -1; + } + return 0; +} diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index 7075d98f69d9..07c775938d46 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -83,8 +83,6 @@ struct annotation_options { full_path, show_linenr, show_nr_jumps, - show_nr_samples, - show_total_period, show_minmax_cycle, show_asm_raw, annotate_src; @@ -94,6 +92,8 @@ struct annotation_options { int context; const char *objdump_path; const char *disassembler_style; + const char *prefix; + const char *prefix_strip; unsigned int percent_type; }; @@ -139,7 +139,6 @@ struct annotation_line { u64 cycles; u64 cycles_max; u64 cycles_min; - size_t privsize; char *path; u32 idx; int idx_asm; @@ -280,7 +279,6 @@ struct annotation { struct annotation_options *options; struct annotation_line **offsets; int nr_events; - int nr_jumps; int max_jump_sources; int nr_entries; int nr_asm_entries; @@ -307,7 +305,7 @@ static inline int annotation__cycles_width(struct annotation *notes) static inline int annotation__pcnt_width(struct annotation *notes) { - return (notes->options->show_total_period ? 12 : 7) * notes->nr_events; + return (symbol_conf.show_total_period ? 12 : 7) * notes->nr_events; } static inline bool annotation_line__filter(struct annotation_line *al, struct annotation *notes) @@ -350,7 +348,7 @@ struct annotated_source *symbol__hists(struct symbol *sym, int nr_hists); void symbol__annotate_zero_histograms(struct symbol *sym); int symbol__annotate(struct map_symbol *ms, - struct evsel *evsel, size_t privsize, + struct evsel *evsel, struct annotation_options *options, struct arch **parch); int symbol__annotate2(struct map_symbol *ms, @@ -411,8 +409,11 @@ static inline int symbol__tui_annotate(struct map_symbol *ms __maybe_unused, } #endif -void annotation_config__init(void); +void annotation_config__init(struct annotation_options *opt); int annotate_parse_percent_type(const struct option *opt, const char *_str, int unset); + +int annotate_check_args(struct annotation_options *args); + #endif /* __PERF_ANNOTATE_H */ diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index eb087e7df6f4..3571ce72ca28 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -629,8 +629,10 @@ int auxtrace_record__options(struct auxtrace_record *itr, struct evlist *evlist, struct record_opts *opts) { - if (itr) + if (itr) { + itr->evlist = evlist; return itr->recording_options(itr, evlist, opts); + } return 0; } @@ -664,6 +666,24 @@ int auxtrace_parse_snapshot_options(struct auxtrace_record *itr, return -EINVAL; } +int auxtrace_record__read_finish(struct auxtrace_record *itr, int idx) +{ + struct evsel *evsel; + + if (!itr->evlist || !itr->pmu) + return -EINVAL; + + evlist__for_each_entry(itr->evlist, evsel) { + if (evsel->core.attr.type == itr->pmu->type) { + if (evsel->disabled) + return 0; + return perf_evlist__enable_event_idx(itr->evlist, evsel, + idx); + } + } + return -EINVAL; +} + /* * Event record size is 16-bit which results in a maximum size of about 64KiB. * Allow about 4KiB for the rest of the sample record, to give a maximum diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h index 749d72cd9c7b..e58ef160b599 100644 --- a/tools/perf/util/auxtrace.h +++ b/tools/perf/util/auxtrace.h @@ -29,6 +29,7 @@ struct record_opts; struct perf_record_auxtrace_error; struct perf_record_auxtrace_info; struct events_stats; +struct perf_pmu; enum auxtrace_error_type { PERF_AUXTRACE_ERROR_ITRACE = 1, @@ -322,6 +323,8 @@ struct auxtrace_mmap_params { * @read_finish: called after reading from an auxtrace mmap * @alignment: alignment (if any) for AUX area data * @default_aux_sample_size: default sample size for --aux sample option + * @pmu: associated pmu + * @evlist: selected events list */ struct auxtrace_record { int (*recording_options)(struct auxtrace_record *itr, @@ -346,6 +349,8 @@ struct auxtrace_record { int (*read_finish)(struct auxtrace_record *itr, int idx); unsigned int alignment; unsigned int default_aux_sample_size; + struct perf_pmu *pmu; + struct evlist *evlist; }; /** @@ -537,6 +542,7 @@ int auxtrace_record__find_snapshot(struct auxtrace_record *itr, int idx, struct auxtrace_mmap *mm, unsigned char *data, u64 *head, u64 *old); u64 auxtrace_record__reference(struct auxtrace_record *itr); +int auxtrace_record__read_finish(struct auxtrace_record *itr, int idx); int auxtrace_index__auxtrace_event(struct list_head *head, union perf_event *event, off_t file_offset); diff --git a/tools/perf/util/block-info.c b/tools/perf/util/block-info.c index c4b030bf6ec2..423ec69bda6c 100644 --- a/tools/perf/util/block-info.c +++ b/tools/perf/util/block-info.c @@ -65,8 +65,7 @@ struct block_info *block_info__new(void) return bi; } -int64_t block_info__cmp(struct perf_hpp_fmt *fmt __maybe_unused, - struct hist_entry *left, struct hist_entry *right) +int64_t __block_info__cmp(struct hist_entry *left, struct hist_entry *right) { struct block_info *bi_l = left->block_info; struct block_info *bi_r = right->block_info; @@ -74,30 +73,27 @@ int64_t block_info__cmp(struct perf_hpp_fmt *fmt __maybe_unused, if (!bi_l->sym || !bi_r->sym) { if (!bi_l->sym && !bi_r->sym) - return 0; + return -1; else if (!bi_l->sym) return -1; else return 1; } - if (bi_l->sym == bi_r->sym) { - if (bi_l->start == bi_r->start) { - if (bi_l->end == bi_r->end) - return 0; - else - return (int64_t)(bi_r->end - bi_l->end); - } else - return (int64_t)(bi_r->start - bi_l->start); - } else { - cmp = strcmp(bi_l->sym->name, bi_r->sym->name); + cmp = strcmp(bi_l->sym->name, bi_r->sym->name); + if (cmp) return cmp; - } - if (bi_l->sym->start != bi_r->sym->start) - return (int64_t)(bi_r->sym->start - bi_l->sym->start); + if (bi_l->start != bi_r->start) + return (int64_t)(bi_r->start - bi_l->start); - return (int64_t)(bi_r->sym->end - bi_l->sym->end); + return (int64_t)(bi_r->end - bi_l->end); +} + +int64_t block_info__cmp(struct perf_hpp_fmt *fmt __maybe_unused, + struct hist_entry *left, struct hist_entry *right) +{ + return __block_info__cmp(left, right); } static void init_block_info(struct block_info *bi, struct symbol *sym, @@ -185,6 +181,17 @@ static int block_column_width(struct perf_hpp_fmt *fmt, return block_fmt->width; } +static int color_pct(struct perf_hpp *hpp, int width, double pct) +{ +#ifdef HAVE_SLANG_SUPPORT + if (use_browser) { + return __hpp__slsmg_color_printf(hpp, "%*.2f%%", + width - 1, pct); + } +#endif + return hpp_color_scnprintf(hpp, "%*.2f%%", width - 1, pct); +} + static int block_total_cycles_pct_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, struct hist_entry *he) @@ -192,14 +199,11 @@ static int block_total_cycles_pct_entry(struct perf_hpp_fmt *fmt, struct block_fmt *block_fmt = container_of(fmt, struct block_fmt, fmt); struct block_info *bi = he->block_info; double ratio = 0.0; - char buf[16]; if (block_fmt->total_cycles) ratio = (double)bi->cycles / (double)block_fmt->total_cycles; - sprintf(buf, "%.2f%%", 100.0 * ratio); - - return scnprintf(hpp->buf, hpp->size, "%*s", block_fmt->width, buf); + return color_pct(hpp, block_fmt->width, 100.0 * ratio); } static int64_t block_total_cycles_pct_sort(struct perf_hpp_fmt *fmt, @@ -252,16 +256,13 @@ static int block_cycles_pct_entry(struct perf_hpp_fmt *fmt, struct block_info *bi = he->block_info; double ratio = 0.0; u64 avg; - char buf[16]; if (block_fmt->block_cycles && bi->num_aggr) { avg = bi->cycles_aggr / bi->num_aggr; ratio = (double)avg / (double)block_fmt->block_cycles; } - sprintf(buf, "%.2f%%", 100.0 * ratio); - - return scnprintf(hpp->buf, hpp->size, "%*s", block_fmt->width, buf); + return color_pct(hpp, block_fmt->width, 100.0 * ratio); } static int block_avg_cycles_entry(struct perf_hpp_fmt *fmt, @@ -295,7 +296,8 @@ static int block_range_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, end_line = map__srcline(he->ms.map, bi->sym->start + bi->end, he->ms.sym); - if ((start_line != SRCLINE_UNKNOWN) && (end_line != SRCLINE_UNKNOWN)) { + if ((strncmp(start_line, SRCLINE_UNKNOWN, strlen(SRCLINE_UNKNOWN)) != 0) && + (strncmp(end_line, SRCLINE_UNKNOWN, strlen(SRCLINE_UNKNOWN)) != 0)) { scnprintf(buf, sizeof(buf), "[%s -> %s]", start_line, end_line); } else { @@ -348,7 +350,7 @@ static void hpp_register(struct block_fmt *block_fmt, int idx, switch (idx) { case PERF_HPP_REPORT__BLOCK_TOTAL_CYCLES_PCT: - fmt->entry = block_total_cycles_pct_entry; + fmt->color = block_total_cycles_pct_entry; fmt->cmp = block_info__cmp; fmt->sort = block_total_cycles_pct_sort; break; @@ -356,7 +358,7 @@ static void hpp_register(struct block_fmt *block_fmt, int idx, fmt->entry = block_cycles_lbr_entry; break; case PERF_HPP_REPORT__BLOCK_CYCLES_PCT: - fmt->entry = block_cycles_pct_entry; + fmt->color = block_cycles_pct_entry; break; case PERF_HPP_REPORT__BLOCK_AVG_CYCLES: fmt->entry = block_avg_cycles_entry; @@ -376,33 +378,41 @@ static void hpp_register(struct block_fmt *block_fmt, int idx, } static void register_block_columns(struct perf_hpp_list *hpp_list, - struct block_fmt *block_fmts) + struct block_fmt *block_fmts, + int *block_hpps, int nr_hpps) { - for (int i = 0; i < PERF_HPP_REPORT__BLOCK_MAX_INDEX; i++) - hpp_register(&block_fmts[i], i, hpp_list); + for (int i = 0; i < nr_hpps; i++) + hpp_register(&block_fmts[i], block_hpps[i], hpp_list); } -static void init_block_hist(struct block_hist *bh, struct block_fmt *block_fmts) +static void init_block_hist(struct block_hist *bh, struct block_fmt *block_fmts, + int *block_hpps, int nr_hpps) { __hists__init(&bh->block_hists, &bh->block_list); perf_hpp_list__init(&bh->block_list); bh->block_list.nr_header_lines = 1; - register_block_columns(&bh->block_list, block_fmts); + register_block_columns(&bh->block_list, block_fmts, + block_hpps, nr_hpps); - perf_hpp_list__register_sort_field(&bh->block_list, - &block_fmts[PERF_HPP_REPORT__BLOCK_TOTAL_CYCLES_PCT].fmt); + /* Sort by the first fmt */ + perf_hpp_list__register_sort_field(&bh->block_list, &block_fmts[0].fmt); } -static void process_block_report(struct hists *hists, - struct block_report *block_report, - u64 total_cycles) +static int process_block_report(struct hists *hists, + struct block_report *block_report, + u64 total_cycles, int *block_hpps, + int nr_hpps) { struct rb_node *next = rb_first_cached(&hists->entries); struct block_hist *bh = &block_report->hist; struct hist_entry *he; - init_block_hist(bh, block_report->fmts); + if (nr_hpps > PERF_HPP_REPORT__BLOCK_MAX_INDEX) + return -1; + + block_report->nr_fmts = nr_hpps; + init_block_hist(bh, block_report->fmts, block_hpps, nr_hpps); while (next) { he = rb_entry(next, struct hist_entry, rb_node); @@ -411,16 +421,19 @@ static void process_block_report(struct hists *hists, next = rb_next(&he->rb_node); } - for (int i = 0; i < PERF_HPP_REPORT__BLOCK_MAX_INDEX; i++) { + for (int i = 0; i < nr_hpps; i++) { block_report->fmts[i].total_cycles = total_cycles; block_report->fmts[i].block_cycles = block_report->cycles; } hists__output_resort(&bh->block_hists, NULL); + return 0; } struct block_report *block_info__create_report(struct evlist *evlist, - u64 total_cycles) + u64 total_cycles, + int *block_hpps, int nr_hpps, + int *nr_reps) { struct block_report *block_reports; int nr_hists = evlist->core.nr_entries, i = 0; @@ -433,13 +446,23 @@ struct block_report *block_info__create_report(struct evlist *evlist, evlist__for_each_entry(evlist, pos) { struct hists *hists = evsel__hists(pos); - process_block_report(hists, &block_reports[i], total_cycles); + process_block_report(hists, &block_reports[i], total_cycles, + block_hpps, nr_hpps); i++; } + *nr_reps = nr_hists; return block_reports; } +void block_info__free_report(struct block_report *reps, int nr_reps) +{ + for (int i = 0; i < nr_reps; i++) + hists__delete_entries(&reps[i].hist.block_hists); + + free(reps); +} + int report__browse_block_hists(struct block_hist *bh, float min_percent, struct evsel *evsel, struct perf_env *env, struct annotation_options *annotation_opts) @@ -451,13 +474,11 @@ int report__browse_block_hists(struct block_hist *bh, float min_percent, symbol_conf.report_individual_block = true; hists__fprintf(&bh->block_hists, true, 0, 0, min_percent, stdout, true); - hists__delete_entries(&bh->block_hists); return 0; case 1: symbol_conf.report_individual_block = true; ret = block_hists_tui_browse(bh, evsel, min_percent, env, annotation_opts); - hists__delete_entries(&bh->block_hists); return ret; default: return -1; diff --git a/tools/perf/util/block-info.h b/tools/perf/util/block-info.h index bef0d75e9819..42e9dcc4cf0a 100644 --- a/tools/perf/util/block-info.h +++ b/tools/perf/util/block-info.h @@ -45,6 +45,7 @@ struct block_report { struct block_hist hist; u64 cycles; struct block_fmt fmts[PERF_HPP_REPORT__BLOCK_MAX_INDEX]; + int nr_fmts; }; struct block_hist; @@ -61,6 +62,8 @@ static inline void __block_info__zput(struct block_info **bi) #define block_info__zput(bi) __block_info__zput(&bi) +int64_t __block_info__cmp(struct hist_entry *left, struct hist_entry *right); + int64_t block_info__cmp(struct perf_hpp_fmt *fmt __maybe_unused, struct hist_entry *left, struct hist_entry *right); @@ -68,7 +71,11 @@ int block_info__process_sym(struct hist_entry *he, struct block_hist *bh, u64 *block_cycles_aggr, u64 total_cycles); struct block_report *block_info__create_report(struct evlist *evlist, - u64 total_cycles); + u64 total_cycles, + int *block_hpps, int nr_hpps, + int *nr_reps); + +void block_info__free_report(struct block_report *reps, int nr_reps); int report__browse_block_hists(struct block_hist *bh, float min_percent, struct evsel *evsel, struct perf_env *env, diff --git a/tools/perf/util/branch.h b/tools/perf/util/branch.h index 88e00d268f6f..154a05cd03af 100644 --- a/tools/perf/util/branch.h +++ b/tools/perf/util/branch.h @@ -12,6 +12,7 @@ #include <linux/stddef.h> #include <linux/perf_event.h> #include <linux/types.h> +#include "event.h" struct branch_flags { u64 mispred:1; @@ -39,9 +40,30 @@ struct branch_entry { struct branch_stack { u64 nr; + u64 hw_idx; struct branch_entry entries[0]; }; +/* + * The hw_idx is only available when PERF_SAMPLE_BRANCH_HW_INDEX is applied. + * Otherwise, the output format of a sample with branch stack is + * struct branch_stack { + * u64 nr; + * struct branch_entry entries[0]; + * } + * Check whether the hw_idx is available, + * and return the corresponding pointer of entries[0]. + */ +static inline struct branch_entry *perf_sample__branch_entries(struct perf_sample *sample) +{ + u64 *entry = (u64 *)sample->branch_stack; + + entry++; + if (sample->no_hw_idx) + return (struct branch_entry *)entry; + return (struct branch_entry *)(++entry); +} + struct branch_type_stat { bool branch_to; u64 counts[PERF_BR_MAX]; diff --git a/tools/perf/util/c++/clang.cpp b/tools/perf/util/c++/clang.cpp index fc361c3f8570..c8885dfa3667 100644 --- a/tools/perf/util/c++/clang.cpp +++ b/tools/perf/util/c++/clang.cpp @@ -71,7 +71,11 @@ getModuleFromSource(llvm::opt::ArgStringList CFlags, CompilerInstance Clang; Clang.createDiagnostics(); +#if CLANG_VERSION_MAJOR < 9 Clang.setVirtualFileSystem(&*VFS); +#else + Clang.createFileManager(&*VFS); +#endif #if CLANG_VERSION_MAJOR < 4 IntrusiveRefCntPtr<CompilerInvocation> CI = diff --git a/tools/perf/util/cgroup.c b/tools/perf/util/cgroup.c index 4881d4af3381..5bc9d3b01bd9 100644 --- a/tools/perf/util/cgroup.c +++ b/tools/perf/util/cgroup.c @@ -3,75 +3,16 @@ #include "evsel.h" #include "cgroup.h" #include "evlist.h" -#include <linux/stringify.h> #include <linux/zalloc.h> #include <sys/types.h> #include <sys/stat.h> #include <fcntl.h> #include <stdlib.h> #include <string.h> +#include <api/fs/fs.h> int nr_cgroups; -static int -cgroupfs_find_mountpoint(char *buf, size_t maxlen) -{ - FILE *fp; - char mountpoint[PATH_MAX + 1], tokens[PATH_MAX + 1], type[PATH_MAX + 1]; - char path_v1[PATH_MAX + 1], path_v2[PATH_MAX + 2], *path; - char *token, *saved_ptr = NULL; - - fp = fopen("/proc/mounts", "r"); - if (!fp) - return -1; - - /* - * in order to handle split hierarchy, we need to scan /proc/mounts - * and inspect every cgroupfs mount point to find one that has - * perf_event subsystem - */ - path_v1[0] = '\0'; - path_v2[0] = '\0'; - - while (fscanf(fp, "%*s %"__stringify(PATH_MAX)"s %"__stringify(PATH_MAX)"s %" - __stringify(PATH_MAX)"s %*d %*d\n", - mountpoint, type, tokens) == 3) { - - if (!path_v1[0] && !strcmp(type, "cgroup")) { - - token = strtok_r(tokens, ",", &saved_ptr); - - while (token != NULL) { - if (!strcmp(token, "perf_event")) { - strcpy(path_v1, mountpoint); - break; - } - token = strtok_r(NULL, ",", &saved_ptr); - } - } - - if (!path_v2[0] && !strcmp(type, "cgroup2")) - strcpy(path_v2, mountpoint); - - if (path_v1[0] && path_v2[0]) - break; - } - fclose(fp); - - if (path_v1[0]) - path = path_v1; - else if (path_v2[0]) - path = path_v2; - else - return -1; - - if (strlen(path) < maxlen) { - strcpy(buf, path); - return 0; - } - return -1; -} - static int open_cgroup(const char *name) { char path[PATH_MAX + 1]; @@ -79,7 +20,7 @@ static int open_cgroup(const char *name) int fd; - if (cgroupfs_find_mountpoint(mnt, PATH_MAX + 1)) + if (cgroupfs_find_mountpoint(mnt, PATH_MAX + 1, "perf_event")) return -1; scnprintf(path, PATH_MAX, "%s/%s", mnt, name); diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c index 0bc9c4d7fdc5..ef38eba56ed0 100644 --- a/tools/perf/util/config.c +++ b/tools/perf/util/config.c @@ -374,6 +374,18 @@ int perf_config_int(int *dest, const char *name, const char *value) return 0; } +int perf_config_u8(u8 *dest, const char *name, const char *value) +{ + long ret = 0; + + if (!perf_parse_long(value, &ret)) { + bad_config(name); + return -1; + } + *dest = ret; + return 0; +} + static int perf_config_bool_or_int(const char *name, const char *value, int *is_bool) { int ret; diff --git a/tools/perf/util/config.h b/tools/perf/util/config.h index bd0a5897c76a..c10b66dde2f3 100644 --- a/tools/perf/util/config.h +++ b/tools/perf/util/config.h @@ -29,6 +29,7 @@ typedef int (*config_fn_t)(const char *, const char *, void *); int perf_default_config(const char *, const char *, void *); int perf_config(config_fn_t fn, void *); int perf_config_int(int *dest, const char *, const char *); +int perf_config_u8(u8 *dest, const char *name, const char *value); int perf_config_u64(u64 *dest, const char *, const char *); int perf_config_bool(const char *, const char *); int config_error_nonbool(const char *); diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index 5471045ebf5c..62d2f9b9ce1b 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -363,6 +363,23 @@ struct cs_etm_packet_queue return NULL; } +static void cs_etm__packet_swap(struct cs_etm_auxtrace *etm, + struct cs_etm_traceid_queue *tidq) +{ + struct cs_etm_packet *tmp; + + if (etm->sample_branches || etm->synth_opts.last_branch || + etm->sample_instructions) { + /* + * Swap PACKET with PREV_PACKET: PACKET becomes PREV_PACKET for + * the next incoming packet. + */ + tmp = tidq->packet; + tidq->packet = tidq->prev_packet; + tidq->prev_packet = tmp; + } +} + static void cs_etm__packet_dump(const char *pkt_string) { const char *color = PERF_COLOR_BLUE; @@ -945,7 +962,7 @@ static inline u64 cs_etm__instr_addr(struct cs_etm_queue *etmq, if (packet->isa == CS_ETM_ISA_T32) { u64 addr = packet->start_addr; - while (offset > 0) { + while (offset) { addr += cs_etm__t32_instr_size(etmq, trace_chan_id, addr); offset--; @@ -1134,10 +1151,8 @@ static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq, cs_etm__copy_insn(etmq, tidq->trace_chan_id, tidq->packet, &sample); - if (etm->synth_opts.last_branch) { - cs_etm__copy_last_branch_rb(etmq, tidq); + if (etm->synth_opts.last_branch) sample.branch_stack = tidq->last_branch; - } if (etm->synth_opts.inject) { ret = cs_etm__inject_event(event, &sample, @@ -1153,9 +1168,6 @@ static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq, "CS ETM Trace: failed to deliver instruction event, error %d\n", ret); - if (etm->synth_opts.last_branch) - cs_etm__reset_last_branch_rb(tidq); - return ret; } @@ -1172,6 +1184,7 @@ static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq, union perf_event *event = tidq->event_buf; struct dummy_branch_stack { u64 nr; + u64 hw_idx; struct branch_entry entries; } dummy_bs; u64 ip; @@ -1202,6 +1215,7 @@ static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq, if (etm->synth_opts.last_branch) { dummy_bs = (struct dummy_branch_stack){ .nr = 1, + .hw_idx = -1ULL, .entries = { .from = sample.ip, .to = sample.addr, @@ -1340,12 +1354,14 @@ static int cs_etm__sample(struct cs_etm_queue *etmq, struct cs_etm_traceid_queue *tidq) { struct cs_etm_auxtrace *etm = etmq->etm; - struct cs_etm_packet *tmp; int ret; u8 trace_chan_id = tidq->trace_chan_id; - u64 instrs_executed = tidq->packet->instr_count; + u64 instrs_prev; + + /* Get instructions remainder from previous packet */ + instrs_prev = tidq->period_instructions; - tidq->period_instructions += instrs_executed; + tidq->period_instructions += tidq->packet->instr_count; /* * Record a branch when the last instruction in @@ -1363,26 +1379,80 @@ static int cs_etm__sample(struct cs_etm_queue *etmq, * TODO: allow period to be defined in cycles and clock time */ - /* Get number of instructions executed after the sample point */ - u64 instrs_over = tidq->period_instructions - - etm->instructions_sample_period; + /* + * Below diagram demonstrates the instruction samples + * generation flows: + * + * Instrs Instrs Instrs Instrs + * Sample(n) Sample(n+1) Sample(n+2) Sample(n+3) + * | | | | + * V V V V + * -------------------------------------------------- + * ^ ^ + * | | + * Period Period + * instructions(Pi) instructions(Pi') + * + * | | + * \---------------- -----------------/ + * V + * tidq->packet->instr_count + * + * Instrs Sample(n...) are the synthesised samples occurring + * every etm->instructions_sample_period instructions - as + * defined on the perf command line. Sample(n) is being the + * last sample before the current etm packet, n+1 to n+3 + * samples are generated from the current etm packet. + * + * tidq->packet->instr_count represents the number of + * instructions in the current etm packet. + * + * Period instructions (Pi) contains the the number of + * instructions executed after the sample point(n) from the + * previous etm packet. This will always be less than + * etm->instructions_sample_period. + * + * When generate new samples, it combines with two parts + * instructions, one is the tail of the old packet and another + * is the head of the new coming packet, to generate + * sample(n+1); sample(n+2) and sample(n+3) consume the + * instructions with sample period. After sample(n+3), the rest + * instructions will be used by later packet and it is assigned + * to tidq->period_instructions for next round calculation. + */ /* - * Calculate the address of the sampled instruction (-1 as - * sample is reported as though instruction has just been - * executed, but PC has not advanced to next instruction) + * Get the initial offset into the current packet instructions; + * entry conditions ensure that instrs_prev is less than + * etm->instructions_sample_period. */ - u64 offset = (instrs_executed - instrs_over - 1); - u64 addr = cs_etm__instr_addr(etmq, trace_chan_id, - tidq->packet, offset); + u64 offset = etm->instructions_sample_period - instrs_prev; + u64 addr; - ret = cs_etm__synth_instruction_sample( - etmq, tidq, addr, etm->instructions_sample_period); - if (ret) - return ret; + /* Prepare last branches for instruction sample */ + if (etm->synth_opts.last_branch) + cs_etm__copy_last_branch_rb(etmq, tidq); - /* Carry remaining instructions into next sample period */ - tidq->period_instructions = instrs_over; + while (tidq->period_instructions >= + etm->instructions_sample_period) { + /* + * Calculate the address of the sampled instruction (-1 + * as sample is reported as though instruction has just + * been executed, but PC has not advanced to next + * instruction) + */ + addr = cs_etm__instr_addr(etmq, trace_chan_id, + tidq->packet, offset - 1); + ret = cs_etm__synth_instruction_sample( + etmq, tidq, addr, + etm->instructions_sample_period); + if (ret) + return ret; + + offset += etm->instructions_sample_period; + tidq->period_instructions -= + etm->instructions_sample_period; + } } if (etm->sample_branches) { @@ -1404,15 +1474,7 @@ static int cs_etm__sample(struct cs_etm_queue *etmq, } } - if (etm->sample_branches || etm->synth_opts.last_branch) { - /* - * Swap PACKET with PREV_PACKET: PACKET becomes PREV_PACKET for - * the next incoming packet. - */ - tmp = tidq->packet; - tidq->packet = tidq->prev_packet; - tidq->prev_packet = tmp; - } + cs_etm__packet_swap(etm, tidq); return 0; } @@ -1441,7 +1503,6 @@ static int cs_etm__flush(struct cs_etm_queue *etmq, { int err = 0; struct cs_etm_auxtrace *etm = etmq->etm; - struct cs_etm_packet *tmp; /* Handle start tracing packet */ if (tidq->prev_packet->sample_type == CS_ETM_EMPTY) @@ -1449,6 +1510,11 @@ static int cs_etm__flush(struct cs_etm_queue *etmq, if (etmq->etm->synth_opts.last_branch && tidq->prev_packet->sample_type == CS_ETM_RANGE) { + u64 addr; + + /* Prepare last branches for instruction sample */ + cs_etm__copy_last_branch_rb(etmq, tidq); + /* * Generate a last branch event for the branches left in the * circular buffer at the end of the trace. @@ -1456,7 +1522,7 @@ static int cs_etm__flush(struct cs_etm_queue *etmq, * Use the address of the end of the last reported execution * range */ - u64 addr = cs_etm__last_executed_instr(tidq->prev_packet); + addr = cs_etm__last_executed_instr(tidq->prev_packet); err = cs_etm__synth_instruction_sample( etmq, tidq, addr, @@ -1476,15 +1542,11 @@ static int cs_etm__flush(struct cs_etm_queue *etmq, } swap_packet: - if (etm->sample_branches || etm->synth_opts.last_branch) { - /* - * Swap PACKET with PREV_PACKET: PACKET becomes PREV_PACKET for - * the next incoming packet. - */ - tmp = tidq->packet; - tidq->packet = tidq->prev_packet; - tidq->prev_packet = tmp; - } + cs_etm__packet_swap(etm, tidq); + + /* Reset last branches after flush the trace */ + if (etm->synth_opts.last_branch) + cs_etm__reset_last_branch_rb(tidq); return err; } @@ -1505,11 +1567,16 @@ static int cs_etm__end_block(struct cs_etm_queue *etmq, */ if (etmq->etm->synth_opts.last_branch && tidq->prev_packet->sample_type == CS_ETM_RANGE) { + u64 addr; + + /* Prepare last branches for instruction sample */ + cs_etm__copy_last_branch_rb(etmq, tidq); + /* * Use the address of the end of the last reported execution * range. */ - u64 addr = cs_etm__last_executed_instr(tidq->prev_packet); + addr = cs_etm__last_executed_instr(tidq->prev_packet); err = cs_etm__synth_instruction_sample( etmq, tidq, addr, diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c index 6242a9215df7..4154f944f474 100644 --- a/tools/perf/util/env.c +++ b/tools/perf/util/env.c @@ -343,11 +343,11 @@ static const char *normalize_arch(char *arch) const char *perf_env__arch(struct perf_env *env) { - struct utsname uts; char *arch_name; if (!env || !env->arch) { /* Assume local operation */ - if (uname(&uts) < 0) + static struct utsname uts = { .machine[0] = '\0', }; + if (uts.machine[0] == '\0' && uname(&uts) < 0) return NULL; arch_name = uts.machine; } else diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index 85223159737c..3cda40a2fafc 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -139,6 +139,7 @@ struct perf_sample { u16 insn_len; u8 cpumode; u16 misc; + bool no_hw_idx; /* No hw_idx collected in branch_stack */ char insn[MAX_INSN]; void *raw_data; struct ip_callchain *callchain; diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index a69e64236120..816d930d774e 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -712,7 +712,8 @@ static void __perf_evsel__config_callchain(struct evsel *evsel, attr->branch_sample_type = PERF_SAMPLE_BRANCH_USER | PERF_SAMPLE_BRANCH_CALL_STACK | PERF_SAMPLE_BRANCH_NO_CYCLES | - PERF_SAMPLE_BRANCH_NO_FLAGS; + PERF_SAMPLE_BRANCH_NO_FLAGS | + PERF_SAMPLE_BRANCH_HW_INDEX; } } else pr_warning("Cannot use LBR callstack with branch stack. " @@ -763,7 +764,8 @@ perf_evsel__reset_callgraph(struct evsel *evsel, if (param->record_mode == CALLCHAIN_LBR) { perf_evsel__reset_sample_bit(evsel, BRANCH_STACK); attr->branch_sample_type &= ~(PERF_SAMPLE_BRANCH_USER | - PERF_SAMPLE_BRANCH_CALL_STACK); + PERF_SAMPLE_BRANCH_CALL_STACK | + PERF_SAMPLE_BRANCH_HW_INDEX); } if (param->record_mode == CALLCHAIN_DWARF) { perf_evsel__reset_sample_bit(evsel, REGS_USER); @@ -808,12 +810,12 @@ static void apply_config_terms(struct evsel *evsel, perf_evsel__reset_sample_bit(evsel, TIME); break; case PERF_EVSEL__CONFIG_TERM_CALLGRAPH: - callgraph_buf = term->val.callgraph; + callgraph_buf = term->val.str; break; case PERF_EVSEL__CONFIG_TERM_BRANCH: - if (term->val.branch && strcmp(term->val.branch, "no")) { + if (term->val.str && strcmp(term->val.str, "no")) { perf_evsel__set_sample_bit(evsel, BRANCH_STACK); - parse_branch_str(term->val.branch, + parse_branch_str(term->val.str, &attr->branch_sample_type); } else perf_evsel__reset_sample_bit(evsel, BRANCH_STACK); @@ -1265,6 +1267,8 @@ static void perf_evsel__free_config_terms(struct evsel *evsel) list_for_each_entry_safe(term, h, &evsel->config_terms, list) { list_del_init(&term->list); + if (term->free_str) + zfree(&term->val.str); free(term); } } @@ -1671,6 +1675,8 @@ fallback_missing_features: evsel->core.attr.ksymbol = 0; if (perf_missing_features.bpf) evsel->core.attr.bpf_event = 0; + if (perf_missing_features.branch_hw_idx) + evsel->core.attr.branch_sample_type &= ~PERF_SAMPLE_BRANCH_HW_INDEX; retry_sample_id: if (perf_missing_features.sample_id_all) evsel->core.attr.sample_id_all = 0; @@ -1782,7 +1788,12 @@ try_fallback: * Must probe features in the order they were added to the * perf_event_attr interface. */ - if (!perf_missing_features.aux_output && evsel->core.attr.aux_output) { + if (!perf_missing_features.branch_hw_idx && + (evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_HW_INDEX)) { + perf_missing_features.branch_hw_idx = true; + pr_debug2("switching off branch HW index support\n"); + goto fallback_missing_features; + } else if (!perf_missing_features.aux_output && evsel->core.attr.aux_output) { perf_missing_features.aux_output = true; pr_debug2_peo("Kernel has no attr.aux_output support, bailing out\n"); goto out_close; @@ -2167,7 +2178,12 @@ int perf_evsel__parse_sample(struct evsel *evsel, union perf_event *event, if (data->branch_stack->nr > max_branch_nr) return -EFAULT; + sz = data->branch_stack->nr * sizeof(struct branch_entry); + if (perf_evsel__has_branch_hw_idx(evsel)) + sz += sizeof(u64); + else + data->no_hw_idx = true; OVERFLOW_CHECK(array, sz, max_size); array = (void *)array + sz; } diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index dc14f4a823cd..33804740e2ca 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -119,6 +119,7 @@ struct perf_missing_features { bool ksymbol; bool bpf; bool aux_output; + bool branch_hw_idx; }; extern struct perf_missing_features perf_missing_features; @@ -389,6 +390,11 @@ static inline bool perf_evsel__has_branch_callstack(const struct evsel *evsel) return evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_CALL_STACK; } +static inline bool perf_evsel__has_branch_hw_idx(const struct evsel *evsel) +{ + return evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_HW_INDEX; +} + static inline bool evsel__has_callchain(const struct evsel *evsel) { return (evsel->core.attr.sample_type & PERF_SAMPLE_CALLCHAIN) != 0; diff --git a/tools/perf/util/evsel_config.h b/tools/perf/util/evsel_config.h index 1f8d2fe0b66e..e026ab67b008 100644 --- a/tools/perf/util/evsel_config.h +++ b/tools/perf/util/evsel_config.h @@ -32,22 +32,21 @@ enum evsel_term_type { struct perf_evsel_config_term { struct list_head list; enum evsel_term_type type; + bool free_str; union { u64 period; u64 freq; bool time; - char *callgraph; - char *drv_cfg; u64 stack_user; int max_stack; bool inherit; bool overwrite; - char *branch; unsigned long max_events; bool percore; bool aux_output; u32 aux_sample_size; u64 cfg_chg; + char *str; } val; bool weak; }; diff --git a/tools/perf/util/expr.c b/tools/perf/util/expr.c new file mode 100644 index 000000000000..fd192ddf93c1 --- /dev/null +++ b/tools/perf/util/expr.c @@ -0,0 +1,112 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <stdbool.h> +#include <assert.h> +#include "expr.h" +#include "expr-bison.h" +#define YY_EXTRA_TYPE int +#include "expr-flex.h" + +#ifdef PARSER_DEBUG +extern int expr_debug; +#endif + +/* Caller must make sure id is allocated */ +void expr__add_id(struct parse_ctx *ctx, const char *name, double val) +{ + int idx; + + assert(ctx->num_ids < MAX_PARSE_ID); + idx = ctx->num_ids++; + ctx->ids[idx].name = name; + ctx->ids[idx].val = val; +} + +void expr__ctx_init(struct parse_ctx *ctx) +{ + ctx->num_ids = 0; +} + +static int +__expr__parse(double *val, struct parse_ctx *ctx, const char *expr, + int start) +{ + YY_BUFFER_STATE buffer; + void *scanner; + int ret; + + ret = expr_lex_init_extra(start, &scanner); + if (ret) + return ret; + + buffer = expr__scan_string(expr, scanner); + +#ifdef PARSER_DEBUG + expr_debug = 1; +#endif + + ret = expr_parse(val, ctx, scanner); + + expr__flush_buffer(buffer, scanner); + expr__delete_buffer(buffer, scanner); + expr_lex_destroy(scanner); + return ret; +} + +int expr__parse(double *final_val, struct parse_ctx *ctx, const char *expr) +{ + return __expr__parse(final_val, ctx, expr, EXPR_PARSE) ? -1 : 0; +} + +static bool +already_seen(const char *val, const char *one, const char **other, + int num_other) +{ + int i; + + if (one && !strcasecmp(one, val)) + return true; + for (i = 0; i < num_other; i++) + if (!strcasecmp(other[i], val)) + return true; + return false; +} + +int expr__find_other(const char *expr, const char *one, const char ***other, + int *num_other) +{ + int err, i = 0, j = 0; + struct parse_ctx ctx; + + expr__ctx_init(&ctx); + err = __expr__parse(NULL, &ctx, expr, EXPR_OTHER); + if (err) + return -1; + + *other = malloc((ctx.num_ids + 1) * sizeof(char *)); + if (!*other) + return -ENOMEM; + + for (i = 0, j = 0; i < ctx.num_ids; i++) { + const char *str = ctx.ids[i].name; + + if (already_seen(str, one, *other, j)) + continue; + + str = strdup(str); + if (!str) + goto out; + (*other)[j++] = str; + } + (*other)[j] = NULL; + +out: + if (i != ctx.num_ids) { + while (--j) + free((char *) (*other)[i]); + free(*other); + err = -1; + } + + *num_other = j; + return err; +} diff --git a/tools/perf/util/expr.h b/tools/perf/util/expr.h index 046160831f90..9377538f4097 100644 --- a/tools/perf/util/expr.h +++ b/tools/perf/util/expr.h @@ -2,7 +2,7 @@ #ifndef PARSE_CTX_H #define PARSE_CTX_H 1 -#define EXPR_MAX_OTHER 15 +#define EXPR_MAX_OTHER 20 #define MAX_PARSE_ID EXPR_MAX_OTHER struct parse_id { @@ -17,10 +17,8 @@ struct parse_ctx { void expr__ctx_init(struct parse_ctx *ctx); void expr__add_id(struct parse_ctx *ctx, const char *id, double val); -#ifndef IN_EXPR_Y -int expr__parse(double *final_val, struct parse_ctx *ctx, const char **pp); -#endif -int expr__find_other(const char *p, const char *one, const char ***other, +int expr__parse(double *final_val, struct parse_ctx *ctx, const char *expr); +int expr__find_other(const char *expr, const char *one, const char ***other, int *num_other); #endif diff --git a/tools/perf/util/expr.l b/tools/perf/util/expr.l new file mode 100644 index 000000000000..eaad29243c23 --- /dev/null +++ b/tools/perf/util/expr.l @@ -0,0 +1,114 @@ +%option prefix="expr_" +%option reentrant +%option bison-bridge + +%{ +#include <linux/compiler.h> +#include "expr.h" +#include "expr-bison.h" + +char *expr_get_text(yyscan_t yyscanner); +YYSTYPE *expr_get_lval(yyscan_t yyscanner); + +static int __value(YYSTYPE *yylval, char *str, int base, int token) +{ + u64 num; + + errno = 0; + num = strtoull(str, NULL, base); + if (errno) + return EXPR_ERROR; + + yylval->num = num; + return token; +} + +static int value(yyscan_t scanner, int base) +{ + YYSTYPE *yylval = expr_get_lval(scanner); + char *text = expr_get_text(scanner); + + return __value(yylval, text, base, NUMBER); +} + +/* + * Allow @ instead of / to be able to specify pmu/event/ without + * conflicts with normal division. + */ +static char *normalize(char *str) +{ + char *ret = str; + char *dst = str; + + while (*str) { + if (*str == '@') + *dst++ = '/'; + else if (*str == '\\') + *dst++ = *++str; + else + *dst++ = *str; + str++; + } + + *dst = 0x0; + return ret; +} + +static int str(yyscan_t scanner, int token) +{ + YYSTYPE *yylval = expr_get_lval(scanner); + char *text = expr_get_text(scanner); + + yylval->str = normalize(strdup(text)); + if (!yylval->str) + return EXPR_ERROR; + + yylval->str = normalize(yylval->str); + return token; +} +%} + +number [0-9]+ + +sch [-,=] +spec \\{sch} +sym [0-9a-zA-Z_\.:@]+ +symbol {spec}*{sym}*{spec}*{sym}* + +%% + { + int start_token; + + start_token = expr_get_extra(yyscanner); + + if (start_token) { + expr_set_extra(NULL, yyscanner); + return start_token; + } + } + +max { return MAX; } +min { return MIN; } +if { return IF; } +else { return ELSE; } +#smt_on { return SMT_ON; } +{number} { return value(yyscanner, 10); } +{symbol} { return str(yyscanner, ID); } +"|" { return '|'; } +"^" { return '^'; } +"&" { return '&'; } +"-" { return '-'; } +"+" { return '+'; } +"*" { return '*'; } +"/" { return '/'; } +"%" { return '%'; } +"(" { return '('; } +")" { return ')'; } +"," { return ','; } +. { } +%% + +int expr_wrap(void *scanner __maybe_unused) +{ + return 1; +} diff --git a/tools/perf/util/expr.y b/tools/perf/util/expr.y index f9a20a39b64a..4720cbe79357 100644 --- a/tools/perf/util/expr.y +++ b/tools/perf/util/expr.y @@ -1,30 +1,32 @@ /* Simple expression parser */ %{ +#define YYDEBUG 1 +#include <stdio.h> #include "util.h" #include "util/debug.h" #include <stdlib.h> // strtod() #define IN_EXPR_Y 1 #include "expr.h" #include "smt.h" -#include <assert.h> #include <string.h> -#define MAXIDLEN 256 %} -%pure-parser +%define api.pure full + %parse-param { double *final_val } %parse-param { struct parse_ctx *ctx } -%parse-param { const char **pp } -%lex-param { const char **pp } +%parse-param {void *scanner} +%lex-param {void* scanner} %union { - double num; - char id[MAXIDLEN+1]; + double num; + char *str; } +%token EXPR_PARSE EXPR_OTHER EXPR_ERROR %token <num> NUMBER -%token <id> ID +%token <str> ID %token MIN MAX IF ELSE SMT_ON %left MIN MAX IF %left '|' @@ -36,11 +38,9 @@ %type <num> expr if_expr %{ -static int expr__lex(YYSTYPE *res, const char **pp); - -static void expr__error(double *final_val __maybe_unused, +static void expr_error(double *final_val __maybe_unused, struct parse_ctx *ctx __maybe_unused, - const char **pp __maybe_unused, + void *scanner, const char *s) { pr_debug("%s\n", s); @@ -62,6 +62,27 @@ static int lookup_id(struct parse_ctx *ctx, char *id, double *val) %} %% +start: +EXPR_PARSE all_expr +| +EXPR_OTHER all_other + +all_other: all_other other +| + +other: ID +{ + if (ctx->num_ids + 1 >= EXPR_MAX_OTHER) { + pr_err("failed: way too many variables"); + YYABORT; + } + + ctx->ids[ctx->num_ids++].name = $1; +} +| +MIN | MAX | IF | ELSE | SMT_ON | NUMBER | '|' | '^' | '&' | '-' | '+' | '*' | '/' | '%' | '(' | ')' + + all_expr: if_expr { *final_val = $1; } ; @@ -92,146 +113,3 @@ expr: NUMBER ; %% - -static int expr__symbol(YYSTYPE *res, const char *p, const char **pp) -{ - char *dst = res->id; - const char *s = p; - - if (*p == '#') - *dst++ = *p++; - - while (isalnum(*p) || *p == '_' || *p == '.' || *p == ':' || *p == '@' || *p == '\\') { - if (p - s >= MAXIDLEN) - return -1; - /* - * Allow @ instead of / to be able to specify pmu/event/ without - * conflicts with normal division. - */ - if (*p == '@') - *dst++ = '/'; - else if (*p == '\\') - *dst++ = *++p; - else - *dst++ = *p; - p++; - } - *dst = 0; - *pp = p; - dst = res->id; - switch (dst[0]) { - case 'm': - if (!strcmp(dst, "min")) - return MIN; - if (!strcmp(dst, "max")) - return MAX; - break; - case 'i': - if (!strcmp(dst, "if")) - return IF; - break; - case 'e': - if (!strcmp(dst, "else")) - return ELSE; - break; - case '#': - if (!strcasecmp(dst, "#smt_on")) - return SMT_ON; - break; - } - return ID; -} - -static int expr__lex(YYSTYPE *res, const char **pp) -{ - int tok; - const char *s; - const char *p = *pp; - - while (isspace(*p)) - p++; - s = p; - switch (*p++) { - case '#': - case 'a' ... 'z': - case 'A' ... 'Z': - return expr__symbol(res, p - 1, pp); - case '0' ... '9': case '.': - res->num = strtod(s, (char **)&p); - tok = NUMBER; - break; - default: - tok = *s; - break; - } - *pp = p; - return tok; -} - -/* Caller must make sure id is allocated */ -void expr__add_id(struct parse_ctx *ctx, const char *name, double val) -{ - int idx; - assert(ctx->num_ids < MAX_PARSE_ID); - idx = ctx->num_ids++; - ctx->ids[idx].name = name; - ctx->ids[idx].val = val; -} - -void expr__ctx_init(struct parse_ctx *ctx) -{ - ctx->num_ids = 0; -} - -static bool already_seen(const char *val, const char *one, const char **other, - int num_other) -{ - int i; - - if (one && !strcasecmp(one, val)) - return true; - for (i = 0; i < num_other; i++) - if (!strcasecmp(other[i], val)) - return true; - return false; -} - -int expr__find_other(const char *p, const char *one, const char ***other, - int *num_otherp) -{ - const char *orig = p; - int err = -1; - int num_other; - - *other = malloc((EXPR_MAX_OTHER + 1) * sizeof(char *)); - if (!*other) - return -1; - - num_other = 0; - for (;;) { - YYSTYPE val; - int tok = expr__lex(&val, &p); - if (tok == 0) { - err = 0; - break; - } - if (tok == ID && !already_seen(val.id, one, *other, num_other)) { - if (num_other >= EXPR_MAX_OTHER - 1) { - pr_debug("Too many extra events in %s\n", orig); - break; - } - (*other)[num_other] = strdup(val.id); - if (!(*other)[num_other]) - return -1; - num_other++; - } - } - (*other)[num_other] = NULL; - *num_otherp = num_other; - if (err) { - *num_otherp = 0; - free(*other); - *other = NULL; - } - return err; -} diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 93ad27830e2b..acbd046bf95c 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -1590,6 +1590,40 @@ static void free_event_desc(struct evsel *events) free(events); } +static bool perf_attr_check(struct perf_event_attr *attr) +{ + if (attr->__reserved_1 || attr->__reserved_2 || attr->__reserved_3) { + pr_warning("Reserved bits are set unexpectedly. " + "Please update perf tool.\n"); + return false; + } + + if (attr->sample_type & ~(PERF_SAMPLE_MAX-1)) { + pr_warning("Unknown sample type (0x%llx) is detected. " + "Please update perf tool.\n", + attr->sample_type); + return false; + } + + if (attr->read_format & ~(PERF_FORMAT_MAX-1)) { + pr_warning("Unknown read format (0x%llx) is detected. " + "Please update perf tool.\n", + attr->read_format); + return false; + } + + if ((attr->sample_type & PERF_SAMPLE_BRANCH_STACK) && + (attr->branch_sample_type & ~(PERF_SAMPLE_BRANCH_MAX-1))) { + pr_warning("Unknown branch sample type (0x%llx) is detected. " + "Please update perf tool.\n", + attr->branch_sample_type); + + return false; + } + + return true; +} + static struct evsel *read_event_desc(struct feat_fd *ff) { struct evsel *evsel, *events = NULL; @@ -1634,6 +1668,9 @@ static struct evsel *read_event_desc(struct feat_fd *ff) memcpy(&evsel->core.attr, buf, msz); + if (!perf_attr_check(&evsel->core.attr)) + goto error; + if (do_read_u32(ff, &nr)) goto error; @@ -2922,7 +2959,7 @@ int perf_header__fprintf_info(struct perf_session *session, FILE *fp, bool full) if (ret == -1) return -1; - stctime = st.st_ctime; + stctime = st.st_mtime; fprintf(fp, "# captured on : %s", ctime(&stctime)); fprintf(fp, "# header version : %u\n", header->version); diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index ca5a8f4d007e..e74a5acf66d9 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -2584,9 +2584,10 @@ void hist__account_cycles(struct branch_stack *bs, struct addr_location *al, u64 *total_cycles) { struct branch_info *bi; + struct branch_entry *entries = perf_sample__branch_entries(sample); /* If we have branch cycles always annotate them. */ - if (bs && bs->nr && bs->entries[0].flags.cycles) { + if (bs && bs->nr && entries[0].flags.cycles) { int i; bi = sample__resolve_bstack(sample, al); diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index 33cf8928cf05..23c8289c2472 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -1295,6 +1295,7 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq) struct perf_sample sample = { .ip = 0, }; struct dummy_branch_stack { u64 nr; + u64 hw_idx; struct branch_entry entries; } dummy_bs; @@ -1316,6 +1317,7 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq) if (pt->synth_opts.last_branch && sort__mode == SORT_MODE__BRANCH) { dummy_bs = (struct dummy_branch_stack){ .nr = 1, + .hw_idx = -1ULL, .entries = { .from = sample.ip, .to = sample.addr, diff --git a/tools/perf/util/llvm-utils.c b/tools/perf/util/llvm-utils.c index eae47c2509eb..dbdffb6673fe 100644 --- a/tools/perf/util/llvm-utils.c +++ b/tools/perf/util/llvm-utils.c @@ -265,6 +265,8 @@ static int detect_kbuild_dir(char **kbuild_dir) return -ENOMEM; return 0; } + pr_debug("%s: Couldn't find \"%s\", missing kernel-devel package?.\n", + __func__, autoconf_path); free(autoconf_path); return -ENOENT; } @@ -288,6 +290,7 @@ static const char *kinc_fetch_script = "obj-y := dummy.o\n" "\\$(obj)/%.o: \\$(src)/%.c\n" "\t@echo -n \"\\$(NOSTDINC_FLAGS) \\$(LINUXINCLUDE) \\$(EXTRA_CFLAGS)\"\n" +"\t\\$(CC) -c -o \\$@ \\$<\n" "EOF\n" "touch $TMPDIR/dummy.c\n" "make -s -C $KBUILD_DIR M=$TMPDIR $KBUILD_OPTS dummy.o 2>/dev/null\n" diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index c8c5410315e8..fd14f1489802 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -686,6 +686,7 @@ static struct dso *machine__findnew_module_dso(struct machine *machine, dso__set_module_info(dso, m, machine); dso__set_long_name(dso, strdup(filename), true); + dso->kernel = DSO_TYPE_KERNEL; } dso__get(dso); @@ -726,9 +727,17 @@ static int machine__process_ksymbol_register(struct machine *machine, struct map *map = maps__find(&machine->kmaps, event->ksymbol.addr); if (!map) { - map = dso__new_map(event->ksymbol.name); - if (!map) + struct dso *dso = dso__new(event->ksymbol.name); + + if (dso) { + dso->kernel = DSO_TYPE_KERNEL; + map = map__new2(0, dso); + } + + if (!dso || !map) { + dso__put(dso); return -ENOMEM; + } map->start = event->ksymbol.addr; map->end = map->start + event->ksymbol.len; @@ -972,7 +981,6 @@ int machine__create_extra_kernel_map(struct machine *machine, kmap = map__kmap(map); - kmap->kmaps = &machine->kmaps; strlcpy(kmap->name, xm->name, KMAP_NAME_LEN); maps__insert(&machine->kmaps, map); @@ -1082,9 +1090,6 @@ int __weak machine__create_extra_kernel_maps(struct machine *machine __maybe_unu static int __machine__create_kernel_maps(struct machine *machine, struct dso *kernel) { - struct kmap *kmap; - struct map *map; - /* In case of renewal the kernel map, destroy previous one */ machine__destroy_kernel_maps(machine); @@ -1093,14 +1098,7 @@ __machine__create_kernel_maps(struct machine *machine, struct dso *kernel) return -1; machine->vmlinux_map->map_ip = machine->vmlinux_map->unmap_ip = identity__map_ip; - map = machine__kernel_map(machine); - kmap = map__kmap(map); - if (!kmap) - return -1; - - kmap->kmaps = &machine->kmaps; - maps__insert(&machine->kmaps, map); - + maps__insert(&machine->kmaps, machine->vmlinux_map); return 0; } @@ -2083,15 +2081,16 @@ struct branch_info *sample__resolve_bstack(struct perf_sample *sample, { unsigned int i; const struct branch_stack *bs = sample->branch_stack; + struct branch_entry *entries = perf_sample__branch_entries(sample); struct branch_info *bi = calloc(bs->nr, sizeof(struct branch_info)); if (!bi) return NULL; for (i = 0; i < bs->nr; i++) { - ip__resolve_ams(al->thread, &bi[i].to, bs->entries[i].to); - ip__resolve_ams(al->thread, &bi[i].from, bs->entries[i].from); - bi[i].flags = bs->entries[i].flags; + ip__resolve_ams(al->thread, &bi[i].to, entries[i].to); + ip__resolve_ams(al->thread, &bi[i].from, entries[i].from); + bi[i].flags = entries[i].flags; } return bi; } @@ -2187,6 +2186,7 @@ static int resolve_lbr_callchain_sample(struct thread *thread, /* LBR only affects the user callchain */ if (i != chain_nr) { struct branch_stack *lbr_stack = sample->branch_stack; + struct branch_entry *entries = perf_sample__branch_entries(sample); int lbr_nr = lbr_stack->nr, j, k; bool branch; struct branch_flags *flags; @@ -2212,31 +2212,29 @@ static int resolve_lbr_callchain_sample(struct thread *thread, ip = chain->ips[j]; else if (j > i + 1) { k = j - i - 2; - ip = lbr_stack->entries[k].from; + ip = entries[k].from; branch = true; - flags = &lbr_stack->entries[k].flags; + flags = &entries[k].flags; } else { - ip = lbr_stack->entries[0].to; + ip = entries[0].to; branch = true; - flags = &lbr_stack->entries[0].flags; - branch_from = - lbr_stack->entries[0].from; + flags = &entries[0].flags; + branch_from = entries[0].from; } } else { if (j < lbr_nr) { k = lbr_nr - j - 1; - ip = lbr_stack->entries[k].from; + ip = entries[k].from; branch = true; - flags = &lbr_stack->entries[k].flags; + flags = &entries[k].flags; } else if (j > lbr_nr) ip = chain->ips[i + 1 - (j - lbr_nr)]; else { - ip = lbr_stack->entries[0].to; + ip = entries[0].to; branch = true; - flags = &lbr_stack->entries[0].flags; - branch_from = - lbr_stack->entries[0].from; + flags = &entries[0].flags; + branch_from = entries[0].from; } } @@ -2283,6 +2281,7 @@ static int thread__resolve_callchain_sample(struct thread *thread, int max_stack) { struct branch_stack *branch = sample->branch_stack; + struct branch_entry *entries = perf_sample__branch_entries(sample); struct ip_callchain *chain = sample->callchain; int chain_nr = 0; u8 cpumode = PERF_RECORD_MISC_USER; @@ -2330,7 +2329,7 @@ static int thread__resolve_callchain_sample(struct thread *thread, for (i = 0; i < nr; i++) { if (callchain_param.order == ORDER_CALLEE) { - be[i] = branch->entries[i]; + be[i] = entries[i]; if (chain == NULL) continue; @@ -2349,7 +2348,7 @@ static int thread__resolve_callchain_sample(struct thread *thread, be[i].from >= chain->ips[first_call] - 8) first_call++; } else - be[i] = branch->entries[branch->nr - i - 1]; + be[i] = entries[branch->nr - i - 1]; } memset(iter, 0, sizeof(struct iterations) * nr); diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index fdd5bddb3075..53d96611e6a6 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -44,8 +44,8 @@ static inline int is_no_dso_memory(const char *filename) static inline int is_android_lib(const char *filename) { - return !strncmp(filename, "/data/app-lib", 13) || - !strncmp(filename, "/system/lib", 11); + return strstarts(filename, "/data/app-lib/") || + strstarts(filename, "/system/lib/"); } static inline bool replace_android_lib(const char *filename, char *newfilename) @@ -65,7 +65,7 @@ static inline bool replace_android_lib(const char *filename, char *newfilename) app_abi_length = strlen(app_abi); - if (!strncmp(filename, "/data/app-lib", 13)) { + if (strstarts(filename, "/data/app-lib/")) { char *apk_path; if (!app_abi_length) @@ -89,7 +89,7 @@ static inline bool replace_android_lib(const char *filename, char *newfilename) return true; } - if (!strncmp(filename, "/system/lib/", 11)) { + if (strstarts(filename, "/system/lib/")) { char *ndk, *app; const char *arch; size_t ndk_length; @@ -375,8 +375,13 @@ struct symbol *map__find_symbol_by_name(struct map *map, const char *name) struct map *map__clone(struct map *from) { - struct map *map = memdup(from, sizeof(*map)); + size_t size = sizeof(struct map); + struct map *map; + + if (from->dso && from->dso->kernel) + size += sizeof(struct kmap); + map = memdup(from, size); if (map != NULL) { refcount_set(&map->refcnt, 1); RB_CLEAR_NODE(&map->rb_node); @@ -426,7 +431,7 @@ int map__fprintf_srcline(struct map *map, u64 addr, const char *prefix, if (map && map->dso) { char *srcline = map__srcline(map, addr, NULL); - if (srcline != SRCLINE_UNKNOWN) + if (strncmp(srcline, SRCLINE_UNKNOWN, strlen(SRCLINE_UNKNOWN)) != 0) ret = fprintf(fp, "%s%s", prefix, srcline); free_srcline(srcline); } @@ -538,6 +543,16 @@ void maps__insert(struct maps *maps, struct map *map) __maps__insert(maps, map); ++maps->nr_maps; + if (map->dso && map->dso->kernel) { + struct kmap *kmap = map__kmap(map); + + if (kmap) + kmap->kmaps = maps; + else + pr_err("Internal error: kernel dso with non kernel map\n"); + } + + /* * If we already performed some search by name, then we need to add the just * inserted map and resort. @@ -549,6 +564,7 @@ void maps__insert(struct maps *maps, struct map *map) if (maps_by_name == NULL) { __maps__free_maps_by_name(maps); + up_write(&maps->lock); return; } diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index 02aee946b6c1..c3a8c701609a 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -22,6 +22,8 @@ #include <linux/string.h> #include <linux/zalloc.h> #include <subcmd/parse-options.h> +#include <api/fs/fs.h> +#include "util.h" struct metric_event *metricgroup__lookup(struct rblist *metric_events, struct evsel *evsel, @@ -399,13 +401,85 @@ void metricgroup__print(bool metrics, bool metricgroups, char *filter, strlist__delete(metriclist); } +static void metricgroup__add_metric_weak_group(struct strbuf *events, + const char **ids, + int idnum) +{ + bool no_group = false; + int i; + + for (i = 0; i < idnum; i++) { + pr_debug("found event %s\n", ids[i]); + /* + * Duration time maps to a software event and can make + * groups not count. Always use it outside a + * group. + */ + if (!strcmp(ids[i], "duration_time")) { + if (i > 0) + strbuf_addf(events, "}:W,"); + strbuf_addf(events, "duration_time"); + no_group = true; + continue; + } + strbuf_addf(events, "%s%s", + i == 0 || no_group ? "{" : ",", + ids[i]); + no_group = false; + } + if (!no_group) + strbuf_addf(events, "}:W"); +} + +static void metricgroup__add_metric_non_group(struct strbuf *events, + const char **ids, + int idnum) +{ + int i; + + for (i = 0; i < idnum; i++) + strbuf_addf(events, ",%s", ids[i]); +} + +static void metricgroup___watchdog_constraint_hint(const char *name, bool foot) +{ + static bool violate_nmi_constraint; + + if (!foot) { + pr_warning("Splitting metric group %s into standalone metrics.\n", name); + violate_nmi_constraint = true; + return; + } + + if (!violate_nmi_constraint) + return; + + pr_warning("Try disabling the NMI watchdog to comply NO_NMI_WATCHDOG metric constraint:\n" + " echo 0 > /proc/sys/kernel/nmi_watchdog\n" + " perf stat ...\n" + " echo 1 > /proc/sys/kernel/nmi_watchdog\n"); +} + +static bool metricgroup__has_constraint(struct pmu_event *pe) +{ + if (!pe->metric_constraint) + return false; + + if (!strcmp(pe->metric_constraint, "NO_NMI_WATCHDOG") && + sysctl__nmi_watchdog_enabled()) { + metricgroup___watchdog_constraint_hint(pe->metric_name, false); + return true; + } + + return false; +} + static int metricgroup__add_metric(const char *metric, struct strbuf *events, struct list_head *group_list) { struct pmu_events_map *map = perf_pmu__find_map(NULL); struct pmu_event *pe; - int ret = -EINVAL; - int i, j; + int i, ret = -EINVAL; if (!map) return 0; @@ -422,7 +496,6 @@ static int metricgroup__add_metric(const char *metric, struct strbuf *events, const char **ids; int idnum; struct egroup *eg; - bool no_group = false; pr_debug("metric expr %s for %s\n", pe->metric_expr, pe->metric_name); @@ -431,27 +504,11 @@ static int metricgroup__add_metric(const char *metric, struct strbuf *events, continue; if (events->len > 0) strbuf_addf(events, ","); - for (j = 0; j < idnum; j++) { - pr_debug("found event %s\n", ids[j]); - /* - * Duration time maps to a software event and can make - * groups not count. Always use it outside a - * group. - */ - if (!strcmp(ids[j], "duration_time")) { - if (j > 0) - strbuf_addf(events, "}:W,"); - strbuf_addf(events, "duration_time"); - no_group = true; - continue; - } - strbuf_addf(events, "%s%s", - j == 0 || no_group ? "{" : ",", - ids[j]); - no_group = false; - } - if (!no_group) - strbuf_addf(events, "}:W"); + + if (metricgroup__has_constraint(pe)) + metricgroup__add_metric_non_group(events, ids, idnum); + else + metricgroup__add_metric_weak_group(events, ids, idnum); eg = malloc(sizeof(struct egroup)); if (!eg) { @@ -493,6 +550,10 @@ static int metricgroup__add_metric_list(const char *list, struct strbuf *events, } } free(nlist); + + if (!ret) + metricgroup___watchdog_constraint_hint(NULL, true); + return ret; } diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c index 063d1b93c53d..ab7108d22428 100644 --- a/tools/perf/util/mmap.c +++ b/tools/perf/util/mmap.c @@ -23,6 +23,18 @@ #include "mmap.h" #include "../perf.h" #include <internal/lib.h> /* page_size */ +#include <linux/bitmap.h> + +#define MASK_SIZE 1023 +void mmap_cpu_mask__scnprintf(struct mmap_cpu_mask *mask, const char *tag) +{ + char buf[MASK_SIZE + 1]; + size_t len; + + len = bitmap_scnprintf(mask->bits, mask->nbits, buf, MASK_SIZE); + buf[len] = '\0'; + pr_debug("%p: %s mask[%zd]: %s\n", mask, tag, mask->nbits, buf); +} size_t mmap__mmap_len(struct mmap *map) { @@ -86,20 +98,29 @@ static int perf_mmap__aio_bind(struct mmap *map, int idx, int cpu, int affinity) { void *data; size_t mmap_len; - unsigned long node_mask; + unsigned long *node_mask; + unsigned long node_index; + int err = 0; if (affinity != PERF_AFFINITY_SYS && cpu__max_node() > 1) { data = map->aio.data[idx]; mmap_len = mmap__mmap_len(map); - node_mask = 1UL << cpu__get_node(cpu); - if (mbind(data, mmap_len, MPOL_BIND, &node_mask, 1, 0)) { - pr_err("Failed to bind [%p-%p] AIO buffer to node %d: error %m\n", - data, data + mmap_len, cpu__get_node(cpu)); + node_index = cpu__get_node(cpu); + node_mask = bitmap_alloc(node_index + 1); + if (!node_mask) { + pr_err("Failed to allocate node mask for mbind: error %m\n"); return -1; } + set_bit(node_index, node_mask); + if (mbind(data, mmap_len, MPOL_BIND, node_mask, node_index + 1 + 1, 0)) { + pr_err("Failed to bind [%p-%p] AIO buffer to node %lu: error %m\n", + data, data + mmap_len, node_index); + err = -1; + } + bitmap_free(node_mask); } - return 0; + return err; } #else /* !HAVE_LIBNUMA_SUPPORT */ static int perf_mmap__aio_alloc(struct mmap *map, int idx) @@ -207,6 +228,8 @@ static void perf_mmap__aio_munmap(struct mmap *map __maybe_unused) void mmap__munmap(struct mmap *map) { + bitmap_free(map->affinity_mask.bits); + perf_mmap__aio_munmap(map); if (map->data != NULL) { munmap(map->data, mmap__mmap_len(map)); @@ -215,7 +238,7 @@ void mmap__munmap(struct mmap *map) auxtrace_mmap__munmap(&map->auxtrace_mmap); } -static void build_node_mask(int node, cpu_set_t *mask) +static void build_node_mask(int node, struct mmap_cpu_mask *mask) { int c, cpu, nr_cpus; const struct perf_cpu_map *cpu_map = NULL; @@ -228,17 +251,23 @@ static void build_node_mask(int node, cpu_set_t *mask) for (c = 0; c < nr_cpus; c++) { cpu = cpu_map->map[c]; /* map c index to online cpu index */ if (cpu__get_node(cpu) == node) - CPU_SET(cpu, mask); + set_bit(cpu, mask->bits); } } -static void perf_mmap__setup_affinity_mask(struct mmap *map, struct mmap_params *mp) +static int perf_mmap__setup_affinity_mask(struct mmap *map, struct mmap_params *mp) { - CPU_ZERO(&map->affinity_mask); + map->affinity_mask.nbits = cpu__max_cpu(); + map->affinity_mask.bits = bitmap_alloc(map->affinity_mask.nbits); + if (!map->affinity_mask.bits) + return -1; + if (mp->affinity == PERF_AFFINITY_NODE && cpu__max_node() > 1) build_node_mask(cpu__get_node(map->core.cpu), &map->affinity_mask); else if (mp->affinity == PERF_AFFINITY_CPU) - CPU_SET(map->core.cpu, &map->affinity_mask); + set_bit(map->core.cpu, map->affinity_mask.bits); + + return 0; } int mmap__mmap(struct mmap *map, struct mmap_params *mp, int fd, int cpu) @@ -249,7 +278,15 @@ int mmap__mmap(struct mmap *map, struct mmap_params *mp, int fd, int cpu) return -1; } - perf_mmap__setup_affinity_mask(map, mp); + if (mp->affinity != PERF_AFFINITY_SYS && + perf_mmap__setup_affinity_mask(map, mp)) { + pr_debug2("failed to alloc mmap affinity mask, error %d\n", + errno); + return -1; + } + + if (verbose == 2) + mmap_cpu_mask__scnprintf(&map->affinity_mask, "mmap"); map->core.flush = mp->flush; diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h index bee4e83f7109..9d5f589f02ae 100644 --- a/tools/perf/util/mmap.h +++ b/tools/perf/util/mmap.h @@ -15,6 +15,15 @@ #include "event.h" struct aiocb; + +struct mmap_cpu_mask { + unsigned long *bits; + size_t nbits; +}; + +#define MMAP_CPU_MASK_BYTES(m) \ + (BITS_TO_LONGS(((struct mmap_cpu_mask *)m)->nbits) * sizeof(unsigned long)) + /** * struct mmap - perf's ring buffer mmap details * @@ -31,7 +40,7 @@ struct mmap { int nr_cblocks; } aio; #endif - cpu_set_t affinity_mask; + struct mmap_cpu_mask affinity_mask; void *data; int comp_level; }; @@ -52,4 +61,6 @@ int perf_mmap__push(struct mmap *md, void *to, size_t mmap__mmap_len(struct mmap *map); +void mmap_cpu_mask__scnprintf(struct mmap_cpu_mask *mask, const char *tag); + #endif /*__PERF_MMAP_H */ diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index ed7c008b9c8b..a7dc0b096974 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -257,21 +257,15 @@ struct tracepoint_path *tracepoint_id_to_path(u64 config) path = zalloc(sizeof(*path)); if (!path) return NULL; - path->system = malloc(MAX_EVENT_LENGTH); - if (!path->system) { + if (asprintf(&path->system, "%.*s", MAX_EVENT_LENGTH, sys_dirent->d_name) < 0) { free(path); return NULL; } - path->name = malloc(MAX_EVENT_LENGTH); - if (!path->name) { + if (asprintf(&path->name, "%.*s", MAX_EVENT_LENGTH, evt_dirent->d_name) < 0) { zfree(&path->system); free(path); return NULL; } - strncpy(path->system, sys_dirent->d_name, - MAX_EVENT_LENGTH); - strncpy(path->name, evt_dirent->d_name, - MAX_EVENT_LENGTH); return path; } } @@ -1219,8 +1213,7 @@ static int config_attr(struct perf_event_attr *attr, static int get_config_terms(struct list_head *head_config, struct list_head *head_terms __maybe_unused) { -#define ADD_CONFIG_TERM(__type, __name, __val) \ -do { \ +#define ADD_CONFIG_TERM(__type, __weak) \ struct perf_evsel_config_term *__t; \ \ __t = zalloc(sizeof(*__t)); \ @@ -1229,9 +1222,24 @@ do { \ \ INIT_LIST_HEAD(&__t->list); \ __t->type = PERF_EVSEL__CONFIG_TERM_ ## __type; \ + __t->weak = __weak; \ + list_add_tail(&__t->list, head_terms) + +#define ADD_CONFIG_TERM_VAL(__type, __name, __val, __weak) \ +do { \ + ADD_CONFIG_TERM(__type, __weak); \ __t->val.__name = __val; \ - __t->weak = term->weak; \ - list_add_tail(&__t->list, head_terms); \ +} while (0) + +#define ADD_CONFIG_TERM_STR(__type, __val, __weak) \ +do { \ + ADD_CONFIG_TERM(__type, __weak); \ + __t->val.str = strdup(__val); \ + if (!__t->val.str) { \ + zfree(&__t); \ + return -ENOMEM; \ + } \ + __t->free_str = true; \ } while (0) struct parse_events_term *term; @@ -1239,53 +1247,62 @@ do { \ list_for_each_entry(term, head_config, list) { switch (term->type_term) { case PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD: - ADD_CONFIG_TERM(PERIOD, period, term->val.num); + ADD_CONFIG_TERM_VAL(PERIOD, period, term->val.num, term->weak); break; case PARSE_EVENTS__TERM_TYPE_SAMPLE_FREQ: - ADD_CONFIG_TERM(FREQ, freq, term->val.num); + ADD_CONFIG_TERM_VAL(FREQ, freq, term->val.num, term->weak); break; case PARSE_EVENTS__TERM_TYPE_TIME: - ADD_CONFIG_TERM(TIME, time, term->val.num); + ADD_CONFIG_TERM_VAL(TIME, time, term->val.num, term->weak); break; case PARSE_EVENTS__TERM_TYPE_CALLGRAPH: - ADD_CONFIG_TERM(CALLGRAPH, callgraph, term->val.str); + ADD_CONFIG_TERM_STR(CALLGRAPH, term->val.str, term->weak); break; case PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE: - ADD_CONFIG_TERM(BRANCH, branch, term->val.str); + ADD_CONFIG_TERM_STR(BRANCH, term->val.str, term->weak); break; case PARSE_EVENTS__TERM_TYPE_STACKSIZE: - ADD_CONFIG_TERM(STACK_USER, stack_user, term->val.num); + ADD_CONFIG_TERM_VAL(STACK_USER, stack_user, + term->val.num, term->weak); break; case PARSE_EVENTS__TERM_TYPE_INHERIT: - ADD_CONFIG_TERM(INHERIT, inherit, term->val.num ? 1 : 0); + ADD_CONFIG_TERM_VAL(INHERIT, inherit, + term->val.num ? 1 : 0, term->weak); break; case PARSE_EVENTS__TERM_TYPE_NOINHERIT: - ADD_CONFIG_TERM(INHERIT, inherit, term->val.num ? 0 : 1); + ADD_CONFIG_TERM_VAL(INHERIT, inherit, + term->val.num ? 0 : 1, term->weak); break; case PARSE_EVENTS__TERM_TYPE_MAX_STACK: - ADD_CONFIG_TERM(MAX_STACK, max_stack, term->val.num); + ADD_CONFIG_TERM_VAL(MAX_STACK, max_stack, + term->val.num, term->weak); break; case PARSE_EVENTS__TERM_TYPE_MAX_EVENTS: - ADD_CONFIG_TERM(MAX_EVENTS, max_events, term->val.num); + ADD_CONFIG_TERM_VAL(MAX_EVENTS, max_events, + term->val.num, term->weak); break; case PARSE_EVENTS__TERM_TYPE_OVERWRITE: - ADD_CONFIG_TERM(OVERWRITE, overwrite, term->val.num ? 1 : 0); + ADD_CONFIG_TERM_VAL(OVERWRITE, overwrite, + term->val.num ? 1 : 0, term->weak); break; case PARSE_EVENTS__TERM_TYPE_NOOVERWRITE: - ADD_CONFIG_TERM(OVERWRITE, overwrite, term->val.num ? 0 : 1); + ADD_CONFIG_TERM_VAL(OVERWRITE, overwrite, + term->val.num ? 0 : 1, term->weak); break; case PARSE_EVENTS__TERM_TYPE_DRV_CFG: - ADD_CONFIG_TERM(DRV_CFG, drv_cfg, term->val.str); + ADD_CONFIG_TERM_STR(DRV_CFG, term->val.str, term->weak); break; case PARSE_EVENTS__TERM_TYPE_PERCORE: - ADD_CONFIG_TERM(PERCORE, percore, - term->val.num ? true : false); + ADD_CONFIG_TERM_VAL(PERCORE, percore, + term->val.num ? true : false, term->weak); break; case PARSE_EVENTS__TERM_TYPE_AUX_OUTPUT: - ADD_CONFIG_TERM(AUX_OUTPUT, aux_output, term->val.num ? 1 : 0); + ADD_CONFIG_TERM_VAL(AUX_OUTPUT, aux_output, + term->val.num ? 1 : 0, term->weak); break; case PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZE: - ADD_CONFIG_TERM(AUX_SAMPLE_SIZE, aux_sample_size, term->val.num); + ADD_CONFIG_TERM_VAL(AUX_SAMPLE_SIZE, aux_sample_size, + term->val.num, term->weak); break; default: break; @@ -1322,7 +1339,7 @@ static int get_config_chgs(struct perf_pmu *pmu, struct list_head *head_config, } if (bits) - ADD_CONFIG_TERM(CFG_CHG, cfg_chg, bits); + ADD_CONFIG_TERM_VAL(CFG_CHG, cfg_chg, bits, false); #undef ADD_CONFIG_TERM return 0; diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y index e2eea4e601b4..94f8bcd83582 100644 --- a/tools/perf/util/parse-events.y +++ b/tools/perf/util/parse-events.y @@ -1,4 +1,4 @@ -%pure-parser +%define api.pure full %parse-param {void *_parse_state} %parse-param {void *scanner} %lex-param {void* scanner} diff --git a/tools/perf/util/perf_event_attr_fprintf.c b/tools/perf/util/perf_event_attr_fprintf.c index 651203126c71..355d3458d4e6 100644 --- a/tools/perf/util/perf_event_attr_fprintf.c +++ b/tools/perf/util/perf_event_attr_fprintf.c @@ -50,6 +50,7 @@ static void __p_branch_sample_type(char *buf, size_t size, u64 value) bit_name(ABORT_TX), bit_name(IN_TX), bit_name(NO_TX), bit_name(COND), bit_name(CALL_STACK), bit_name(IND_JUMP), bit_name(CALL), bit_name(NO_FLAGS), bit_name(NO_CYCLES), + bit_name(HW_INDEX), { .name = NULL, } }; #undef bit_name diff --git a/tools/perf/util/probe-file.c b/tools/perf/util/probe-file.c index 5003ba403345..8c852948513e 100644 --- a/tools/perf/util/probe-file.c +++ b/tools/perf/util/probe-file.c @@ -206,6 +206,9 @@ static struct strlist *__probe_file__get_namelist(int fd, bool include_group) } else ret = strlist__add(sl, tev.event); clear_probe_trace_event(&tev); + /* Skip if there is same name multi-probe event in the list */ + if (ret == -EEXIST) + ret = 0; if (ret < 0) break; } @@ -301,10 +304,15 @@ int probe_file__get_events(int fd, struct strfilter *filter, p = strchr(ent->s, ':'); if ((p && strfilter__compare(filter, p + 1)) || strfilter__compare(filter, ent->s)) { - strlist__add(plist, ent->s); + ret = strlist__add(plist, ent->s); + if (ret == -ENOMEM) { + pr_err("strlist__add failed with -ENOMEM\n"); + goto out; + } ret = 0; } } +out: strlist__delete(namelist); return ret; @@ -511,7 +519,11 @@ static int probe_cache__load(struct probe_cache *pcache) ret = -EINVAL; goto out; } - strlist__add(entry->tevlist, buf); + ret = strlist__add(entry->tevlist, buf); + if (ret == -ENOMEM) { + pr_err("strlist__add failed with -ENOMEM\n"); + goto out; + } } } out: @@ -672,7 +684,12 @@ int probe_cache__add_entry(struct probe_cache *pcache, command = synthesize_probe_trace_command(&tevs[i]); if (!command) goto out_err; - strlist__add(entry->tevlist, command); + ret = strlist__add(entry->tevlist, command); + if (ret == -ENOMEM) { + pr_err("strlist__add failed with -ENOMEM\n"); + goto out_err; + } + free(command); } list_add_tail(&entry->node, &pcache->entries); @@ -853,9 +870,15 @@ int probe_cache__scan_sdt(struct probe_cache *pcache, const char *pathname) break; } - strlist__add(entry->tevlist, buf); + ret = strlist__add(entry->tevlist, buf); + free(buf); entry = NULL; + + if (ret == -ENOMEM) { + pr_err("strlist__add failed with -ENOMEM\n"); + break; + } } if (entry) { list_del_init(&entry->node); diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index c470c49a804f..e4cff49384f4 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -303,7 +303,8 @@ static int convert_variable_type(Dwarf_Die *vr_die, char prefix; /* TODO: check all types */ - if (cast && strcmp(cast, "string") != 0 && strcmp(cast, "x") != 0 && + if (cast && strcmp(cast, "string") != 0 && strcmp(cast, "ustring") && + strcmp(cast, "x") != 0 && strcmp(cast, "s") != 0 && strcmp(cast, "u") != 0) { /* Non string type is OK */ /* and respect signedness/hexadecimal cast */ @@ -636,14 +637,19 @@ static int convert_to_trace_point(Dwarf_Die *sp_die, Dwfl_Module *mod, return -EINVAL; } - /* Try to get actual symbol name from symtab */ - symbol = dwfl_module_addrsym(mod, paddr, &sym, NULL); + if (dwarf_entrypc(sp_die, &eaddr) == 0) { + /* If the DIE has entrypc, use it. */ + symbol = dwarf_diename(sp_die); + } else { + /* Try to get actual symbol name and address from symtab */ + symbol = dwfl_module_addrsym(mod, paddr, &sym, NULL); + eaddr = sym.st_value; + } if (!symbol) { pr_warning("Failed to find symbol at 0x%lx\n", (unsigned long)paddr); return -ENOENT; } - eaddr = sym.st_value; tp->offset = (unsigned long)(paddr - eaddr); tp->address = (unsigned long)paddr; diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index 80ca5d0ab7fe..8c1b27cd8b99 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -464,6 +464,7 @@ static PyObject *python_process_brstack(struct perf_sample *sample, struct thread *thread) { struct branch_stack *br = sample->branch_stack; + struct branch_entry *entries = perf_sample__branch_entries(sample); PyObject *pylist; u64 i; @@ -484,28 +485,28 @@ static PyObject *python_process_brstack(struct perf_sample *sample, Py_FatalError("couldn't create Python dictionary"); pydict_set_item_string_decref(pyelem, "from", - PyLong_FromUnsignedLongLong(br->entries[i].from)); + PyLong_FromUnsignedLongLong(entries[i].from)); pydict_set_item_string_decref(pyelem, "to", - PyLong_FromUnsignedLongLong(br->entries[i].to)); + PyLong_FromUnsignedLongLong(entries[i].to)); pydict_set_item_string_decref(pyelem, "mispred", - PyBool_FromLong(br->entries[i].flags.mispred)); + PyBool_FromLong(entries[i].flags.mispred)); pydict_set_item_string_decref(pyelem, "predicted", - PyBool_FromLong(br->entries[i].flags.predicted)); + PyBool_FromLong(entries[i].flags.predicted)); pydict_set_item_string_decref(pyelem, "in_tx", - PyBool_FromLong(br->entries[i].flags.in_tx)); + PyBool_FromLong(entries[i].flags.in_tx)); pydict_set_item_string_decref(pyelem, "abort", - PyBool_FromLong(br->entries[i].flags.abort)); + PyBool_FromLong(entries[i].flags.abort)); pydict_set_item_string_decref(pyelem, "cycles", - PyLong_FromUnsignedLongLong(br->entries[i].flags.cycles)); + PyLong_FromUnsignedLongLong(entries[i].flags.cycles)); thread__find_map_fb(thread, sample->cpumode, - br->entries[i].from, &al); + entries[i].from, &al); dsoname = get_dsoname(al.map); pydict_set_item_string_decref(pyelem, "from_dsoname", _PyUnicode_FromString(dsoname)); thread__find_map_fb(thread, sample->cpumode, - br->entries[i].to, &al); + entries[i].to, &al); dsoname = get_dsoname(al.map); pydict_set_item_string_decref(pyelem, "to_dsoname", _PyUnicode_FromString(dsoname)); @@ -561,6 +562,7 @@ static PyObject *python_process_brstacksym(struct perf_sample *sample, struct thread *thread) { struct branch_stack *br = sample->branch_stack; + struct branch_entry *entries = perf_sample__branch_entries(sample); PyObject *pylist; u64 i; char bf[512]; @@ -581,22 +583,22 @@ static PyObject *python_process_brstacksym(struct perf_sample *sample, Py_FatalError("couldn't create Python dictionary"); thread__find_symbol_fb(thread, sample->cpumode, - br->entries[i].from, &al); + entries[i].from, &al); get_symoff(al.sym, &al, true, bf, sizeof(bf)); pydict_set_item_string_decref(pyelem, "from", _PyUnicode_FromString(bf)); thread__find_symbol_fb(thread, sample->cpumode, - br->entries[i].to, &al); + entries[i].to, &al); get_symoff(al.sym, &al, true, bf, sizeof(bf)); pydict_set_item_string_decref(pyelem, "to", _PyUnicode_FromString(bf)); - get_br_mspred(&br->entries[i].flags, bf, sizeof(bf)); + get_br_mspred(&entries[i].flags, bf, sizeof(bf)); pydict_set_item_string_decref(pyelem, "pred", _PyUnicode_FromString(bf)); - if (br->entries[i].flags.in_tx) { + if (entries[i].flags.in_tx) { pydict_set_item_string_decref(pyelem, "in_tx", _PyUnicode_FromString("X")); } else { @@ -604,7 +606,7 @@ static PyObject *python_process_brstacksym(struct perf_sample *sample, _PyUnicode_FromString("-")); } - if (br->entries[i].flags.abort) { + if (entries[i].flags.abort) { pydict_set_item_string_decref(pyelem, "abort", _PyUnicode_FromString("A")); } else { diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index d0d7d25b23e3..055b00abd56d 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -1007,6 +1007,7 @@ static void callchain__lbr_callstack_printf(struct perf_sample *sample) { struct ip_callchain *callchain = sample->callchain; struct branch_stack *lbr_stack = sample->branch_stack; + struct branch_entry *entries = perf_sample__branch_entries(sample); u64 kernel_callchain_nr = callchain->nr; unsigned int i; @@ -1043,10 +1044,10 @@ static void callchain__lbr_callstack_printf(struct perf_sample *sample) i, callchain->ips[i]); printf("..... %2d: %016" PRIx64 "\n", - (int)(kernel_callchain_nr), lbr_stack->entries[0].to); + (int)(kernel_callchain_nr), entries[0].to); for (i = 0; i < lbr_stack->nr; i++) printf("..... %2d: %016" PRIx64 "\n", - (int)(i + kernel_callchain_nr + 1), lbr_stack->entries[i].from); + (int)(i + kernel_callchain_nr + 1), entries[i].from); } } @@ -1068,6 +1069,7 @@ static void callchain__printf(struct evsel *evsel, static void branch_stack__printf(struct perf_sample *sample, bool callstack) { + struct branch_entry *entries = perf_sample__branch_entries(sample); uint64_t i; printf("%s: nr:%" PRIu64 "\n", @@ -1075,7 +1077,7 @@ static void branch_stack__printf(struct perf_sample *sample, bool callstack) sample->branch_stack->nr); for (i = 0; i < sample->branch_stack->nr; i++) { - struct branch_entry *e = &sample->branch_stack->entries[i]; + struct branch_entry *e = &entries[i]; if (!callstack) { printf("..... %2"PRIu64": %016" PRIx64 " -> %016" PRIx64 " %hu cycles %s%s%s%s %x\n", diff --git a/tools/perf/util/setup.py b/tools/perf/util/setup.py index aa344a163eaf..8a065a6f9713 100644 --- a/tools/perf/util/setup.py +++ b/tools/perf/util/setup.py @@ -2,11 +2,13 @@ from os import getenv from subprocess import Popen, PIPE from re import sub +cc = getenv("CC") +cc_is_clang = b"clang version" in Popen([cc, "-v"], stderr=PIPE).stderr.readline() + def clang_has_option(option): - return [o for o in Popen(['clang', option], stderr=PIPE).stderr.readlines() if b"unknown argument" in o] == [ ] + return [o for o in Popen([cc, option], stderr=PIPE).stderr.readlines() if b"unknown argument" in o] == [ ] -cc = getenv("CC") -if cc == "clang": +if cc_is_clang: from distutils.sysconfig import get_config_vars vars = get_config_vars() for var in ('CFLAGS', 'OPT'): @@ -40,7 +42,7 @@ class install_lib(_install_lib): cflags = getenv('CFLAGS', '').split() # switch off several checks (need to be at the end of cflags list) cflags += ['-fno-strict-aliasing', '-Wno-write-strings', '-Wno-unused-parameter', '-Wno-redundant-decls' ] -if cc != "clang": +if not cc_is_clang: cflags += ['-Wno-cast-function-type' ] src_perf = getenv('srctree') + '/tools/perf' diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 9fcba2872130..ab0cfd790ad0 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -324,8 +324,7 @@ static int _hist_entry__sym_snprintf(struct map_symbol *ms, return ret; } -static int hist_entry__sym_snprintf(struct hist_entry *he, char *bf, - size_t size, unsigned int width) +int hist_entry__sym_snprintf(struct hist_entry *he, char *bf, size_t size, unsigned int width) { return _hist_entry__sym_snprintf(&he->ms, he->ip, he->level, bf, size, width); diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index 5aff9542d9b7..6c862d62d052 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -164,6 +164,8 @@ static __pure inline bool hist_entry__has_callchains(struct hist_entry *he) return he->callchain_size != 0; } +int hist_entry__sym_snprintf(struct hist_entry *he, char *bf, size_t size, unsigned int width); + static inline bool hist_entry__has_pairs(struct hist_entry *he) { return !list_empty(&he->pairs.node); diff --git a/tools/perf/util/srcline.c b/tools/perf/util/srcline.c index 6ccf6f6d09df..5b7d6c16d33f 100644 --- a/tools/perf/util/srcline.c +++ b/tools/perf/util/srcline.c @@ -193,16 +193,30 @@ static void find_address_in_section(bfd *abfd, asection *section, void *data) bfd_vma pc, vma; bfd_size_type size; struct a2l_data *a2l = data; + flagword flags; if (a2l->found) return; - if ((bfd_get_section_flags(abfd, section) & SEC_ALLOC) == 0) +#ifdef bfd_get_section_flags + flags = bfd_get_section_flags(abfd, section); +#else + flags = bfd_section_flags(section); +#endif + if ((flags & SEC_ALLOC) == 0) return; pc = a2l->addr; +#ifdef bfd_get_section_vma vma = bfd_get_section_vma(abfd, section); +#else + vma = bfd_section_vma(section); +#endif +#ifdef bfd_get_section_size size = bfd_get_section_size(section); +#else + size = bfd_section_size(section); +#endif if (pc < vma || pc >= vma + size) return; diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index bc31fccc0057..76c6052b12e2 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -16,6 +16,7 @@ #include <linux/ctype.h> #include "cgroup.h" #include <api/fs/fs.h> +#include "util.h" #define CNTR_NOT_SUPPORTED "<not supported>" #define CNTR_NOT_COUNTED "<not counted>" @@ -110,7 +111,7 @@ static void aggr_printout(struct perf_stat_config *config, config->csv_sep); break; case AGGR_NONE: - if (evsel->percore) { + if (evsel->percore && !config->percore_show_thread) { fprintf(config->output, "S%d-D%d-C%*d%s", cpu_map__id_to_socket(id), cpu_map__id_to_die(id), @@ -628,7 +629,7 @@ static void aggr_cb(struct perf_stat_config *config, static void print_counter_aggrdata(struct perf_stat_config *config, struct evsel *counter, int s, char *prefix, bool metric_only, - bool *first) + bool *first, int cpu) { struct aggr_data ad; FILE *output = config->output; @@ -654,7 +655,7 @@ static void print_counter_aggrdata(struct perf_stat_config *config, fprintf(output, "%s", prefix); uval = val * counter->scale; - printout(config, id, nr, counter, uval, prefix, + printout(config, cpu != -1 ? cpu : id, nr, counter, uval, prefix, run, ena, 1.0, &rt_stat); if (!metric_only) fputc('\n', output); @@ -687,7 +688,7 @@ static void print_aggr(struct perf_stat_config *config, evlist__for_each_entry(evlist, counter) { print_counter_aggrdata(config, counter, s, prefix, metric_only, - &first); + &first, -1); } if (metric_only) fputc('\n', output); @@ -1097,7 +1098,6 @@ static void print_footer(struct perf_stat_config *config) { double avg = avg_stats(config->walltime_nsecs_stats) / NSEC_PER_SEC; FILE *output = config->output; - int n; if (!config->null_run) fprintf(output, "\n"); @@ -1131,9 +1131,7 @@ static void print_footer(struct perf_stat_config *config) } fprintf(output, "\n\n"); - if (config->print_free_counters_hint && - sysctl__read_int("kernel/nmi_watchdog", &n) >= 0 && - n > 0) + if (config->print_free_counters_hint && sysctl__nmi_watchdog_enabled()) fprintf(output, "Some events weren't counted. Try disabling the NMI watchdog:\n" " echo 0 > /proc/sys/kernel/nmi_watchdog\n" @@ -1146,6 +1144,26 @@ static void print_footer(struct perf_stat_config *config) "the same PMU. Try reorganizing the group.\n"); } +static void print_percore_thread(struct perf_stat_config *config, + struct evsel *counter, char *prefix) +{ + int s, s2, id; + bool first = true; + + for (int i = 0; i < perf_evsel__nr_cpus(counter); i++) { + s2 = config->aggr_get_id(config, evsel__cpus(counter), i); + for (s = 0; s < config->aggr_map->nr; s++) { + id = config->aggr_map->map[s]; + if (s2 == id) + break; + } + + print_counter_aggrdata(config, counter, s, + prefix, false, + &first, i); + } +} + static void print_percore(struct perf_stat_config *config, struct evsel *counter, char *prefix) { @@ -1157,13 +1175,16 @@ static void print_percore(struct perf_stat_config *config, if (!(config->aggr_map || config->aggr_get_id)) return; + if (config->percore_show_thread) + return print_percore_thread(config, counter, prefix); + for (s = 0; s < config->aggr_map->nr; s++) { if (prefix && metric_only) fprintf(output, "%s", prefix); print_counter_aggrdata(config, counter, s, prefix, metric_only, - &first); + &first, -1); } if (metric_only) diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index 2c41d47f6f83..0fd713d3674f 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -18,7 +18,6 @@ * AGGR_NONE: Use matching CPU * AGGR_THREAD: Not supported? */ -static bool have_frontend_stalled; struct runtime_stat rt_stat; struct stats walltime_nsecs_stats; @@ -144,7 +143,6 @@ void runtime_stat__exit(struct runtime_stat *st) void perf_stat__init_shadow_stats(void) { - have_frontend_stalled = pmu_have_event("cpu", "stalled-cycles-frontend"); runtime_stat__init(&rt_stat); } @@ -779,9 +777,7 @@ static void generic_metric(struct perf_stat_config *config, } if (!metric_events[i]) { - const char *p = metric_expr; - - if (expr__parse(&ratio, &pctx, &p) == 0) { + if (expr__parse(&ratio, &pctx, metric_expr) == 0) { char *unit; char metric_bf[64]; @@ -853,10 +849,6 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, print_metric(config, ctxp, NULL, "%7.2f ", "stalled cycles per insn", ratio); - } else if (have_frontend_stalled) { - out->new_line(config, ctxp); - print_metric(config, ctxp, NULL, "%7.2f ", - "stalled cycles per insn", 0); } } else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES)) { if (runtime_stat_n(st, STAT_BRANCHES, ctx, cpu) != 0) diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index fb990efa54a8..b4fdfaa7f2c0 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -109,6 +109,7 @@ struct perf_stat_config { bool walltime_run_table; bool all_kernel; bool all_user; + bool percore_show_thread; FILE *output; unsigned int interval; unsigned int timeout; diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 3b379b1296f1..26bc6a0096ce 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -635,9 +635,12 @@ out: static bool symbol__is_idle(const char *name) { const char * const idle_symbols[] = { + "acpi_idle_do_entry", + "acpi_processor_ffh_cstate_enter", "arch_cpu_idle", "cpu_idle", "cpu_startup_entry", + "idle_cpu", "intel_idle", "default_idle", "native_safe_halt", @@ -651,13 +654,17 @@ static bool symbol__is_idle(const char *name) NULL }; int i; + static struct strlist *idle_symbols_list; - for (i = 0; idle_symbols[i]; i++) { - if (!strcmp(idle_symbols[i], name)) - return true; - } + if (idle_symbols_list) + return strlist__has_entry(idle_symbols_list, name); - return false; + idle_symbols_list = strlist__new(NULL, NULL); + + for (i = 0; idle_symbols[i]; i++) + strlist__add(idle_symbols_list, idle_symbols[i]); + + return strlist__has_entry(idle_symbols_list, name); } static int map__process_kallsym_symbol(void *arg, const char *name, @@ -1615,7 +1622,12 @@ int dso__load(struct dso *dso, struct map *map) goto out; } - if (dso->kernel) { + kmod = dso->symtab_type == DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE || + dso->symtab_type == DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE_COMP || + dso->symtab_type == DSO_BINARY_TYPE__GUEST_KMODULE || + dso->symtab_type == DSO_BINARY_TYPE__GUEST_KMODULE_COMP; + + if (dso->kernel && !kmod) { if (dso->kernel == DSO_TYPE_KERNEL) ret = dso__load_kernel_sym(dso, map); else if (dso->kernel == DSO_TYPE_GUEST_KERNEL) @@ -1643,12 +1655,6 @@ int dso__load(struct dso *dso, struct map *map) if (!name) goto out; - kmod = dso->symtab_type == DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE || - dso->symtab_type == DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE_COMP || - dso->symtab_type == DSO_BINARY_TYPE__GUEST_KMODULE || - dso->symtab_type == DSO_BINARY_TYPE__GUEST_KMODULE_COMP; - - /* * Read the build id if possible. This is required for * DSO_BINARY_TYPE__BUILDID_DEBUGINFO to work diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c index c423298fe62d..3f28af39f9c6 100644 --- a/tools/perf/util/synthetic-events.c +++ b/tools/perf/util/synthetic-events.c @@ -345,6 +345,7 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool, continue; event->mmap2.ino = (u64)ino; + event->mmap2.ino_generation = 0; /* * Just like the kernel, see __perf_event_mmap in kernel/perf_event.c @@ -1183,7 +1184,8 @@ size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type, if (type & PERF_SAMPLE_BRANCH_STACK) { sz = sample->branch_stack->nr * sizeof(struct branch_entry); - sz += sizeof(u64); + /* nr, hw_idx */ + sz += 2 * sizeof(u64); result += sz; } @@ -1344,7 +1346,8 @@ int perf_event__synthesize_sample(union perf_event *event, u64 type, u64 read_fo if (type & PERF_SAMPLE_BRANCH_STACK) { sz = sample->branch_stack->nr * sizeof(struct branch_entry); - sz += sizeof(u64); + /* nr, hw_idx */ + sz += 2 * sizeof(u64); memcpy(array, sample->branch_stack, sz); array = (void *)array + sz; } diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index 969ae560dad9..d707c9624dd9 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c @@ -55,6 +55,24 @@ int sysctl__max_stack(void) return sysctl_perf_event_max_stack; } +bool sysctl__nmi_watchdog_enabled(void) +{ + static bool cached; + static bool nmi_watchdog; + int value; + + if (cached) + return nmi_watchdog; + + if (sysctl__read_int("kernel/nmi_watchdog", &value) < 0) + return false; + + nmi_watchdog = (value > 0) ? true : false; + cached = true; + + return nmi_watchdog; +} + bool test_attr__enabled; bool perf_host = true; diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index 9969b8b46f7c..f486fdd3a538 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -29,6 +29,8 @@ size_t hex_width(u64 v); int sysctl__max_stack(void); +bool sysctl__nmi_watchdog_enabled(void); + int fetch_kernel_version(unsigned int *puint, char *str, size_t str_sz); #define KVER_VERSION(x) (((x) >> 16) & 0xff) |