summaryrefslogtreecommitdiff
path: root/tools/perf/util/annotate-data.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2024-03-14 16:31:23 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2024-03-14 16:31:23 -0700
commit1bbeaf83dd7b5e3628b98bec66ff8fe2646e14aa (patch)
treea391eed8ae206613b48e02e56e6ad5c4432d8767 /tools/perf/util/annotate-data.c
parent63bd30f249dcf0a7ce16967935cecee8feec24bb (diff)
parent0f66dfe7b91d2743cc71dfff37af503215b204ef (diff)
Merge tag 'perf-tools-for-v6.9-2024-03-13' of git://git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools
Pull perf tools updates from Namhyung Kim: "perf stat: - Support new 'cluster' aggregation mode for shared resources depending on the hardware configuration: $ sudo perf stat -a --per-cluster -e cycles,instructions sleep 1 Performance counter stats for 'system wide': S0-D0-CLS0 2 85,051,822 cycles S0-D0-CLS0 2 73,909,908 instructions # 0.87 insn per cycle S0-D0-CLS2 2 93,365,918 cycles S0-D0-CLS2 2 83,006,158 instructions # 0.89 insn per cycle S0-D0-CLS4 2 104,157,523 cycles S0-D0-CLS4 2 53,234,396 instructions # 0.51 insn per cycle S0-D0-CLS6 2 65,891,079 cycles S0-D0-CLS6 2 41,478,273 instructions # 0.63 insn per cycle 1.002407989 seconds time elapsed - Various fixes and cleanups for event metrics including NaN handling perf script: - Use libcapstone if available to disassemble the instructions. This enables 'perf script -F disasm' and 'perf script --insn-trace=disasm' (for Intel-PT): $ perf script -F event,ip,disasm cycles:P: ffffffffa988d428 wrmsr cycles:P: ffffffffa9839d25 movq %rax, %r14 cycles:P: ffffffffa9cdcaf0 endbr64 cycles:P: ffffffffa988d428 wrmsr cycles:P: ffffffffa988d428 wrmsr cycles:P: ffffffffaa401f86 iretq cycles:P: ffffffffa99c4de5 movq 0x30(%rcx), %r8 cycles:P: ffffffffa988d428 wrmsr cycles:P: ffffffffaa401f86 iretq cycles:P: ffffffffa9907983 movl 0x68(%rbx), %eax cycles:P: ffffffffa988d428 wrmsr - Expose sample ID / stream ID to python scripts perf test: - Add more perf test cases from Redhat internal test suites. This time it adds the base infra and a few perf probe tests. More to come. :) - Add 'perf test -p' for parallel execution and fix some issues found by the parallel test - Support symbol test to print symbols in given (active) module: $ perf test -F -v Symbols --dso /lib/modules/$(uname -r)/kernel/fs/ext4/ext4.ko --- start --- Testing /lib/modules/6.5.13-1rodete2-amd64/kernel/fs/ext4/ext4.ko Overlapping symbols: 7a990-7a9a0 l __pfx_ext4_exit_fs 7a990-7a9a0 g __pfx_cleanup_module Overlapping symbols: 7a9a0-7aa1c l ext4_exit_fs 7a9a0-7aa1c g cleanup_module ... JSON metric updates: - A new round of Intel metric updates - Support Power11 PVR (compatible to Power10) - Fix cache latency events on Zen 4 to set SliceId properly Internal: - Fix reference counting for 'map' data structure, tireless work from Ian! - More memory optimization for struct thread and annotate histogram. Now, 'perf report' (TUI) and 'perf annotate' should be much lighter-weight in terms of memory footprint - Support cross-arch perf register access. Clean up the build configuration so that it can detect arch-register support at runtime. This can allow to parse register data in sample which was recorded in a different arch Others: - Sync task state in 'perf sched' to kernel using trace event fields. The task states have been changed so tools cannot assume a fixed encoding - Clean up 'perf mem' to generalize the arch-specific events - Add support for local and global variables to data type profiling. This would increase the success rate of type resolution with DWARF - Add short option -H for --hierarchy in 'perf report' and 'perf top'" * tag 'perf-tools-for-v6.9-2024-03-13' of git://git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools: (154 commits) perf annotate: Add comments in the data structures perf annotate: Remove sym_hist.addr[] array perf annotate: Calculate instruction overhead using hashmap perf annotate: Add a hashmap for symbol histogram perf threads: Reduce table size from 256 to 8 perf threads: Switch from rbtree to hashmap perf threads: Move threads to its own files perf machine: Move machine's threads into its own abstraction perf machine: Move fprintf to for_each loop and a callback perf trace: Ignore thread hashing in summary perf report: Sort child tasks by tid perf vendor events amd: Fix Zen 4 cache latency events perf version: Display availability of OpenCSD support perf vendor events intel: Add umasks/occ_sel to PCU events. perf map: Fix map reference count issues libperf evlist: Avoid out-of-bounds access perf lock contention: Account contending locks too perf metrics: Fix segv for metrics with no events perf metrics: Fix metric matching perf pmu: Fix a potential memory leak in perf_pmu__lookup() ...
Diffstat (limited to 'tools/perf/util/annotate-data.c')
-rw-r--r--tools/perf/util/annotate-data.c119
1 files changed, 100 insertions, 19 deletions
diff --git a/tools/perf/util/annotate-data.c b/tools/perf/util/annotate-data.c
index f22b4f18271c..30c4d19fcf11 100644
--- a/tools/perf/util/annotate-data.c
+++ b/tools/perf/util/annotate-data.c
@@ -9,10 +9,12 @@
#include <stdlib.h>
#include <inttypes.h>
+#include "annotate.h"
#include "annotate-data.h"
#include "debuginfo.h"
#include "debug.h"
#include "dso.h"
+#include "dwarf-regs.h"
#include "evsel.h"
#include "evlist.h"
#include "map.h"
@@ -192,7 +194,8 @@ static bool find_cu_die(struct debuginfo *di, u64 pc, Dwarf_Die *cu_die)
}
/* The type info will be saved in @type_die */
-static int check_variable(Dwarf_Die *var_die, Dwarf_Die *type_die, int offset)
+static int check_variable(Dwarf_Die *var_die, Dwarf_Die *type_die, int offset,
+ bool is_pointer)
{
Dwarf_Word size;
@@ -204,14 +207,18 @@ static int check_variable(Dwarf_Die *var_die, Dwarf_Die *type_die, int offset)
}
/*
- * It expects a pointer type for a memory access.
- * Convert to a real type it points to.
+ * Usually it expects a pointer type for a memory access.
+ * Convert to a real type it points to. But global variables
+ * and local variables are accessed directly without a pointer.
*/
- if (dwarf_tag(type_die) != DW_TAG_pointer_type ||
- die_get_real_type(type_die, type_die) == NULL) {
- pr_debug("no pointer or no type\n");
- ann_data_stat.no_typeinfo++;
- return -1;
+ if (is_pointer) {
+ if ((dwarf_tag(type_die) != DW_TAG_pointer_type &&
+ dwarf_tag(type_die) != DW_TAG_array_type) ||
+ die_get_real_type(type_die, type_die) == NULL) {
+ pr_debug("no pointer or no type\n");
+ ann_data_stat.no_typeinfo++;
+ return -1;
+ }
}
/* Get the size of the actual type */
@@ -232,13 +239,18 @@ static int check_variable(Dwarf_Die *var_die, Dwarf_Die *type_die, int offset)
}
/* The result will be saved in @type_die */
-static int find_data_type_die(struct debuginfo *di, u64 pc,
- int reg, int offset, Dwarf_Die *type_die)
+static int find_data_type_die(struct debuginfo *di, u64 pc, u64 addr,
+ const char *var_name, struct annotated_op_loc *loc,
+ Dwarf_Die *type_die)
{
Dwarf_Die cu_die, var_die;
Dwarf_Die *scopes = NULL;
+ int reg, offset;
int ret = -1;
int i, nr_scopes;
+ int fbreg = -1;
+ bool is_fbreg = false;
+ int fb_offset = 0;
/* Get a compile_unit for this address */
if (!find_cu_die(di, pc, &cu_die)) {
@@ -247,19 +259,81 @@ static int find_data_type_die(struct debuginfo *di, u64 pc,
return -1;
}
+ reg = loc->reg1;
+ offset = loc->offset;
+
+ if (reg == DWARF_REG_PC) {
+ if (die_find_variable_by_addr(&cu_die, pc, addr, &var_die, &offset)) {
+ ret = check_variable(&var_die, type_die, offset,
+ /*is_pointer=*/false);
+ loc->offset = offset;
+ goto out;
+ }
+
+ if (var_name && die_find_variable_at(&cu_die, var_name, pc,
+ &var_die)) {
+ ret = check_variable(&var_die, type_die, 0,
+ /*is_pointer=*/false);
+ /* loc->offset will be updated by the caller */
+ goto out;
+ }
+ }
+
/* Get a list of nested scopes - i.e. (inlined) functions and blocks. */
nr_scopes = die_get_scopes(&cu_die, pc, &scopes);
+ if (reg != DWARF_REG_PC && dwarf_hasattr(&scopes[0], DW_AT_frame_base)) {
+ Dwarf_Attribute attr;
+ Dwarf_Block block;
+
+ /* Check if the 'reg' is assigned as frame base register */
+ if (dwarf_attr(&scopes[0], DW_AT_frame_base, &attr) != NULL &&
+ dwarf_formblock(&attr, &block) == 0 && block.length == 1) {
+ switch (*block.data) {
+ case DW_OP_reg0 ... DW_OP_reg31:
+ fbreg = *block.data - DW_OP_reg0;
+ break;
+ case DW_OP_call_frame_cfa:
+ if (die_get_cfa(di->dbg, pc, &fbreg,
+ &fb_offset) < 0)
+ fbreg = -1;
+ break;
+ default:
+ break;
+ }
+ }
+ }
+
+retry:
+ is_fbreg = (reg == fbreg);
+ if (is_fbreg)
+ offset = loc->offset - fb_offset;
+
/* Search from the inner-most scope to the outer */
for (i = nr_scopes - 1; i >= 0; i--) {
- /* Look up variables/parameters in this scope */
- if (!die_find_variable_by_reg(&scopes[i], pc, reg, &var_die))
- continue;
+ if (reg == DWARF_REG_PC) {
+ if (!die_find_variable_by_addr(&scopes[i], pc, addr,
+ &var_die, &offset))
+ continue;
+ } else {
+ /* Look up variables/parameters in this scope */
+ if (!die_find_variable_by_reg(&scopes[i], pc, reg,
+ &offset, is_fbreg, &var_die))
+ continue;
+ }
/* Found a variable, see if it's correct */
- ret = check_variable(&var_die, type_die, offset);
+ ret = check_variable(&var_die, type_die, offset,
+ reg != DWARF_REG_PC && !is_fbreg);
+ loc->offset = offset;
goto out;
}
+
+ if (loc->multi_regs && reg == loc->reg1 && loc->reg1 != loc->reg2) {
+ reg = loc->reg2;
+ goto retry;
+ }
+
if (ret < 0)
ann_data_stat.no_var++;
@@ -272,15 +346,22 @@ out:
* find_data_type - Return a data type at the location
* @ms: map and symbol at the location
* @ip: instruction address of the memory access
- * @reg: register that holds the base address
- * @offset: offset from the base address
+ * @loc: instruction operand location
+ * @addr: data address of the memory access
+ * @var_name: global variable name
*
* This functions searches the debug information of the binary to get the data
- * type it accesses. The exact location is expressed by (ip, reg, offset).
+ * type it accesses. The exact location is expressed by (@ip, reg, offset)
+ * for pointer variables or (@ip, @addr) for global variables. Note that global
+ * variables might update the @loc->offset after finding the start of the variable.
+ * If it cannot find a global variable by address, it tried to fine a declaration
+ * of the variable using @var_name. In that case, @loc->offset won't be updated.
+ *
* It return %NULL if not found.
*/
struct annotated_data_type *find_data_type(struct map_symbol *ms, u64 ip,
- int reg, int offset)
+ struct annotated_op_loc *loc, u64 addr,
+ const char *var_name)
{
struct annotated_data_type *result = NULL;
struct dso *dso = map__dso(ms->map);
@@ -300,7 +381,7 @@ struct annotated_data_type *find_data_type(struct map_symbol *ms, u64 ip,
* a file address for DWARF processing.
*/
pc = map__rip_2objdump(ms->map, ip);
- if (find_data_type_die(di, pc, reg, offset, &type_die) < 0)
+ if (find_data_type_die(di, pc, addr, var_name, loc, &type_die) < 0)
goto out;
result = dso__findnew_data_type(dso, &type_die);