diff options
40 files changed, 1584 insertions, 2157 deletions
diff --git a/tools/arch/x86/include/asm/amd-ibs.h b/tools/arch/x86/include/asm/amd-ibs.h index 765e9e752d03..9a3312e12e2e 100644 --- a/tools/arch/x86/include/asm/amd-ibs.h +++ b/tools/arch/x86/include/asm/amd-ibs.h @@ -29,7 +29,10 @@ union ibs_fetch_ctl { rand_en:1, /* 57: random tagging enable */ fetch_l2_miss:1,/* 58: L2 miss for sampled fetch * (needs IbsFetchComp) */ - reserved:5; /* 59-63: reserved */ + l3_miss_only:1, /* 59: Collect L3 miss samples only */ + fetch_oc_miss:1,/* 60: Op cache miss for the sampled fetch */ + fetch_l3_miss:1,/* 61: L3 cache miss for the sampled fetch */ + reserved:2; /* 62-63: reserved */ }; }; @@ -38,14 +41,14 @@ union ibs_op_ctl { __u64 val; struct { __u64 opmaxcnt:16, /* 0-15: periodic op max. count */ - reserved0:1, /* 16: reserved */ + l3_miss_only:1, /* 16: Collect L3 miss samples only */ op_en:1, /* 17: op sampling enable */ op_val:1, /* 18: op sample valid */ cnt_ctl:1, /* 19: periodic op counter control */ opmaxcnt_ext:7, /* 20-26: upper 7 bits of periodic op maximum count */ - reserved1:5, /* 27-31: reserved */ + reserved0:5, /* 27-31: reserved */ opcurcnt:27, /* 32-58: periodic op counter current count */ - reserved2:5; /* 59-63: reserved */ + reserved1:5; /* 59-63: reserved */ }; }; @@ -71,11 +74,12 @@ union ibs_op_data { union ibs_op_data2 { __u64 val; struct { - __u64 data_src:3, /* 0-2: data source */ + __u64 data_src_lo:3, /* 0-2: data source low */ reserved0:1, /* 3: reserved */ rmt_node:1, /* 4: destination node */ cache_hit_st:1, /* 5: cache hit state */ - reserved1:57; /* 5-63: reserved */ + data_src_hi:2, /* 6-7: data source high */ + reserved1:56; /* 8-63: reserved */ }; }; diff --git a/tools/lib/perf/include/perf/event.h b/tools/lib/perf/include/perf/event.h index e7758707cadd..9f7ca070da87 100644 --- a/tools/lib/perf/include/perf/event.h +++ b/tools/lib/perf/include/perf/event.h @@ -389,6 +389,7 @@ enum perf_user_event_type { /* above any possible kernel type */ PERF_RECORD_TIME_CONV = 79, PERF_RECORD_HEADER_FEATURE = 80, PERF_RECORD_COMPRESSED = 81, + PERF_RECORD_FINISHED_INIT = 82, PERF_RECORD_HEADER_MAX }; diff --git a/tools/perf/Documentation/perf-lock.txt b/tools/perf/Documentation/perf-lock.txt index 656b537b2fba..4b8568f0c53b 100644 --- a/tools/perf/Documentation/perf-lock.txt +++ b/tools/perf/Documentation/perf-lock.txt @@ -46,6 +46,13 @@ COMMON OPTIONS --force:: Don't complain, do it. +--vmlinux=<file>:: + vmlinux pathname + +--kallsyms=<file>:: + kallsyms pathname + + REPORT OPTIONS -------------- diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index cf8ad50f3de1..099817ef5150 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -275,6 +275,11 @@ OPTIONS User can change the size by passing the size after comma like "--call-graph dwarf,4096". + When "fp" recording is used, perf tries to save stack enties + up to the number specified in sysctl.kernel.perf_event_max_stack + by default. User can change the number by passing it after comma + like "--call-graph fp,32". + -q:: --quiet:: Don't print any message, useful for scripting. @@ -313,6 +318,11 @@ OPTIONS --sample-cpu:: Record the sample cpu. +--sample-identifier:: + Record the sample identifier i.e. PERF_SAMPLE_IDENTIFIER bit set in + the sample_type member of the struct perf_event_attr argument to the + perf_event_open system call. + -n:: --no-samples:: Don't sample. diff --git a/tools/perf/Documentation/perf.data-file-format.txt b/tools/perf/Documentation/perf.data-file-format.txt index f56d0e0fbff6..635ba043fd7d 100644 --- a/tools/perf/Documentation/perf.data-file-format.txt +++ b/tools/perf/Documentation/perf.data-file-format.txt @@ -419,18 +419,20 @@ Example: cpu_core cpu list : 0-15 cpu_atom cpu list : 16-23 - HEADER_HYBRID_CPU_PMU_CAPS = 31, + HEADER_PMU_CAPS = 31, - A list of hybrid CPU PMU capabilities. + List of pmu capabilities (except cpu pmu which is already + covered by HEADER_CPU_PMU_CAPS). Note that hybrid cpu pmu + capabilities are also stored here. struct { u32 nr_pmu; struct { - u32 nr_cpu_pmu_caps; + u32 nr_caps; { char name[]; char value[]; - } [nr_cpu_pmu_caps]; + } [nr_caps]; char pmu_name[]; } [nr_pmu]; }; @@ -607,6 +609,16 @@ struct compressed_event { char data[]; }; + PERF_RECORD_FINISHED_INIT = 82, + +Marks the end of records for the system, pre-existing threads in system wide +sessions, etc. Those are the ones prefixed PERF_RECORD_USER_*. + +This is used, for instance, to 'perf inject' events after init and before +regular events, those emitted by the kernel, to support combining guest and +host records. + + The header is followed by compressed data frame that can be decompressed into array of perf trace records. The size of the entire compressed event record including the header is limited by the max value of header.size. diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index 73e0762092fe..153c18909ff5 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -241,15 +241,15 @@ endif # Try different combinations to accommodate systems that only have # python[2][3]-config in weird combinations in the following order of # priority from lowest to highest: -# * python3-config -# * python-config # * python2-config as per pep-0394. +# * python-config +# * python3-config # * $(PYTHON)-config (If PYTHON is user supplied but PYTHON_CONFIG isn't) # PYTHON_AUTO := python-config -PYTHON_AUTO := $(if $(call get-executable,python3-config),python3-config,$(PYTHON_AUTO)) -PYTHON_AUTO := $(if $(call get-executable,python-config),python-config,$(PYTHON_AUTO)) PYTHON_AUTO := $(if $(call get-executable,python2-config),python2-config,$(PYTHON_AUTO)) +PYTHON_AUTO := $(if $(call get-executable,python-config),python-config,$(PYTHON_AUTO)) +PYTHON_AUTO := $(if $(call get-executable,python3-config),python3-config,$(PYTHON_AUTO)) # If PYTHON is defined but PYTHON_CONFIG isn't, then take $(PYTHON)-config as if it was the user # supplied value for PYTHON_CONFIG. Because it's "user supplied", error out if it doesn't exist. @@ -889,6 +889,25 @@ else endif endif +ifneq ($(NO_JEVENTS),1) + ifeq ($(wildcard pmu-events/arch/$(SRCARCH)/mapfile.csv),) + NO_JEVENTS := 1 + endif +endif +ifneq ($(NO_JEVENTS),1) + NO_JEVENTS := 0 + ifndef PYTHON + $(warning No python interpreter disabling jevent generation) + NO_JEVENTS := 1 + else + # jevents.py uses f-strings present in Python 3.6 released in Dec. 2016. + JEVENTS_PYTHON_GOOD := $(shell $(PYTHON) -c 'import sys;print("1" if(sys.version_info.major >= 3 and sys.version_info.minor >= 6) else "0")' 2> /dev/null) + ifneq ($(JEVENTS_PYTHON_GOOD), 1) + $(warning Python interpreter too old (older than 3.6) disabling jevent generation) + NO_JEVENTS := 1 + endif + endif +endif ifndef NO_LIBBFD ifeq ($(feature-libbfd), 1) diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 8f738e11356d..8f0b1fb39984 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -651,25 +651,15 @@ strip: $(PROGRAMS) $(OUTPUT)perf PERF_IN := $(OUTPUT)perf-in.o -JEVENTS := $(OUTPUT)pmu-events/jevents -JEVENTS_IN := $(OUTPUT)pmu-events/jevents-in.o - PMU_EVENTS_IN := $(OUTPUT)pmu-events/pmu-events-in.o - -export JEVENTS +export NO_JEVENTS build := -f $(srctree)/tools/build/Makefile.build dir=. obj $(PERF_IN): prepare FORCE $(Q)$(MAKE) $(build)=perf -$(JEVENTS_IN): FORCE - $(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=pmu-events obj=jevents - -$(JEVENTS): $(JEVENTS_IN) - $(QUIET_LINK)$(HOSTCC) $(JEVENTS_IN) -o $@ - -$(PMU_EVENTS_IN): $(JEVENTS) FORCE +$(PMU_EVENTS_IN): FORCE $(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=pmu-events obj=pmu-events $(OUTPUT)perf: $(PERFLIBS) $(PERF_IN) $(PMU_EVENTS_IN) $(LIBTRACEEVENT_DYNAMIC_LIST) @@ -1089,7 +1079,7 @@ clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clea $(call QUIET_CLEAN, core-objs) $(RM) $(LIBPERF_A) $(OUTPUT)perf-archive $(OUTPUT)perf-iostat $(LANG_BINDINGS) $(Q)find $(or $(OUTPUT),.) -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete $(Q)$(RM) $(OUTPUT).config-detected - $(call QUIET_CLEAN, core-progs) $(RM) $(ALL_PROGRAMS) perf perf-read-vdso32 perf-read-vdsox32 $(OUTPUT)pmu-events/jevents $(OUTPUT)$(LIBJVMTI).so + $(call QUIET_CLEAN, core-progs) $(RM) $(ALL_PROGRAMS) perf perf-read-vdso32 perf-read-vdsox32 $(OUTPUT)$(LIBJVMTI).so $(call QUIET_CLEAN, core-gen) $(RM) *.spec *.pyc *.pyo */*.pyc */*.pyo $(OUTPUT)common-cmds.h TAGS tags cscope* $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)FEATURE-DUMP $(OUTPUT)util/*-bison* $(OUTPUT)util/*-flex* \ $(OUTPUT)util/intel-pt-decoder/inat-tables.c \ $(OUTPUT)tests/llvm-src-{base,kbuild,prologue,relocation}.c \ diff --git a/tools/perf/arch/x86/util/evsel.c b/tools/perf/arch/x86/util/evsel.c index 3501399cef35..882c1a8c1ded 100644 --- a/tools/perf/arch/x86/util/evsel.c +++ b/tools/perf/arch/x86/util/evsel.c @@ -6,6 +6,10 @@ #include "util/pmu.h" #include "linux/string.h" #include "evsel.h" +#include "util/debug.h" + +#define IBS_FETCH_L3MISSONLY (1ULL << 59) +#define IBS_OP_L3MISSONLY (1ULL << 16) void arch_evsel__set_sample_weight(struct evsel *evsel) { @@ -61,3 +65,51 @@ bool arch_evsel__must_be_in_group(const struct evsel *evsel) (strcasestr(evsel->name, "slots") || strcasestr(evsel->name, "topdown")); } + +static void ibs_l3miss_warn(void) +{ + pr_warning( +"WARNING: Hw internally resets sampling period when L3 Miss Filtering is enabled\n" +"and tagged operation does not cause L3 Miss. This causes sampling period skew.\n"); +} + +void arch__post_evsel_config(struct evsel *evsel, struct perf_event_attr *attr) +{ + struct perf_pmu *evsel_pmu, *ibs_fetch_pmu, *ibs_op_pmu; + static int warned_once; + /* 0: Uninitialized, 1: Yes, -1: No */ + static int is_amd; + + if (warned_once || is_amd == -1) + return; + + if (!is_amd) { + struct perf_env *env = evsel__env(evsel); + + if (!perf_env__cpuid(env) || !env->cpuid || + !strstarts(env->cpuid, "AuthenticAMD")) { + is_amd = -1; + return; + } + is_amd = 1; + } + + evsel_pmu = evsel__find_pmu(evsel); + if (!evsel_pmu) + return; + + ibs_fetch_pmu = perf_pmu__find("ibs_fetch"); + ibs_op_pmu = perf_pmu__find("ibs_op"); + + if (ibs_fetch_pmu && ibs_fetch_pmu->type == evsel_pmu->type) { + if (attr->config & IBS_FETCH_L3MISSONLY) { + ibs_l3miss_warn(); + warned_once = 1; + } + } else if (ibs_op_pmu && ibs_op_pmu->type == evsel_pmu->type) { + if (attr->config & IBS_OP_L3MISSONLY) { + ibs_l3miss_warn(); + warned_once = 1; + } + } +} diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index 54d4e508a092..c800911f68e7 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -809,7 +809,7 @@ static bool keep_feat(int feat) case HEADER_CPU_PMU_CAPS: case HEADER_CLOCK_DATA: case HEADER_HYBRID_TOPOLOGY: - case HEADER_HYBRID_CPU_PMU_CAPS: + case HEADER_PMU_CAPS: return true; /* Information that can be updated */ case HEADER_BUILD_ID: @@ -1061,6 +1061,7 @@ int cmd_inject(int argc, const char **argv) .stat = perf_event__repipe_op2_synth, .stat_round = perf_event__repipe_op2_synth, .feature = perf_event__repipe_op2_synth, + .finished_init = perf_event__repipe_op2_synth, .compressed = perf_event__repipe_op4_synth, .auxtrace = perf_event__repipe_auxtrace, }, diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c index 23a33ac15e68..c5ca34741561 100644 --- a/tools/perf/builtin-lock.c +++ b/tools/perf/builtin-lock.c @@ -9,6 +9,7 @@ #include "util/symbol.h" #include "util/thread.h" #include "util/header.h" +#include "util/callchain.h" #include <subcmd/pager.h> #include <subcmd/parse-options.h> @@ -19,6 +20,7 @@ #include "util/tool.h" #include "util/data.h" #include "util/string2.h" +#include "util/map.h" #include <sys/types.h> #include <sys/prctl.h> @@ -32,6 +34,7 @@ #include <linux/kernel.h> #include <linux/zalloc.h> #include <linux/err.h> +#include <linux/stringify.h> static struct perf_session *session; @@ -120,6 +123,24 @@ static struct rb_root thread_stats; static bool combine_locks; static bool show_thread_stats; +/* + * CONTENTION_STACK_DEPTH + * Number of stack trace entries to find callers + */ +#define CONTENTION_STACK_DEPTH 8 + +/* + * CONTENTION_STACK_SKIP + * Number of stack trace entries to skip when finding callers. + * The first few entries belong to the locking implementation itself. + */ +#define CONTENTION_STACK_SKIP 3 + +static u64 sched_text_start; +static u64 sched_text_end; +static u64 lock_text_start; +static u64 lock_text_end; + static struct thread_stat *thread_stat_find(u32 tid) { struct rb_node *node; @@ -251,6 +272,31 @@ struct lock_key { struct list_head list; }; +static void lock_stat_key_print_time(unsigned long long nsec, int len) +{ + static const struct { + float base; + const char *unit; + } table[] = { + { 1e9 * 3600, "h " }, + { 1e9 * 60, "m " }, + { 1e9, "s " }, + { 1e6, "ms" }, + { 1e3, "us" }, + { 0, NULL }, + }; + + for (int i = 0; table[i].unit; i++) { + if (nsec < table[i].base) + continue; + + pr_info("%*.2f %s", len - 3, nsec / table[i].base, table[i].unit); + return; + } + + pr_info("%*llu %s", len - 3, nsec, "ns"); +} + #define PRINT_KEY(member) \ static void lock_stat_key_print_ ## member(struct lock_key *key, \ struct lock_stat *ls) \ @@ -258,11 +304,18 @@ static void lock_stat_key_print_ ## member(struct lock_key *key, \ pr_info("%*llu", key->len, (unsigned long long)ls->member); \ } +#define PRINT_TIME(member) \ +static void lock_stat_key_print_ ## member(struct lock_key *key, \ + struct lock_stat *ls) \ +{ \ + lock_stat_key_print_time((unsigned long long)ls->member, key->len); \ +} + PRINT_KEY(nr_acquired) PRINT_KEY(nr_contended) -PRINT_KEY(avg_wait_time) -PRINT_KEY(wait_time_total) -PRINT_KEY(wait_time_max) +PRINT_TIME(avg_wait_time) +PRINT_TIME(wait_time_total) +PRINT_TIME(wait_time_max) static void lock_stat_key_print_wait_time_min(struct lock_key *key, struct lock_stat *ls) @@ -272,7 +325,7 @@ static void lock_stat_key_print_wait_time_min(struct lock_key *key, if (wait_time == ULLONG_MAX) wait_time = 0; - pr_info("%*"PRIu64, key->len, wait_time); + lock_stat_key_print_time(wait_time, key->len); } @@ -291,10 +344,10 @@ static const char *output_fields; struct lock_key keys[] = { DEF_KEY_LOCK(acquired, "acquired", nr_acquired, 10), DEF_KEY_LOCK(contended, "contended", nr_contended, 10), - DEF_KEY_LOCK(avg_wait, "avg wait (ns)", avg_wait_time, 15), - DEF_KEY_LOCK(wait_total, "total wait (ns)", wait_time_total, 15), - DEF_KEY_LOCK(wait_max, "max wait (ns)", wait_time_max, 15), - DEF_KEY_LOCK(wait_min, "min wait (ns)", wait_time_min, 15), + DEF_KEY_LOCK(avg_wait, "avg wait", avg_wait_time, 12), + DEF_KEY_LOCK(wait_total, "total wait", wait_time_total, 12), + DEF_KEY_LOCK(wait_max, "max wait", wait_time_max, 12), + DEF_KEY_LOCK(wait_min, "min wait", wait_time_min, 12), /* extra comparisons much complicated should be here */ { } @@ -451,6 +504,18 @@ static struct lock_stat *pop_from_result(void) return container_of(node, struct lock_stat, rb); } +static struct lock_stat *lock_stat_find(u64 addr) +{ + struct hlist_head *entry = lockhashentry(addr); + struct lock_stat *ret; + + hlist_for_each_entry(ret, entry, hash_entry) { + if (ret->addr == addr) + return ret; + } + return NULL; +} + static struct lock_stat *lock_stat_findnew(u64 addr, const char *name) { struct hlist_head *entry = lockhashentry(addr); @@ -484,17 +549,29 @@ alloc_failed: } struct trace_lock_handler { + /* it's used on CONFIG_LOCKDEP */ int (*acquire_event)(struct evsel *evsel, struct perf_sample *sample); + /* it's used on CONFIG_LOCKDEP && CONFIG_LOCK_STAT */ int (*acquired_event)(struct evsel *evsel, struct perf_sample *sample); + /* it's used on CONFIG_LOCKDEP && CONFIG_LOCK_STAT */ int (*contended_event)(struct evsel *evsel, struct perf_sample *sample); + /* it's used on CONFIG_LOCKDEP */ int (*release_event)(struct evsel *evsel, struct perf_sample *sample); + + /* it's used when CONFIG_LOCKDEP is off */ + int (*contention_begin_event)(struct evsel *evsel, + struct perf_sample *sample); + + /* it's used when CONFIG_LOCKDEP is off */ + int (*contention_end_event)(struct evsel *evsel, + struct perf_sample *sample); }; static struct lock_seq_stat *get_seq(struct thread_stat *ts, u64 addr) @@ -783,6 +860,243 @@ end: return 0; } +static bool is_lock_function(u64 addr) +{ + if (!sched_text_start) { + struct machine *machine = &session->machines.host; + struct map *kmap; + struct symbol *sym; + + sym = machine__find_kernel_symbol_by_name(machine, + "__sched_text_start", + &kmap); + if (!sym) { + /* to avoid retry */ + sched_text_start = 1; + return false; + } + + sched_text_start = kmap->unmap_ip(kmap, sym->start); + + /* should not fail from here */ + sym = machine__find_kernel_symbol_by_name(machine, + "__sched_text_end", + &kmap); + sched_text_end = kmap->unmap_ip(kmap, sym->start); + + sym = machine__find_kernel_symbol_by_name(machine, + "__lock_text_start", + &kmap); + lock_text_start = kmap->unmap_ip(kmap, sym->start); + + sym = machine__find_kernel_symbol_by_name(machine, + "__lock_text_end", + &kmap); + lock_text_start = kmap->unmap_ip(kmap, sym->start); + } + + /* failed to get kernel symbols */ + if (sched_text_start == 1) + return false; + + /* mutex and rwsem functions are in sched text section */ + if (sched_text_start <= addr && addr < sched_text_end) + return true; + + /* spinlock functions are in lock text section */ + if (lock_text_start <= addr && addr < lock_text_end) + return true; + + return false; +} + +static int lock_contention_caller(struct evsel *evsel, struct perf_sample *sample, + char *buf, int size) +{ + struct thread *thread; + struct callchain_cursor *cursor = &callchain_cursor; + struct symbol *sym; + int skip = 0; + int ret; + + /* lock names will be replaced to task name later */ + if (show_thread_stats) + return -1; + + thread = machine__findnew_thread(&session->machines.host, + -1, sample->pid); + if (thread == NULL) + return -1; + + /* use caller function name from the callchain */ + ret = thread__resolve_callchain(thread, cursor, evsel, sample, + NULL, NULL, CONTENTION_STACK_DEPTH); + if (ret != 0) { + thread__put(thread); + return -1; + } + + callchain_cursor_commit(cursor); + thread__put(thread); + + while (true) { + struct callchain_cursor_node *node; + + node = callchain_cursor_current(cursor); + if (node == NULL) + break; + + /* skip first few entries - for lock functions */ + if (++skip <= CONTENTION_STACK_SKIP) + goto next; + + sym = node->ms.sym; + if (sym && !is_lock_function(node->ip)) { + struct map *map = node->ms.map; + u64 offset; + + offset = map->map_ip(map, node->ip) - sym->start; + + if (offset) + scnprintf(buf, size, "%s+%#lx", sym->name, offset); + else + strlcpy(buf, sym->name, size); + return 0; + } + +next: + callchain_cursor_advance(cursor); + } + return -1; +} + +static int report_lock_contention_begin_event(struct evsel *evsel, + struct perf_sample *sample) +{ + struct lock_stat *ls; + struct thread_stat *ts; + struct lock_seq_stat *seq; + u64 addr = evsel__intval(evsel, sample, "lock_addr"); + + if (show_thread_stats) + addr = sample->tid; + + ls = lock_stat_find(addr); + if (!ls) { + char buf[128]; + const char *caller = buf; + + if (lock_contention_caller(evsel, sample, buf, sizeof(buf)) < 0) + caller = "Unknown"; + + ls = lock_stat_findnew(addr, caller); + if (!ls) + return -ENOMEM; + } + + ts = thread_stat_findnew(sample->tid); + if (!ts) + return -ENOMEM; + + seq = get_seq(ts, addr); + if (!seq) + return -ENOMEM; + + switch (seq->state) { + case SEQ_STATE_UNINITIALIZED: + case SEQ_STATE_ACQUIRED: + break; + case SEQ_STATE_CONTENDED: + /* + * It can have nested contention begin with mutex spinning, + * then we would use the original contention begin event and + * ignore the second one. + */ + goto end; + case SEQ_STATE_ACQUIRING: + case SEQ_STATE_READ_ACQUIRED: + case SEQ_STATE_RELEASED: + /* broken lock sequence */ + if (!ls->broken) { + ls->broken = 1; + bad_hist[BROKEN_CONTENDED]++; + } + list_del_init(&seq->list); + free(seq); + goto end; + default: + BUG_ON("Unknown state of lock sequence found!\n"); + break; + } + + if (seq->state != SEQ_STATE_CONTENDED) { + seq->state = SEQ_STATE_CONTENDED; + seq->prev_event_time = sample->time; + ls->nr_contended++; + } +end: + return 0; +} + +static int report_lock_contention_end_event(struct evsel *evsel, + struct perf_sample *sample) +{ + struct lock_stat *ls; + struct thread_stat *ts; + struct lock_seq_stat *seq; + u64 contended_term; + u64 addr = evsel__intval(evsel, sample, "lock_addr"); + + if (show_thread_stats) + addr = sample->tid; + + ls = lock_stat_find(addr); + if (!ls) + return 0; + + ts = thread_stat_find(sample->tid); + if (!ts) + return 0; + + seq = get_seq(ts, addr); + if (!seq) + return -ENOMEM; + + switch (seq->state) { + case SEQ_STATE_UNINITIALIZED: + goto end; + case SEQ_STATE_CONTENDED: + contended_term = sample->time - seq->prev_event_time; + ls->wait_time_total += contended_term; + if (contended_term < ls->wait_time_min) + ls->wait_time_min = contended_term; + if (ls->wait_time_max < contended_term) + ls->wait_time_max = contended_term; + break; + case SEQ_STATE_ACQUIRING: + case SEQ_STATE_ACQUIRED: + case SEQ_STATE_READ_ACQUIRED: + case SEQ_STATE_RELEASED: + /* broken lock sequence */ + if (!ls->broken) { + ls->broken = 1; + bad_hist[BROKEN_ACQUIRED]++; + } + list_del_init(&seq->list); + free(seq); + goto end; + default: + BUG_ON("Unknown state of lock sequence found!\n"); + break; + } + + seq->state = SEQ_STATE_ACQUIRED; + ls->nr_acquired++; + ls->avg_wait_time = ls->wait_time_total/ls->nr_acquired; +end: + return 0; +} + /* lock oriented handlers */ /* TODO: handlers for CPU oriented, thread oriented */ static struct trace_lock_handler report_lock_ops = { @@ -790,6 +1104,8 @@ static struct trace_lock_handler report_lock_ops = { .acquired_event = report_lock_acquired_event, .contended_event = report_lock_contended_event, .release_event = report_lock_release_event, + .contention_begin_event = report_lock_contention_begin_event, + .contention_end_event = report_lock_contention_end_event, }; static struct trace_lock_handler *trace_handler; @@ -822,13 +1138,34 @@ static int evsel__process_lock_release(struct evsel *evsel, struct perf_sample * return 0; } +static int evsel__process_contention_begin(struct evsel *evsel, struct perf_sample *sample) +{ + if (trace_handler->contention_begin_event) + return trace_handler->contention_begin_event(evsel, sample); + return 0; +} + +static int evsel__process_contention_end(struct evsel *evsel, struct perf_sample *sample) +{ + if (trace_handler->contention_end_event) + return trace_handler->contention_end_event(evsel, sample); + return 0; +} + static void print_bad_events(int bad, int total) { /* Output for debug, this have to be removed */ int i; + int broken = 0; const char *name[4] = { "acquire", "acquired", "contended", "release" }; + for (i = 0; i < BROKEN_MAX; i++) + broken += bad_hist[i]; + + if (broken == 0 && !verbose) + return; + pr_info("\n=== output for debug===\n\n"); pr_info("bad: %d, total: %d\n", bad, total); pr_info("bad rate: %.2f %%\n", (double)bad / (double)total * 100); @@ -1023,6 +1360,11 @@ static const struct evsel_str_handler lock_tracepoints[] = { { "lock:lock_release", evsel__process_lock_release, }, /* CONFIG_LOCKDEP */ }; +static const struct evsel_str_handler contention_tracepoints[] = { + { "lock:contention_begin", evsel__process_contention_begin, }, + { "lock:contention_end", evsel__process_contention_end, }, +}; + static bool force; static int __cmd_report(bool display_info) @@ -1031,6 +1373,7 @@ static int __cmd_report(bool display_info) struct perf_tool eops = { .sample = process_sample_event, .comm = perf_event__process_comm, + .mmap = perf_event__process_mmap, .namespaces = perf_event__process_namespaces, .ordered_events = true, }; @@ -1046,6 +1389,8 @@ static int __cmd_report(bool display_info) return PTR_ERR(session); } + /* for lock function check */ + symbol_conf.sort_by_name = true; symbol__init(&session->header.env); if (!perf_session__has_traces(session, "lock record")) @@ -1056,6 +1401,11 @@ static int __cmd_report(bool display_info) goto out_delete; } + if (perf_session__set_tracepoints_handlers(session, contention_tracepoints)) { + pr_err("Initializing perf session tracepoint handlers failed\n"); + goto out_delete; + } + if (setup_output_field(output_fields)) goto out_delete; @@ -1085,21 +1435,48 @@ static int __cmd_record(int argc, const char **argv) const char *record_args[] = { "record", "-R", "-m", "1024", "-c", "1", "--synth", "task", }; + const char *callgraph_args[] = { + "--call-graph", "fp," __stringify(CONTENTION_STACK_DEPTH), + }; unsigned int rec_argc, i, j, ret; + unsigned int nr_tracepoints; + unsigned int nr_callgraph_args = 0; const char **rec_argv; + bool has_lock_stat = true; for (i = 0; i < ARRAY_SIZE(lock_tracepoints); i++) { if (!is_valid_tracepoint(lock_tracepoints[i].name)) { - pr_err("tracepoint %s is not enabled. " - "Are CONFIG_LOCKDEP and CONFIG_LOCK_STAT enabled?\n", - lock_tracepoints[i].name); - return 1; + pr_debug("tracepoint %s is not enabled. " + "Are CONFIG_LOCKDEP and CONFIG_LOCK_STAT enabled?\n", + lock_tracepoints[i].name); + has_lock_stat = false; + break; } } - rec_argc = ARRAY_SIZE(record_args) + argc - 1; + if (has_lock_stat) + goto setup_args; + + for (i = 0; i < ARRAY_SIZE(contention_tracepoints); i++) { + if (!is_valid_tracepoint(contention_tracepoints[i].name)) { + pr_err("tracepoint %s is not enabled.\n", + contention_tracepoints[i].name); + return 1; + } + } + + nr_callgraph_args = ARRAY_SIZE(callgraph_args); + +setup_args: + rec_argc = ARRAY_SIZE(record_args) + nr_callgraph_args + argc - 1; + + if (has_lock_stat) + nr_tracepoints = ARRAY_SIZE(lock_tracepoints); + else + nr_tracepoints = ARRAY_SIZE(contention_tracepoints); + /* factor of 2 is for -e in front of each tracepoint */ - rec_argc += 2 * ARRAY_SIZE(lock_tracepoints); + rec_argc += 2 * nr_tracepoints; rec_argv = calloc(rec_argc + 1, sizeof(char *)); if (!rec_argv) @@ -1108,11 +1485,24 @@ static int __cmd_record(int argc, const char **argv) for (i = 0; i < ARRAY_SIZE(record_args); i++) rec_argv[i] = strdup(record_args[i]); - for (j = 0; j < ARRAY_SIZE(lock_tracepoints); j++) { + for (j = 0; j < nr_tracepoints; j++) { + const char *ev_name; + + if (has_lock_stat) + ev_name = strdup(lock_tracepoints[j].name); + else + ev_name = strdup(contention_tracepoints[j].name); + + if (!ev_name) + return -ENOMEM; + rec_argv[i++] = "-e"; - rec_argv[i++] = strdup(lock_tracepoints[j].name); + rec_argv[i++] = ev_name; } + for (j = 0; j < nr_callgraph_args; j++, i++) + rec_argv[i] = callgraph_args[j]; + for (j = 1; j < (unsigned int)argc; j++, i++) rec_argv[i] = argv[j]; @@ -1130,6 +1520,10 @@ int cmd_lock(int argc, const char **argv) OPT_INCR('v', "verbose", &verbose, "be more verbose (show symbol address, etc)"), OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, "dump raw trace in ASCII"), OPT_BOOLEAN('f', "force", &force, "don't complain, do it"), + OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name, + "file", "vmlinux pathname"), + OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name, + "file", "kallsyms pathname"), OPT_END() }; diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 9a71f0330137..cf5c5379ceaa 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -1388,6 +1388,11 @@ static struct perf_event_header finished_round_event = { .type = PERF_RECORD_FINISHED_ROUND, }; +static struct perf_event_header finished_init_event = { + .size = sizeof(struct perf_event_header), + .type = PERF_RECORD_FINISHED_INIT, +}; + static void record__adjust_affinity(struct record *rec, struct mmap *map) { if (rec->opts.affinity != PERF_AFFINITY_SYS && @@ -1696,6 +1701,14 @@ static int record__synthesize_workload(struct record *rec, bool tail) return err; } +static int write_finished_init(struct record *rec, bool tail) +{ + if (rec->opts.tail_synthesize != tail) + return 0; + + return record__write(rec, NULL, &finished_init_event, sizeof(finished_init_event)); +} + static int record__synthesize(struct record *rec, bool tail); static int @@ -1710,6 +1723,8 @@ record__switch_output(struct record *rec, bool at_exit) record__aio_mmap_read_sync(rec); + write_finished_init(rec, true); + record__synthesize(rec, true); if (target__none(&rec->opts.target)) record__synthesize_workload(rec, true); @@ -1764,6 +1779,7 @@ record__switch_output(struct record *rec, bool at_exit) */ if (target__none(&rec->opts.target)) record__synthesize_workload(rec, false); + write_finished_init(rec, false); } return fd; } @@ -1834,13 +1850,11 @@ static int record__synthesize(struct record *rec, bool tail) goto out; /* Synthesize id_index before auxtrace_info */ - if (rec->opts.auxtrace_sample_mode || rec->opts.full_auxtrace) { - err = perf_event__synthesize_id_index(tool, - process_synthesized_event, - session->evlist, machine); - if (err) - goto out; - } + err = perf_event__synthesize_id_index(tool, + process_synthesized_event, + session->evlist, machine); + if (err) + goto out; if (rec->opts.full_auxtrace) { err = perf_event__synthesize_auxtrace_info(rec->itr, tool, @@ -2421,6 +2435,15 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) trigger_ready(&auxtrace_snapshot_trigger); trigger_ready(&switch_output_trigger); perf_hooks__invoke_record_start(); + + /* + * Must write FINISHED_INIT so it will be seen after all other + * synthesized user events, but before any regular events. + */ + err = write_finished_init(rec, false); + if (err < 0) + goto out_child; + for (;;) { unsigned long long hits = thread->samples; @@ -2565,6 +2588,8 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", record__waking(rec)); + write_finished_init(rec, true); + if (target__none(&rec->opts.target)) record__synthesize_workload(rec, true); @@ -3193,6 +3218,8 @@ static struct option __record_options[] = { OPT_BOOLEAN(0, "code-page-size", &record.opts.sample_code_page_size, "Record the sampled code address (ip) page size"), OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"), + OPT_BOOLEAN(0, "sample-identifier", &record.opts.sample_identifier, + "Record the sample identifier"), OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time, &record.opts.sample_time_set, "Record the sample timestamps"), @@ -3805,6 +3832,9 @@ int cmd_record(int argc, const char **argv) goto out_opts; } + if (rec->opts.kcore) + rec->opts.text_poke = true; + if (rec->opts.kcore || record__threads_enabled(rec)) rec->data.is_dir = true; diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index c689054002cc..7cf21ab16f4f 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -3633,6 +3633,9 @@ int process_thread_map_event(struct perf_session *session, struct perf_tool *tool = session->tool; struct perf_script *script = container_of(tool, struct perf_script, tool); + if (dump_trace) + perf_event__fprintf_thread_map(event, stdout); + if (script->threads) { pr_warning("Extra thread map event, ignoring.\n"); return 0; @@ -3652,6 +3655,9 @@ int process_cpu_map_event(struct perf_session *session, struct perf_tool *tool = session->tool; struct perf_script *script = container_of(tool, struct perf_script, tool); + if (dump_trace) + perf_event__fprintf_cpu_map(event, stdout); + if (script->cpus) { pr_warning("Extra cpu map event, ignoring.\n"); return 0; diff --git a/tools/perf/pmu-events/Build b/tools/perf/pmu-events/Build index a055dee6a46a..28a9d01b08af 100644 --- a/tools/perf/pmu-events/Build +++ b/tools/perf/pmu-events/Build @@ -1,7 +1,3 @@ -hostprogs := jevents - -jevents-y += json.o jsmn.o jevents.o -HOSTCFLAGS_jevents.o = -I$(srctree)/tools/include pmu-events-y += pmu-events.o JDIR = pmu-events/arch/$(SRCARCH) JSON = $(shell [ -d $(JDIR) ] && \ @@ -9,10 +5,19 @@ JSON = $(shell [ -d $(JDIR) ] && \ JDIR_TEST = pmu-events/arch/test JSON_TEST = $(shell [ -d $(JDIR_TEST) ] && \ find $(JDIR_TEST) -name '*.json') +JEVENTS_PY = pmu-events/jevents.py # # Locate/process JSON files in pmu-events/arch/ # directory and create tables in pmu-events.c. # -$(OUTPUT)pmu-events/pmu-events.c: $(JSON) $(JSON_TEST) $(JEVENTS) - $(Q)$(call echo-cmd,gen)$(JEVENTS) $(SRCARCH) pmu-events/arch $(OUTPUT)pmu-events/pmu-events.c $(V) + +ifeq ($(NO_JEVENTS),1) +$(OUTPUT)pmu-events/pmu-events.c: pmu-events/empty-pmu-events.c + $(call rule_mkdir) + $(Q)$(call echo-cmd,gen)cp $< $@ +else +$(OUTPUT)pmu-events/pmu-events.c: $(JSON) $(JSON_TEST) $(JEVENTS_PY) + $(call rule_mkdir) + $(Q)$(call echo-cmd,gen)$(PYTHON) $(JEVENTS_PY) $(SRCARCH) pmu-events/arch $@ +endif diff --git a/tools/perf/pmu-events/empty-pmu-events.c b/tools/perf/pmu-events/empty-pmu-events.c new file mode 100644 index 000000000000..77e655c6f116 --- /dev/null +++ b/tools/perf/pmu-events/empty-pmu-events.c @@ -0,0 +1,158 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * An empty pmu-events.c file used when there is no architecture json files in + * arch or when the jevents.py script cannot be run. + * + * The test cpu/soc is provided for testing. + */ +#include "pmu-events/pmu-events.h" + +static const struct pmu_event pme_test_soc_cpu[] = { + { + .name = "l3_cache_rd", + .event = "event=0x40", + .desc = "L3 cache access, read", + .topic = "cache", + .long_desc = "Attributable Level 3 cache access, read", + }, + { + .name = "segment_reg_loads.any", + .event = "event=0x6,period=200000,umask=0x80", + .desc = "Number of segment register loads", + .topic = "other", + }, + { + .name = "dispatch_blocked.any", + .event = "event=0x9,period=200000,umask=0x20", + .desc = "Memory cluster signals to block micro-op dispatch for any reason", + .topic = "other", + }, + { + .name = "eist_trans", + .event = "event=0x3a,period=200000,umask=0x0", + .desc = "Number of Enhanced Intel SpeedStep(R) Technology (EIST) transitions", + .topic = "other", + }, + { + .name = "uncore_hisi_ddrc.flux_wcmd", + .event = "event=0x2", + .desc = "DDRC write commands. Unit: hisi_sccl,ddrc ", + .topic = "uncore", + .long_desc = "DDRC write commands", + .pmu = "hisi_sccl,ddrc", + }, + { + .name = "unc_cbo_xsnp_response.miss_eviction", + .event = "event=0x22,umask=0x81", + .desc = "A cross-core snoop resulted from L3 Eviction which misses in some processor core. Unit: uncore_cbox ", + .topic = "uncore", + .long_desc = "A cross-core snoop resulted from L3 Eviction which misses in some processor core", + .pmu = "uncore_cbox", + }, + { + .name = "event-hyphen", + .event = "event=0xe0,umask=0x00", + .desc = "UNC_CBO_HYPHEN. Unit: uncore_cbox ", + .topic = "uncore", + .long_desc = "UNC_CBO_HYPHEN", + .pmu = "uncore_cbox", + }, + { + .name = "event-two-hyph", + .event = "event=0xc0,umask=0x00", + .desc = "UNC_CBO_TWO_HYPH. Unit: uncore_cbox ", + .topic = "uncore", + .long_desc = "UNC_CBO_TWO_HYPH", + .pmu = "uncore_cbox", + }, + { + .name = "uncore_hisi_l3c.rd_hit_cpipe", + .event = "event=0x7", + .desc = "Total read hits. Unit: hisi_sccl,l3c ", + .topic = "uncore", + .long_desc = "Total read hits", + .pmu = "hisi_sccl,l3c", + }, + { + .name = "uncore_imc_free_running.cache_miss", + .event = "event=0x12", + .desc = "Total cache misses. Unit: uncore_imc_free_running ", + .topic = "uncore", + .long_desc = "Total cache misses", + .pmu = "uncore_imc_free_running", + }, + { + .name = "uncore_imc.cache_hits", + .event = "event=0x34", + .desc = "Total cache hits. Unit: uncore_imc ", + .topic = "uncore", + .long_desc = "Total cache hits", + .pmu = "uncore_imc", + }, + { + .name = "bp_l1_btb_correct", + .event = "event=0x8a", + .desc = "L1 BTB Correction", + .topic = "branch", + }, + { + .name = "bp_l2_btb_correct", + .event = "event=0x8b", + .desc = "L2 BTB Correction", + .topic = "branch", + }, + { + .name = 0, + .event = 0, + .desc = 0, + }, +}; + +const struct pmu_events_map pmu_events_map[] = { + { + .cpuid = "testcpu", + .version = "v1", + .type = "core", + .table = pme_test_soc_cpu, + }, + { + .cpuid = 0, + .version = 0, + .type = 0, + .table = 0, + }, +}; + +static const struct pmu_event pme_test_soc_sys[] = { + { + .name = "sys_ddr_pmu.write_cycles", + .event = "event=0x2b", + .desc = "ddr write-cycles event. Unit: uncore_sys_ddr_pmu ", + .compat = "v8", + .topic = "uncore", + .pmu = "uncore_sys_ddr_pmu", + }, + { + .name = "sys_ccn_pmu.read_cycles", + .event = "config=0x2c", + .desc = "ccn read-cycles event. Unit: uncore_sys_ccn_pmu ", + .compat = "0x01", + .topic = "uncore", + .pmu = "uncore_sys_ccn_pmu", + }, + { + .name = 0, + .event = 0, + .desc = 0, + }, +}; + +const struct pmu_sys_events pmu_sys_event_tables[] = { + { + .table = pme_test_soc_sys, + .name = "pme_test_soc_sys", + }, + { + .table = 0 + }, +}; diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c deleted file mode 100644 index e597e4bac90f..000000000000 --- a/tools/perf/pmu-events/jevents.c +++ /dev/null @@ -1,1342 +0,0 @@ -#define _XOPEN_SOURCE 500 /* needed for nftw() */ -#define _GNU_SOURCE /* needed for asprintf() */ - -/* Parse event JSON files */ - -/* - * Copyright (c) 2014, Intel Corporation - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, - * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR - * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, - * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED - * OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#include <stdio.h> -#include <stdlib.h> -#include <errno.h> -#include <string.h> -#include <ctype.h> -#include <unistd.h> -#include <stdarg.h> -#include <libgen.h> -#include <limits.h> -#include <dirent.h> -#include <sys/time.h> /* getrlimit */ -#include <sys/resource.h> /* getrlimit */ -#include <ftw.h> -#include <sys/stat.h> -#include <linux/compiler.h> -#include <linux/list.h> -#include "jsmn.h" -#include "json.h" -#include "pmu-events.h" - -int verbose; -char *prog; - -struct json_event { - char *name; - char *compat; - char *event; - char *desc; - char *long_desc; - char *pmu; - char *unit; - char *perpkg; - char *aggr_mode; - char *metric_expr; - char *metric_name; - char *metric_group; - char *deprecated; - char *metric_constraint; -}; - -static enum aggr_mode_class convert(const char *aggr_mode) -{ - if (!strcmp(aggr_mode, "PerCore")) - return PerCore; - else if (!strcmp(aggr_mode, "PerChip")) - return PerChip; - - pr_err("%s: Wrong AggregationMode value '%s'\n", prog, aggr_mode); - return -1; -} - -static LIST_HEAD(sys_event_tables); - -struct sys_event_table { - struct list_head list; - char *soc_id; -}; - -static void free_sys_event_tables(void) -{ - struct sys_event_table *et, *next; - - list_for_each_entry_safe(et, next, &sys_event_tables, list) { - free(et->soc_id); - free(et); - } -} - -int eprintf(int level, int var, const char *fmt, ...) -{ - - int ret; - va_list args; - - if (var < level) - return 0; - - va_start(args, fmt); - - ret = vfprintf(stderr, fmt, args); - - va_end(args); - - return ret; -} - -static void addfield(char *map, char **dst, const char *sep, - const char *a, jsmntok_t *bt) -{ - unsigned int len = strlen(a) + 1 + strlen(sep); - int olen = *dst ? strlen(*dst) : 0; - int blen = bt ? json_len(bt) : 0; - char *out; - - out = realloc(*dst, len + olen + blen); - if (!out) { - /* Don't add field in this case */ - return; - } - *dst = out; - - if (!olen) - *(*dst) = 0; - else - strcat(*dst, sep); - strcat(*dst, a); - if (bt) - strncat(*dst, map + bt->start, blen); -} - -static void fixname(char *s) -{ - for (; *s; s++) - *s = tolower(*s); -} - -static void fixdesc(char *s) -{ - char *e = s + strlen(s); - - /* Remove trailing dots that look ugly in perf list */ - --e; - while (e >= s && isspace(*e)) - --e; - if (*e == '.') - *e = 0; -} - -/* Add escapes for '\' so they are proper C strings. */ -static char *fixregex(char *s) -{ - int len = 0; - int esc_count = 0; - char *fixed = NULL; - char *p, *q; - - /* Count the number of '\' in string */ - for (p = s; *p; p++) { - ++len; - if (*p == '\\') - ++esc_count; - } - - if (esc_count == 0) - return s; - - /* allocate space for a new string */ - fixed = (char *) malloc(len + esc_count + 1); - if (!fixed) - return NULL; - - /* copy over the characters */ - q = fixed; - for (p = s; *p; p++) { - if (*p == '\\') { - *q = '\\'; - ++q; - } - *q = *p; - ++q; - } - *q = '\0'; - return fixed; -} - -static struct msrmap { - const char *num; - const char *pname; -} msrmap[] = { - { "0x3F6", "ldlat=" }, - { "0x1A6", "offcore_rsp=" }, - { "0x1A7", "offcore_rsp=" }, - { "0x3F7", "frontend=" }, - { NULL, NULL } -}; - -static void cut_comma(char *map, jsmntok_t *newval) -{ - int i; - - /* Cut off everything after comma */ - for (i = newval->start; i < newval->end; i++) { - if (map[i] == ',') - newval->end = i; - } -} - -static struct msrmap *lookup_msr(char *map, jsmntok_t *val) -{ - jsmntok_t newval = *val; - static bool warned; - int i; - - cut_comma(map, &newval); - for (i = 0; msrmap[i].num; i++) - if (json_streq(map, &newval, msrmap[i].num)) - return &msrmap[i]; - if (!warned) { - warned = true; - pr_err("%s: Unknown MSR in event file %.*s\n", prog, - json_len(val), map + val->start); - } - return NULL; -} - -static struct map { - const char *json; - const char *perf; -} unit_to_pmu[] = { - { "CBO", "uncore_cbox" }, - { "QPI LL", "uncore_qpi" }, - { "SBO", "uncore_sbox" }, - { "iMPH-U", "uncore_arb" }, - { "CPU-M-CF", "cpum_cf" }, - { "CPU-M-SF", "cpum_sf" }, - { "UPI LL", "uncore_upi" }, - { "hisi_sicl,cpa", "hisi_sicl,cpa"}, - { "hisi_sccl,ddrc", "hisi_sccl,ddrc" }, - { "hisi_sccl,hha", "hisi_sccl,hha" }, - { "hisi_sccl,l3c", "hisi_sccl,l3c" }, - /* it's not realistic to keep adding these, we need something more scalable ... */ - { "imx8_ddr", "imx8_ddr" }, - { "L3PMC", "amd_l3" }, - { "DFPMC", "amd_df" }, - { "cpu_core", "cpu_core" }, - { "cpu_atom", "cpu_atom" }, - {} -}; - -static const char *field_to_perf(struct map *table, char *map, jsmntok_t *val) -{ - int i; - - for (i = 0; table[i].json; i++) { - if (json_streq(map, val, table[i].json)) - return table[i].perf; - } - return NULL; -} - -#define EXPECT(e, t, m) do { if (!(e)) { \ - jsmntok_t *loc = (t); \ - if (!(t)->start && (t) > tokens) \ - loc = (t) - 1; \ - pr_err("%s:%d: " m ", got %s\n", fn, \ - json_line(map, loc), \ - json_name(t)); \ - err = -EIO; \ - goto out_free; \ -} } while (0) - -static char *topic; - -static char *get_topic(void) -{ - char *tp; - int i; - - /* tp is free'd in process_one_file() */ - i = asprintf(&tp, "%s", topic); - if (i < 0) { - pr_info("%s: asprintf() error %s\n", prog); - return NULL; - } - - for (i = 0; i < (int) strlen(tp); i++) { - char c = tp[i]; - - if (c == '-') - tp[i] = ' '; - else if (c == '.') { - tp[i] = '\0'; - break; - } - } - - return tp; -} - -static int add_topic(char *bname) -{ - free(topic); - topic = strdup(bname); - if (!topic) { - pr_info("%s: strdup() error %s for file %s\n", prog, - strerror(errno), bname); - return -ENOMEM; - } - return 0; -} - -struct perf_entry_data { - FILE *outfp; - char *topic; -}; - -static int close_table; - -static void print_events_table_prefix(FILE *fp, const char *tblname) -{ - fprintf(fp, "static const struct pmu_event %s[] = {\n", tblname); - close_table = 1; -} - -static int print_events_table_entry(void *data, struct json_event *je) -{ - struct perf_entry_data *pd = data; - FILE *outfp = pd->outfp; - char *topic_local = pd->topic; - - /* - * TODO: Remove formatting chars after debugging to reduce - * string lengths. - */ - fprintf(outfp, "{\n"); - - if (je->name) - fprintf(outfp, "\t.name = \"%s\",\n", je->name); - if (je->event) - fprintf(outfp, "\t.event = \"%s\",\n", je->event); - fprintf(outfp, "\t.desc = \"%s\",\n", je->desc); - if (je->compat) - fprintf(outfp, "\t.compat = \"%s\",\n", je->compat); - fprintf(outfp, "\t.topic = \"%s\",\n", topic_local); - if (je->long_desc && je->long_desc[0]) - fprintf(outfp, "\t.long_desc = \"%s\",\n", je->long_desc); - if (je->pmu) - fprintf(outfp, "\t.pmu = \"%s\",\n", je->pmu); - if (je->unit) - fprintf(outfp, "\t.unit = \"%s\",\n", je->unit); - if (je->perpkg) - fprintf(outfp, "\t.perpkg = \"%s\",\n", je->perpkg); - if (je->aggr_mode) - fprintf(outfp, "\t.aggr_mode = \"%d\",\n", convert(je->aggr_mode)); - if (je->metric_expr) - fprintf(outfp, "\t.metric_expr = \"%s\",\n", je->metric_expr); - if (je->metric_name) - fprintf(outfp, "\t.metric_name = \"%s\",\n", je->metric_name); - if (je->metric_group) - fprintf(outfp, "\t.metric_group = \"%s\",\n", je->metric_group); - if (je->deprecated) - fprintf(outfp, "\t.deprecated = \"%s\",\n", je->deprecated); - if (je->metric_constraint) - fprintf(outfp, "\t.metric_constraint = \"%s\",\n", je->metric_constraint); - fprintf(outfp, "},\n"); - - return 0; -} - -struct event_struct { - struct list_head list; - char *name; - char *event; - char *compat; - char *desc; - char *long_desc; - char *pmu; - char *unit; - char *perpkg; - char *aggr_mode; - char *metric_expr; - char *metric_name; - char *metric_group; - char *deprecated; - char *metric_constraint; -}; - -#define ADD_EVENT_FIELD(field) do { if (je->field) { \ - es->field = strdup(je->field); \ - if (!es->field) \ - goto out_free; \ -} } while (0) - -#define FREE_EVENT_FIELD(field) free(es->field) - -#define TRY_FIXUP_FIELD(field) do { if (es->field && !je->field) {\ - je->field = strdup(es->field); \ - if (!je->field) \ - return -ENOMEM; \ -} } while (0) - -#define FOR_ALL_EVENT_STRUCT_FIELDS(op) do { \ - op(name); \ - op(event); \ - op(desc); \ - op(long_desc); \ - op(pmu); \ - op(unit); \ - op(perpkg); \ - op(aggr_mode); \ - op(metric_expr); \ - op(metric_name); \ - op(metric_group); \ - op(deprecated); \ -} while (0) - -static LIST_HEAD(arch_std_events); - -static void free_arch_std_events(void) -{ - struct event_struct *es, *next; - - list_for_each_entry_safe(es, next, &arch_std_events, list) { - FOR_ALL_EVENT_STRUCT_FIELDS(FREE_EVENT_FIELD); - list_del_init(&es->list); - free(es); - } -} - -static int save_arch_std_events(void *data __maybe_unused, struct json_event *je) -{ - struct event_struct *es; - - es = malloc(sizeof(*es)); - if (!es) - return -ENOMEM; - memset(es, 0, sizeof(*es)); - FOR_ALL_EVENT_STRUCT_FIELDS(ADD_EVENT_FIELD); - list_add_tail(&es->list, &arch_std_events); - return 0; -out_free: - FOR_ALL_EVENT_STRUCT_FIELDS(FREE_EVENT_FIELD); - free(es); - return -ENOMEM; -} - -static void print_events_table_suffix(FILE *outfp) -{ - fprintf(outfp, "{\n"); - - fprintf(outfp, "\t.name = 0,\n"); - fprintf(outfp, "\t.event = 0,\n"); - fprintf(outfp, "\t.desc = 0,\n"); - - fprintf(outfp, "},\n"); - fprintf(outfp, "};\n"); - close_table = 0; -} - -static struct fixed { - const char *name; - const char *event; -} fixed[] = { - { "inst_retired.any", "event=0xc0,period=2000003" }, - { "inst_retired.any_p", "event=0xc0,period=2000003" }, - { "cpu_clk_unhalted.ref", "event=0x0,umask=0x03,period=2000003" }, - { "cpu_clk_unhalted.thread", "event=0x3c,period=2000003" }, - { "cpu_clk_unhalted.core", "event=0x3c,period=2000003" }, - { "cpu_clk_unhalted.thread_any", "event=0x3c,any=1,period=2000003" }, - { NULL, NULL}, -}; - -/* - * Handle different fixed counter encodings between JSON and perf. - */ -static char *real_event(const char *name, char *event) -{ - int i; - - if (!name) - return NULL; - - for (i = 0; fixed[i].name; i++) - if (!strcasecmp(name, fixed[i].name)) - return (char *)fixed[i].event; - return event; -} - -static int -try_fixup(const char *fn, char *arch_std, struct json_event *je, char **event) -{ - /* try to find matching event from arch standard values */ - struct event_struct *es; - - list_for_each_entry(es, &arch_std_events, list) { - if (!strcmp(arch_std, es->name)) { - FOR_ALL_EVENT_STRUCT_FIELDS(TRY_FIXUP_FIELD); - *event = je->event; - return 0; - } - } - - pr_err("%s: could not find matching %s for %s\n", - prog, arch_std, fn); - return -1; -} - -/* Call func with each event in the json file */ -static int json_events(const char *fn, - int (*func)(void *data, struct json_event *je), - void *data) -{ - int err; - size_t size; - jsmntok_t *tokens, *tok; - int i, j, len; - char *map; - char buf[128]; - - if (!fn) - return -ENOENT; - - tokens = parse_json(fn, &map, &size, &len); - if (!tokens) - return -EIO; - EXPECT(tokens->type == JSMN_ARRAY, tokens, "expected top level array"); - tok = tokens + 1; - for (i = 0; i < tokens->size; i++) { - char *event = NULL; - char *extra_desc = NULL; - char *filter = NULL; - struct json_event je = {}; - char *arch_std = NULL; - unsigned long long eventcode = 0; - unsigned long long configcode = 0; - struct msrmap *msr = NULL; - jsmntok_t *msrval = NULL; - jsmntok_t *precise = NULL; - jsmntok_t *obj = tok++; - bool configcode_present = false; - char *umask = NULL; - char *cmask = NULL; - char *inv = NULL; - char *any = NULL; - char *edge = NULL; - char *period = NULL; - char *fc_mask = NULL; - char *ch_mask = NULL; - - EXPECT(obj->type == JSMN_OBJECT, obj, "expected object"); - for (j = 0; j < obj->size; j += 2) { - jsmntok_t *field, *val; - int nz; - char *s; - - field = tok + j; - EXPECT(field->type == JSMN_STRING, tok + j, - "Expected field name"); - val = tok + j + 1; - EXPECT(val->type == JSMN_STRING, tok + j + 1, - "Expected string value"); - - nz = !json_streq(map, val, "0"); - /* match_field */ - if (json_streq(map, field, "UMask") && nz) { - addfield(map, &umask, "", "umask=", val); - } else if (json_streq(map, field, "CounterMask") && nz) { - addfield(map, &cmask, "", "cmask=", val); - } else if (json_streq(map, field, "Invert") && nz) { - addfield(map, &inv, "", "inv=", val); - } else if (json_streq(map, field, "AnyThread") && nz) { - addfield(map, &any, "", "any=", val); - } else if (json_streq(map, field, "EdgeDetect") && nz) { - addfield(map, &edge, "", "edge=", val); - } else if (json_streq(map, field, "SampleAfterValue") && nz) { - addfield(map, &period, "", "period=", val); - } else if (json_streq(map, field, "FCMask") && nz) { - addfield(map, &fc_mask, "", "fc_mask=", val); - } else if (json_streq(map, field, "PortMask") && nz) { - addfield(map, &ch_mask, "", "ch_mask=", val); - } else if (json_streq(map, field, "EventCode")) { - char *code = NULL; - addfield(map, &code, "", "", val); - eventcode |= strtoul(code, NULL, 0); - free(code); - } else if (json_streq(map, field, "ConfigCode")) { - char *code = NULL; - addfield(map, &code, "", "", val); - configcode |= strtoul(code, NULL, 0); - free(code); - configcode_present = true; - } else if (json_streq(map, field, "ExtSel")) { - char *code = NULL; - addfield(map, &code, "", "", val); - eventcode |= strtoul(code, NULL, 0) << 8; - free(code); - } else if (json_streq(map, field, "EventName")) { - addfield(map, &je.name, "", "", val); - } else if (json_streq(map, field, "Compat")) { - addfield(map, &je.compat, "", "", val); - } else if (json_streq(map, field, "BriefDescription")) { - addfield(map, &je.desc, "", "", val); - fixdesc(je.desc); - } else if (json_streq(map, field, - "PublicDescription")) { - addfield(map, &je.long_desc, "", "", val); - fixdesc(je.long_desc); - } else if (json_streq(map, field, "PEBS") && nz) { - precise = val; - } else if (json_streq(map, field, "MSRIndex") && nz) { - msr = lookup_msr(map, val); - } else if (json_streq(map, field, "MSRValue")) { - msrval = val; - } else if (json_streq(map, field, "Errata") && - !json_streq(map, val, "null")) { - addfield(map, &extra_desc, ". ", - " Spec update: ", val); - } else if (json_streq(map, field, "Data_LA") && nz) { - addfield(map, &extra_desc, ". ", - " Supports address when precise", - NULL); - } else if (json_streq(map, field, "Unit")) { - const char *ppmu; - - ppmu = field_to_perf(unit_to_pmu, map, val); - if (ppmu) { - je.pmu = strdup(ppmu); - } else { - if (!je.pmu) - je.pmu = strdup("uncore_"); - addfield(map, &je.pmu, "", "", val); - for (s = je.pmu; *s; s++) - *s = tolower(*s); - } - } else if (json_streq(map, field, "Filter")) { - addfield(map, &filter, "", "", val); - } else if (json_streq(map, field, "ScaleUnit")) { - addfield(map, &je.unit, "", "", val); - } else if (json_streq(map, field, "PerPkg")) { - addfield(map, &je.perpkg, "", "", val); - } else if (json_streq(map, field, "AggregationMode")) { - addfield(map, &je.aggr_mode, "", "", val); - } else if (json_streq(map, field, "Deprecated")) { - addfield(map, &je.deprecated, "", "", val); - } else if (json_streq(map, field, "MetricName")) { - addfield(map, &je.metric_name, "", "", val); - } else if (json_streq(map, field, "MetricGroup")) { - addfield(map, &je.metric_group, "", "", val); - } else if (json_streq(map, field, "MetricConstraint")) { - addfield(map, &je.metric_constraint, "", "", val); - } else if (json_streq(map, field, "MetricExpr")) { - addfield(map, &je.metric_expr, "", "", val); - } else if (json_streq(map, field, "ArchStdEvent")) { - addfield(map, &arch_std, "", "", val); - for (s = arch_std; *s; s++) - *s = tolower(*s); - } - /* ignore unknown fields */ - } - if (precise && je.desc && !strstr(je.desc, "(Precise Event)")) { - if (json_streq(map, precise, "2")) - addfield(map, &extra_desc, " ", - "(Must be precise)", NULL); - else - addfield(map, &extra_desc, " ", - "(Precise event)", NULL); - } - if (configcode_present) - snprintf(buf, sizeof buf, "config=%#llx", configcode); - else - snprintf(buf, sizeof buf, "event=%#llx", eventcode); - addfield(map, &event, ",", buf, NULL); - if (any) - addfield(map, &event, ",", any, NULL); - if (ch_mask) - addfield(map, &event, ",", ch_mask, NULL); - if (cmask) - addfield(map, &event, ",", cmask, NULL); - if (edge) - addfield(map, &event, ",", edge, NULL); - if (fc_mask) - addfield(map, &event, ",", fc_mask, NULL); - if (inv) - addfield(map, &event, ",", inv, NULL); - if (period) - addfield(map, &event, ",", period, NULL); - if (umask) - addfield(map, &event, ",", umask, NULL); - - if (je.desc && extra_desc) - addfield(map, &je.desc, " ", extra_desc, NULL); - if (je.long_desc && extra_desc) - addfield(map, &je.long_desc, " ", extra_desc, NULL); - if (je.pmu) { - addfield(map, &je.desc, ". ", "Unit: ", NULL); - addfield(map, &je.desc, "", je.pmu, NULL); - addfield(map, &je.desc, "", " ", NULL); - } - if (filter) - addfield(map, &event, ",", filter, NULL); - if (msr != NULL) - addfield(map, &event, ",", msr->pname, msrval); - if (je.name) - fixname(je.name); - - if (arch_std) { - /* - * An arch standard event is referenced, so try to - * fixup any unassigned values. - */ - err = try_fixup(fn, arch_std, &je, &event); - if (err) - goto free_strings; - } - je.event = real_event(je.name, event); - err = func(data, &je); -free_strings: - free(umask); - free(cmask); - free(inv); - free(any); - free(edge); - free(period); - free(fc_mask); - free(ch_mask); - free(event); - free(je.desc); - free(je.name); - free(je.compat); - free(je.long_desc); - free(extra_desc); - free(je.pmu); - free(filter); - free(je.perpkg); - free(je.aggr_mode); - free(je.deprecated); - free(je.unit); - free(je.metric_expr); - free(je.metric_name); - free(je.metric_group); - free(je.metric_constraint); - free(arch_std); - - if (err) - break; - tok += j; - } - EXPECT(tok - tokens == len, tok, "unexpected objects at end"); - err = 0; -out_free: - free_json(map, size, tokens); - return err; -} - -static char *file_name_to_table_name(char *fname) -{ - unsigned int i; - int n; - int c; - char *tblname; - - /* - * Ensure tablename starts with alphabetic character. - * Derive rest of table name from basename of the JSON file, - * replacing hyphens and stripping out .json suffix. - */ - n = asprintf(&tblname, "pme_%s", fname); - if (n < 0) { - pr_info("%s: asprintf() error %s for file %s\n", prog, - strerror(errno), fname); - return NULL; - } - - for (i = 0; i < strlen(tblname); i++) { - c = tblname[i]; - - if (c == '-' || c == '/') - tblname[i] = '_'; - else if (c == '.') { - tblname[i] = '\0'; - break; - } else if (!isalnum(c) && c != '_') { - pr_err("%s: Invalid character '%c' in file name %s\n", - prog, c, basename(fname)); - free(tblname); - tblname = NULL; - break; - } - } - - return tblname; -} - -static bool is_sys_dir(char *fname) -{ - size_t len = strlen(fname), len2 = strlen("/sys"); - - if (len2 > len) - return false; - return !strcmp(fname+len-len2, "/sys"); -} - -static void print_mapping_table_prefix(FILE *outfp) -{ - fprintf(outfp, "const struct pmu_events_map pmu_events_map[] = {\n"); -} - -static void print_mapping_table_suffix(FILE *outfp) -{ - /* - * Print the terminating, NULL entry. - */ - fprintf(outfp, "{\n"); - fprintf(outfp, "\t.cpuid = 0,\n"); - fprintf(outfp, "\t.version = 0,\n"); - fprintf(outfp, "\t.type = 0,\n"); - fprintf(outfp, "\t.table = 0,\n"); - fprintf(outfp, "},\n"); - - /* and finally, the closing curly bracket for the struct */ - fprintf(outfp, "};\n"); -} - -static void print_mapping_test_table(FILE *outfp) -{ - /* - * Print the terminating, NULL entry. - */ - fprintf(outfp, "{\n"); - fprintf(outfp, "\t.cpuid = \"testcpu\",\n"); - fprintf(outfp, "\t.version = \"v1\",\n"); - fprintf(outfp, "\t.type = \"core\",\n"); - fprintf(outfp, "\t.table = pme_test_soc_cpu,\n"); - fprintf(outfp, "},\n"); -} - -static void print_system_event_mapping_table_prefix(FILE *outfp) -{ - fprintf(outfp, "\nconst struct pmu_sys_events pmu_sys_event_tables[] = {"); -} - -static void print_system_event_mapping_table_suffix(FILE *outfp) -{ - fprintf(outfp, "\n\t{\n\t\t.table = 0\n\t},"); - fprintf(outfp, "\n};\n"); -} - -static int process_system_event_tables(FILE *outfp) -{ - struct sys_event_table *sys_event_table; - - print_system_event_mapping_table_prefix(outfp); - - list_for_each_entry(sys_event_table, &sys_event_tables, list) { - fprintf(outfp, "\n\t{\n\t\t.table = %s,\n\t\t.name = \"%s\",\n\t},", - sys_event_table->soc_id, - sys_event_table->soc_id); - } - - print_system_event_mapping_table_suffix(outfp); - - return 0; -} - -static int process_mapfile(FILE *outfp, char *fpath) -{ - int n = 16384; - FILE *mapfp; - char *save = NULL; - char *line, *p; - int line_num; - char *tblname; - int ret = 0; - - pr_info("%s: Processing mapfile %s\n", prog, fpath); - - line = malloc(n); - if (!line) - return -1; - - mapfp = fopen(fpath, "r"); - if (!mapfp) { - pr_info("%s: Error %s opening %s\n", prog, strerror(errno), - fpath); - free(line); - return -1; - } - - print_mapping_table_prefix(outfp); - - /* Skip first line (header) */ - p = fgets(line, n, mapfp); - if (!p) - goto out; - - line_num = 1; - while (1) { - char *cpuid, *version, *type, *fname; - - line_num++; - p = fgets(line, n, mapfp); - if (!p) - break; - - if (line[0] == '#' || line[0] == '\n') - continue; - - if (line[strlen(line)-1] != '\n') { - /* TODO Deal with lines longer than 16K */ - pr_info("%s: Mapfile %s: line %d too long, aborting\n", - prog, fpath, line_num); - ret = -1; - goto out; - } - line[strlen(line)-1] = '\0'; - - cpuid = fixregex(strtok_r(p, ",", &save)); - version = strtok_r(NULL, ",", &save); - fname = strtok_r(NULL, ",", &save); - type = strtok_r(NULL, ",", &save); - - tblname = file_name_to_table_name(fname); - fprintf(outfp, "{\n"); - fprintf(outfp, "\t.cpuid = \"%s\",\n", cpuid); - fprintf(outfp, "\t.version = \"%s\",\n", version); - fprintf(outfp, "\t.type = \"%s\",\n", type); - - /* - * CHECK: We can't use the type (eg "core") field in the - * table name. For us to do that, we need to somehow tweak - * the other caller of file_name_to_table(), process_json() - * to determine the type. process_json() file has no way - * of knowing these are "core" events unless file name has - * core in it. If filename has core in it, we can safely - * ignore the type field here also. - */ - fprintf(outfp, "\t.table = %s\n", tblname); - fprintf(outfp, "},\n"); - } - -out: - print_mapping_test_table(outfp); - print_mapping_table_suffix(outfp); - fclose(mapfp); - free(line); - return ret; -} - -/* - * If we fail to locate/process JSON and map files, create a NULL mapping - * table. This would at least allow perf to build even if we can't find/use - * the aliases. - */ -static void create_empty_mapping(const char *output_file) -{ - FILE *outfp; - - pr_info("%s: Creating empty pmu_events_map[] table\n", prog); - - /* Truncate file to clear any partial writes to it */ - outfp = fopen(output_file, "w"); - if (!outfp) { - perror("fopen()"); - _Exit(1); - } - - fprintf(outfp, "#include \"pmu-events/pmu-events.h\"\n"); - print_mapping_table_prefix(outfp); - print_mapping_table_suffix(outfp); - print_system_event_mapping_table_prefix(outfp); - print_system_event_mapping_table_suffix(outfp); - fclose(outfp); -} - -static int get_maxfds(void) -{ - struct rlimit rlim; - - if (getrlimit(RLIMIT_NOFILE, &rlim) == 0) - return min(rlim.rlim_max / 2, (rlim_t)512); - - return 512; -} - -/* - * nftw() doesn't let us pass an argument to the processing function, - * so use a global variables. - */ -static FILE *eventsfp; -static char *mapfile; - -static int is_leaf_dir(const char *fpath) -{ - DIR *d; - struct dirent *dir; - int res = 1; - - d = opendir(fpath); - if (!d) - return 0; - - while ((dir = readdir(d)) != NULL) { - if (!strcmp(dir->d_name, ".") || !strcmp(dir->d_name, "..")) - continue; - - if (dir->d_type == DT_DIR) { - res = 0; - break; - } else if (dir->d_type == DT_UNKNOWN) { - char path[PATH_MAX]; - struct stat st; - - sprintf(path, "%s/%s", fpath, dir->d_name); - if (stat(path, &st)) - break; - - if (S_ISDIR(st.st_mode)) { - res = 0; - break; - } - } - } - - closedir(d); - - return res; -} - -static int is_json_file(const char *name) -{ - const char *suffix; - - if (strlen(name) < 5) - return 0; - - suffix = name + strlen(name) - 5; - - if (strncmp(suffix, ".json", 5) == 0) - return 1; - return 0; -} - -static int preprocess_arch_std_files(const char *fpath, const struct stat *sb, - int typeflag, struct FTW *ftwbuf) -{ - int level = ftwbuf->level; - int is_file = typeflag == FTW_F; - - if (level == 1 && is_file && is_json_file(fpath)) - return json_events(fpath, save_arch_std_events, (void *)sb); - - return 0; -} - -static int process_one_file(const char *fpath, const struct stat *sb, - int typeflag, struct FTW *ftwbuf) -{ - char *tblname, *bname; - int is_dir = typeflag == FTW_D; - int is_file = typeflag == FTW_F; - int level = ftwbuf->level; - int err = 0; - - if (level >= 2 && is_dir) { - int count = 0; - /* - * For level 2 directory, bname will include parent name, - * like vendor/platform. So search back from platform dir - * to find this. - * Something similar for level 3 directory, but we're a PMU - * category folder, like vendor/platform/cpu. - */ - bname = (char *) fpath + ftwbuf->base - 2; - for (;;) { - if (*bname == '/') - count++; - if (count == level - 1) - break; - bname--; - } - bname++; - } else - bname = (char *) fpath + ftwbuf->base; - - pr_debug("%s %d %7jd %-20s %s\n", - is_file ? "f" : is_dir ? "d" : "x", - level, sb->st_size, bname, fpath); - - /* base dir or too deep */ - if (level == 0 || level > 4) - return 0; - - - /* model directory, reset topic */ - if ((level == 1 && is_dir && is_leaf_dir(fpath)) || - (level >= 2 && is_dir && is_leaf_dir(fpath))) { - if (close_table) - print_events_table_suffix(eventsfp); - - /* - * Drop file name suffix. Replace hyphens with underscores. - * Fail if file name contains any alphanum characters besides - * underscores. - */ - tblname = file_name_to_table_name(bname); - if (!tblname) { - pr_info("%s: Error determining table name for %s\n", prog, - bname); - return -1; - } - - if (is_sys_dir(bname)) { - struct sys_event_table *sys_event_table; - - sys_event_table = malloc(sizeof(*sys_event_table)); - if (!sys_event_table) - return -1; - - sys_event_table->soc_id = strdup(tblname); - if (!sys_event_table->soc_id) { - free(sys_event_table); - return -1; - } - list_add_tail(&sys_event_table->list, - &sys_event_tables); - } - - print_events_table_prefix(eventsfp, tblname); - return 0; - } - - /* - * Save the mapfile name for now. We will process mapfile - * after processing all JSON files (so we can write out the - * mapping table after all PMU events tables). - * - */ - if (level == 1 && is_file) { - if (!strcmp(bname, "mapfile.csv")) { - mapfile = strdup(fpath); - return 0; - } - if (is_json_file(bname)) - pr_debug("%s: ArchStd json is preprocessed %s\n", prog, fpath); - else - pr_info("%s: Ignoring file %s\n", prog, fpath); - return 0; - } - - /* - * If the file name does not have a .json extension, - * ignore it. It could be a readme.txt for instance. - */ - if (is_file) { - if (!is_json_file(bname)) { - pr_info("%s: Ignoring file without .json suffix %s\n", prog, - fpath); - return 0; - } - } - - if (level > 1 && add_topic(bname)) - return -ENOMEM; - - /* - * Assume all other files are JSON files. - * - * If mapfile refers to 'power7_core.json', we create a table - * named 'power7_core'. Any inconsistencies between the mapfile - * and directory tree could result in build failure due to table - * names not being found. - * - * At least for now, be strict with processing JSON file names. - * i.e. if JSON file name cannot be mapped to C-style table name, - * fail. - */ - if (is_file) { - struct perf_entry_data data = { - .topic = get_topic(), - .outfp = eventsfp, - }; - - err = json_events(fpath, print_events_table_entry, &data); - - free(data.topic); - } - - return err; -} - -#ifndef PATH_MAX -#define PATH_MAX 4096 -#endif - -/* - * Starting in directory 'start_dirname', find the "mapfile.csv" and - * the set of JSON files for the architecture 'arch'. - * - * From each JSON file, create a C-style "PMU events table" from the - * JSON file (see struct pmu_event). - * - * From the mapfile, create a mapping between the CPU revisions and - * PMU event tables (see struct pmu_events_map). - * - * Write out the PMU events tables and the mapping table to pmu-event.c. - */ -int main(int argc, char *argv[]) -{ - int rc, ret = 0, empty_map = 0; - int maxfds; - char ldirname[PATH_MAX]; - const char *arch; - const char *output_file; - const char *start_dirname; - const char *err_string_ext = ""; - struct stat stbuf; - - prog = basename(argv[0]); - if (argc < 4) { - pr_err("Usage: %s <arch> <starting_dir> <output_file>\n", prog); - return 1; - } - - arch = argv[1]; - start_dirname = argv[2]; - output_file = argv[3]; - - if (argc > 4) - verbose = atoi(argv[4]); - - eventsfp = fopen(output_file, "w"); - if (!eventsfp) { - pr_err("%s Unable to create required file %s (%s)\n", - prog, output_file, strerror(errno)); - return 2; - } - - sprintf(ldirname, "%s/%s", start_dirname, arch); - - /* If architecture does not have any event lists, bail out */ - if (stat(ldirname, &stbuf) < 0) { - pr_info("%s: Arch %s has no PMU event lists\n", prog, arch); - empty_map = 1; - goto err_close_eventsfp; - } - - /* Include pmu-events.h first */ - fprintf(eventsfp, "#include \"pmu-events/pmu-events.h\"\n"); - - /* - * The mapfile allows multiple CPUids to point to the same JSON file, - * so, not sure if there is a need for symlinks within the pmu-events - * directory. - * - * For now, treat symlinks of JSON files as regular files and create - * separate tables for each symlink (presumably, each symlink refers - * to specific version of the CPU). - */ - - maxfds = get_maxfds(); - rc = nftw(ldirname, preprocess_arch_std_files, maxfds, 0); - if (rc) - goto err_processing_std_arch_event_dir; - - rc = nftw(ldirname, process_one_file, maxfds, 0); - if (rc) - goto err_processing_dir; - - sprintf(ldirname, "%s/test", start_dirname); - - rc = nftw(ldirname, preprocess_arch_std_files, maxfds, 0); - if (rc) - goto err_processing_std_arch_event_dir; - - rc = nftw(ldirname, process_one_file, maxfds, 0); - if (rc) - goto err_processing_dir; - - if (close_table) - print_events_table_suffix(eventsfp); - - if (!mapfile) { - pr_info("%s: No CPU->JSON mapping?\n", prog); - empty_map = 1; - goto err_close_eventsfp; - } - - rc = process_mapfile(eventsfp, mapfile); - if (rc) { - pr_info("%s: Error processing mapfile %s\n", prog, mapfile); - /* Make build fail */ - ret = 1; - goto err_close_eventsfp; - } - - rc = process_system_event_tables(eventsfp); - fclose(eventsfp); - if (rc) { - ret = 1; - goto err_out; - } - - free_arch_std_events(); - free_sys_event_tables(); - free(mapfile); - return 0; - -err_processing_std_arch_event_dir: - err_string_ext = " for std arch event"; -err_processing_dir: - if (verbose) { - pr_info("%s: Error walking file tree %s%s\n", prog, ldirname, - err_string_ext); - empty_map = 1; - } else if (rc < 0) { - ret = 1; - } else { - empty_map = 1; - } -err_close_eventsfp: - fclose(eventsfp); - if (empty_map) - create_empty_mapping(output_file); -err_out: - free_arch_std_events(); - free_sys_event_tables(); - free(mapfile); - return ret; -} diff --git a/tools/perf/pmu-events/jevents.py b/tools/perf/pmu-events/jevents.py new file mode 100755 index 000000000000..83e0dcbeac9a --- /dev/null +++ b/tools/perf/pmu-events/jevents.py @@ -0,0 +1,409 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) +"""Convert directories of JSON events to C code.""" +import argparse +import csv +import json +import os +import sys +from typing import Callable +from typing import Sequence + +# Global command line arguments. +_args = None +# List of event tables generated from "/sys" directories. +_sys_event_tables = [] +# Map from an event name to an architecture standard +# JsonEvent. Architecture standard events are in json files in the top +# f'{_args.starting_dir}/{_args.arch}' directory. +_arch_std_events = {} +# Track whether an events table is currently being defined and needs closing. +_close_table = False + + +def removesuffix(s: str, suffix: str) -> str: + """Remove the suffix from a string + + The removesuffix function is added to str in Python 3.9. We aim for 3.6 + compatibility and so provide our own function here. + """ + return s[0:-len(suffix)] if s.endswith(suffix) else s + + +def file_name_to_table_name(parents: Sequence[str], dirname: str) -> str: + """Generate a C table name from directory names.""" + tblname = 'pme' + for p in parents: + tblname += '_' + p + tblname += '_' + dirname + return tblname.replace('-', '_') + + +class JsonEvent: + """Representation of an event loaded from a json file dictionary.""" + + def __init__(self, jd: dict): + """Constructor passed the dictionary of parsed json values.""" + + def llx(x: int) -> str: + """Convert an int to a string similar to a printf modifier of %#llx.""" + return '0' if x == 0 else hex(x) + + def fixdesc(s: str) -> str: + """Fix formatting issue for the desc string.""" + if s is None: + return None + return removesuffix(removesuffix(removesuffix(s, '. '), + '. '), '.').replace('\n', '\\n').replace( + '\"', '\\"').replace('\r', '\\r') + + def convert_aggr_mode(aggr_mode: str) -> str: + """Returns the aggr_mode_class enum value associated with the JSON string.""" + if not aggr_mode: + return None + aggr_mode_to_enum = { + 'PerChip': '1', + 'PerCore': '2', + } + return aggr_mode_to_enum[aggr_mode] + + def lookup_msr(num: str) -> str: + """Converts the msr number, or first in a list to the appropriate event field.""" + if not num: + return None + msrmap = { + 0x3F6: 'ldlat=', + 0x1A6: 'offcore_rsp=', + 0x1A7: 'offcore_rsp=', + 0x3F7: 'frontend=', + } + return msrmap[int(num.split(',', 1)[0], 0)] + + def real_event(name: str, event: str) -> str: + """Convert well known event names to an event string otherwise use the event argument.""" + fixed = { + 'inst_retired.any': 'event=0xc0,period=2000003', + 'inst_retired.any_p': 'event=0xc0,period=2000003', + 'cpu_clk_unhalted.ref': 'event=0x0,umask=0x03,period=2000003', + 'cpu_clk_unhalted.thread': 'event=0x3c,period=2000003', + 'cpu_clk_unhalted.core': 'event=0x3c,period=2000003', + 'cpu_clk_unhalted.thread_any': 'event=0x3c,any=1,period=2000003', + } + if not name: + return None + if name.lower() in fixed: + return fixed[name.lower()] + return event + + def unit_to_pmu(unit: str) -> str: + """Convert a JSON Unit to Linux PMU name.""" + if not unit: + return None + # Comment brought over from jevents.c: + # it's not realistic to keep adding these, we need something more scalable ... + table = { + 'CBO': 'uncore_cbox', + 'QPI LL': 'uncore_qpi', + 'SBO': 'uncore_sbox', + 'iMPH-U': 'uncore_arb', + 'CPU-M-CF': 'cpum_cf', + 'CPU-M-SF': 'cpum_sf', + 'UPI LL': 'uncore_upi', + 'hisi_sicl,cpa': 'hisi_sicl,cpa', + 'hisi_sccl,ddrc': 'hisi_sccl,ddrc', + 'hisi_sccl,hha': 'hisi_sccl,hha', + 'hisi_sccl,l3c': 'hisi_sccl,l3c', + 'imx8_ddr': 'imx8_ddr', + 'L3PMC': 'amd_l3', + 'DFPMC': 'amd_df', + 'cpu_core': 'cpu_core', + 'cpu_atom': 'cpu_atom', + } + return table[unit] if unit in table else f'uncore_{unit.lower()}' + + eventcode = 0 + if 'EventCode' in jd: + eventcode = int(jd['EventCode'].split(',', 1)[0], 0) + if 'ExtSel' in jd: + eventcode |= int(jd['ExtSel']) << 8 + configcode = int(jd['ConfigCode'], 0) if 'ConfigCode' in jd else None + self.name = jd['EventName'].lower() if 'EventName' in jd else None + self.compat = jd.get('Compat') + self.desc = fixdesc(jd.get('BriefDescription')) + self.long_desc = fixdesc(jd.get('PublicDescription')) + precise = jd.get('PEBS') + msr = lookup_msr(jd.get('MSRIndex')) + msrval = jd.get('MSRValue') + extra_desc = '' + if 'Data_LA' in jd: + extra_desc += ' Supports address when precise' + if 'Errata' in jd: + extra_desc += '.' + if 'Errata' in jd: + extra_desc += ' Spec update: ' + jd['Errata'] + self.pmu = unit_to_pmu(jd.get('Unit')) + filter = jd.get('Filter') + self.unit = jd.get('ScaleUnit') + self.perpkg = jd.get('PerPkg') + self.aggr_mode = convert_aggr_mode(jd.get('AggregationMode')) + self.deprecated = jd.get('Deprecated') + self.metric_name = jd.get('MetricName') + self.metric_group = jd.get('MetricGroup') + self.metric_constraint = jd.get('MetricConstraint') + self.metric_expr = jd.get('MetricExpr') + if self.metric_expr: + self.metric_expr = self.metric_expr.replace('\\', '\\\\') + arch_std = jd.get('ArchStdEvent') + if precise and self.desc and not '(Precise Event)' in self.desc: + extra_desc += ' (Must be precise)' if precise == '2' else (' (Precise ' + 'event)') + event = f'config={llx(configcode)}' if configcode is not None else f'event={llx(eventcode)}' + event_fields = [ + ('AnyThread', 'any='), + ('PortMask', 'ch_mask='), + ('CounterMask', 'cmask='), + ('EdgeDetect', 'edge='), + ('FCMask', 'fc_mask='), + ('Invert', 'inv='), + ('SampleAfterValue', 'period='), + ('UMask', 'umask='), + ] + for key, value in event_fields: + if key in jd and jd[key] != '0': + event += ',' + value + jd[key] + if filter: + event += f',{filter}' + if msr: + event += f',{msr}{msrval}' + if self.desc and extra_desc: + self.desc += extra_desc + if self.long_desc and extra_desc: + self.long_desc += extra_desc + if self.pmu: + if self.desc and not self.desc.endswith('. '): + self.desc += '. ' + self.desc = (self.desc if self.desc else '') + ('Unit: ' + self.pmu + ' ') + if arch_std and arch_std.lower() in _arch_std_events: + event = _arch_std_events[arch_std.lower()].event + # Copy from the architecture standard event to self for undefined fields. + for attr, value in _arch_std_events[arch_std.lower()].__dict__.items(): + if hasattr(self, attr) and not getattr(self, attr): + setattr(self, attr, value) + + self.event = real_event(self.name, event) + + def __repr__(self) -> str: + """String representation primarily for debugging.""" + s = '{\n' + for attr, value in self.__dict__.items(): + if value: + s += f'\t{attr} = {value},\n' + return s + '}' + + def to_c_string(self, topic_local: str) -> str: + """Representation of the event as a C struct initializer.""" + + def attr_string(attr: str, value: str) -> str: + return '\t.%s = \"%s\",\n' % (attr, value) + + def str_if_present(self, attr: str) -> str: + if not getattr(self, attr): + return '' + return attr_string(attr, getattr(self, attr)) + + s = '{\n' + for attr in ['name', 'event']: + s += str_if_present(self, attr) + if self.desc is not None: + s += attr_string('desc', self.desc) + else: + s += attr_string('desc', '(null)') + s += str_if_present(self, 'compat') + s += f'\t.topic = "{topic_local}",\n' + for attr in [ + 'long_desc', 'pmu', 'unit', 'perpkg', 'aggr_mode', 'metric_expr', + 'metric_name', 'metric_group', 'deprecated', 'metric_constraint' + ]: + s += str_if_present(self, attr) + s += '},\n' + return s + + +def read_json_events(path: str) -> Sequence[JsonEvent]: + """Read json events from the specified file.""" + return json.load(open(path), object_hook=lambda d: JsonEvent(d)) + + +def preprocess_arch_std_files(archpath: str) -> None: + """Read in all architecture standard events.""" + global _arch_std_events + for item in os.scandir(archpath): + if item.is_file() and item.name.endswith('.json'): + for event in read_json_events(item.path): + if event.name: + _arch_std_events[event.name.lower()] = event + + +def print_events_table_prefix(tblname: str) -> None: + """Called when a new events table is started.""" + global _close_table + if _close_table: + raise IOError('Printing table prefix but last table has no suffix') + _args.output_file.write(f'static const struct pmu_event {tblname}[] = {{\n') + _close_table = True + + +def print_events_table_entries(item: os.DirEntry, topic: str) -> None: + """Create contents of an events table.""" + if not _close_table: + raise IOError('Table entries missing prefix') + for event in read_json_events(item.path): + _args.output_file.write(event.to_c_string(topic)) + + +def print_events_table_suffix() -> None: + """Optionally close events table.""" + global _close_table + if _close_table: + _args.output_file.write("""{ +\t.name = 0, +\t.event = 0, +\t.desc = 0, +}, +}; +""") + _close_table = False + + +def process_one_file(parents: Sequence[str], item: os.DirEntry) -> None: + """Process a JSON file during the main walk.""" + global _sys_event_tables + + def get_topic(topic: str) -> str: + return removesuffix(topic, '.json').replace('-', ' ') + + def is_leaf_dir(path: str) -> bool: + for item in os.scandir(path): + if item.is_dir(): + return False + return True + + # model directory, reset topic + if item.is_dir() and is_leaf_dir(item.path): + print_events_table_suffix() + + tblname = file_name_to_table_name(parents, item.name) + if item.name == 'sys': + _sys_event_tables.append(tblname) + print_events_table_prefix(tblname) + return + + # base dir or too deep + level = len(parents) + if level == 0 or level > 4: + return + + # Ignore other directories. If the file name does not have a .json + # extension, ignore it. It could be a readme.txt for instance. + if not item.is_file() or not item.name.endswith('.json'): + return + + print_events_table_entries(item, get_topic(item.name)) + + +def print_mapping_table() -> None: + """Read the mapfile and generate the struct from cpuid string to event table.""" + with open(f'{_args.starting_dir}/{_args.arch}/mapfile.csv') as csvfile: + table = csv.reader(csvfile) + _args.output_file.write( + 'const struct pmu_events_map pmu_events_map[] = {\n') + first = True + for row in table: + # Skip the first row or any row beginning with #. + if not first and len(row) > 0 and not row[0].startswith('#'): + tblname = file_name_to_table_name([], row[2].replace('/', '_')) + _args.output_file.write("""{ +\t.cpuid = \"%s\", +\t.version = \"%s\", +\t.type = \"%s\", +\t.table = %s +}, +""" % (row[0].replace('\\', '\\\\'), row[1], row[3], tblname)) + first = False + + _args.output_file.write("""{ +\t.cpuid = "testcpu", +\t.version = "v1", +\t.type = "core", +\t.table = pme_test_soc_cpu, +}, +{ +\t.cpuid = 0, +\t.version = 0, +\t.type = 0, +\t.table = 0, +}, +}; +""") + + +def print_system_mapping_table() -> None: + """C struct mapping table array for tables from /sys directories.""" + _args.output_file.write( + '\nconst struct pmu_sys_events pmu_sys_event_tables[] = {\n') + for tblname in _sys_event_tables: + _args.output_file.write(f"""\t{{ +\t\t.table = {tblname}, +\t\t.name = \"{tblname}\", +\t}}, +""") + _args.output_file.write("""\t{ +\t\t.table = 0 +\t}, +}; +""") + + +def main() -> None: + global _args + + def dir_path(path: str) -> str: + """Validate path is a directory for argparse.""" + if os.path.isdir(path): + return path + raise argparse.ArgumentTypeError(f'\'{path}\' is not a valid directory') + + def ftw(path: str, parents: Sequence[str], + action: Callable[[Sequence[str], os.DirEntry], None]) -> None: + """Replicate the directory/file walking behavior of C's file tree walk.""" + for item in os.scandir(path): + action(parents, item) + if item.is_dir(): + ftw(item.path, parents + [item.name], action) + + ap = argparse.ArgumentParser() + ap.add_argument('arch', help='Architecture name like x86') + ap.add_argument( + 'starting_dir', + type=dir_path, + help='Root of tree containing architecture directories containing json files' + ) + ap.add_argument( + 'output_file', type=argparse.FileType('w'), nargs='?', default=sys.stdout) + _args = ap.parse_args() + + _args.output_file.write("#include \"pmu-events/pmu-events.h\"\n") + for path in [_args.arch, 'test']: + arch_path = f'{_args.starting_dir}/{path}' + if not os.path.isdir(arch_path): + raise IOError(f'Missing architecture directory in \'{arch_path}\'') + preprocess_arch_std_files(arch_path) + ftw(arch_path, [], process_one_file) + print_events_table_suffix() + + print_mapping_table() + print_system_mapping_table() + + +if __name__ == '__main__': + main() diff --git a/tools/perf/pmu-events/jsmn.c b/tools/perf/pmu-events/jsmn.c deleted file mode 100644 index 831dc44c4558..000000000000 --- a/tools/perf/pmu-events/jsmn.c +++ /dev/null @@ -1,352 +0,0 @@ -/* - * Copyright (c) 2010 Serge A. Zaitsev - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - * - * Slightly modified by AK to not assume 0 terminated input. - */ - -#include <stdlib.h> -#include "jsmn.h" -#define JSMN_STRICT - -/* - * Allocates a fresh unused token from the token pool. - */ -static jsmntok_t *jsmn_alloc_token(jsmn_parser *parser, - jsmntok_t *tokens, size_t num_tokens) -{ - jsmntok_t *tok; - - if ((unsigned)parser->toknext >= num_tokens) - return NULL; - tok = &tokens[parser->toknext++]; - tok->start = tok->end = -1; - tok->size = 0; - return tok; -} - -/* - * Fills token type and boundaries. - */ -static void jsmn_fill_token(jsmntok_t *token, jsmntype_t type, - int start, int end) -{ - token->type = type; - token->start = start; - token->end = end; - token->size = 0; -} - -/* - * Fills next available token with JSON primitive. - */ -static jsmnerr_t jsmn_parse_primitive(jsmn_parser *parser, const char *js, - size_t len, - jsmntok_t *tokens, size_t num_tokens) -{ - jsmntok_t *token; - int start; - - start = parser->pos; - - for (; parser->pos < len; parser->pos++) { - switch (js[parser->pos]) { -#ifndef JSMN_STRICT - /* - * In strict mode primitive must be followed by "," - * or "}" or "]" - */ - case ':': -#endif - case '\t': - case '\r': - case '\n': - case ' ': - case ',': - case ']': - case '}': - goto found; - default: - break; - } - if (js[parser->pos] < 32 || js[parser->pos] >= 127) { - parser->pos = start; - return JSMN_ERROR_INVAL; - } - } -#ifdef JSMN_STRICT - /* - * In strict mode primitive must be followed by a - * comma/object/array. - */ - parser->pos = start; - return JSMN_ERROR_PART; -#endif - -found: - token = jsmn_alloc_token(parser, tokens, num_tokens); - if (token == NULL) { - parser->pos = start; - return JSMN_ERROR_NOMEM; - } - jsmn_fill_token(token, JSMN_PRIMITIVE, start, parser->pos); - parser->pos--; /* parent sees closing brackets */ - return JSMN_SUCCESS; -} - -/* - * Fills next token with JSON string. - */ -static jsmnerr_t jsmn_parse_string(jsmn_parser *parser, const char *js, - size_t len, - jsmntok_t *tokens, size_t num_tokens) -{ - jsmntok_t *token; - int start = parser->pos; - - /* Skip starting quote */ - parser->pos++; - - for (; parser->pos < len; parser->pos++) { - char c = js[parser->pos]; - - /* Quote: end of string */ - if (c == '\"') { - token = jsmn_alloc_token(parser, tokens, num_tokens); - if (token == NULL) { - parser->pos = start; - return JSMN_ERROR_NOMEM; - } - jsmn_fill_token(token, JSMN_STRING, start+1, - parser->pos); - return JSMN_SUCCESS; - } - - /* Backslash: Quoted symbol expected */ - if (c == '\\') { - parser->pos++; - switch (js[parser->pos]) { - /* Allowed escaped symbols */ - case '\"': - case '/': - case '\\': - case 'b': - case 'f': - case 'r': - case 'n': - case 't': - break; - /* Allows escaped symbol \uXXXX */ - case 'u': - /* TODO */ - break; - /* Unexpected symbol */ - default: - parser->pos = start; - return JSMN_ERROR_INVAL; - } - } - } - parser->pos = start; - return JSMN_ERROR_PART; -} - -/* - * Parse JSON string and fill tokens. - */ -jsmnerr_t jsmn_parse(jsmn_parser *parser, const char *js, size_t len, - jsmntok_t *tokens, unsigned int num_tokens) -{ - jsmnerr_t r; - int i; - jsmntok_t *token; -#ifdef JSMN_STRICT - /* - * Keeps track of whether a new object/list/primitive is expected. New items are only - * allowed after an opening brace, comma or colon. A closing brace after a comma is not - * valid JSON. - */ - int expecting_item = 1; -#endif - - for (; parser->pos < len; parser->pos++) { - char c; - jsmntype_t type; - - c = js[parser->pos]; - switch (c) { - case '{': - case '[': -#ifdef JSMN_STRICT - if (!expecting_item) - return JSMN_ERROR_INVAL; -#endif - token = jsmn_alloc_token(parser, tokens, num_tokens); - if (token == NULL) - return JSMN_ERROR_NOMEM; - if (parser->toksuper != -1) - tokens[parser->toksuper].size++; - token->type = (c == '{' ? JSMN_OBJECT : JSMN_ARRAY); - token->start = parser->pos; - parser->toksuper = parser->toknext - 1; - break; - case '}': - case ']': -#ifdef JSMN_STRICT - if (expecting_item) - return JSMN_ERROR_INVAL; -#endif - type = (c == '}' ? JSMN_OBJECT : JSMN_ARRAY); - for (i = parser->toknext - 1; i >= 0; i--) { - token = &tokens[i]; - if (token->start != -1 && token->end == -1) { - if (token->type != type) - return JSMN_ERROR_INVAL; - parser->toksuper = -1; - token->end = parser->pos + 1; - break; - } - } - /* Error if unmatched closing bracket */ - if (i == -1) - return JSMN_ERROR_INVAL; - for (; i >= 0; i--) { - token = &tokens[i]; - if (token->start != -1 && token->end == -1) { - parser->toksuper = i; - break; - } - } - break; - case '\"': -#ifdef JSMN_STRICT - if (!expecting_item) - return JSMN_ERROR_INVAL; - expecting_item = 0; -#endif - r = jsmn_parse_string(parser, js, len, tokens, - num_tokens); - if (r < 0) - return r; - if (parser->toksuper != -1) - tokens[parser->toksuper].size++; - break; - case '\t': - case '\r': - case '\n': - case ' ': - break; -#ifdef JSMN_STRICT - case ':': - case ',': - if (expecting_item) - return JSMN_ERROR_INVAL; - expecting_item = 1; - break; - /* - * In strict mode primitives are: - * numbers and booleans. - */ - case '-': - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - case 't': - case 'f': - case 'n': -#else - case ':': - case ',': - break; - /* - * In non-strict mode every unquoted value - * is a primitive. - */ - /*FALL THROUGH */ - default: -#endif - -#ifdef JSMN_STRICT - if (!expecting_item) - return JSMN_ERROR_INVAL; - expecting_item = 0; -#endif - r = jsmn_parse_primitive(parser, js, len, tokens, - num_tokens); - if (r < 0) - return r; - if (parser->toksuper != -1) - tokens[parser->toksuper].size++; - break; - -#ifdef JSMN_STRICT - /* Unexpected char in strict mode */ - default: - return JSMN_ERROR_INVAL; -#endif - } - } - - for (i = parser->toknext - 1; i >= 0; i--) { - /* Unmatched opened object or array */ - if (tokens[i].start != -1 && tokens[i].end == -1) - return JSMN_ERROR_PART; - } - -#ifdef JSMN_STRICT - return expecting_item ? JSMN_ERROR_INVAL : JSMN_SUCCESS; -#else - return JSMN_SUCCESS; -#endif -} - -/* - * Creates a new parser based over a given buffer with an array of tokens - * available. - */ -void jsmn_init(jsmn_parser *parser) -{ - parser->pos = 0; - parser->toknext = 0; - parser->toksuper = -1; -} - -const char *jsmn_strerror(jsmnerr_t err) -{ - switch (err) { - case JSMN_ERROR_NOMEM: - return "No enough tokens"; - case JSMN_ERROR_INVAL: - return "Invalid character inside JSON string"; - case JSMN_ERROR_PART: - return "The string is not a full JSON packet, more bytes expected"; - case JSMN_SUCCESS: - return "Success"; - default: - return "Unknown json error"; - } -} diff --git a/tools/perf/pmu-events/jsmn.h b/tools/perf/pmu-events/jsmn.h deleted file mode 100644 index 1bdfd55fff30..000000000000 --- a/tools/perf/pmu-events/jsmn.h +++ /dev/null @@ -1,68 +0,0 @@ -/* SPDX-License-Identifier: MIT */ -#ifndef __JSMN_H_ -#define __JSMN_H_ - -/* - * JSON type identifier. Basic types are: - * o Object - * o Array - * o String - * o Other primitive: number, boolean (true/false) or null - */ -typedef enum { - JSMN_PRIMITIVE = 0, - JSMN_OBJECT = 1, - JSMN_ARRAY = 2, - JSMN_STRING = 3 -} jsmntype_t; - -typedef enum { - /* Not enough tokens were provided */ - JSMN_ERROR_NOMEM = -1, - /* Invalid character inside JSON string */ - JSMN_ERROR_INVAL = -2, - /* The string is not a full JSON packet, more bytes expected */ - JSMN_ERROR_PART = -3, - /* Everything was fine */ - JSMN_SUCCESS = 0 -} jsmnerr_t; - -/* - * JSON token description. - * @param type type (object, array, string etc.) - * @param start start position in JSON data string - * @param end end position in JSON data string - */ -typedef struct { - jsmntype_t type; - int start; - int end; - int size; -} jsmntok_t; - -/* - * JSON parser. Contains an array of token blocks available. Also stores - * the string being parsed now and current position in that string - */ -typedef struct { - unsigned int pos; /* offset in the JSON string */ - int toknext; /* next token to allocate */ - int toksuper; /* superior token node, e.g parent object or array */ -} jsmn_parser; - -/* - * Create JSON parser over an array of tokens - */ -void jsmn_init(jsmn_parser *parser); - -/* - * Run JSON parser. It parses a JSON data string into and array of tokens, - * each describing a single JSON object. - */ -jsmnerr_t jsmn_parse(jsmn_parser *parser, const char *js, - size_t len, - jsmntok_t *tokens, unsigned int num_tokens); - -const char *jsmn_strerror(jsmnerr_t err); - -#endif /* __JSMN_H_ */ diff --git a/tools/perf/pmu-events/json.c b/tools/perf/pmu-events/json.c deleted file mode 100644 index 0544398d6e2d..000000000000 --- a/tools/perf/pmu-events/json.c +++ /dev/null @@ -1,162 +0,0 @@ -/* Parse JSON files using the JSMN parser. */ - -/* - * Copyright (c) 2014, Intel Corporation - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, - * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR - * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, - * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED - * OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#include <stdlib.h> -#include <string.h> -#include <sys/mman.h> -#include <sys/stat.h> -#include <fcntl.h> -#include <stdio.h> -#include <errno.h> -#include <unistd.h> -#include "jsmn.h" -#include "json.h" -#include <linux/kernel.h> - - -static char *mapfile(const char *fn, size_t *size) -{ - unsigned ps = sysconf(_SC_PAGESIZE); - struct stat st; - char *map = NULL; - int err; - int fd = open(fn, O_RDONLY); - - if (fd < 0 && verbose > 0 && fn) { - pr_err("Error opening events file '%s': %s\n", fn, - strerror(errno)); - } - - if (fd < 0) - return NULL; - err = fstat(fd, &st); - if (err < 0) - goto out; - *size = st.st_size; - map = mmap(NULL, - (st.st_size + ps - 1) & ~(ps - 1), - PROT_READ|PROT_WRITE, MAP_PRIVATE, fd, 0); - if (map == MAP_FAILED) - map = NULL; -out: - close(fd); - return map; -} - -static void unmapfile(char *map, size_t size) -{ - unsigned ps = sysconf(_SC_PAGESIZE); - munmap(map, roundup(size, ps)); -} - -/* - * Parse json file using jsmn. Return array of tokens, - * and mapped file. Caller needs to free array. - */ -jsmntok_t *parse_json(const char *fn, char **map, size_t *size, int *len) -{ - jsmn_parser parser; - jsmntok_t *tokens; - jsmnerr_t res; - unsigned sz; - - *map = mapfile(fn, size); - if (!*map) - return NULL; - /* Heuristic */ - sz = *size * 16; - tokens = malloc(sz); - if (!tokens) - goto error; - jsmn_init(&parser); - res = jsmn_parse(&parser, *map, *size, tokens, - sz / sizeof(jsmntok_t)); - if (res != JSMN_SUCCESS) { - pr_err("%s: json error %s\n", fn, jsmn_strerror(res)); - goto error_free; - } - if (len) - *len = parser.toknext; - return tokens; -error_free: - free(tokens); -error: - unmapfile(*map, *size); - return NULL; -} - -void free_json(char *map, size_t size, jsmntok_t *tokens) -{ - free(tokens); - unmapfile(map, size); -} - -static int countchar(char *map, char c, int end) -{ - int i; - int count = 0; - for (i = 0; i < end; i++) - if (map[i] == c) - count++; - return count; -} - -/* Return line number of a jsmn token */ -int json_line(char *map, jsmntok_t *t) -{ - return countchar(map, '\n', t->start) + 1; -} - -static const char * const jsmn_types[] = { - [JSMN_PRIMITIVE] = "primitive", - [JSMN_ARRAY] = "array", - [JSMN_OBJECT] = "object", - [JSMN_STRING] = "string" -}; - -#define LOOKUP(a, i) ((i) < (sizeof(a)/sizeof(*(a))) ? ((a)[i]) : "?") - -/* Return type name of a jsmn token */ -const char *json_name(jsmntok_t *t) -{ - return LOOKUP(jsmn_types, t->type); -} - -int json_len(jsmntok_t *t) -{ - return t->end - t->start; -} - -/* Is string t equal to s? */ -int json_streq(char *map, jsmntok_t *t, const char *s) -{ - unsigned len = json_len(t); - return len == strlen(s) && !strncasecmp(map + t->start, s, len); -} diff --git a/tools/perf/pmu-events/json.h b/tools/perf/pmu-events/json.h deleted file mode 100644 index fbcd5a0590ad..000000000000 --- a/tools/perf/pmu-events/json.h +++ /dev/null @@ -1,39 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef JSON_H -#define JSON_H 1 - -#include "jsmn.h" - -jsmntok_t *parse_json(const char *fn, char **map, size_t *size, int *len); -void free_json(char *map, size_t size, jsmntok_t *tokens); -int json_line(char *map, jsmntok_t *t); -const char *json_name(jsmntok_t *t); -int json_streq(char *map, jsmntok_t *t, const char *s); -int json_len(jsmntok_t *t); - -extern int verbose; - -#include <stdbool.h> - -extern int eprintf(int level, int var, const char *fmt, ...); -#define pr_fmt(fmt) fmt - -#define pr_err(fmt, ...) \ - eprintf(0, verbose, pr_fmt(fmt), ##__VA_ARGS__) - -#define pr_info(fmt, ...) \ - eprintf(1, verbose, pr_fmt(fmt), ##__VA_ARGS__) - -#define pr_debug(fmt, ...) \ - eprintf(2, verbose, pr_fmt(fmt), ##__VA_ARGS__) - -#ifndef roundup -#define roundup(x, y) ( \ -{ \ - const typeof(y) __y = y; \ - (((x) + (__y - 1)) / __y) * __y; \ -} \ -) -#endif - -#endif diff --git a/tools/perf/tests/pmu-events.c b/tools/perf/tests/pmu-events.c index f13368569d8b..478b33825790 100644 --- a/tools/perf/tests/pmu-events.c +++ b/tools/perf/tests/pmu-events.c @@ -1115,6 +1115,7 @@ static int test__parsing_fake(struct test_suite *test __maybe_unused, break; if (!pe->metric_expr) continue; + pr_debug("Found metric '%s' for '%s'\n", pe->metric_name, map->cpuid); err = metric_parse_fake(pe->metric_expr); if (err) return err; diff --git a/tools/perf/tests/shell/stat_all_metrics.sh b/tools/perf/tests/shell/stat_all_metrics.sh index e7c59e5a7a98..6e79349e42be 100755 --- a/tools/perf/tests/shell/stat_all_metrics.sh +++ b/tools/perf/tests/shell/stat_all_metrics.sh @@ -1,26 +1,41 @@ -#!/bin/sh +#!/bin/bash # perf all metrics test # SPDX-License-Identifier: GPL-2.0 -set -e - err=0 for m in $(perf list --raw-dump metrics); do echo "Testing $m" result=$(perf stat -M "$m" true 2>&1) - if [[ ! "$result" =~ "$m" ]] && [[ ! "$result" =~ "<not supported>" ]]; then - # We failed to see the metric and the events are support. Possibly the - # workload was too small so retry with something longer. - result=$(perf stat -M "$m" perf bench internals synthesize 2>&1) - if [[ ! "$result" =~ "$m" ]]; then - echo "Metric '$m' not printed in:" - echo "$result" - if [[ "$result" =~ "FP_ARITH" && "$err" != "1" ]]; then - echo "Skip, not fail, for FP issues" - err=2 - else - err=1 - fi + if [[ "$result" =~ "${m:0:50}" ]] || [[ "$result" =~ "<not supported>" ]] + then + continue + fi + # Failed so try system wide. + result=$(perf stat -M "$m" -a true 2>&1) + if [[ "$result" =~ "${m:0:50}" ]] + then + continue + fi + # Failed again, possibly the workload was too small so retry with something + # longer. + result=$(perf stat -M "$m" perf bench internals synthesize 2>&1) + if [[ "$result" =~ "${m:0:50}" ]] + then + continue + fi + echo "Metric '$m' not printed in:" + echo "$result" + if [[ "$err" != "1" ]] + then + err=2 + if [[ "$result" =~ "FP_ARITH" || "$result" =~ "AMX" ]] + then + echo "Skip, not fail, for FP issues" + elif [[ "$result" =~ "PMM" ]] + then + echo "Skip, not fail, for Optane memory issues" + else + err=1 fi fi done diff --git a/tools/perf/tests/shell/test_brstack.sh b/tools/perf/tests/shell/test_brstack.sh new file mode 100755 index 000000000000..113ccd17bf03 --- /dev/null +++ b/tools/perf/tests/shell/test_brstack.sh @@ -0,0 +1,114 @@ +#!/bin/sh +# Check branch stack sampling + +# SPDX-License-Identifier: GPL-2.0 +# German Gomez <german.gomez@arm.com>, 2022 + +# we need a C compiler to build the test programs +# so bail if none is found +if ! [ -x "$(command -v cc)" ]; then + echo "failed: no compiler, install gcc" + exit 2 +fi + +# skip the test if the hardware doesn't support branch stack sampling +perf record -b -o- -e dummy -B true > /dev/null 2>&1 || exit 2 + +TMPDIR=$(mktemp -d /tmp/__perf_test.program.XXXXX) + +cleanup() { + rm -rf $TMPDIR +} + +trap cleanup exit term int + +gen_test_program() { + # generate test program + cat << EOF > $1 +#define BENCH_RUNS 999999 +int cnt; +void bar(void) { +} /* return */ +void foo(void) { + bar(); /* call */ +} /* return */ +void bench(void) { + void (*foo_ind)(void) = foo; + if ((cnt++) % 3) /* branch (cond) */ + foo(); /* call */ + bar(); /* call */ + foo_ind(); /* call (ind) */ +} +int main(void) +{ + int cnt = 0; + while (1) { + if ((cnt++) > BENCH_RUNS) + break; + bench(); /* call */ + } /* branch (uncond) */ + return 0; +} +EOF +} + +test_user_branches() { + echo "Testing user branch stack sampling" + + gen_test_program "$TEMPDIR/program.c" + cc -fno-inline -g "$TEMPDIR/program.c" -o $TMPDIR/a.out + + perf record -o $TMPDIR/perf.data --branch-filter any,save_type,u -- $TMPDIR/a.out > /dev/null 2>&1 + perf script -i $TMPDIR/perf.data --fields brstacksym | xargs -n1 > $TMPDIR/perf.script + + # example of branch entries: + # foo+0x14/bar+0x40/P/-/-/0/CALL + + set -x + egrep -m1 "^bench\+[^ ]*/foo\+[^ ]*/IND_CALL$" $TMPDIR/perf.script + egrep -m1 "^foo\+[^ ]*/bar\+[^ ]*/CALL$" $TMPDIR/perf.script + egrep -m1 "^bench\+[^ ]*/foo\+[^ ]*/CALL$" $TMPDIR/perf.script + egrep -m1 "^bench\+[^ ]*/bar\+[^ ]*/CALL$" $TMPDIR/perf.script + egrep -m1 "^bar\+[^ ]*/foo\+[^ ]*/RET$" $TMPDIR/perf.script + egrep -m1 "^foo\+[^ ]*/bench\+[^ ]*/RET$" $TMPDIR/perf.script + egrep -m1 "^bench\+[^ ]*/bench\+[^ ]*/COND$" $TMPDIR/perf.script + egrep -m1 "^main\+[^ ]*/main\+[^ ]*/UNCOND$" $TMPDIR/perf.script + set +x + + # some branch types are still not being tested: + # IND COND_CALL COND_RET SYSCALL SYSRET IRQ SERROR NO_TX +} + +# first argument <arg0> is the argument passed to "--branch-stack <arg0>,save_type,u" +# second argument are the expected branch types for the given filter +test_filter() { + local filter=$1 + local expect=$2 + + echo "Testing branch stack filtering permutation ($filter,$expect)" + + gen_test_program "$TEMPDIR/program.c" + cc -fno-inline -g "$TEMPDIR/program.c" -o $TMPDIR/a.out + + perf record -o $TMPDIR/perf.data --branch-filter $filter,save_type,u -- $TMPDIR/a.out > /dev/null 2>&1 + perf script -i $TMPDIR/perf.data --fields brstack | xargs -n1 > $TMPDIR/perf.script + + # fail if we find any branch type that doesn't match any of the expected ones + # also consider UNKNOWN branch types (-) + if egrep -vm1 "^[^ ]*/($expect|-|( *))$" $TMPDIR/perf.script; then + return 1 + fi +} + +set -e + +test_user_branches + +test_filter "any_call" "CALL|IND_CALL|COND_CALL|SYSCALL|IRQ" +test_filter "call" "CALL|SYSCALL" +test_filter "cond" "COND" +test_filter "any_ret" "RET|COND_RET|SYSRET|ERET" + +test_filter "call,cond" "CALL|SYSCALL|COND" +test_filter "any_call,cond" "CALL|IND_CALL|COND_CALL|IRQ|SYSCALL|COND" +test_filter "cond,any_call,any_ret" "COND|CALL|IND_CALL|COND_CALL|SYSCALL|IRQ|RET|COND_RET|SYSRET|ERET" diff --git a/tools/perf/util/amd-sample-raw.c b/tools/perf/util/amd-sample-raw.c index d19d765195c5..238305868644 100644 --- a/tools/perf/util/amd-sample-raw.c +++ b/tools/perf/util/amd-sample-raw.c @@ -18,6 +18,7 @@ #include "pmu-events/pmu-events.h" static u32 cpu_family, cpu_model, ibs_fetch_type, ibs_op_type; +static bool zen4_ibs_extensions; static void pr_ibs_fetch_ctl(union ibs_fetch_ctl reg) { @@ -39,6 +40,7 @@ static void pr_ibs_fetch_ctl(union ibs_fetch_ctl reg) }; const char *ic_miss_str = NULL; const char *l1tlb_pgsz_str = NULL; + char l3_miss_str[sizeof(" L3MissOnly _ FetchOcMiss _ FetchL3Miss _")] = ""; if (cpu_family == 0x19 && cpu_model < 0x10) { /* @@ -53,12 +55,19 @@ static void pr_ibs_fetch_ctl(union ibs_fetch_ctl reg) ic_miss_str = ic_miss_strs[reg.ic_miss]; } + if (zen4_ibs_extensions) { + snprintf(l3_miss_str, sizeof(l3_miss_str), + " L3MissOnly %d FetchOcMiss %d FetchL3Miss %d", + reg.l3_miss_only, reg.fetch_oc_miss, reg.fetch_l3_miss); + } + printf("ibs_fetch_ctl:\t%016llx MaxCnt %7d Cnt %7d Lat %5d En %d Val %d Comp %d%s " - "PhyAddrValid %d%s L1TlbMiss %d L2TlbMiss %d RandEn %d%s\n", + "PhyAddrValid %d%s L1TlbMiss %d L2TlbMiss %d RandEn %d%s%s\n", reg.val, reg.fetch_maxcnt << 4, reg.fetch_cnt << 4, reg.fetch_lat, reg.fetch_en, reg.fetch_val, reg.fetch_comp, ic_miss_str ? : "", reg.phy_addr_valid, l1tlb_pgsz_str ? : "", reg.l1tlb_miss, reg.l2tlb_miss, - reg.rand_en, reg.fetch_comp ? (reg.fetch_l2_miss ? " L2Miss 1" : " L2Miss 0") : ""); + reg.rand_en, reg.fetch_comp ? (reg.fetch_l2_miss ? " L2Miss 1" : " L2Miss 0") : "", + l3_miss_str); } static void pr_ic_ibs_extd_ctl(union ic_ibs_extd_ctl reg) @@ -68,9 +77,15 @@ static void pr_ic_ibs_extd_ctl(union ic_ibs_extd_ctl reg) static void pr_ibs_op_ctl(union ibs_op_ctl reg) { - printf("ibs_op_ctl:\t%016llx MaxCnt %9d En %d Val %d CntCtl %d=%s CurCnt %9d\n", - reg.val, ((reg.opmaxcnt_ext << 16) | reg.opmaxcnt) << 4, reg.op_en, reg.op_val, - reg.cnt_ctl, reg.cnt_ctl ? "uOps" : "cycles", reg.opcurcnt); + char l3_miss_only[sizeof(" L3MissOnly _")] = ""; + + if (zen4_ibs_extensions) + snprintf(l3_miss_only, sizeof(l3_miss_only), " L3MissOnly %d", reg.l3_miss_only); + + printf("ibs_op_ctl:\t%016llx MaxCnt %9d%s En %d Val %d CntCtl %d=%s CurCnt %9d\n", + reg.val, ((reg.opmaxcnt_ext << 16) | reg.opmaxcnt) << 4, l3_miss_only, + reg.op_en, reg.op_val, reg.cnt_ctl, + reg.cnt_ctl ? "uOps" : "cycles", reg.opcurcnt); } static void pr_ibs_op_data(union ibs_op_data reg) @@ -84,7 +99,34 @@ static void pr_ibs_op_data(union ibs_op_data reg) reg.op_brn_ret, reg.op_rip_invalid, reg.op_brn_fuse, reg.op_microcode); } -static void pr_ibs_op_data2(union ibs_op_data2 reg) +static void pr_ibs_op_data2_extended(union ibs_op_data2 reg) +{ + static const char * const data_src_str[] = { + "", + " DataSrc 1=Local L3 or other L1/L2 in CCX", + " DataSrc 2=A peer cache in a near CCX", + " DataSrc 3=Data returned from DRAM", + " DataSrc 4=(reserved)", + " DataSrc 5=A peer cache in a far CCX", + " DataSrc 6=DRAM address map with \"long latency\" bit set", + " DataSrc 7=Data returned from MMIO/Config/PCI/APIC", + " DataSrc 8=Extension Memory (S-Link, GenZ, etc)", + " DataSrc 9=(reserved)", + " DataSrc 10=(reserved)", + " DataSrc 11=(reserved)", + " DataSrc 12=Peer Agent Memory", + /* 13 to 31 are reserved. Avoid printing them. */ + }; + int data_src = (reg.data_src_hi << 3) | reg.data_src_lo; + + printf("ibs_op_data2:\t%016llx %sRmtNode %d%s\n", reg.val, + (data_src == 1 || data_src == 2 || data_src == 5) ? + (reg.cache_hit_st ? "CacheHitSt 1=O-State " : "CacheHitSt 0=M-state ") : "", + reg.rmt_node, + data_src < (int)ARRAY_SIZE(data_src_str) ? data_src_str[data_src] : ""); +} + +static void pr_ibs_op_data2_default(union ibs_op_data2 reg) { static const char * const data_src_str[] = { "", @@ -98,9 +140,16 @@ static void pr_ibs_op_data2(union ibs_op_data2 reg) }; printf("ibs_op_data2:\t%016llx %sRmtNode %d%s\n", reg.val, - reg.data_src == 2 ? (reg.cache_hit_st ? "CacheHitSt 1=O-State " + reg.data_src_lo == 2 ? (reg.cache_hit_st ? "CacheHitSt 1=O-State " : "CacheHitSt 0=M-state ") : "", - reg.rmt_node, data_src_str[reg.data_src]); + reg.rmt_node, data_src_str[reg.data_src_lo]); +} + +static void pr_ibs_op_data2(union ibs_op_data2 reg) +{ + if (zen4_ibs_extensions) + return pr_ibs_op_data2_extended(reg); + pr_ibs_op_data2_default(reg); } static void pr_ibs_op_data3(union ibs_op_data3 reg) @@ -279,6 +328,9 @@ bool evlist__has_amd_ibs(struct evlist *evlist) pmu_mapping += strlen(pmu_mapping) + 1 /* '\0' */; } + if (perf_env__find_pmu_cap(env, "ibs_op", "zen4_ibs_extensions")) + zen4_ibs_extensions = 1; + if (ibs_fetch_type || ibs_op_type) { if (!cpu_family) parse_cpuid(env); diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index 5c27a4b2e7a7..7e663673f79f 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -31,6 +31,7 @@ #include "callchain.h" #include "branch.h" #include "symbol.h" +#include "util.h" #include "../perf.h" #define CALLCHAIN_PARAM_DEFAULT \ @@ -266,12 +267,17 @@ int parse_callchain_record(const char *arg, struct callchain_param *param) do { /* Framepointer style */ if (!strncmp(name, "fp", sizeof("fp"))) { - if (!strtok_r(NULL, ",", &saveptr)) { - param->record_mode = CALLCHAIN_FP; - ret = 0; - } else - pr_err("callchain: No more arguments " - "needed for --call-graph fp\n"); + ret = 0; + param->record_mode = CALLCHAIN_FP; + + tok = strtok_r(NULL, ",", &saveptr); + if (tok) { + unsigned long size; + + size = strtoul(tok, &name, 0); + if (size < (unsigned) sysctl__max_stack()) + param->max_stack = size; + } break; /* Dwarf style */ diff --git a/tools/perf/util/data-convert-json.c b/tools/perf/util/data-convert-json.c index f1ab6edba446..613d6ae82663 100644 --- a/tools/perf/util/data-convert-json.c +++ b/tools/perf/util/data-convert-json.c @@ -149,6 +149,7 @@ static int process_sample_event(struct perf_tool *tool, struct convert_json *c = container_of(tool, struct convert_json, tool); FILE *out = c->out; struct addr_location al, tal; + u64 sample_type = __evlist__combined_sample_type(evsel->evlist); u8 cpumode = PERF_RECORD_MISC_USER; if (machine__resolve(machine, &al, sample) < 0) { @@ -168,7 +169,9 @@ static int process_sample_event(struct perf_tool *tool, output_json_key_format(out, true, 3, "pid", "%i", al.thread->pid_); output_json_key_format(out, true, 3, "tid", "%i", al.thread->tid); - if (al.thread->cpu >= 0) + if ((sample_type & PERF_SAMPLE_CPU)) + output_json_key_format(out, true, 3, "cpu", "%i", sample->cpu); + else if (al.thread->cpu >= 0) output_json_key_format(out, true, 3, "cpu", "%i", al.thread->cpu); output_json_key_string(out, true, 3, "comm", thread__comm_str(al.thread)); diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c index 579e44c59914..5b8cf6a421a4 100644 --- a/tools/perf/util/env.c +++ b/tools/perf/util/env.c @@ -179,7 +179,7 @@ static void perf_env__purge_bpf(struct perf_env *env __maybe_unused) void perf_env__exit(struct perf_env *env) { - int i; + int i, j; perf_env__purge_bpf(env); perf_env__purge_cgroups(env); @@ -196,6 +196,8 @@ void perf_env__exit(struct perf_env *env) zfree(&env->sibling_threads); zfree(&env->pmu_mappings); zfree(&env->cpu); + for (i = 0; i < env->nr_cpu_pmu_caps; i++) + zfree(&env->cpu_pmu_caps[i]); zfree(&env->cpu_pmu_caps); zfree(&env->numa_map); @@ -217,11 +219,13 @@ void perf_env__exit(struct perf_env *env) } zfree(&env->hybrid_nodes); - for (i = 0; i < env->nr_hybrid_cpc_nodes; i++) { - zfree(&env->hybrid_cpc_nodes[i].cpu_pmu_caps); - zfree(&env->hybrid_cpc_nodes[i].pmu_name); + for (i = 0; i < env->nr_pmus_with_caps; i++) { + for (j = 0; j < env->pmu_caps[i].nr_caps; j++) + zfree(&env->pmu_caps[i].caps[j]); + zfree(&env->pmu_caps[i].caps); + zfree(&env->pmu_caps[i].pmu_name); } - zfree(&env->hybrid_cpc_nodes); + zfree(&env->pmu_caps); } void perf_env__init(struct perf_env *env) @@ -527,3 +531,51 @@ int perf_env__numa_node(struct perf_env *env, struct perf_cpu cpu) return cpu.cpu >= 0 && cpu.cpu < env->nr_numa_map ? env->numa_map[cpu.cpu] : -1; } + +char *perf_env__find_pmu_cap(struct perf_env *env, const char *pmu_name, + const char *cap) +{ + char *cap_eq; + int cap_size; + char **ptr; + int i, j; + + if (!pmu_name || !cap) + return NULL; + + cap_size = strlen(cap); + cap_eq = zalloc(cap_size + 2); + if (!cap_eq) + return NULL; + + memcpy(cap_eq, cap, cap_size); + cap_eq[cap_size] = '='; + + if (!strcmp(pmu_name, "cpu")) { + for (i = 0; i < env->nr_cpu_pmu_caps; i++) { + if (!strncmp(env->cpu_pmu_caps[i], cap_eq, cap_size + 1)) { + free(cap_eq); + return &env->cpu_pmu_caps[i][cap_size + 1]; + } + } + goto out; + } + + for (i = 0; i < env->nr_pmus_with_caps; i++) { + if (strcmp(env->pmu_caps[i].pmu_name, pmu_name)) + continue; + + ptr = env->pmu_caps[i].caps; + + for (j = 0; j < env->pmu_caps[i].nr_caps; j++) { + if (!strncmp(ptr[j], cap_eq, cap_size + 1)) { + free(cap_eq); + return &ptr[j][cap_size + 1]; + } + } + } + +out: + free(cap_eq); + return NULL; +} diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h index a3541f98e1fc..4566c51f2fd9 100644 --- a/tools/perf/util/env.h +++ b/tools/perf/util/env.h @@ -43,10 +43,10 @@ struct hybrid_node { char *cpus; }; -struct hybrid_cpc_node { - int nr_cpu_pmu_caps; +struct pmu_caps { + int nr_caps; unsigned int max_branches; - char *cpu_pmu_caps; + char **caps; char *pmu_name; }; @@ -74,14 +74,14 @@ struct perf_env { int nr_groups; int nr_cpu_pmu_caps; int nr_hybrid_nodes; - int nr_hybrid_cpc_nodes; + int nr_pmus_with_caps; char *cmdline; const char **cmdline_argv; char *sibling_cores; char *sibling_dies; char *sibling_threads; char *pmu_mappings; - char *cpu_pmu_caps; + char **cpu_pmu_caps; struct cpu_topology_map *cpu; struct cpu_cache_level *caches; int caches_cnt; @@ -94,7 +94,7 @@ struct perf_env { struct memory_node *memory_nodes; unsigned long long memory_bsize; struct hybrid_node *hybrid_nodes; - struct hybrid_cpc_node *hybrid_cpc_nodes; + struct pmu_caps *pmu_caps; #ifdef HAVE_LIBBPF_SUPPORT /* * bpf_info_lock protects bpf rbtrees. This is needed because the @@ -172,4 +172,6 @@ bool perf_env__insert_btf(struct perf_env *env, struct btf_node *btf_node); struct btf_node *perf_env__find_btf(struct perf_env *env, __u32 btf_id); int perf_env__numa_node(struct perf_env *env, struct perf_cpu cpu); +char *perf_env__find_pmu_cap(struct perf_env *env, const char *pmu_name, + const char *cap); #endif /* __PERF_ENV_H */ diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 0476bb3a4188..1fa14598b916 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -76,6 +76,7 @@ static const char *perf_event__names[] = { [PERF_RECORD_TIME_CONV] = "TIME_CONV", [PERF_RECORD_HEADER_FEATURE] = "FEATURE", [PERF_RECORD_COMPRESSED] = "COMPRESSED", + [PERF_RECORD_FINISHED_INIT] = "FINISHED_INIT", }; const char *perf_event__name(unsigned int id) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 094b0a9c0bc0..a67cc3f2fa74 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -1092,6 +1092,11 @@ void __weak arch_evsel__fixup_new_cycles(struct perf_event_attr *attr __maybe_un { } +void __weak arch__post_evsel_config(struct evsel *evsel __maybe_unused, + struct perf_event_attr *attr __maybe_unused) +{ +} + static void evsel__set_default_freq_period(struct record_opts *opts, struct perf_event_attr *attr) { @@ -1375,6 +1380,8 @@ void evsel__config(struct evsel *evsel, struct record_opts *opts, if (evsel__is_offcpu_event(evsel)) evsel->core.attr.sample_type &= OFFCPU_SAMPLE_TYPES; + + arch__post_evsel_config(evsel, attr); } int evsel__set_filter(struct evsel *evsel, const char *filter) diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 73ea48e94079..92bed8e2f7d8 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -297,6 +297,7 @@ void evsel__set_sample_id(struct evsel *evsel, bool use_sample_identifier); void arch_evsel__set_sample_weight(struct evsel *evsel); void arch_evsel__fixup_new_cycles(struct perf_event_attr *attr); +void arch__post_evsel_config(struct evsel *evsel, struct perf_event_attr *attr); int evsel__set_filter(struct evsel *evsel, const char *filter); int evsel__append_tp_filter(struct evsel *evsel, const char *filter); diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 6ad629db63b7..c30c29c51410 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -1512,18 +1512,13 @@ static int write_compressed(struct feat_fd *ff __maybe_unused, return do_write(ff, &(ff->ph->env.comp_mmap_len), sizeof(ff->ph->env.comp_mmap_len)); } -static int write_per_cpu_pmu_caps(struct feat_fd *ff, struct perf_pmu *pmu, - bool write_pmu) +static int __write_pmu_caps(struct feat_fd *ff, struct perf_pmu *pmu, + bool write_pmu) { struct perf_pmu_caps *caps = NULL; - int nr_caps; int ret; - nr_caps = perf_pmu__caps_parse(pmu); - if (nr_caps < 0) - return nr_caps; - - ret = do_write(ff, &nr_caps, sizeof(nr_caps)); + ret = do_write(ff, &pmu->nr_caps, sizeof(pmu->nr_caps)); if (ret < 0) return ret; @@ -1550,33 +1545,60 @@ static int write_cpu_pmu_caps(struct feat_fd *ff, struct evlist *evlist __maybe_unused) { struct perf_pmu *cpu_pmu = perf_pmu__find("cpu"); + int ret; if (!cpu_pmu) return -ENOENT; - return write_per_cpu_pmu_caps(ff, cpu_pmu, false); + ret = perf_pmu__caps_parse(cpu_pmu); + if (ret < 0) + return ret; + + return __write_pmu_caps(ff, cpu_pmu, false); } -static int write_hybrid_cpu_pmu_caps(struct feat_fd *ff, - struct evlist *evlist __maybe_unused) +static int write_pmu_caps(struct feat_fd *ff, + struct evlist *evlist __maybe_unused) { - struct perf_pmu *pmu; - u32 nr_pmu = perf_pmu__hybrid_pmu_num(); + struct perf_pmu *pmu = NULL; + int nr_pmu = 0; int ret; - if (nr_pmu == 0) - return -ENOENT; + while ((pmu = perf_pmu__scan(pmu))) { + if (!pmu->name || !strcmp(pmu->name, "cpu") || + perf_pmu__caps_parse(pmu) <= 0) + continue; + nr_pmu++; + } ret = do_write(ff, &nr_pmu, sizeof(nr_pmu)); if (ret < 0) return ret; + if (!nr_pmu) + return 0; + + /* + * Write hybrid pmu caps first to maintain compatibility with + * older perf tool. + */ + pmu = NULL; perf_pmu__for_each_hybrid_pmu(pmu) { - ret = write_per_cpu_pmu_caps(ff, pmu, true); + ret = __write_pmu_caps(ff, pmu, true); if (ret < 0) return ret; } + pmu = NULL; + while ((pmu = perf_pmu__scan(pmu))) { + if (!pmu->name || !strcmp(pmu->name, "cpu") || + !pmu->nr_caps || perf_pmu__is_hybrid(pmu->name)) + continue; + + ret = __write_pmu_caps(ff, pmu, true); + if (ret < 0) + return ret; + } return 0; } @@ -2051,32 +2073,20 @@ static void print_compressed(struct feat_fd *ff, FILE *fp) ff->ph->env.comp_level, ff->ph->env.comp_ratio); } -static void print_per_cpu_pmu_caps(FILE *fp, int nr_caps, char *cpu_pmu_caps, - char *pmu_name) +static void __print_pmu_caps(FILE *fp, int nr_caps, char **caps, char *pmu_name) { - const char *delimiter; - char *str, buf[128]; + const char *delimiter = ""; + int i; if (!nr_caps) { - if (!pmu_name) - fprintf(fp, "# cpu pmu capabilities: not available\n"); - else - fprintf(fp, "# %s pmu capabilities: not available\n", pmu_name); + fprintf(fp, "# %s pmu capabilities: not available\n", pmu_name); return; } - if (!pmu_name) - scnprintf(buf, sizeof(buf), "# cpu pmu capabilities: "); - else - scnprintf(buf, sizeof(buf), "# %s pmu capabilities: ", pmu_name); - - delimiter = buf; - - str = cpu_pmu_caps; - while (nr_caps--) { - fprintf(fp, "%s%s", delimiter, str); + fprintf(fp, "# %s pmu capabilities: ", pmu_name); + for (i = 0; i < nr_caps; i++) { + fprintf(fp, "%s%s", delimiter, caps[i]); delimiter = ", "; - str += strlen(str) + 1; } fprintf(fp, "\n"); @@ -2084,19 +2094,18 @@ static void print_per_cpu_pmu_caps(FILE *fp, int nr_caps, char *cpu_pmu_caps, static void print_cpu_pmu_caps(struct feat_fd *ff, FILE *fp) { - print_per_cpu_pmu_caps(fp, ff->ph->env.nr_cpu_pmu_caps, - ff->ph->env.cpu_pmu_caps, NULL); + __print_pmu_caps(fp, ff->ph->env.nr_cpu_pmu_caps, + ff->ph->env.cpu_pmu_caps, (char *)"cpu"); } -static void print_hybrid_cpu_pmu_caps(struct feat_fd *ff, FILE *fp) +static void print_pmu_caps(struct feat_fd *ff, FILE *fp) { - struct hybrid_cpc_node *n; + struct pmu_caps *pmu_caps; - for (int i = 0; i < ff->ph->env.nr_hybrid_cpc_nodes; i++) { - n = &ff->ph->env.hybrid_cpc_nodes[i]; - print_per_cpu_pmu_caps(fp, n->nr_cpu_pmu_caps, - n->cpu_pmu_caps, - n->pmu_name); + for (int i = 0; i < ff->ph->env.nr_pmus_with_caps; i++) { + pmu_caps = &ff->ph->env.pmu_caps[i]; + __print_pmu_caps(fp, pmu_caps->nr_caps, pmu_caps->caps, + pmu_caps->pmu_name); } } @@ -3207,28 +3216,26 @@ static int process_compressed(struct feat_fd *ff, return 0; } -static int process_per_cpu_pmu_caps(struct feat_fd *ff, int *nr_cpu_pmu_caps, - char **cpu_pmu_caps, - unsigned int *max_branches) +static int __process_pmu_caps(struct feat_fd *ff, int *nr_caps, + char ***caps, unsigned int *max_branches) { - char *name, *value; - struct strbuf sb; - u32 nr_caps; + char *name, *value, *ptr; + u32 nr_pmu_caps, i; + + *nr_caps = 0; + *caps = NULL; - if (do_read_u32(ff, &nr_caps)) + if (do_read_u32(ff, &nr_pmu_caps)) return -1; - if (!nr_caps) { - pr_debug("cpu pmu capabilities not available\n"); + if (!nr_pmu_caps) return 0; - } - - *nr_cpu_pmu_caps = nr_caps; - if (strbuf_init(&sb, 128) < 0) + *caps = zalloc(sizeof(char *) * nr_pmu_caps); + if (!*caps) return -1; - while (nr_caps--) { + for (i = 0; i < nr_pmu_caps; i++) { name = do_read_string(ff); if (!name) goto error; @@ -3237,12 +3244,10 @@ static int process_per_cpu_pmu_caps(struct feat_fd *ff, int *nr_cpu_pmu_caps, if (!value) goto free_name; - if (strbuf_addf(&sb, "%s=%s", name, value) < 0) + if (asprintf(&ptr, "%s=%s", name, value) < 0) goto free_value; - /* include a NULL character at the end */ - if (strbuf_add(&sb, "", 1) < 0) - goto free_value; + (*caps)[i] = ptr; if (!strcmp(name, "branches")) *max_branches = atoi(value); @@ -3250,7 +3255,7 @@ static int process_per_cpu_pmu_caps(struct feat_fd *ff, int *nr_cpu_pmu_caps, free(value); free(name); } - *cpu_pmu_caps = strbuf_detach(&sb, NULL); + *nr_caps = nr_pmu_caps; return 0; free_value: @@ -3258,64 +3263,76 @@ free_value: free_name: free(name); error: - strbuf_release(&sb); + for (; i > 0; i--) + free((*caps)[i - 1]); + free(*caps); + *caps = NULL; + *nr_caps = 0; return -1; } static int process_cpu_pmu_caps(struct feat_fd *ff, void *data __maybe_unused) { - return process_per_cpu_pmu_caps(ff, &ff->ph->env.nr_cpu_pmu_caps, - &ff->ph->env.cpu_pmu_caps, - &ff->ph->env.max_branches); + int ret = __process_pmu_caps(ff, &ff->ph->env.nr_cpu_pmu_caps, + &ff->ph->env.cpu_pmu_caps, + &ff->ph->env.max_branches); + + if (!ret && !ff->ph->env.cpu_pmu_caps) + pr_debug("cpu pmu capabilities not available\n"); + return ret; } -static int process_hybrid_cpu_pmu_caps(struct feat_fd *ff, - void *data __maybe_unused) +static int process_pmu_caps(struct feat_fd *ff, void *data __maybe_unused) { - struct hybrid_cpc_node *nodes; + struct pmu_caps *pmu_caps; u32 nr_pmu, i; int ret; + int j; if (do_read_u32(ff, &nr_pmu)) return -1; if (!nr_pmu) { - pr_debug("hybrid cpu pmu capabilities not available\n"); + pr_debug("pmu capabilities not available\n"); return 0; } - nodes = zalloc(sizeof(*nodes) * nr_pmu); - if (!nodes) + pmu_caps = zalloc(sizeof(*pmu_caps) * nr_pmu); + if (!pmu_caps) return -ENOMEM; for (i = 0; i < nr_pmu; i++) { - struct hybrid_cpc_node *n = &nodes[i]; - - ret = process_per_cpu_pmu_caps(ff, &n->nr_cpu_pmu_caps, - &n->cpu_pmu_caps, - &n->max_branches); + ret = __process_pmu_caps(ff, &pmu_caps[i].nr_caps, + &pmu_caps[i].caps, + &pmu_caps[i].max_branches); if (ret) goto err; - n->pmu_name = do_read_string(ff); - if (!n->pmu_name) { + pmu_caps[i].pmu_name = do_read_string(ff); + if (!pmu_caps[i].pmu_name) { ret = -1; goto err; } + if (!pmu_caps[i].nr_caps) { + pr_debug("%s pmu capabilities not available\n", + pmu_caps[i].pmu_name); + } } - ff->ph->env.nr_hybrid_cpc_nodes = nr_pmu; - ff->ph->env.hybrid_cpc_nodes = nodes; + ff->ph->env.nr_pmus_with_caps = nr_pmu; + ff->ph->env.pmu_caps = pmu_caps; return 0; err: for (i = 0; i < nr_pmu; i++) { - free(nodes[i].cpu_pmu_caps); - free(nodes[i].pmu_name); + for (j = 0; j < pmu_caps[i].nr_caps; j++) + free(pmu_caps[i].caps[j]); + free(pmu_caps[i].caps); + free(pmu_caps[i].pmu_name); } - free(nodes); + free(pmu_caps); return ret; } @@ -3381,7 +3398,7 @@ const struct perf_header_feature_ops feat_ops[HEADER_LAST_FEATURE] = { FEAT_OPR(CPU_PMU_CAPS, cpu_pmu_caps, false), FEAT_OPR(CLOCK_DATA, clock_data, false), FEAT_OPN(HYBRID_TOPOLOGY, hybrid_topology, true), - FEAT_OPR(HYBRID_CPU_PMU_CAPS, hybrid_cpu_pmu_caps, false), + FEAT_OPR(PMU_CAPS, pmu_caps, false), }; struct header_print_data { @@ -4363,6 +4380,9 @@ int perf_event__process_event_update(struct perf_tool *tool __maybe_unused, struct evsel *evsel; struct perf_cpu_map *map; + if (dump_trace) + perf_event__fprintf_event_update(event, stdout); + if (!pevlist || *pevlist == NULL) return -EINVAL; diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h index 56916dabce7b..2d5e601ba60f 100644 --- a/tools/perf/util/header.h +++ b/tools/perf/util/header.h @@ -46,7 +46,7 @@ enum { HEADER_CPU_PMU_CAPS, HEADER_CLOCK_DATA, HEADER_HYBRID_TOPOLOGY, - HEADER_HYBRID_CPU_PMU_CAPS, + HEADER_PMU_CAPS, HEADER_LAST_FEATURE, HEADER_FEAT_BITS = 256, }; diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 9a1c7e63e663..0112e1c36418 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -1890,7 +1890,11 @@ int perf_pmu__caps_parse(struct perf_pmu *pmu) const char *sysfs = sysfs__mountpoint(); DIR *caps_dir; struct dirent *evt_ent; - int nr_caps = 0; + + if (pmu->caps_initialized) + return pmu->nr_caps; + + pmu->nr_caps = 0; if (!sysfs) return -1; @@ -1898,8 +1902,10 @@ int perf_pmu__caps_parse(struct perf_pmu *pmu) snprintf(caps_path, PATH_MAX, "%s" EVENT_SOURCE_DEVICE_PATH "%s/caps", sysfs, pmu->name); - if (stat(caps_path, &st) < 0) + if (stat(caps_path, &st) < 0) { + pmu->caps_initialized = true; return 0; /* no error if caps does not exist */ + } caps_dir = opendir(caps_path); if (!caps_dir) @@ -1926,13 +1932,14 @@ int perf_pmu__caps_parse(struct perf_pmu *pmu) continue; } - nr_caps++; + pmu->nr_caps++; fclose(file); } closedir(caps_dir); - return nr_caps; + pmu->caps_initialized = true; + return pmu->nr_caps; } void perf_pmu__warn_invalid_config(struct perf_pmu *pmu, __u64 config, diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index 541889fa9f9c..4b45fd8da5a3 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -46,6 +46,8 @@ struct perf_pmu { struct perf_cpu_map *cpus; struct list_head format; /* HEAD struct perf_pmu_format -> list */ struct list_head aliases; /* HEAD struct perf_pmu_alias -> list */ + bool caps_initialized; + u32 nr_caps; struct list_head caps; /* HEAD struct perf_pmu_caps -> list */ struct list_head list; /* ELEM */ struct list_head hybrid_list; diff --git a/tools/perf/util/record.c b/tools/perf/util/record.c index 5b09ecbb05dc..b529636ab3ea 100644 --- a/tools/perf/util/record.c +++ b/tools/perf/util/record.c @@ -121,7 +121,7 @@ void evlist__config(struct evlist *evlist, struct record_opts *opts, struct call evlist__for_each_entry(evlist, evsel) evsel__config_leader_sampling(evsel, evlist); - if (opts->full_auxtrace) { + if (opts->full_auxtrace || opts->sample_identifier) { /* * Need to be able to synthesize and parse selected events with * arbitrary sample types, which requires always being able to diff --git a/tools/perf/util/record.h b/tools/perf/util/record.h index be9a957501f4..4269e916f450 100644 --- a/tools/perf/util/record.h +++ b/tools/perf/util/record.h @@ -28,6 +28,7 @@ struct record_opts { bool sample_time; bool sample_time_set; bool sample_cpu; + bool sample_identifier; bool period; bool period_set; bool running_time; diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 0aa818977d2b..37f833c3c81b 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -562,6 +562,8 @@ void perf_tool__fill_defaults(struct perf_tool *tool) tool->feature = process_event_op2_stub; if (tool->compressed == NULL) tool->compressed = perf_session__process_compressed_event; + if (tool->finished_init == NULL) + tool->finished_init = process_event_op2_stub; } static void swap_sample_id_all(union perf_event *event, void *data) @@ -1706,6 +1708,8 @@ static s64 perf_session__process_user_event(struct perf_session *session, if (err) dump_event(session->evlist, event, file_offset, &sample, file_path); return err; + case PERF_RECORD_FINISHED_INIT: + return tool->finished_init(session, event); default: return -EINVAL; } diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c index 84d17bd4efae..fe5db4bf0042 100644 --- a/tools/perf/util/synthetic-events.c +++ b/tools/perf/util/synthetic-events.c @@ -1720,14 +1720,17 @@ int perf_event__synthesize_id_index(struct perf_tool *tool, perf_event__handler_ size_t nr = 0, i = 0, sz, max_nr, n; int err; - pr_debug2("Synthesizing id index\n"); - max_nr = (UINT16_MAX - sizeof(struct perf_record_id_index)) / sizeof(struct id_index_entry); evlist__for_each_entry(evlist, evsel) nr += evsel->core.ids; + if (!nr) + return 0; + + pr_debug2("Synthesizing id index\n"); + n = nr > max_nr ? max_nr : nr; sz = sizeof(struct perf_record_id_index) + n * sizeof(struct id_index_entry); ev = zalloc(sz); diff --git a/tools/perf/util/tool.h b/tools/perf/util/tool.h index f2352dba1875..c957fb849ac6 100644 --- a/tools/perf/util/tool.h +++ b/tools/perf/util/tool.h @@ -76,7 +76,8 @@ struct perf_tool { stat_config, stat, stat_round, - feature; + feature, + finished_init; event_op4 compressed; event_op3 auxtrace; bool ordered_events; |