diff options
38 files changed, 1734 insertions, 166 deletions
diff --git a/tools/perf/Documentation/itrace.txt b/tools/perf/Documentation/itrace.txt index 0f1005209a2b..2d586fe5e4c5 100644 --- a/tools/perf/Documentation/itrace.txt +++ b/tools/perf/Documentation/itrace.txt @@ -20,6 +20,7 @@ L synthesize last branch entries on existing event records s skip initial number of events q quicker (less detailed) decoding + Z prefer to ignore timestamps (so-called "timeless" decoding) The default is all events i.e. the same as --itrace=ibxwpe, except for perf script where it is --itrace=ce diff --git a/tools/perf/Documentation/perf-inject.txt b/tools/perf/Documentation/perf-inject.txt index a8eccff21281..91108fe3ad5f 100644 --- a/tools/perf/Documentation/perf-inject.txt +++ b/tools/perf/Documentation/perf-inject.txt @@ -68,6 +68,16 @@ include::itrace.txt[] --force:: Don't complain, do it. +--vm-time-correlation[=OPTIONS]:: + Some architectures may capture AUX area data which contains timestamps + affected by virtualization. This option will update those timestamps + in place, to correlate with host timestamps. The in-place update means + that an output file is not specified, and instead the input file is + modified. The options are architecture specific, except that they may + start with "dry-run" which will cause the file to be processed but + without updating it. Currently this option is supported only by + Intel PT, refer linkperf:perf-intel-pt[1] + SEE ALSO -------- linkperf:perf-record[1], linkperf:perf-report[1], linkperf:perf-archive[1], diff --git a/tools/perf/Documentation/perf-intel-pt.txt b/tools/perf/Documentation/perf-intel-pt.txt index bcf3eca5afbe..e382dbd4ff0a 100644 --- a/tools/perf/Documentation/perf-intel-pt.txt +++ b/tools/perf/Documentation/perf-intel-pt.txt @@ -869,6 +869,7 @@ The letters are: L synthesize last branch entries on existing event records s skip initial number of events q quicker (less detailed) decoding + Z prefer to ignore timestamps (so-called "timeless" decoding) "Instructions" events look like they were recorded by "perf record -e instructions". @@ -1062,6 +1063,10 @@ What *will* be decoded with the qq option: - instruction pointer associated with PSB packets +The Z option is equivalent to having recorded a trace without TSC +(i.e. config term tsc=0). It can be useful to avoid timestamp issues when +decoding a trace of a virtual machine. + dump option ~~~~~~~~~~~ @@ -1150,8 +1155,9 @@ include::build-xed.txt[] Tracing Virtual Machines ------------------------ -Currently, only kernel tracing is supported and only with "timeless" decoding -i.e. no TSC timestamps +Currently, only kernel tracing is supported and only with either "timeless" decoding +(i.e. no TSC timestamps) or VM Time Correlation. VM Time Correlation is an extra step +using 'perf inject' and requires unchanging VMX TSC Offset and no VMX TSC Scaling. Other limitations and caveats @@ -1162,7 +1168,7 @@ Other limitations and caveats Guest VCPU is unknown but may be able to be inferred from the host thread Callchains are not supported -Example +Example using "timeless" decoding Start VM @@ -1226,6 +1232,107 @@ perf script can be used to provide an instruction trace :1440 1440 ffffffffbb74603c clockevents_program_event+0x4c ([guest.kernel.kallsyms]) popq %rbx :1440 1440 ffffffffbb74603d clockevents_program_event+0x4d ([guest.kernel.kallsyms]) popq %r12 +Example using VM Time Correlation + +Start VM + + $ sudo virsh start kubuntu20.04 + Domain kubuntu20.04 started + +Mount the guest file system. Note sshfs needs -o direct_io to enable reading of proc files. root access is needed to read /proc/kcore. + + $ mkdir -p vm0 + $ sshfs -o direct_io root@vm0:/ vm0 + +Copy the guest /proc/kallsyms, /proc/modules and /proc/kcore + + $ perf buildid-cache -v --kcore vm0/proc/kcore + same kcore found in /home/user/.debug/[kernel.kcore]/cc9c55a98c5e4ec0aeda69302554aabed5cd6491/2021021312450777 + $ KALLSYMS=/home/user/.debug/\[kernel.kcore\]/cc9c55a98c5e4ec0aeda69302554aabed5cd6491/2021021312450777/kallsyms + +Find the VM process + + $ ps -eLl | grep 'KVM\|PID' + F S UID PID PPID LWP C PRI NI ADDR SZ WCHAN TTY TIME CMD + 3 S 64055 16998 1 17005 13 80 0 - 1818189 - ? 00:00:16 CPU 0/KVM + 3 S 64055 16998 1 17006 4 80 0 - 1818189 - ? 00:00:05 CPU 1/KVM + 3 S 64055 16998 1 17007 3 80 0 - 1818189 - ? 00:00:04 CPU 2/KVM + 3 S 64055 16998 1 17008 4 80 0 - 1818189 - ? 00:00:05 CPU 3/KVM + +Start an open-ended perf record, tracing the VM process, do something on the VM, and then ctrl-C to stop. +IPC can be determined, hence cyc=1 can be added. +Only kernel decoding is supported, so 'k' must be specified. +Intel PT traces both the host and the guest so --guest and --host need to be specified. + + $ sudo perf kvm --guest --host --guestkallsyms $KALLSYMS record --kcore -e intel_pt/cyc=1/k -p 16998 + ^C[ perf record: Woken up 1 times to write data ] + [ perf record: Captured and wrote 9.041 MB perf.data.kvm ] + +Now 'perf inject' can be used to determine the VMX TCS Offset. Note, Intel PT TSC packets are +only 7-bytes, so the TSC Offset might differ from the actual value in the 8th byte. That will +have no effect i.e. the resulting timestamps will be correct anyway. + + $ perf inject -i perf.data.kvm --vm-time-correlation=dry-run + ERROR: Unknown TSC Offset for VMCS 0x1bff6a + VMCS: 0x1bff6a TSC Offset 0xffffe42722c64c41 + ERROR: Unknown TSC Offset for VMCS 0x1cbc08 + VMCS: 0x1cbc08 TSC Offset 0xffffe42722c64c41 + ERROR: Unknown TSC Offset for VMCS 0x1c3ce8 + VMCS: 0x1c3ce8 TSC Offset 0xffffe42722c64c41 + ERROR: Unknown TSC Offset for VMCS 0x1cbce9 + VMCS: 0x1cbce9 TSC Offset 0xffffe42722c64c41 + +Each virtual CPU has a different Virtual Machine Control Structure (VMCS) +shown above with the calculated TSC Offset. For an unchanging TSC Offset +they should all be the same for the same virtual machine. + +Now that the TSC Offset is known, it can be provided to 'perf inject' + + $ perf inject -i perf.data.kvm --vm-time-correlation="dry-run 0xffffe42722c64c41" + +Note the options for 'perf inject' --vm-time-correlation are: + + [ dry-run ] [ <TSC Offset> [ : <VMCS> [ , <VMCS> ]... ] ]... + +So it is possible to specify different TSC Offsets for different VMCS. +The option "dry-run" will cause the file to be processed but without updating it. +Note it is also possible to get a intel_pt.log file by adding option --itrace=d + +There were no errors so, do it for real + + $ perf inject -i perf.data.kvm --vm-time-correlation=0xffffe42722c64c41 --force + +'perf script' can be used to see if there are any decoder errors + + $ perf script -i perf.data.kvm --guestkallsyms $KALLSYMS --itrace=e-o + +There were none. + +'perf script' can be used to provide an instruction trace showing timestamps + + $ perf script -i perf.data.kvm --guestkallsyms $KALLSYMS --insn-trace --xed -F+ipc | grep -C10 vmresume | head -21 + CPU 1/KVM 17006 [001] 11500.262865593: ffffffff82133cdd __vmx_vcpu_run+0x3d ([kernel.kallsyms]) movq 0x48(%rax), %r9 + CPU 1/KVM 17006 [001] 11500.262865593: ffffffff82133ce1 __vmx_vcpu_run+0x41 ([kernel.kallsyms]) movq 0x50(%rax), %r10 + CPU 1/KVM 17006 [001] 11500.262865593: ffffffff82133ce5 __vmx_vcpu_run+0x45 ([kernel.kallsyms]) movq 0x58(%rax), %r11 + CPU 1/KVM 17006 [001] 11500.262865593: ffffffff82133ce9 __vmx_vcpu_run+0x49 ([kernel.kallsyms]) movq 0x60(%rax), %r12 + CPU 1/KVM 17006 [001] 11500.262865593: ffffffff82133ced __vmx_vcpu_run+0x4d ([kernel.kallsyms]) movq 0x68(%rax), %r13 + CPU 1/KVM 17006 [001] 11500.262865593: ffffffff82133cf1 __vmx_vcpu_run+0x51 ([kernel.kallsyms]) movq 0x70(%rax), %r14 + CPU 1/KVM 17006 [001] 11500.262865593: ffffffff82133cf5 __vmx_vcpu_run+0x55 ([kernel.kallsyms]) movq 0x78(%rax), %r15 + CPU 1/KVM 17006 [001] 11500.262865593: ffffffff82133cf9 __vmx_vcpu_run+0x59 ([kernel.kallsyms]) movq (%rax), %rax + CPU 1/KVM 17006 [001] 11500.262865593: ffffffff82133cfc __vmx_vcpu_run+0x5c ([kernel.kallsyms]) callq 0xffffffff82133c40 + CPU 1/KVM 17006 [001] 11500.262865593: ffffffff82133c40 vmx_vmenter+0x0 ([kernel.kallsyms]) jz 0xffffffff82133c46 + CPU 1/KVM 17006 [001] 11500.262866075: ffffffff82133c42 vmx_vmenter+0x2 ([kernel.kallsyms]) vmresume IPC: 0.05 (40/769) + :17006 17006 [001] 11500.262869216: ffffffff82200cb0 asm_sysvec_apic_timer_interrupt+0x0 ([guest.kernel.kallsyms]) clac + :17006 17006 [001] 11500.262869216: ffffffff82200cb3 asm_sysvec_apic_timer_interrupt+0x3 ([guest.kernel.kallsyms]) pushq $0xffffffffffffffff + :17006 17006 [001] 11500.262869216: ffffffff82200cb5 asm_sysvec_apic_timer_interrupt+0x5 ([guest.kernel.kallsyms]) callq 0xffffffff82201160 + :17006 17006 [001] 11500.262869216: ffffffff82201160 error_entry+0x0 ([guest.kernel.kallsyms]) cld + :17006 17006 [001] 11500.262869216: ffffffff82201161 error_entry+0x1 ([guest.kernel.kallsyms]) pushq %rsi + :17006 17006 [001] 11500.262869216: ffffffff82201162 error_entry+0x2 ([guest.kernel.kallsyms]) movq 0x8(%rsp), %rsi + :17006 17006 [001] 11500.262869216: ffffffff82201167 error_entry+0x7 ([guest.kernel.kallsyms]) movq %rdi, 0x8(%rsp) + :17006 17006 [001] 11500.262869216: ffffffff8220116c error_entry+0xc ([guest.kernel.kallsyms]) pushq %rdx + :17006 17006 [001] 11500.262869216: ffffffff8220116d error_entry+0xd ([guest.kernel.kallsyms]) pushq %rcx + :17006 17006 [001] 11500.262869216: ffffffff8220116e error_entry+0xe ([guest.kernel.kallsyms]) pushq %rax + SEE ALSO diff --git a/tools/perf/Documentation/perf.data-file-format.txt b/tools/perf/Documentation/perf.data-file-format.txt index 9ee96640744e..e6ff8c898ada 100644 --- a/tools/perf/Documentation/perf.data-file-format.txt +++ b/tools/perf/Documentation/perf.data-file-format.txt @@ -402,6 +402,39 @@ struct { u64 clockid_time_ns; }; + HEADER_HYBRID_TOPOLOGY = 30, + +Indicate the hybrid CPUs. The format of data is as below. + +struct { + u32 nr; + struct { + char pmu_name[]; + char cpus[]; + } [nr]; /* Variable length records */ +}; + +Example: + hybrid cpu system: + cpu_core cpu list : 0-15 + cpu_atom cpu list : 16-23 + + HEADER_HYBRID_CPU_PMU_CAPS = 31, + + A list of hybrid CPU PMU capabilities. + +struct { + u32 nr_pmu; + struct { + u32 nr_cpu_pmu_caps; + { + char name[]; + char value[]; + } [nr_cpu_pmu_caps]; + char pmu_name[]; + } [nr_pmu]; +}; + other bits are reserved and should ignored for now HEADER_FEAT_BITS = 256, diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index 406a9519145e..829e0498bd51 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -635,7 +635,7 @@ endif ifdef BUILD_BPF_SKEL $(call feature_check,clang-bpf-co-re) ifeq ($(feature-clang-bpf-co-re), 0) - dummy := $(error Error: clang too old. Please install recent clang) + dummy := $(error Error: clang too old/not installed. Please install recent clang to build with BUILD_BPF_SKEL) endif $(call detected,CONFIG_PERF_BPF_SKEL) CFLAGS += -DHAVE_BPF_SKEL diff --git a/tools/perf/arch/arm/include/arch-tests.h b/tools/perf/arch/arm/include/arch-tests.h index 90ec4c8cb880..c62538052404 100644 --- a/tools/perf/arch/arm/include/arch-tests.h +++ b/tools/perf/arch/arm/include/arch-tests.h @@ -2,11 +2,6 @@ #ifndef ARCH_TESTS_H #define ARCH_TESTS_H -#ifdef HAVE_DWARF_UNWIND_SUPPORT -struct thread; -struct perf_sample; -#endif - extern struct test arch_tests[]; #endif diff --git a/tools/perf/arch/arm64/include/arch-tests.h b/tools/perf/arch/arm64/include/arch-tests.h index 90ec4c8cb880..c62538052404 100644 --- a/tools/perf/arch/arm64/include/arch-tests.h +++ b/tools/perf/arch/arm64/include/arch-tests.h @@ -2,11 +2,6 @@ #ifndef ARCH_TESTS_H #define ARCH_TESTS_H -#ifdef HAVE_DWARF_UNWIND_SUPPORT -struct thread; -struct perf_sample; -#endif - extern struct test arch_tests[]; #endif diff --git a/tools/perf/arch/powerpc/include/arch-tests.h b/tools/perf/arch/powerpc/include/arch-tests.h index 1c7be75cbc78..c62538052404 100644 --- a/tools/perf/arch/powerpc/include/arch-tests.h +++ b/tools/perf/arch/powerpc/include/arch-tests.h @@ -2,13 +2,6 @@ #ifndef ARCH_TESTS_H #define ARCH_TESTS_H -#ifdef HAVE_DWARF_UNWIND_SUPPORT -struct thread; -struct perf_sample; -int test__arch_unwind_sample(struct perf_sample *sample, - struct thread *thread); -#endif - extern struct test arch_tests[]; #endif diff --git a/tools/perf/arch/powerpc/tests/dwarf-unwind.c b/tools/perf/arch/powerpc/tests/dwarf-unwind.c index 8efd9ed9e9db..c9cb4b059392 100644 --- a/tools/perf/arch/powerpc/tests/dwarf-unwind.c +++ b/tools/perf/arch/powerpc/tests/dwarf-unwind.c @@ -7,7 +7,6 @@ #include "event.h" #include "debug.h" #include "tests/tests.h" -#include "arch-tests.h" #define STACK_SIZE 8192 diff --git a/tools/perf/arch/x86/include/arch-tests.h b/tools/perf/arch/x86/include/arch-tests.h index 0e20f3dc69f3..9599e7a3f1af 100644 --- a/tools/perf/arch/x86/include/arch-tests.h +++ b/tools/perf/arch/x86/include/arch-tests.h @@ -2,23 +2,15 @@ #ifndef ARCH_TESTS_H #define ARCH_TESTS_H -#include <linux/compiler.h> struct test; /* Tests */ -int test__rdpmc(struct test *test __maybe_unused, int subtest); -int test__insn_x86(struct test *test __maybe_unused, int subtest); +int test__rdpmc(struct test *test, int subtest); +int test__insn_x86(struct test *test, int subtest); int test__intel_pt_pkt_decoder(struct test *test, int subtest); int test__bp_modify(struct test *test, int subtest); int test__x86_sample_parsing(struct test *test, int subtest); -#ifdef HAVE_DWARF_UNWIND_SUPPORT -struct thread; -struct perf_sample; -int test__arch_unwind_sample(struct perf_sample *sample, - struct thread *thread); -#endif - extern struct test arch_tests[]; #endif diff --git a/tools/perf/arch/x86/tests/dwarf-unwind.c b/tools/perf/arch/x86/tests/dwarf-unwind.c index 478078fb0f22..a54dea7c112f 100644 --- a/tools/perf/arch/x86/tests/dwarf-unwind.c +++ b/tools/perf/arch/x86/tests/dwarf-unwind.c @@ -7,7 +7,6 @@ #include "event.h" #include "debug.h" #include "tests/tests.h" -#include "arch-tests.h" #define STACK_SIZE 8192 diff --git a/tools/perf/arch/x86/util/kvm-stat.c b/tools/perf/arch/x86/util/kvm-stat.c index 072920475b65..c5dd54f6ef5e 100644 --- a/tools/perf/arch/x86/util/kvm-stat.c +++ b/tools/perf/arch/x86/util/kvm-stat.c @@ -133,11 +133,56 @@ static struct kvm_events_ops ioport_events = { .name = "IO Port Access" }; + /* The time of emulation msr is from kvm_msr to kvm_entry. */ +static void msr_event_get_key(struct evsel *evsel, + struct perf_sample *sample, + struct event_key *key) +{ + key->key = evsel__intval(evsel, sample, "ecx"); + key->info = evsel__intval(evsel, sample, "write"); +} + +static bool msr_event_begin(struct evsel *evsel, + struct perf_sample *sample, + struct event_key *key) +{ + if (!strcmp(evsel->name, "kvm:kvm_msr")) { + msr_event_get_key(evsel, sample, key); + return true; + } + + return false; +} + +static bool msr_event_end(struct evsel *evsel, + struct perf_sample *sample __maybe_unused, + struct event_key *key __maybe_unused) +{ + return kvm_entry_event(evsel); +} + +static void msr_event_decode_key(struct perf_kvm_stat *kvm __maybe_unused, + struct event_key *key, + char *decode) +{ + scnprintf(decode, decode_str_len, "%#llx:%s", + (unsigned long long)key->key, + key->info ? "W" : "R"); +} + +static struct kvm_events_ops msr_events = { + .is_begin_event = msr_event_begin, + .is_end_event = msr_event_end, + .decode_key = msr_event_decode_key, + .name = "MSR Access" +}; + const char *kvm_events_tp[] = { "kvm:kvm_entry", "kvm:kvm_exit", "kvm:kvm_mmio", "kvm:kvm_pio", + "kvm:kvm_msr", NULL, }; @@ -145,6 +190,7 @@ struct kvm_reg_events_ops kvm_reg_events_ops[] = { { .name = "vmexit", .ops = &exit_events }, { .name = "mmio", .ops = &mmio_events }, { .name = "ioport", .ops = &ioport_events }, + { .name = "msr", .ops = &msr_events }, { NULL, NULL }, }; diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index ddccc0eb7390..102cafb0c0b3 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -31,6 +31,7 @@ #include <uapi/linux/mman.h> /* To get things like MAP_HUGETLB even on older libc headers */ #include <linux/list.h> +#include <linux/string.h> #include <errno.h> #include <signal.h> @@ -43,6 +44,8 @@ struct perf_inject { bool have_auxtrace; bool strip; bool jit_mode; + bool in_place_update; + bool in_place_update_dry_run; const char *input_name; struct perf_data output; u64 bytes_written; @@ -696,12 +699,42 @@ static void strip_init(struct perf_inject *inject) evsel->handler = drop_sample; } +static int parse_vm_time_correlation(const struct option *opt, const char *str, int unset) +{ + struct perf_inject *inject = opt->value; + const char *args; + char *dry_run; + + if (unset) + return 0; + + inject->itrace_synth_opts.set = true; + inject->itrace_synth_opts.vm_time_correlation = true; + inject->in_place_update = true; + + if (!str) + return 0; + + dry_run = skip_spaces(str); + if (!strncmp(dry_run, "dry-run", strlen("dry-run"))) { + inject->itrace_synth_opts.vm_tm_corr_dry_run = true; + inject->in_place_update_dry_run = true; + args = dry_run + strlen("dry-run"); + } else { + args = str; + } + + inject->itrace_synth_opts.vm_tm_corr_args = strdup(args); + + return inject->itrace_synth_opts.vm_tm_corr_args ? 0 : -ENOMEM; +} + static int __cmd_inject(struct perf_inject *inject) { int ret = -EINVAL; struct perf_session *session = inject->session; struct perf_data *data_out = &inject->output; - int fd = perf_data__fd(data_out); + int fd = inject->in_place_update ? -1 : perf_data__fd(data_out); u64 output_data_offset; signal(SIGINT, sig_handler); @@ -737,6 +770,15 @@ static int __cmd_inject(struct perf_inject *inject) else if (!strncmp(name, "sched:sched_stat_", 17)) evsel->handler = perf_inject__sched_stat; } + } else if (inject->itrace_synth_opts.vm_time_correlation) { + session->itrace_synth_opts = &inject->itrace_synth_opts; + memset(&inject->tool, 0, sizeof(inject->tool)); + inject->tool.id_index = perf_event__process_id_index; + inject->tool.auxtrace_info = perf_event__process_auxtrace_info; + inject->tool.auxtrace = perf_event__process_auxtrace; + inject->tool.auxtrace_error = perf_event__process_auxtrace_error; + inject->tool.ordered_events = true; + inject->tool.ordering_requires_timestamps = true; } else if (inject->itrace_synth_opts.set) { session->itrace_synth_opts = &inject->itrace_synth_opts; inject->itrace_synth_opts.inject = true; @@ -759,14 +801,14 @@ static int __cmd_inject(struct perf_inject *inject) if (!inject->itrace_synth_opts.set) auxtrace_index__free(&session->auxtrace_index); - if (!data_out->is_pipe) + if (!data_out->is_pipe && !inject->in_place_update) lseek(fd, output_data_offset, SEEK_SET); ret = perf_session__process_events(session); if (ret) return ret; - if (!data_out->is_pipe) { + if (!data_out->is_pipe && !inject->in_place_update) { if (inject->build_ids) perf_header__set_feat(&session->header, HEADER_BUILD_ID); @@ -878,6 +920,9 @@ int cmd_inject(int argc, const char **argv) itrace_parse_synth_opts), OPT_BOOLEAN(0, "strip", &inject.strip, "strip non-synthesized events (use with --itrace)"), + OPT_CALLBACK_OPTARG(0, "vm-time-correlation", &inject, NULL, "opts", + "correlate time between VM guests and the host", + parse_vm_time_correlation), OPT_END() }; const char * const inject_usage[] = { @@ -900,7 +945,23 @@ int cmd_inject(int argc, const char **argv) return -1; } - if (perf_data__open(&inject.output)) { + if (inject.in_place_update) { + if (!strcmp(inject.input_name, "-")) { + pr_err("Input file name required for in-place updating\n"); + return -1; + } + if (strcmp(inject.output.path, "-")) { + pr_err("Output file name must not be specified for in-place updating\n"); + return -1; + } + if (!data.force && !inject.in_place_update_dry_run) { + pr_err("The input file would be updated in place, " + "the --force option is required.\n"); + return -1; + } + if (!inject.in_place_update_dry_run) + data.in_place_update = true; + } else if (perf_data__open(&inject.output)) { perror("failed to create output file"); return -1; } @@ -950,5 +1011,6 @@ int cmd_inject(int argc, const char **argv) out_delete: zstd_fini(&(inject.session->zstd_data)); perf_session__delete(inject.session); + free(inject.itrace_synth_opts.vm_tm_corr_args); return ret; } diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 3337b5f93336..bc3dd379eb67 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -969,6 +969,15 @@ out: return rc; } +static void set_timestamp_boundary(struct record *rec, u64 sample_time) +{ + if (rec->evlist->first_sample_time == 0) + rec->evlist->first_sample_time = sample_time; + + if (sample_time) + rec->evlist->last_sample_time = sample_time; +} + static int process_sample_event(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, @@ -977,10 +986,7 @@ static int process_sample_event(struct perf_tool *tool, { struct record *rec = container_of(tool, struct record, tool); - if (rec->evlist->first_sample_time == 0) - rec->evlist->first_sample_time = sample->time; - - rec->evlist->last_sample_time = sample->time; + set_timestamp_boundary(rec, sample->time); if (rec->buildid_all) return 0; @@ -2402,6 +2408,17 @@ static int build_id__process_mmap2(struct perf_tool *tool, union perf_event *eve return perf_event__process_mmap2(tool, event, sample, machine); } +static int process_timestamp_boundary(struct perf_tool *tool, + union perf_event *event __maybe_unused, + struct perf_sample *sample, + struct machine *machine __maybe_unused) +{ + struct record *rec = container_of(tool, struct record, tool); + + set_timestamp_boundary(rec, sample->time); + return 0; +} + /* * XXX Ideally would be local to cmd_record() and passed to a record__new * because we need to have access to it in record__exit, that is called @@ -2436,6 +2453,8 @@ static struct record record = { .namespaces = perf_event__process_namespaces, .mmap = build_id__process_mmap, .mmap2 = build_id__process_mmap2, + .itrace_start = process_timestamp_boundary, + .aux = process_timestamp_boundary, .ordered_events = true, }, }; diff --git a/tools/perf/tests/dwarf-unwind.c b/tools/perf/tests/dwarf-unwind.c index 83638097c3bc..a288035eb362 100644 --- a/tools/perf/tests/dwarf-unwind.c +++ b/tools/perf/tests/dwarf-unwind.c @@ -17,10 +17,6 @@ #include "callchain.h" #include "util/synthetic-events.h" -#if defined (__x86_64__) || defined (__i386__) || defined (__powerpc__) -#include "arch-tests.h" -#endif - /* For bsearch. We try to unwind functions in shared object. */ #include <stdlib.h> diff --git a/tools/perf/tests/make b/tools/perf/tests/make index 94bd5d215d94..da013e90a945 100644 --- a/tools/perf/tests/make +++ b/tools/perf/tests/make @@ -84,9 +84,11 @@ make_no_libaudit := NO_LIBAUDIT=1 make_no_libbionic := NO_LIBBIONIC=1 make_no_auxtrace := NO_AUXTRACE=1 make_no_libbpf := NO_LIBBPF=1 +make_libbpf_dynamic := LIBBPF_DYNAMIC=1 make_no_libbpf_DEBUG := NO_LIBBPF=1 DEBUG=1 make_no_libcrypto := NO_LIBCRYPTO=1 make_with_babeltrace:= LIBBABELTRACE=1 +make_with_coresight := CORESIGHT=1 make_no_sdt := NO_SDT=1 make_no_syscall_tbl := NO_SYSCALL_TABLE=1 make_with_clangllvm := LIBCLANGLLVM=1 @@ -148,11 +150,13 @@ run += make_no_libaudit run += make_no_libbionic run += make_no_auxtrace run += make_no_libbpf +run += make_libbpf_dynamic run += make_no_libbpf_DEBUG run += make_no_libcrypto run += make_no_sdt run += make_no_syscall_tbl run += make_with_babeltrace +run += make_with_coresight run += make_with_clangllvm run += make_with_libpfm4 run += make_help @@ -266,6 +270,9 @@ test_make_install_info_O := $(test_ok) test_make_install_pdf := $(test_ok) test_make_install_pdf_O := $(test_ok) +test_make_libbpf_dynamic := ldd $(PERF_O)/perf | grep -q libbpf +test_make_libbpf_dynamic_O := ldd $$TMP_O/perf | grep -q libbpf + test_make_python_perf_so_O := test -f $$TMP_O/python/perf.so test_make_perf_o_O := test -f $$TMP_O/perf.o test_make_util_map_o_O := test -f $$TMP_O/util/map.o diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h index b85f005308a3..1100dd55b657 100644 --- a/tools/perf/tests/tests.h +++ b/tools/perf/tests/tests.h @@ -133,14 +133,12 @@ bool test__bp_account_is_supported(void); bool test__wp_is_supported(void); bool test__tsc_is_supported(void); -#if defined(__arm__) || defined(__aarch64__) #ifdef HAVE_DWARF_UNWIND_SUPPORT struct thread; struct perf_sample; int test__arch_unwind_sample(struct perf_sample *sample, struct thread *thread); #endif -#endif #if defined(__arm__) int test__vectors_page(struct test *test, int subtest); diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index 1b4091a3b508..62268f8b6c4f 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -1120,8 +1120,9 @@ int auxtrace_queue_data(struct perf_session *session, bool samples, bool events) auxtrace_queue_data_cb, &qd); } -void *auxtrace_buffer__get_data(struct auxtrace_buffer *buffer, int fd) +void *auxtrace_buffer__get_data_rw(struct auxtrace_buffer *buffer, int fd, bool rw) { + int prot = rw ? PROT_READ | PROT_WRITE : PROT_READ; size_t adj = buffer->data_offset & (page_size - 1); size_t size = buffer->size + adj; off_t file_offset = buffer->data_offset - adj; @@ -1130,7 +1131,7 @@ void *auxtrace_buffer__get_data(struct auxtrace_buffer *buffer, int fd) if (buffer->data) return buffer->data; - addr = mmap(NULL, size, PROT_READ, MAP_SHARED, fd, file_offset); + addr = mmap(NULL, size, prot, MAP_SHARED, fd, file_offset); if (addr == MAP_FAILED) return NULL; @@ -1569,6 +1570,9 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str, case 'q': synth_opts->quick += 1; break; + case 'Z': + synth_opts->timeless_decoding = true; + break; case ' ': case ',': break; diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h index a4fbb33b7245..472c0973b1f1 100644 --- a/tools/perf/util/auxtrace.h +++ b/tools/perf/util/auxtrace.h @@ -89,6 +89,10 @@ enum itrace_period_type { * @tlb: whether to synthesize TLB events * @remote_access: whether to synthesize remote access events * @mem: whether to synthesize memory events + * @timeless_decoding: prefer "timeless" decoding i.e. ignore timestamps + * @vm_time_correlation: perform VM Time Correlation + * @vm_tm_corr_dry_run: VM Time Correlation dry-run + * @vm_tm_corr_args: VM Time Correlation implementation-specific arguments * @callchain_sz: maximum callchain size * @last_branch_sz: branch context size * @period: 'instructions' events period @@ -128,6 +132,10 @@ struct itrace_synth_opts { bool tlb; bool remote_access; bool mem; + bool timeless_decoding; + bool vm_time_correlation; + bool vm_tm_corr_dry_run; + char *vm_tm_corr_args; unsigned int callchain_sz; unsigned int last_branch_sz; unsigned long long period; @@ -525,7 +533,11 @@ int auxtrace_queue_data(struct perf_session *session, bool samples, bool events); struct auxtrace_buffer *auxtrace_buffer__next(struct auxtrace_queue *queue, struct auxtrace_buffer *buffer); -void *auxtrace_buffer__get_data(struct auxtrace_buffer *buffer, int fd); +void *auxtrace_buffer__get_data_rw(struct auxtrace_buffer *buffer, int fd, bool rw); +static inline void *auxtrace_buffer__get_data(struct auxtrace_buffer *buffer, int fd) +{ + return auxtrace_buffer__get_data_rw(buffer, fd, false); +} void auxtrace_buffer__put_data(struct auxtrace_buffer *buffer); void auxtrace_buffer__drop_data(struct auxtrace_buffer *buffer); void auxtrace_buffer__free(struct auxtrace_buffer *buffer); diff --git a/tools/perf/util/cputopo.c b/tools/perf/util/cputopo.c index 1b52402a8923..ec77e2a7b3ca 100644 --- a/tools/perf/util/cputopo.c +++ b/tools/perf/util/cputopo.c @@ -12,6 +12,7 @@ #include "cpumap.h" #include "debug.h" #include "env.h" +#include "pmu-hybrid.h" #define CORE_SIB_FMT \ "%s/devices/system/cpu/cpu%d/topology/core_siblings_list" @@ -351,3 +352,82 @@ void numa_topology__delete(struct numa_topology *tp) free(tp); } + +static int load_hybrid_node(struct hybrid_topology_node *node, + struct perf_pmu *pmu) +{ + const char *sysfs; + char path[PATH_MAX]; + char *buf = NULL, *p; + FILE *fp; + size_t len = 0; + + node->pmu_name = strdup(pmu->name); + if (!node->pmu_name) + return -1; + + sysfs = sysfs__mountpoint(); + if (!sysfs) + goto err; + + snprintf(path, PATH_MAX, CPUS_TEMPLATE_CPU, sysfs, pmu->name); + fp = fopen(path, "r"); + if (!fp) + goto err; + + if (getline(&buf, &len, fp) <= 0) { + fclose(fp); + goto err; + } + + p = strchr(buf, '\n'); + if (p) + *p = '\0'; + + fclose(fp); + node->cpus = buf; + return 0; + +err: + zfree(&node->pmu_name); + free(buf); + return -1; +} + +struct hybrid_topology *hybrid_topology__new(void) +{ + struct perf_pmu *pmu; + struct hybrid_topology *tp = NULL; + u32 nr, i = 0; + + nr = perf_pmu__hybrid_pmu_num(); + if (nr == 0) + return NULL; + + tp = zalloc(sizeof(*tp) + sizeof(tp->nodes[0]) * nr); + if (!tp) + return NULL; + + tp->nr = nr; + perf_pmu__for_each_hybrid_pmu(pmu) { + if (load_hybrid_node(&tp->nodes[i], pmu)) { + hybrid_topology__delete(tp); + return NULL; + } + i++; + } + + return tp; +} + +void hybrid_topology__delete(struct hybrid_topology *tp) +{ + u32 i; + + for (i = 0; i < tp->nr; i++) { + zfree(&tp->nodes[i].pmu_name); + zfree(&tp->nodes[i].cpus); + } + + free(tp); +} diff --git a/tools/perf/util/cputopo.h b/tools/perf/util/cputopo.h index 6201c3790d86..d9af97177068 100644 --- a/tools/perf/util/cputopo.h +++ b/tools/perf/util/cputopo.h @@ -25,10 +25,23 @@ struct numa_topology { struct numa_topology_node nodes[]; }; +struct hybrid_topology_node { + char *pmu_name; + char *cpus; +}; + +struct hybrid_topology { + u32 nr; + struct hybrid_topology_node nodes[]; +}; + struct cpu_topology *cpu_topology__new(void); void cpu_topology__delete(struct cpu_topology *tp); struct numa_topology *numa_topology__new(void); void numa_topology__delete(struct numa_topology *tp); +struct hybrid_topology *hybrid_topology__new(void); +void hybrid_topology__delete(struct hybrid_topology *tp); + #endif /* __PERF_CPUTOPO_H */ diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c index 059bcec3f651..3e1a05bc82cc 100644 --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c @@ -6,6 +6,7 @@ * Author: Mathieu Poirier <mathieu.poirier@linaro.org> */ +#include <asm/bug.h> #include <linux/coresight-pmu.h> #include <linux/err.h> #include <linux/list.h> @@ -17,6 +18,7 @@ #include "cs-etm.h" #include "cs-etm-decoder.h" +#include "debug.h" #include "intlist.h" /* use raw logging */ @@ -276,13 +278,13 @@ cs_etm_decoder__do_soft_timestamp(struct cs_etm_queue *etmq, const uint8_t trace_chan_id) { /* No timestamp packet has been received, nothing to do */ - if (!packet_queue->timestamp) + if (!packet_queue->cs_timestamp) return OCSD_RESP_CONT; - packet_queue->timestamp = packet_queue->next_timestamp; + packet_queue->cs_timestamp = packet_queue->next_cs_timestamp; /* Estimate the timestamp for the next range packet */ - packet_queue->next_timestamp += packet_queue->instr_count; + packet_queue->next_cs_timestamp += packet_queue->instr_count; packet_queue->instr_count = 0; /* Tell the front end which traceid_queue needs attention */ @@ -294,7 +296,8 @@ cs_etm_decoder__do_soft_timestamp(struct cs_etm_queue *etmq, static ocsd_datapath_resp_t cs_etm_decoder__do_hard_timestamp(struct cs_etm_queue *etmq, const ocsd_generic_trace_elem *elem, - const uint8_t trace_chan_id) + const uint8_t trace_chan_id, + const ocsd_trc_index_t indx) { struct cs_etm_packet_queue *packet_queue; @@ -308,20 +311,39 @@ cs_etm_decoder__do_hard_timestamp(struct cs_etm_queue *etmq, * Function do_soft_timestamp() will report the value to the front end, * hence asking the decoder to keep decoding rather than stopping. */ - if (packet_queue->timestamp) { - packet_queue->next_timestamp = elem->timestamp; + if (packet_queue->cs_timestamp) { + packet_queue->next_cs_timestamp = elem->timestamp; return OCSD_RESP_CONT; } - /* - * This is the first timestamp we've seen since the beginning of traces - * or a discontinuity. Since timestamps packets are generated *after* - * range packets have been generated, we need to estimate the time at - * which instructions started by subtracting the number of instructions - * executed to the timestamp. - */ - packet_queue->timestamp = elem->timestamp - packet_queue->instr_count; - packet_queue->next_timestamp = elem->timestamp; + + if (!elem->timestamp) { + /* + * Zero timestamps can be seen due to misconfiguration or hardware bugs. + * Warn once, and don't try to subtract instr_count as it would result in an + * underflow. + */ + packet_queue->cs_timestamp = 0; + WARN_ONCE(true, "Zero Coresight timestamp found at Idx:%" OCSD_TRC_IDX_STR + ". Decoding may be improved with --itrace=Z...\n", indx); + } else if (packet_queue->instr_count > elem->timestamp) { + /* + * Sanity check that the elem->timestamp - packet_queue->instr_count would not + * result in an underflow. Warn and clamp at 0 if it would. + */ + packet_queue->cs_timestamp = 0; + pr_err("Timestamp calculation underflow at Idx:%" OCSD_TRC_IDX_STR "\n", indx); + } else { + /* + * This is the first timestamp we've seen since the beginning of traces + * or a discontinuity. Since timestamps packets are generated *after* + * range packets have been generated, we need to estimate the time at + * which instructions started by subtracting the number of instructions + * executed to the timestamp. + */ + packet_queue->cs_timestamp = elem->timestamp - packet_queue->instr_count; + } + packet_queue->next_cs_timestamp = elem->timestamp; packet_queue->instr_count = 0; /* Tell the front end which traceid_queue needs attention */ @@ -334,8 +356,8 @@ cs_etm_decoder__do_hard_timestamp(struct cs_etm_queue *etmq, static void cs_etm_decoder__reset_timestamp(struct cs_etm_packet_queue *packet_queue) { - packet_queue->timestamp = 0; - packet_queue->next_timestamp = 0; + packet_queue->cs_timestamp = 0; + packet_queue->next_cs_timestamp = 0; packet_queue->instr_count = 0; } @@ -542,7 +564,7 @@ cs_etm_decoder__set_tid(struct cs_etm_queue *etmq, static ocsd_datapath_resp_t cs_etm_decoder__gen_trace_elem_printer( const void *context, - const ocsd_trc_index_t indx __maybe_unused, + const ocsd_trc_index_t indx, const u8 trace_chan_id __maybe_unused, const ocsd_generic_trace_elem *elem) { @@ -579,7 +601,8 @@ static ocsd_datapath_resp_t cs_etm_decoder__gen_trace_elem_printer( break; case OCSD_GEN_TRC_ELEM_TIMESTAMP: resp = cs_etm_decoder__do_hard_timestamp(etmq, elem, - trace_chan_id); + trace_chan_id, + indx); break; case OCSD_GEN_TRC_ELEM_PE_CONTEXT: resp = cs_etm_decoder__set_tid(etmq, packet_queue, diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index 7e63e7dedc33..64536a6ed10a 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -38,8 +38,6 @@ #include <tools/libc_compat.h> #include "util/synthetic-events.h" -#define MAX_TIMESTAMP (~0ULL) - struct cs_etm_auxtrace { struct auxtrace auxtrace; struct auxtrace_queues queues; @@ -56,6 +54,7 @@ struct cs_etm_auxtrace { u8 sample_instructions; int num_cpu; + u64 latest_kernel_timestamp; u32 auxtrace_type; u64 branches_sample_type; u64 branches_id; @@ -86,7 +85,7 @@ struct cs_etm_queue { struct cs_etm_decoder *decoder; struct auxtrace_buffer *buffer; unsigned int queue_nr; - u8 pending_timestamp; + u8 pending_timestamp_chan_id; u64 offset; const unsigned char *buf; size_t buf_len, buf_used; @@ -208,7 +207,7 @@ void cs_etm__etmq_set_traceid_queue_timestamp(struct cs_etm_queue *etmq, * be more than one channel per cs_etm_queue, we need to specify * what traceID queue needs servicing. */ - etmq->pending_timestamp = trace_chan_id; + etmq->pending_timestamp_chan_id = trace_chan_id; } static u64 cs_etm__etmq_get_timestamp(struct cs_etm_queue *etmq, @@ -216,22 +215,22 @@ static u64 cs_etm__etmq_get_timestamp(struct cs_etm_queue *etmq, { struct cs_etm_packet_queue *packet_queue; - if (!etmq->pending_timestamp) + if (!etmq->pending_timestamp_chan_id) return 0; if (trace_chan_id) - *trace_chan_id = etmq->pending_timestamp; + *trace_chan_id = etmq->pending_timestamp_chan_id; packet_queue = cs_etm__etmq_get_packet_queue(etmq, - etmq->pending_timestamp); + etmq->pending_timestamp_chan_id); if (!packet_queue) return 0; /* Acknowledge pending status */ - etmq->pending_timestamp = 0; + etmq->pending_timestamp_chan_id = 0; /* See function cs_etm_decoder__do_{hard|soft}_timestamp() */ - return packet_queue->timestamp; + return packet_queue->cs_timestamp; } static void cs_etm__clear_packet_queue(struct cs_etm_packet_queue *queue) @@ -814,7 +813,7 @@ static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm, int ret = 0; unsigned int cs_queue_nr; u8 trace_chan_id; - u64 timestamp; + u64 cs_timestamp; struct cs_etm_queue *etmq = queue->priv; if (list_empty(&queue->head) || etmq) @@ -854,7 +853,7 @@ static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm, /* * Run decoder on the trace block. The decoder will stop when - * encountering a timestamp, a full packet queue or the end of + * encountering a CS timestamp, a full packet queue or the end of * trace for that block. */ ret = cs_etm__decode_data_block(etmq); @@ -865,10 +864,10 @@ static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm, * Function cs_etm_decoder__do_{hard|soft}_timestamp() does all * the timestamp calculation for us. */ - timestamp = cs_etm__etmq_get_timestamp(etmq, &trace_chan_id); + cs_timestamp = cs_etm__etmq_get_timestamp(etmq, &trace_chan_id); /* We found a timestamp, no need to continue. */ - if (timestamp) + if (cs_timestamp) break; /* @@ -892,7 +891,7 @@ static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm, * queue and will be processed in cs_etm__process_queues(). */ cs_queue_nr = TO_CS_QUEUE_NR(queue_nr, trace_chan_id); - ret = auxtrace_heap__add(&etm->heap, cs_queue_nr, timestamp); + ret = auxtrace_heap__add(&etm->heap, cs_queue_nr, cs_timestamp); out: return ret; } @@ -1194,6 +1193,8 @@ static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq, event->sample.header.misc = cs_etm__cpu_mode(etmq, addr); event->sample.header.size = sizeof(struct perf_event_header); + if (!etm->timeless_decoding) + sample.time = etm->latest_kernel_timestamp; sample.ip = addr; sample.pid = tidq->pid; sample.tid = tidq->tid; @@ -1250,6 +1251,8 @@ static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq, event->sample.header.misc = cs_etm__cpu_mode(etmq, ip); event->sample.header.size = sizeof(struct perf_event_header); + if (!etm->timeless_decoding) + sample.time = etm->latest_kernel_timestamp; sample.ip = ip; sample.pid = tidq->pid; sample.tid = tidq->tid; @@ -2221,7 +2224,7 @@ static int cs_etm__process_queues(struct cs_etm_auxtrace *etm) int ret = 0; unsigned int cs_queue_nr, queue_nr; u8 trace_chan_id; - u64 timestamp; + u64 cs_timestamp; struct auxtrace_queue *queue; struct cs_etm_queue *etmq; struct cs_etm_traceid_queue *tidq; @@ -2283,9 +2286,9 @@ refetch: if (ret) goto out; - timestamp = cs_etm__etmq_get_timestamp(etmq, &trace_chan_id); + cs_timestamp = cs_etm__etmq_get_timestamp(etmq, &trace_chan_id); - if (!timestamp) { + if (!cs_timestamp) { /* * Function cs_etm__decode_data_block() returns when * there is no more traces to decode in the current @@ -2308,7 +2311,7 @@ refetch: * this queue/traceID. */ cs_queue_nr = TO_CS_QUEUE_NR(queue_nr, trace_chan_id); - ret = auxtrace_heap__add(&etm->heap, cs_queue_nr, timestamp); + ret = auxtrace_heap__add(&etm->heap, cs_queue_nr, cs_timestamp); } out: @@ -2380,7 +2383,7 @@ static int cs_etm__process_event(struct perf_session *session, struct perf_tool *tool) { int err = 0; - u64 timestamp; + u64 sample_kernel_timestamp; struct cs_etm_auxtrace *etm = container_of(session->auxtrace, struct cs_etm_auxtrace, auxtrace); @@ -2394,11 +2397,11 @@ static int cs_etm__process_event(struct perf_session *session, } if (sample->time && (sample->time != (u64) -1)) - timestamp = sample->time; + sample_kernel_timestamp = sample->time; else - timestamp = 0; + sample_kernel_timestamp = 0; - if (timestamp || etm->timeless_decoding) { + if (sample_kernel_timestamp || etm->timeless_decoding) { err = cs_etm__update_queues(etm); if (err) return err; @@ -2414,9 +2417,15 @@ static int cs_etm__process_event(struct perf_session *session, else if (event->header.type == PERF_RECORD_SWITCH_CPU_WIDE) return cs_etm__process_switch_cpu_wide(etm, event); - if (!etm->timeless_decoding && - event->header.type == PERF_RECORD_AUX) + if (!etm->timeless_decoding && event->header.type == PERF_RECORD_AUX) { + /* + * Record the latest kernel timestamp available in the header + * for samples so that synthesised samples occur from this point + * onwards. + */ + etm->latest_kernel_timestamp = sample_kernel_timestamp; return cs_etm__process_queues(etm); + } return 0; } @@ -2464,6 +2473,10 @@ static bool cs_etm__is_timeless_decoding(struct cs_etm_auxtrace *etm) struct evlist *evlist = etm->session->evlist; bool timeless_decoding = true; + /* Override timeless mode with user input from --itrace=Z */ + if (etm->synth_opts.timeless_decoding) + return true; + /* * Circle through the list of event and complain if we find one * with the time bit set. @@ -2810,6 +2823,14 @@ int cs_etm__process_auxtrace_info(union perf_event *event, if (err) goto err_free_etm; + if (session->itrace_synth_opts->set) { + etm->synth_opts = *session->itrace_synth_opts; + } else { + itrace_synth_opts__set_default(&etm->synth_opts, + session->itrace_synth_opts->default_no_sample); + etm->synth_opts.callchain = false; + } + etm->session = session; etm->machine = &session->machines.host; @@ -2854,14 +2875,6 @@ int cs_etm__process_auxtrace_info(union perf_event *event, return 0; } - if (session->itrace_synth_opts->set) { - etm->synth_opts = *session->itrace_synth_opts; - } else { - itrace_synth_opts__set_default(&etm->synth_opts, - session->itrace_synth_opts->default_no_sample); - etm->synth_opts.callchain = false; - } - err = cs_etm__synth_events(etm, session); if (err) goto err_delete_thread; diff --git a/tools/perf/util/cs-etm.h b/tools/perf/util/cs-etm.h index 36428918411e..d65c7b19407d 100644 --- a/tools/perf/util/cs-etm.h +++ b/tools/perf/util/cs-etm.h @@ -171,8 +171,8 @@ struct cs_etm_packet_queue { u32 head; u32 tail; u32 instr_count; - u64 timestamp; - u64 next_timestamp; + u64 cs_timestamp; + u64 next_cs_timestamp; struct cs_etm_packet packet_buffer[CS_ETM_PACKET_MAX_BUFFER]; }; diff --git a/tools/perf/util/data.c b/tools/perf/util/data.c index 8fca4779ae6a..a9c102e8e3c0 100644 --- a/tools/perf/util/data.c +++ b/tools/perf/util/data.c @@ -240,11 +240,12 @@ static bool is_dir(struct perf_data *data) static int open_file_read(struct perf_data *data) { + int flags = data->in_place_update ? O_RDWR : O_RDONLY; struct stat st; int fd; char sbuf[STRERR_BUFSIZE]; - fd = open(data->file.path, O_RDONLY); + fd = open(data->file.path, flags); if (fd < 0) { int err = errno; diff --git a/tools/perf/util/data.h b/tools/perf/util/data.h index 62a3e66fbee8..c9de82af5584 100644 --- a/tools/perf/util/data.h +++ b/tools/perf/util/data.h @@ -31,6 +31,7 @@ struct perf_data { bool is_dir; bool force; bool use_stdio; + bool in_place_update; enum perf_data_mode mode; struct { diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c index 9130f6fad8d5..1bea5b29b12d 100644 --- a/tools/perf/util/env.c +++ b/tools/perf/util/env.c @@ -202,6 +202,18 @@ void perf_env__exit(struct perf_env *env) for (i = 0; i < env->nr_memory_nodes; i++) zfree(&env->memory_nodes[i].set); zfree(&env->memory_nodes); + + for (i = 0; i < env->nr_hybrid_nodes; i++) { + zfree(&env->hybrid_nodes[i].pmu_name); + zfree(&env->hybrid_nodes[i].cpus); + } + zfree(&env->hybrid_nodes); + + for (i = 0; i < env->nr_hybrid_cpc_nodes; i++) { + zfree(&env->hybrid_cpc_nodes[i].cpu_pmu_caps); + zfree(&env->hybrid_cpc_nodes[i].pmu_name); + } + zfree(&env->hybrid_cpc_nodes); } void perf_env__init(struct perf_env *env __maybe_unused) diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h index ca249bf5e984..6824a7423a2d 100644 --- a/tools/perf/util/env.h +++ b/tools/perf/util/env.h @@ -37,6 +37,18 @@ struct memory_node { unsigned long *set; }; +struct hybrid_node { + char *pmu_name; + char *cpus; +}; + +struct hybrid_cpc_node { + int nr_cpu_pmu_caps; + unsigned int max_branches; + char *cpu_pmu_caps; + char *pmu_name; +}; + struct perf_env { char *hostname; char *os_release; @@ -59,6 +71,8 @@ struct perf_env { int nr_pmu_mappings; int nr_groups; int nr_cpu_pmu_caps; + int nr_hybrid_nodes; + int nr_hybrid_cpc_nodes; char *cmdline; const char **cmdline_argv; char *sibling_cores; @@ -77,6 +91,8 @@ struct perf_env { struct numa_node *numa_nodes; struct memory_node *memory_nodes; unsigned long long memory_bsize; + struct hybrid_node *hybrid_nodes; + struct hybrid_cpc_node *hybrid_cpc_nodes; #ifdef HAVE_LIBBPF_SUPPORT /* * bpf_info_lock protects bpf rbtrees. This is needed because the diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index aa1e42518d37..0158d2945bab 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -49,6 +49,7 @@ #include "cputopo.h" #include "bpf-event.h" #include "clockid.h" +#include "pmu-hybrid.h" #include <linux/ctype.h> #include <internal/lib.h> @@ -932,6 +933,40 @@ static int write_clock_data(struct feat_fd *ff, return do_write(ff, data64, sizeof(*data64)); } +static int write_hybrid_topology(struct feat_fd *ff, + struct evlist *evlist __maybe_unused) +{ + struct hybrid_topology *tp; + int ret; + u32 i; + + tp = hybrid_topology__new(); + if (!tp) + return -ENOENT; + + ret = do_write(ff, &tp->nr, sizeof(u32)); + if (ret < 0) + goto err; + + for (i = 0; i < tp->nr; i++) { + struct hybrid_topology_node *n = &tp->nodes[i]; + + ret = do_write_string(ff, n->pmu_name); + if (ret < 0) + goto err; + + ret = do_write_string(ff, n->cpus); + if (ret < 0) + goto err; + } + + ret = 0; + +err: + hybrid_topology__delete(tp); + return ret; +} + static int write_dir_format(struct feat_fd *ff, struct evlist *evlist __maybe_unused) { @@ -1425,18 +1460,14 @@ static int write_compressed(struct feat_fd *ff __maybe_unused, return do_write(ff, &(ff->ph->env.comp_mmap_len), sizeof(ff->ph->env.comp_mmap_len)); } -static int write_cpu_pmu_caps(struct feat_fd *ff, - struct evlist *evlist __maybe_unused) +static int write_per_cpu_pmu_caps(struct feat_fd *ff, struct perf_pmu *pmu, + bool write_pmu) { - struct perf_pmu *cpu_pmu = perf_pmu__find("cpu"); struct perf_pmu_caps *caps = NULL; int nr_caps; int ret; - if (!cpu_pmu) - return -ENOENT; - - nr_caps = perf_pmu__caps_parse(cpu_pmu); + nr_caps = perf_pmu__caps_parse(pmu); if (nr_caps < 0) return nr_caps; @@ -1444,7 +1475,7 @@ static int write_cpu_pmu_caps(struct feat_fd *ff, if (ret < 0) return ret; - list_for_each_entry(caps, &cpu_pmu->caps, list) { + list_for_each_entry(caps, &pmu->caps, list) { ret = do_write_string(ff, caps->name); if (ret < 0) return ret; @@ -1454,9 +1485,49 @@ static int write_cpu_pmu_caps(struct feat_fd *ff, return ret; } + if (write_pmu) { + ret = do_write_string(ff, pmu->name); + if (ret < 0) + return ret; + } + return ret; } +static int write_cpu_pmu_caps(struct feat_fd *ff, + struct evlist *evlist __maybe_unused) +{ + struct perf_pmu *cpu_pmu = perf_pmu__find("cpu"); + + if (!cpu_pmu) + return -ENOENT; + + return write_per_cpu_pmu_caps(ff, cpu_pmu, false); +} + +static int write_hybrid_cpu_pmu_caps(struct feat_fd *ff, + struct evlist *evlist __maybe_unused) +{ + struct perf_pmu *pmu; + u32 nr_pmu = perf_pmu__hybrid_pmu_num(); + int ret; + + if (nr_pmu == 0) + return -ENOENT; + + ret = do_write(ff, &nr_pmu, sizeof(nr_pmu)); + if (ret < 0) + return ret; + + perf_pmu__for_each_hybrid_pmu(pmu) { + ret = write_per_cpu_pmu_caps(ff, pmu, true); + if (ret < 0) + return ret; + } + + return 0; +} + static void print_hostname(struct feat_fd *ff, FILE *fp) { fprintf(fp, "# hostname : %s\n", ff->ph->env.hostname); @@ -1623,6 +1694,18 @@ static void print_clock_data(struct feat_fd *ff, FILE *fp) clockid_name(clockid)); } +static void print_hybrid_topology(struct feat_fd *ff, FILE *fp) +{ + int i; + struct hybrid_node *n; + + fprintf(fp, "# hybrid cpu system:\n"); + for (i = 0; i < ff->ph->env.nr_hybrid_nodes; i++) { + n = &ff->ph->env.hybrid_nodes[i]; + fprintf(fp, "# %s cpu list : %s\n", n->pmu_name, n->cpus); + } +} + static void print_dir_format(struct feat_fd *ff, FILE *fp) { struct perf_session *session; @@ -1916,18 +1999,28 @@ static void print_compressed(struct feat_fd *ff, FILE *fp) ff->ph->env.comp_level, ff->ph->env.comp_ratio); } -static void print_cpu_pmu_caps(struct feat_fd *ff, FILE *fp) +static void print_per_cpu_pmu_caps(FILE *fp, int nr_caps, char *cpu_pmu_caps, + char *pmu_name) { - const char *delimiter = "# cpu pmu capabilities: "; - u32 nr_caps = ff->ph->env.nr_cpu_pmu_caps; - char *str; + const char *delimiter; + char *str, buf[128]; if (!nr_caps) { - fprintf(fp, "# cpu pmu capabilities: not available\n"); + if (!pmu_name) + fprintf(fp, "# cpu pmu capabilities: not available\n"); + else + fprintf(fp, "# %s pmu capabilities: not available\n", pmu_name); return; } - str = ff->ph->env.cpu_pmu_caps; + if (!pmu_name) + scnprintf(buf, sizeof(buf), "# cpu pmu capabilities: "); + else + scnprintf(buf, sizeof(buf), "# %s pmu capabilities: ", pmu_name); + + delimiter = buf; + + str = cpu_pmu_caps; while (nr_caps--) { fprintf(fp, "%s%s", delimiter, str); delimiter = ", "; @@ -1937,6 +2030,24 @@ static void print_cpu_pmu_caps(struct feat_fd *ff, FILE *fp) fprintf(fp, "\n"); } +static void print_cpu_pmu_caps(struct feat_fd *ff, FILE *fp) +{ + print_per_cpu_pmu_caps(fp, ff->ph->env.nr_cpu_pmu_caps, + ff->ph->env.cpu_pmu_caps, NULL); +} + +static void print_hybrid_cpu_pmu_caps(struct feat_fd *ff, FILE *fp) +{ + struct hybrid_cpc_node *n; + + for (int i = 0; i < ff->ph->env.nr_hybrid_cpc_nodes; i++) { + n = &ff->ph->env.hybrid_cpc_nodes[i]; + print_per_cpu_pmu_caps(fp, n->nr_cpu_pmu_caps, + n->cpu_pmu_caps, + n->pmu_name); + } +} + static void print_pmu_mappings(struct feat_fd *ff, FILE *fp) { const char *delimiter = "# pmu mappings: "; @@ -2849,6 +2960,46 @@ static int process_clock_data(struct feat_fd *ff, return 0; } +static int process_hybrid_topology(struct feat_fd *ff, + void *data __maybe_unused) +{ + struct hybrid_node *nodes, *n; + u32 nr, i; + + /* nr nodes */ + if (do_read_u32(ff, &nr)) + return -1; + + nodes = zalloc(sizeof(*nodes) * nr); + if (!nodes) + return -ENOMEM; + + for (i = 0; i < nr; i++) { + n = &nodes[i]; + + n->pmu_name = do_read_string(ff); + if (!n->pmu_name) + goto error; + + n->cpus = do_read_string(ff); + if (!n->cpus) + goto error; + } + + ff->ph->env.nr_hybrid_nodes = nr; + ff->ph->env.hybrid_nodes = nodes; + return 0; + +error: + for (i = 0; i < nr; i++) { + free(nodes[i].pmu_name); + free(nodes[i].cpus); + } + + free(nodes); + return -1; +} + static int process_dir_format(struct feat_fd *ff, void *_data __maybe_unused) { @@ -3002,8 +3153,9 @@ static int process_compressed(struct feat_fd *ff, return 0; } -static int process_cpu_pmu_caps(struct feat_fd *ff, - void *data __maybe_unused) +static int process_per_cpu_pmu_caps(struct feat_fd *ff, int *nr_cpu_pmu_caps, + char **cpu_pmu_caps, + unsigned int *max_branches) { char *name, *value; struct strbuf sb; @@ -3017,7 +3169,7 @@ static int process_cpu_pmu_caps(struct feat_fd *ff, return 0; } - ff->ph->env.nr_cpu_pmu_caps = nr_caps; + *nr_cpu_pmu_caps = nr_caps; if (strbuf_init(&sb, 128) < 0) return -1; @@ -3039,12 +3191,12 @@ static int process_cpu_pmu_caps(struct feat_fd *ff, goto free_value; if (!strcmp(name, "branches")) - ff->ph->env.max_branches = atoi(value); + *max_branches = atoi(value); free(value); free(name); } - ff->ph->env.cpu_pmu_caps = strbuf_detach(&sb, NULL); + *cpu_pmu_caps = strbuf_detach(&sb, NULL); return 0; free_value: @@ -3056,6 +3208,63 @@ error: return -1; } +static int process_cpu_pmu_caps(struct feat_fd *ff, + void *data __maybe_unused) +{ + return process_per_cpu_pmu_caps(ff, &ff->ph->env.nr_cpu_pmu_caps, + &ff->ph->env.cpu_pmu_caps, + &ff->ph->env.max_branches); +} + +static int process_hybrid_cpu_pmu_caps(struct feat_fd *ff, + void *data __maybe_unused) +{ + struct hybrid_cpc_node *nodes; + u32 nr_pmu, i; + int ret; + + if (do_read_u32(ff, &nr_pmu)) + return -1; + + if (!nr_pmu) { + pr_debug("hybrid cpu pmu capabilities not available\n"); + return 0; + } + + nodes = zalloc(sizeof(*nodes) * nr_pmu); + if (!nodes) + return -ENOMEM; + + for (i = 0; i < nr_pmu; i++) { + struct hybrid_cpc_node *n = &nodes[i]; + + ret = process_per_cpu_pmu_caps(ff, &n->nr_cpu_pmu_caps, + &n->cpu_pmu_caps, + &n->max_branches); + if (ret) + goto err; + + n->pmu_name = do_read_string(ff); + if (!n->pmu_name) { + ret = -1; + goto err; + } + } + + ff->ph->env.nr_hybrid_cpc_nodes = nr_pmu; + ff->ph->env.hybrid_cpc_nodes = nodes; + return 0; + +err: + for (i = 0; i < nr_pmu; i++) { + free(nodes[i].cpu_pmu_caps); + free(nodes[i].pmu_name); + } + + free(nodes); + return ret; +} + #define FEAT_OPR(n, func, __full_only) \ [HEADER_##n] = { \ .name = __stringify(n), \ @@ -3117,6 +3326,8 @@ const struct perf_header_feature_ops feat_ops[HEADER_LAST_FEATURE] = { FEAT_OPR(COMPRESSED, compressed, false), FEAT_OPR(CPU_PMU_CAPS, cpu_pmu_caps, false), FEAT_OPR(CLOCK_DATA, clock_data, false), + FEAT_OPN(HYBRID_TOPOLOGY, hybrid_topology, true), + FEAT_OPR(HYBRID_CPU_PMU_CAPS, hybrid_cpu_pmu_caps, false), }; struct header_print_data { @@ -3814,6 +4025,11 @@ int perf_session__read_header(struct perf_session *session) if (perf_file_header__read(&f_header, header, fd) < 0) return -EINVAL; + if (header->needs_swap && data->in_place_update) { + pr_err("In-place update not supported when byte-swapping is required\n"); + return -EINVAL; + } + /* * Sanity check that perf.data was written cleanly; data size is * initialized to 0 and updated only if the on_exit function is run. diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h index 2aca71763ecf..ae6b1cf19a7d 100644 --- a/tools/perf/util/header.h +++ b/tools/perf/util/header.h @@ -45,6 +45,8 @@ enum { HEADER_COMPRESSED, HEADER_CPU_PMU_CAPS, HEADER_CLOCK_DATA, + HEADER_HYBRID_TOPOLOGY, + HEADER_HYBRID_CPU_PMU_CAPS, HEADER_LAST_FEATURE, HEADER_FEAT_BITS = 256, }; diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c index 20ad663978cc..cb2520abf261 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c @@ -35,6 +35,10 @@ #define BIT63 (((uint64_t)1 << 63)) +#define SEVEN_BYTES 0xffffffffffffffULL + +#define NO_VMCS 0xffffffffffULL + #define INTEL_PT_RETURN 1 /* Maximum number of loops with no packets consumed i.e. stuck in a loop */ @@ -51,6 +55,11 @@ struct intel_pt_stack { int pos; }; +enum intel_pt_p_once { + INTEL_PT_PRT_ONCE_UNK_VMCS, + INTEL_PT_PRT_ONCE_ERANGE, +}; + enum intel_pt_pkt_state { INTEL_PT_STATE_NO_PSB, INTEL_PT_STATE_NO_IP, @@ -64,6 +73,7 @@ enum intel_pt_pkt_state { INTEL_PT_STATE_FUP_NO_TIP, INTEL_PT_STATE_FUP_IN_PSB, INTEL_PT_STATE_RESAMPLE, + INTEL_PT_STATE_VM_TIME_CORRELATION, }; static inline bool intel_pt_sample_time(enum intel_pt_pkt_state pkt_state) @@ -75,6 +85,7 @@ static inline bool intel_pt_sample_time(enum intel_pt_pkt_state pkt_state) case INTEL_PT_STATE_IN_SYNC: case INTEL_PT_STATE_TNT_CONT: case INTEL_PT_STATE_RESAMPLE: + case INTEL_PT_STATE_VM_TIME_CORRELATION: return true; case INTEL_PT_STATE_TNT: case INTEL_PT_STATE_TIP: @@ -107,6 +118,7 @@ struct intel_pt_decoder { uint64_t max_insn_cnt, void *data); bool (*pgd_ip)(uint64_t ip, void *data); int (*lookahead)(void *data, intel_pt_lookahead_cb_t cb, void *cb_data); + struct intel_pt_vmcs_info *(*findnew_vmcs_info)(void *data, uint64_t vmcs); void *data; struct intel_pt_state state; const unsigned char *buf; @@ -122,6 +134,11 @@ struct intel_pt_decoder { bool in_psb; bool hop; bool leap; + bool vm_time_correlation; + bool vm_tm_corr_dry_run; + bool vm_tm_corr_reliable; + bool vm_tm_corr_same_buf; + bool vm_tm_corr_continuous; bool nr; bool next_nr; enum intel_pt_param_flags flags; @@ -139,6 +156,11 @@ struct intel_pt_decoder { uint64_t ctc_delta; uint64_t cycle_cnt; uint64_t cyc_ref_timestamp; + uint64_t first_timestamp; + uint64_t last_reliable_timestamp; + uint64_t vmcs; + uint64_t print_once; + uint64_t last_ctc; uint32_t last_mtc; uint32_t tsc_ctc_ratio_n; uint32_t tsc_ctc_ratio_d; @@ -217,6 +239,31 @@ static uint64_t intel_pt_lower_power_of_2(uint64_t x) return x << i; } +__printf(1, 2) +static void p_log(const char *fmt, ...) +{ + char buf[512]; + va_list args; + + va_start(args, fmt); + vsnprintf(buf, sizeof(buf), fmt, args); + va_end(args); + + fprintf(stderr, "%s\n", buf); + intel_pt_log("%s\n", buf); +} + +static bool intel_pt_print_once(struct intel_pt_decoder *decoder, + enum intel_pt_p_once id) +{ + uint64_t bit = 1ULL << id; + + if (decoder->print_once & bit) + return false; + decoder->print_once |= bit; + return true; +} + static uint64_t intel_pt_cyc_threshold(uint64_t ctl) { if (!(ctl & INTEL_PT_CYC_ENABLE)) @@ -258,11 +305,16 @@ struct intel_pt_decoder *intel_pt_decoder_new(struct intel_pt_params *params) decoder->walk_insn = params->walk_insn; decoder->pgd_ip = params->pgd_ip; decoder->lookahead = params->lookahead; + decoder->findnew_vmcs_info = params->findnew_vmcs_info; decoder->data = params->data; decoder->return_compression = params->return_compression; decoder->branch_enable = params->branch_enable; decoder->hop = params->quick >= 1; decoder->leap = params->quick >= 2; + decoder->vm_time_correlation = params->vm_time_correlation; + decoder->vm_tm_corr_dry_run = params->vm_tm_corr_dry_run; + decoder->first_timestamp = params->first_timestamp; + decoder->last_reliable_timestamp = params->first_timestamp; decoder->flags = params->flags; @@ -312,6 +364,12 @@ struct intel_pt_decoder *intel_pt_decoder_new(struct intel_pt_params *params) return decoder; } +void intel_pt_set_first_timestamp(struct intel_pt_decoder *decoder, + uint64_t first_timestamp) +{ + decoder->first_timestamp = first_timestamp; +} + static void intel_pt_pop_blk(struct intel_pt_stack *stack) { struct intel_pt_blk *blk = stack->blk; @@ -577,6 +635,7 @@ static int intel_pt_get_data(struct intel_pt_decoder *decoder, bool reposition) intel_pt_reposition(decoder); decoder->ref_timestamp = buffer.ref_timestamp; decoder->state.trace_nr = buffer.trace_nr; + decoder->vm_tm_corr_same_buf = false; intel_pt_log("Reference timestamp 0x%" PRIx64 "\n", decoder->ref_timestamp); return -ENOLINK; @@ -1469,9 +1528,24 @@ static uint64_t intel_pt_8b_tsc(uint64_t timestamp, uint64_t ref_timestamp) return timestamp; } +/* For use only when decoder->vm_time_correlation is true */ +static bool intel_pt_time_in_range(struct intel_pt_decoder *decoder, + uint64_t timestamp) +{ + uint64_t max_timestamp = decoder->buf_timestamp; + + if (!max_timestamp) { + max_timestamp = decoder->last_reliable_timestamp + + 0x400000000ULL; + } + return timestamp >= decoder->last_reliable_timestamp && + timestamp < decoder->buf_timestamp; +} + static void intel_pt_calc_tsc_timestamp(struct intel_pt_decoder *decoder) { uint64_t timestamp; + bool bad = false; decoder->have_tma = false; @@ -1493,10 +1567,21 @@ static void intel_pt_calc_tsc_timestamp(struct intel_pt_decoder *decoder) timestamp = decoder->timestamp; } if (timestamp < decoder->timestamp) { - intel_pt_log_to("Wraparound timestamp", timestamp); - timestamp += (1ULL << 56); - decoder->tsc_timestamp = timestamp; + if (!decoder->buf_timestamp || + (timestamp + (1ULL << 56) < decoder->buf_timestamp)) { + intel_pt_log_to("Wraparound timestamp", timestamp); + timestamp += (1ULL << 56); + decoder->tsc_timestamp = timestamp; + } else { + intel_pt_log_to("Suppressing bad timestamp", timestamp); + timestamp = decoder->timestamp; + bad = true; + } } + if (decoder->vm_time_correlation && + (bad || !intel_pt_time_in_range(decoder, timestamp)) && + intel_pt_print_once(decoder, INTEL_PT_PRT_ONCE_ERANGE)) + p_log("Timestamp out of range"); decoder->timestamp = timestamp; decoder->timestamp_insn_cnt = 0; } @@ -1573,6 +1658,7 @@ static void intel_pt_calc_tma(struct intel_pt_decoder *decoder) intel_pt_mtc_cyc_cnt_upd(decoder); decoder->last_mtc = (ctc >> decoder->mtc_shift) & 0xff; + decoder->last_ctc = ctc - ctc_rem; decoder->ctc_timestamp = decoder->tsc_timestamp - fc; if (decoder->tsc_ctc_mult) { decoder->ctc_timestamp -= ctc_rem * decoder->tsc_ctc_mult; @@ -1957,6 +2043,613 @@ static int intel_pt_resample(struct intel_pt_decoder *decoder) return 0; } +struct intel_pt_vm_tsc_info { + struct intel_pt_pkt pip_packet; + struct intel_pt_pkt vmcs_packet; + struct intel_pt_pkt tma_packet; + bool tsc, pip, vmcs, tma, psbend; + uint64_t ctc_delta; + uint64_t last_ctc; + int max_lookahead; +}; + +/* Lookahead and get the PIP, VMCS and TMA packets from PSB+ */ +static int intel_pt_vm_psb_lookahead_cb(struct intel_pt_pkt_info *pkt_info) +{ + struct intel_pt_vm_tsc_info *data = pkt_info->data; + + switch (pkt_info->packet.type) { + case INTEL_PT_PAD: + case INTEL_PT_MNT: + case INTEL_PT_MODE_EXEC: + case INTEL_PT_MODE_TSX: + case INTEL_PT_MTC: + case INTEL_PT_FUP: + case INTEL_PT_CYC: + case INTEL_PT_CBR: + break; + + case INTEL_PT_TSC: + data->tsc = true; + break; + + case INTEL_PT_TMA: + data->tma_packet = pkt_info->packet; + data->tma = true; + break; + + case INTEL_PT_PIP: + data->pip_packet = pkt_info->packet; + data->pip = true; + break; + + case INTEL_PT_VMCS: + data->vmcs_packet = pkt_info->packet; + data->vmcs = true; + break; + + case INTEL_PT_PSBEND: + data->psbend = true; + return 1; + + case INTEL_PT_TIP_PGE: + case INTEL_PT_PTWRITE: + case INTEL_PT_PTWRITE_IP: + case INTEL_PT_EXSTOP: + case INTEL_PT_EXSTOP_IP: + case INTEL_PT_MWAIT: + case INTEL_PT_PWRE: + case INTEL_PT_PWRX: + case INTEL_PT_BBP: + case INTEL_PT_BIP: + case INTEL_PT_BEP: + case INTEL_PT_BEP_IP: + case INTEL_PT_OVF: + case INTEL_PT_BAD: + case INTEL_PT_TNT: + case INTEL_PT_TIP_PGD: + case INTEL_PT_TIP: + case INTEL_PT_PSB: + case INTEL_PT_TRACESTOP: + default: + return 1; + } + + return 0; +} + +struct intel_pt_ovf_fup_info { + int max_lookahead; + bool found; +}; + +/* Lookahead to detect a FUP packet after OVF */ +static int intel_pt_ovf_fup_lookahead_cb(struct intel_pt_pkt_info *pkt_info) +{ + struct intel_pt_ovf_fup_info *data = pkt_info->data; + + if (pkt_info->packet.type == INTEL_PT_CYC || + pkt_info->packet.type == INTEL_PT_MTC || + pkt_info->packet.type == INTEL_PT_TSC) + return !--(data->max_lookahead); + data->found = pkt_info->packet.type == INTEL_PT_FUP; + return 1; +} + +static bool intel_pt_ovf_fup_lookahead(struct intel_pt_decoder *decoder) +{ + struct intel_pt_ovf_fup_info data = { + .max_lookahead = 16, + .found = false, + }; + + intel_pt_pkt_lookahead(decoder, intel_pt_ovf_fup_lookahead_cb, &data); + return data.found; +} + +/* Lookahead and get the TMA packet after TSC */ +static int intel_pt_tma_lookahead_cb(struct intel_pt_pkt_info *pkt_info) +{ + struct intel_pt_vm_tsc_info *data = pkt_info->data; + + if (pkt_info->packet.type == INTEL_PT_CYC || + pkt_info->packet.type == INTEL_PT_MTC) + return !--(data->max_lookahead); + + if (pkt_info->packet.type == INTEL_PT_TMA) { + data->tma_packet = pkt_info->packet; + data->tma = true; + } + return 1; +} + +static uint64_t intel_pt_ctc_to_tsc(struct intel_pt_decoder *decoder, uint64_t ctc) +{ + if (decoder->tsc_ctc_mult) + return ctc * decoder->tsc_ctc_mult; + else + return multdiv(ctc, decoder->tsc_ctc_ratio_n, decoder->tsc_ctc_ratio_d); +} + +static uint64_t intel_pt_calc_expected_tsc(struct intel_pt_decoder *decoder, + uint32_t ctc, + uint32_t fc, + uint64_t last_ctc_timestamp, + uint64_t ctc_delta, + uint32_t last_ctc) +{ + /* Number of CTC ticks from last_ctc_timestamp to last_mtc */ + uint64_t last_mtc_ctc = last_ctc + ctc_delta; + /* + * Number of CTC ticks from there until current TMA packet. We would + * expect last_mtc_ctc to be before ctc, but the TSC packet can slip + * past an MTC, so a sign-extended value is used. + */ + uint64_t delta = (int16_t)((uint16_t)ctc - (uint16_t)last_mtc_ctc); + /* Total CTC ticks from last_ctc_timestamp to current TMA packet */ + uint64_t new_ctc_delta = ctc_delta + delta; + uint64_t expected_tsc; + + /* + * Convert CTC ticks to TSC ticks, add the starting point + * (last_ctc_timestamp) and the fast counter from the TMA packet. + */ + expected_tsc = last_ctc_timestamp + intel_pt_ctc_to_tsc(decoder, new_ctc_delta) + fc; + + if (intel_pt_enable_logging) { + intel_pt_log_x64(last_mtc_ctc); + intel_pt_log_x32(last_ctc); + intel_pt_log_x64(ctc_delta); + intel_pt_log_x64(delta); + intel_pt_log_x32(ctc); + intel_pt_log_x64(new_ctc_delta); + intel_pt_log_x64(last_ctc_timestamp); + intel_pt_log_x32(fc); + intel_pt_log_x64(intel_pt_ctc_to_tsc(decoder, new_ctc_delta)); + intel_pt_log_x64(expected_tsc); + } + + return expected_tsc; +} + +static uint64_t intel_pt_expected_tsc(struct intel_pt_decoder *decoder, + struct intel_pt_vm_tsc_info *data) +{ + uint32_t ctc = data->tma_packet.payload; + uint32_t fc = data->tma_packet.count; + + return intel_pt_calc_expected_tsc(decoder, ctc, fc, + decoder->ctc_timestamp, + data->ctc_delta, data->last_ctc); +} + +static void intel_pt_translate_vm_tsc(struct intel_pt_decoder *decoder, + struct intel_pt_vmcs_info *vmcs_info) +{ + uint64_t payload = decoder->packet.payload; + + /* VMX adds the TSC Offset, so subtract to get host TSC */ + decoder->packet.payload -= vmcs_info->tsc_offset; + /* TSC packet has only 7 bytes */ + decoder->packet.payload &= SEVEN_BYTES; + + /* + * The buffer is mmapped from the data file, so this also updates the + * data file. + */ + if (!decoder->vm_tm_corr_dry_run) + memcpy((void *)decoder->buf + 1, &decoder->packet.payload, 7); + + intel_pt_log("Translated VM TSC %#" PRIx64 " -> %#" PRIx64 + " VMCS %#" PRIx64 " TSC Offset %#" PRIx64 "\n", + payload, decoder->packet.payload, vmcs_info->vmcs, + vmcs_info->tsc_offset); +} + +static void intel_pt_translate_vm_tsc_offset(struct intel_pt_decoder *decoder, + uint64_t tsc_offset) +{ + struct intel_pt_vmcs_info vmcs_info = { + .vmcs = NO_VMCS, + .tsc_offset = tsc_offset + }; + + intel_pt_translate_vm_tsc(decoder, &vmcs_info); +} + +static inline bool in_vm(uint64_t pip_payload) +{ + return pip_payload & 1; +} + +static inline bool pip_in_vm(struct intel_pt_pkt *pip_packet) +{ + return pip_packet->payload & 1; +} + +static void intel_pt_print_vmcs_info(struct intel_pt_vmcs_info *vmcs_info) +{ + p_log("VMCS: %#" PRIx64 " TSC Offset %#" PRIx64, + vmcs_info->vmcs, vmcs_info->tsc_offset); +} + +static void intel_pt_vm_tm_corr_psb(struct intel_pt_decoder *decoder, + struct intel_pt_vm_tsc_info *data) +{ + memset(data, 0, sizeof(*data)); + data->ctc_delta = decoder->ctc_delta; + data->last_ctc = decoder->last_ctc; + intel_pt_pkt_lookahead(decoder, intel_pt_vm_psb_lookahead_cb, data); + if (data->tsc && !data->psbend) + p_log("ERROR: PSB without PSBEND"); + decoder->in_psb = data->psbend; +} + +static void intel_pt_vm_tm_corr_first_tsc(struct intel_pt_decoder *decoder, + struct intel_pt_vm_tsc_info *data, + struct intel_pt_vmcs_info *vmcs_info, + uint64_t host_tsc) +{ + if (!decoder->in_psb) { + /* Can't happen */ + p_log("ERROR: First TSC is not in PSB+"); + } + + if (data->pip) { + if (pip_in_vm(&data->pip_packet)) { /* Guest */ + if (vmcs_info && vmcs_info->tsc_offset) { + intel_pt_translate_vm_tsc(decoder, vmcs_info); + decoder->vm_tm_corr_reliable = true; + } else { + p_log("ERROR: First TSC, unknown TSC Offset"); + } + } else { /* Host */ + decoder->vm_tm_corr_reliable = true; + } + } else { /* Host or Guest */ + decoder->vm_tm_corr_reliable = false; + if (intel_pt_time_in_range(decoder, host_tsc)) { + /* Assume Host */ + } else { + /* Assume Guest */ + if (vmcs_info && vmcs_info->tsc_offset) + intel_pt_translate_vm_tsc(decoder, vmcs_info); + else + p_log("ERROR: First TSC, no PIP, unknown TSC Offset"); + } + } +} + +static void intel_pt_vm_tm_corr_tsc(struct intel_pt_decoder *decoder, + struct intel_pt_vm_tsc_info *data) +{ + struct intel_pt_vmcs_info *vmcs_info; + uint64_t tsc_offset = 0; + uint64_t vmcs; + bool reliable = true; + uint64_t expected_tsc; + uint64_t host_tsc; + uint64_t ref_timestamp; + + bool assign = false; + bool assign_reliable = false; + + /* Already have 'data' for the in_psb case */ + if (!decoder->in_psb) { + memset(data, 0, sizeof(*data)); + data->ctc_delta = decoder->ctc_delta; + data->last_ctc = decoder->last_ctc; + data->max_lookahead = 16; + intel_pt_pkt_lookahead(decoder, intel_pt_tma_lookahead_cb, data); + if (decoder->pge) { + data->pip = true; + data->pip_packet.payload = decoder->pip_payload; + } + } + + /* Calculations depend on having TMA packets */ + if (!data->tma) { + p_log("ERROR: TSC without TMA"); + return; + } + + vmcs = data->vmcs ? data->vmcs_packet.payload : decoder->vmcs; + if (vmcs == NO_VMCS) + vmcs = 0; + + vmcs_info = decoder->findnew_vmcs_info(decoder->data, vmcs); + + ref_timestamp = decoder->timestamp ? decoder->timestamp : decoder->buf_timestamp; + host_tsc = intel_pt_8b_tsc(decoder->packet.payload, ref_timestamp); + + if (!decoder->ctc_timestamp) { + intel_pt_vm_tm_corr_first_tsc(decoder, data, vmcs_info, host_tsc); + return; + } + + expected_tsc = intel_pt_expected_tsc(decoder, data); + + tsc_offset = host_tsc - expected_tsc; + + /* Determine if TSC is from Host or Guest */ + if (data->pip) { + if (pip_in_vm(&data->pip_packet)) { /* Guest */ + if (!vmcs_info) { + /* PIP NR=1 without VMCS cannot happen */ + p_log("ERROR: Missing VMCS"); + intel_pt_translate_vm_tsc_offset(decoder, tsc_offset); + decoder->vm_tm_corr_reliable = false; + return; + } + } else { /* Host */ + decoder->last_reliable_timestamp = host_tsc; + decoder->vm_tm_corr_reliable = true; + return; + } + } else { /* Host or Guest */ + reliable = false; /* Host/Guest is a guess, so not reliable */ + if (decoder->in_psb) { + if (!tsc_offset) + return; /* Zero TSC Offset, assume Host */ + /* + * TSC packet has only 7 bytes of TSC. We have no + * information about the Guest's 8th byte, but it + * doesn't matter because we only need 7 bytes. + * Here, since the 8th byte is unreliable and + * irrelevant, compare only 7 byes. + */ + if (vmcs_info && + (tsc_offset & SEVEN_BYTES) == + (vmcs_info->tsc_offset & SEVEN_BYTES)) { + /* Same TSC Offset as last VMCS, assume Guest */ + goto guest; + } + } + /* + * Check if the host_tsc is within the expected range. + * Note, we could narrow the range more by looking ahead for + * the next host TSC in the same buffer, but we don't bother to + * do that because this is probably good enough. + */ + if (host_tsc >= expected_tsc && intel_pt_time_in_range(decoder, host_tsc)) { + /* Within expected range for Host TSC, assume Host */ + decoder->vm_tm_corr_reliable = false; + return; + } + } + +guest: /* Assuming Guest */ + + /* Determine whether to assign TSC Offset */ + if (vmcs_info && vmcs_info->vmcs) { + if (vmcs_info->tsc_offset && vmcs_info->reliable) { + assign = false; + } else if (decoder->in_psb && data->pip && decoder->vm_tm_corr_reliable && + decoder->vm_tm_corr_continuous && decoder->vm_tm_corr_same_buf) { + /* Continuous tracing, TSC in a PSB is not a time loss */ + assign = true; + assign_reliable = true; + } else if (decoder->in_psb && data->pip && decoder->vm_tm_corr_same_buf) { + /* + * Unlikely to be a time loss TSC in a PSB which is not + * at the start of a buffer. + */ + assign = true; + assign_reliable = false; + } + } + + /* Record VMCS TSC Offset */ + if (assign && (vmcs_info->tsc_offset != tsc_offset || + vmcs_info->reliable != assign_reliable)) { + bool print = vmcs_info->tsc_offset != tsc_offset; + + vmcs_info->tsc_offset = tsc_offset; + vmcs_info->reliable = assign_reliable; + if (print) + intel_pt_print_vmcs_info(vmcs_info); + } + + /* Determine what TSC Offset to use */ + if (vmcs_info && vmcs_info->tsc_offset) { + if (!vmcs_info->reliable) + reliable = false; + intel_pt_translate_vm_tsc(decoder, vmcs_info); + } else { + reliable = false; + if (vmcs_info) { + if (!vmcs_info->error_printed) { + p_log("ERROR: Unknown TSC Offset for VMCS %#" PRIx64, + vmcs_info->vmcs); + vmcs_info->error_printed = true; + } + } else { + if (intel_pt_print_once(decoder, INTEL_PT_PRT_ONCE_UNK_VMCS)) + p_log("ERROR: Unknown VMCS"); + } + intel_pt_translate_vm_tsc_offset(decoder, tsc_offset); + } + + decoder->vm_tm_corr_reliable = reliable; +} + +static void intel_pt_vm_tm_corr_pebs_tsc(struct intel_pt_decoder *decoder) +{ + uint64_t host_tsc = decoder->packet.payload; + uint64_t guest_tsc = decoder->packet.payload; + struct intel_pt_vmcs_info *vmcs_info; + uint64_t vmcs; + + vmcs = decoder->vmcs; + if (vmcs == NO_VMCS) + vmcs = 0; + + vmcs_info = decoder->findnew_vmcs_info(decoder->data, vmcs); + + if (decoder->pge) { + if (in_vm(decoder->pip_payload)) { /* Guest */ + if (!vmcs_info) { + /* PIP NR=1 without VMCS cannot happen */ + p_log("ERROR: Missing VMCS"); + } + } else { /* Host */ + return; + } + } else { /* Host or Guest */ + if (intel_pt_time_in_range(decoder, host_tsc)) { + /* Within expected range for Host TSC, assume Host */ + return; + } + } + + if (vmcs_info) { + /* Translate Guest TSC to Host TSC */ + host_tsc = ((guest_tsc & SEVEN_BYTES) - vmcs_info->tsc_offset) & SEVEN_BYTES; + host_tsc = intel_pt_8b_tsc(host_tsc, decoder->timestamp); + intel_pt_log("Translated VM TSC %#" PRIx64 " -> %#" PRIx64 + " VMCS %#" PRIx64 " TSC Offset %#" PRIx64 "\n", + guest_tsc, host_tsc, vmcs_info->vmcs, + vmcs_info->tsc_offset); + if (!intel_pt_time_in_range(decoder, host_tsc) && + intel_pt_print_once(decoder, INTEL_PT_PRT_ONCE_ERANGE)) + p_log("Timestamp out of range"); + } else { + if (intel_pt_print_once(decoder, INTEL_PT_PRT_ONCE_UNK_VMCS)) + p_log("ERROR: Unknown VMCS"); + host_tsc = decoder->timestamp; + } + + decoder->packet.payload = host_tsc; + + if (!decoder->vm_tm_corr_dry_run) + memcpy((void *)decoder->buf + 1, &host_tsc, 8); +} + +static int intel_pt_vm_time_correlation(struct intel_pt_decoder *decoder) +{ + struct intel_pt_vm_tsc_info data = { .psbend = false }; + bool pge; + int err; + + if (decoder->in_psb) + intel_pt_vm_tm_corr_psb(decoder, &data); + + while (1) { + err = intel_pt_get_next_packet(decoder); + if (err == -ENOLINK) + continue; + if (err) + break; + + switch (decoder->packet.type) { + case INTEL_PT_TIP_PGD: + decoder->pge = false; + decoder->vm_tm_corr_continuous = false; + break; + + case INTEL_PT_TNT: + case INTEL_PT_TIP: + case INTEL_PT_TIP_PGE: + decoder->pge = true; + break; + + case INTEL_PT_OVF: + decoder->in_psb = false; + pge = decoder->pge; + decoder->pge = intel_pt_ovf_fup_lookahead(decoder); + if (pge != decoder->pge) + intel_pt_log("Surprising PGE change in OVF!"); + if (!decoder->pge) + decoder->vm_tm_corr_continuous = false; + break; + + case INTEL_PT_FUP: + if (decoder->in_psb) + decoder->pge = true; + break; + + case INTEL_PT_TRACESTOP: + decoder->pge = false; + decoder->vm_tm_corr_continuous = false; + decoder->have_tma = false; + break; + + case INTEL_PT_PSB: + intel_pt_vm_tm_corr_psb(decoder, &data); + break; + + case INTEL_PT_PIP: + decoder->pip_payload = decoder->packet.payload; + break; + + case INTEL_PT_MTC: + intel_pt_calc_mtc_timestamp(decoder); + break; + + case INTEL_PT_TSC: + intel_pt_vm_tm_corr_tsc(decoder, &data); + intel_pt_calc_tsc_timestamp(decoder); + decoder->vm_tm_corr_same_buf = true; + decoder->vm_tm_corr_continuous = decoder->pge; + break; + + case INTEL_PT_TMA: + intel_pt_calc_tma(decoder); + break; + + case INTEL_PT_CYC: + intel_pt_calc_cyc_timestamp(decoder); + break; + + case INTEL_PT_CBR: + intel_pt_calc_cbr(decoder); + break; + + case INTEL_PT_PSBEND: + decoder->in_psb = false; + data.psbend = false; + break; + + case INTEL_PT_VMCS: + if (decoder->packet.payload != NO_VMCS) + decoder->vmcs = decoder->packet.payload; + break; + + case INTEL_PT_BBP: + decoder->blk_type = decoder->packet.payload; + break; + + case INTEL_PT_BIP: + if (decoder->blk_type == INTEL_PT_PEBS_BASIC && + decoder->packet.count == 2) + intel_pt_vm_tm_corr_pebs_tsc(decoder); + break; + + case INTEL_PT_BEP: + case INTEL_PT_BEP_IP: + decoder->blk_type = 0; + break; + + case INTEL_PT_MODE_EXEC: + case INTEL_PT_MODE_TSX: + case INTEL_PT_MNT: + case INTEL_PT_PAD: + case INTEL_PT_PTWRITE_IP: + case INTEL_PT_PTWRITE: + case INTEL_PT_MWAIT: + case INTEL_PT_PWRE: + case INTEL_PT_EXSTOP_IP: + case INTEL_PT_EXSTOP: + case INTEL_PT_PWRX: + case INTEL_PT_BAD: /* Does not happen */ + default: + break; + } + } + + return err; +} + #define HOP_PROCESS 0 #define HOP_IGNORE 1 #define HOP_RETURN 2 @@ -2898,6 +3591,15 @@ static int intel_pt_sync(struct intel_pt_decoder *decoder) if (err) return err; + if (decoder->vm_time_correlation) { + decoder->in_psb = true; + if (!decoder->timestamp) + decoder->timestamp = 1; + decoder->state.type = 0; + decoder->pkt_state = INTEL_PT_STATE_VM_TIME_CORRELATION; + return 0; + } + decoder->have_last_ip = true; decoder->pkt_state = INTEL_PT_STATE_NO_IP; @@ -2985,6 +3687,9 @@ const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder) case INTEL_PT_STATE_RESAMPLE: err = intel_pt_resample(decoder); break; + case INTEL_PT_STATE_VM_TIME_CORRELATION: + err = intel_pt_vm_time_correlation(decoder); + break; default: err = intel_pt_bug(decoder); break; @@ -3231,6 +3936,7 @@ static unsigned char *adj_for_padding(unsigned char *buf_b, * @len_b: size of second buffer * @consecutive: returns true if there is data in buf_b that is consecutive * to buf_a + * @ooo_tsc: out-of-order TSC due to VM TSC offset / scaling * * If the trace contains TSC we can look at the last TSC of @buf_a and the * first TSC of @buf_b in order to determine if the buffers overlap, and then @@ -3243,7 +3949,8 @@ static unsigned char *adj_for_padding(unsigned char *buf_b, static unsigned char *intel_pt_find_overlap_tsc(unsigned char *buf_a, size_t len_a, unsigned char *buf_b, - size_t len_b, bool *consecutive) + size_t len_b, bool *consecutive, + bool ooo_tsc) { uint64_t tsc_a, tsc_b; unsigned char *p; @@ -3278,7 +3985,7 @@ static unsigned char *intel_pt_find_overlap_tsc(unsigned char *buf_a, start = buf_b + len_b - (rem_b - rem_a); return adj_for_padding(start, buf_a, len_a); } - if (cmp < 0) + if (cmp < 0 && !ooo_tsc) return buf_b; /* tsc_a < tsc_b => no overlap */ } @@ -3296,6 +4003,7 @@ static unsigned char *intel_pt_find_overlap_tsc(unsigned char *buf_a, * @have_tsc: can use TSC packets to detect overlap * @consecutive: returns true if there is data in buf_b that is consecutive * to buf_a + * @ooo_tsc: out-of-order TSC due to VM TSC offset / scaling * * When trace samples or snapshots are recorded there is the possibility that * the data overlaps. Note that, for the purposes of decoding, data is only @@ -3306,7 +4014,8 @@ static unsigned char *intel_pt_find_overlap_tsc(unsigned char *buf_a, */ unsigned char *intel_pt_find_overlap(unsigned char *buf_a, size_t len_a, unsigned char *buf_b, size_t len_b, - bool have_tsc, bool *consecutive) + bool have_tsc, bool *consecutive, + bool ooo_tsc) { unsigned char *found; @@ -3319,7 +4028,7 @@ unsigned char *intel_pt_find_overlap(unsigned char *buf_a, size_t len_a, if (have_tsc) { found = intel_pt_find_overlap_tsc(buf_a, len_a, buf_b, len_b, - consecutive); + consecutive, ooo_tsc); if (found) return found; } diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h index d9e62a7f6f0e..714c475808c0 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h @@ -11,6 +11,8 @@ #include <stddef.h> #include <stdbool.h> +#include <linux/rbtree.h> + #include "intel-pt-insn-decoder.h" #define INTEL_PT_IN_TX (1 << 0) @@ -199,6 +201,14 @@ struct intel_pt_blk_items { bool is_32_bit; }; +struct intel_pt_vmcs_info { + struct rb_node rb_node; + uint64_t vmcs; + uint64_t tsc_offset; + bool reliable; + bool error_printed; +}; + struct intel_pt_state { enum intel_pt_sample_type type; bool from_nr; @@ -244,9 +254,13 @@ struct intel_pt_params { uint64_t max_insn_cnt, void *data); bool (*pgd_ip)(uint64_t ip, void *data); int (*lookahead)(void *data, intel_pt_lookahead_cb_t cb, void *cb_data); + struct intel_pt_vmcs_info *(*findnew_vmcs_info)(void *data, uint64_t vmcs); void *data; bool return_compression; bool branch_enable; + bool vm_time_correlation; + bool vm_tm_corr_dry_run; + uint64_t first_timestamp; uint64_t ctl; uint64_t period; enum intel_pt_period_type period_type; @@ -269,8 +283,12 @@ int intel_pt_fast_forward(struct intel_pt_decoder *decoder, uint64_t timestamp); unsigned char *intel_pt_find_overlap(unsigned char *buf_a, size_t len_a, unsigned char *buf_b, size_t len_b, - bool have_tsc, bool *consecutive); + bool have_tsc, bool *consecutive, + bool ooo_tsc); int intel_pt__strerror(int code, char *buf, size_t buflen); +void intel_pt_set_first_timestamp(struct intel_pt_decoder *decoder, + uint64_t first_timestamp); + #endif diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-log.h b/tools/perf/util/intel-pt-decoder/intel-pt-log.h index 388661f89c44..d900aab24b21 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-log.h +++ b/tools/perf/util/intel-pt-decoder/intel-pt-log.h @@ -67,4 +67,9 @@ static inline void intel_pt_log_to(const char *msg, uint64_t u) intel_pt_log("%s to " x64_fmt "\n", msg, u); } +#define intel_pt_log_var(var, fmt) intel_pt_log("%s: " #var " " fmt "\n", __func__, var) + +#define intel_pt_log_x32(var) intel_pt_log_var(var, "%#x") +#define intel_pt_log_x64(var) intel_pt_log_var(var, "%#" PRIx64) + #endif diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index 0dfec8761b9a..154a1077f22e 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -78,6 +78,7 @@ struct intel_pt { u64 kernel_start; u64 switch_ip; u64 ptss_ip; + u64 first_timestamp; struct perf_tsc_conversion tc; bool cap_user_time_zero; @@ -133,6 +134,9 @@ struct intel_pt { struct ip_callchain *chain; struct branch_stack *br_stack; + + u64 dflt_tsc_offset; + struct rb_root vmcs_info; }; enum switch_state { @@ -271,6 +275,65 @@ static bool intel_pt_log_events(struct intel_pt *pt, u64 tm) return !n || !perf_time__ranges_skip_sample(range, n, tm); } +static struct intel_pt_vmcs_info *intel_pt_findnew_vmcs(struct rb_root *rb_root, + u64 vmcs, + u64 dflt_tsc_offset) +{ + struct rb_node **p = &rb_root->rb_node; + struct rb_node *parent = NULL; + struct intel_pt_vmcs_info *v; + + while (*p) { + parent = *p; + v = rb_entry(parent, struct intel_pt_vmcs_info, rb_node); + + if (v->vmcs == vmcs) + return v; + + if (vmcs < v->vmcs) + p = &(*p)->rb_left; + else + p = &(*p)->rb_right; + } + + v = zalloc(sizeof(*v)); + if (v) { + v->vmcs = vmcs; + v->tsc_offset = dflt_tsc_offset; + v->reliable = dflt_tsc_offset; + + rb_link_node(&v->rb_node, parent, p); + rb_insert_color(&v->rb_node, rb_root); + } + + return v; +} + +static struct intel_pt_vmcs_info *intel_pt_findnew_vmcs_info(void *data, uint64_t vmcs) +{ + struct intel_pt_queue *ptq = data; + struct intel_pt *pt = ptq->pt; + + if (!vmcs && !pt->dflt_tsc_offset) + return NULL; + + return intel_pt_findnew_vmcs(&pt->vmcs_info, vmcs, pt->dflt_tsc_offset); +} + +static void intel_pt_free_vmcs_info(struct intel_pt *pt) +{ + struct intel_pt_vmcs_info *v; + struct rb_node *n; + + n = rb_first(&pt->vmcs_info); + while (n) { + v = rb_entry(n, struct intel_pt_vmcs_info, rb_node); + n = rb_next(n); + rb_erase(&v->rb_node, &pt->vmcs_info); + free(v); + } +} + static int intel_pt_do_fix_overlap(struct intel_pt *pt, struct auxtrace_buffer *a, struct auxtrace_buffer *b) { @@ -278,9 +341,17 @@ static int intel_pt_do_fix_overlap(struct intel_pt *pt, struct auxtrace_buffer * void *start; start = intel_pt_find_overlap(a->data, a->size, b->data, b->size, - pt->have_tsc, &consecutive); + pt->have_tsc, &consecutive, + pt->synth_opts.vm_time_correlation); if (!start) return -EINVAL; + /* + * In the case of vm_time_correlation, the overlap might contain TSC + * packets that will not be fixed, and that will then no longer work for + * overlap detection. Avoid that by zeroing out the overlap. + */ + if (pt->synth_opts.vm_time_correlation) + memset(b->data, 0, start - b->data); b->use_size = b->data + b->size - start; b->use_data = start; if (b->use_size && consecutive) @@ -901,7 +972,7 @@ static bool intel_pt_timeless_decoding(struct intel_pt *pt) bool timeless_decoding = true; u64 config; - if (!pt->tsc_bit || !pt->cap_user_time_zero) + if (!pt->tsc_bit || !pt->cap_user_time_zero || pt->synth_opts.timeless_decoding) return true; evlist__for_each_entry(pt->session->evlist, evsel) { @@ -949,6 +1020,19 @@ static bool intel_pt_have_tsc(struct intel_pt *pt) return have_tsc; } +static bool intel_pt_have_mtc(struct intel_pt *pt) +{ + struct evsel *evsel; + u64 config; + + evlist__for_each_entry(pt->session->evlist, evsel) { + if (intel_pt_get_config(pt, &evsel->core.attr, &config) && + (config & pt->mtc_bit)) + return true; + } + return false; +} + static bool intel_pt_sampling_mode(struct intel_pt *pt) { struct evsel *evsel; @@ -1103,6 +1187,7 @@ static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt, params.get_trace = intel_pt_get_trace; params.walk_insn = intel_pt_walk_next_insn; params.lookahead = intel_pt_lookahead; + params.findnew_vmcs_info = intel_pt_findnew_vmcs_info; params.data = ptq; params.return_compression = intel_pt_return_compression(pt); params.branch_enable = intel_pt_branch_enable(pt); @@ -1112,6 +1197,9 @@ static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt, params.tsc_ctc_ratio_n = pt->tsc_ctc_ratio_n; params.tsc_ctc_ratio_d = pt->tsc_ctc_ratio_d; params.quick = pt->synth_opts.quick; + params.vm_time_correlation = pt->synth_opts.vm_time_correlation; + params.vm_tm_corr_dry_run = pt->synth_opts.vm_tm_corr_dry_run; + params.first_timestamp = pt->first_timestamp; if (pt->filts.cnt > 0) params.pgd_ip = intel_pt_pgd_ip; @@ -1176,6 +1264,21 @@ static void intel_pt_free_queue(void *priv) free(ptq); } +static void intel_pt_first_timestamp(struct intel_pt *pt, u64 timestamp) +{ + unsigned int i; + + pt->first_timestamp = timestamp; + + for (i = 0; i < pt->queues.nr_queues; i++) { + struct auxtrace_queue *queue = &pt->queues.queue_array[i]; + struct intel_pt_queue *ptq = queue->priv; + + if (ptq && ptq->decoder) + intel_pt_set_first_timestamp(ptq->decoder, timestamp); + } +} + static void intel_pt_set_pid_tid_cpu(struct intel_pt *pt, struct auxtrace_queue *queue) { @@ -2379,7 +2482,7 @@ static int intel_pt_run_decoder(struct intel_pt_queue *ptq, u64 *timestamp) if (pt->per_cpu_mmaps && (pt->have_sched_switch == 1 || pt->have_sched_switch == 3) && !pt->timeless_decoding && intel_pt_tracing_kernel(pt) && - !pt->sampling_mode) { + !pt->sampling_mode && !pt->synth_opts.vm_time_correlation) { pt->switch_ip = intel_pt_switch_ip(pt, &pt->ptss_ip); if (pt->switch_ip) { intel_pt_log("switch_ip: %"PRIx64" ptss_ip: %"PRIx64"\n", @@ -2878,6 +2981,8 @@ static int intel_pt_process_event(struct perf_session *session, sample->time); } } else if (timestamp) { + if (!pt->first_timestamp) + intel_pt_first_timestamp(pt, timestamp); err = intel_pt_process_queues(pt, timestamp); } if (err) @@ -2964,6 +3069,7 @@ static void intel_pt_free(struct perf_session *session) auxtrace_heap__free(&pt->heap); intel_pt_free_events(session); session->auxtrace = NULL; + intel_pt_free_vmcs_info(pt); thread__put(pt->unknown_thread); addr_filters__exit(&pt->filts); zfree(&pt->chain); @@ -3407,6 +3513,65 @@ static int intel_pt_setup_time_ranges(struct intel_pt *pt, return 0; } +static int intel_pt_parse_vm_tm_corr_arg(struct intel_pt *pt, char **args) +{ + struct intel_pt_vmcs_info *vmcs_info; + u64 tsc_offset, vmcs; + char *p = *args; + + errno = 0; + + p = skip_spaces(p); + if (!*p) + return 1; + + tsc_offset = strtoull(p, &p, 0); + if (errno) + return -errno; + p = skip_spaces(p); + if (*p != ':') { + pt->dflt_tsc_offset = tsc_offset; + *args = p; + return 0; + } + while (1) { + vmcs = strtoull(p, &p, 0); + if (errno) + return -errno; + if (!vmcs) + return -EINVAL; + vmcs_info = intel_pt_findnew_vmcs(&pt->vmcs_info, vmcs, tsc_offset); + if (!vmcs_info) + return -ENOMEM; + p = skip_spaces(p); + if (*p != ',') + break; + p += 1; + } + *args = p; + return 0; +} + +static int intel_pt_parse_vm_tm_corr_args(struct intel_pt *pt) +{ + char *args = pt->synth_opts.vm_tm_corr_args; + int ret; + + if (!args) + return 0; + + do { + ret = intel_pt_parse_vm_tm_corr_arg(pt, &args); + } while (!ret); + + if (ret < 0) { + pr_err("Failed to parse VM Time Correlation options\n"); + return ret; + } + + return 0; +} + static const char * const intel_pt_info_fmts[] = { [INTEL_PT_PMU_TYPE] = " PMU Type %"PRId64"\n", [INTEL_PT_TIME_SHIFT] = " Time Shift %"PRIu64"\n", @@ -3469,6 +3634,8 @@ int intel_pt_process_auxtrace_info(union perf_event *event, if (!pt) return -ENOMEM; + pt->vmcs_info = RB_ROOT; + addr_filters__init(&pt->filts); err = perf_config(intel_pt_perf_config, pt); @@ -3481,6 +3648,20 @@ int intel_pt_process_auxtrace_info(union perf_event *event, intel_pt_log_set_name(INTEL_PT_PMU_NAME); + if (session->itrace_synth_opts->set) { + pt->synth_opts = *session->itrace_synth_opts; + } else { + struct itrace_synth_opts *opts = session->itrace_synth_opts; + + itrace_synth_opts__set_default(&pt->synth_opts, opts->default_no_sample); + if (!opts->default_no_sample && !opts->inject) { + pt->synth_opts.branches = false; + pt->synth_opts.callchain = true; + pt->synth_opts.add_callchain = true; + } + pt->synth_opts.thread_stack = opts->thread_stack; + } + pt->session = session; pt->machine = &session->machines.host; /* No kvm support */ pt->auxtrace_type = auxtrace_info->type; @@ -3562,6 +3743,28 @@ int intel_pt_process_auxtrace_info(union perf_event *event, pt->sampling_mode = intel_pt_sampling_mode(pt); pt->est_tsc = !pt->timeless_decoding; + if (pt->synth_opts.vm_time_correlation) { + if (pt->timeless_decoding) { + pr_err("Intel PT has no time information for VM Time Correlation\n"); + err = -EINVAL; + goto err_free_queues; + } + if (session->itrace_synth_opts->ptime_range) { + pr_err("Time ranges cannot be specified with VM Time Correlation\n"); + err = -EINVAL; + goto err_free_queues; + } + /* Currently TSC Offset is calculated using MTC packets */ + if (!intel_pt_have_mtc(pt)) { + pr_err("MTC packets must have been enabled for VM Time Correlation\n"); + err = -EINVAL; + goto err_free_queues; + } + err = intel_pt_parse_vm_tm_corr_args(pt); + if (err) + goto err_free_queues; + } + pt->unknown_thread = thread__new(999999999, 999999999); if (!pt->unknown_thread) { err = -ENOMEM; @@ -3611,21 +3814,6 @@ int intel_pt_process_auxtrace_info(union perf_event *event, goto err_delete_thread; } - if (session->itrace_synth_opts->set) { - pt->synth_opts = *session->itrace_synth_opts; - } else { - itrace_synth_opts__set_default(&pt->synth_opts, - session->itrace_synth_opts->default_no_sample); - if (!session->itrace_synth_opts->default_no_sample && - !session->itrace_synth_opts->inject) { - pt->synth_opts.branches = false; - pt->synth_opts.callchain = true; - pt->synth_opts.add_callchain = true; - } - pt->synth_opts.thread_stack = - session->itrace_synth_opts->thread_stack; - } - if (pt->synth_opts.log) intel_pt_log_enable(); diff --git a/tools/perf/util/pmu-hybrid.h b/tools/perf/util/pmu-hybrid.h index d0fa7bc50a76..2b186c26a43e 100644 --- a/tools/perf/util/pmu-hybrid.h +++ b/tools/perf/util/pmu-hybrid.h @@ -19,4 +19,15 @@ struct perf_pmu *perf_pmu__find_hybrid_pmu(const char *name); bool perf_pmu__is_hybrid(const char *name); char *perf_pmu__hybrid_type_to_pmu(const char *type); +static inline int perf_pmu__hybrid_pmu_num(void) +{ + struct perf_pmu *pmu; + int num = 0; + + perf_pmu__for_each_hybrid_pmu(pmu) + num++; + + return num; +} + #endif /* __PMU_HYBRID_H */ diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 106b3d60881a..0dbb4f2628f3 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -2155,6 +2155,7 @@ struct reader { u64 data_size; u64 data_offset; reader_cb_t process; + bool in_place_update; }; static int @@ -2188,7 +2189,9 @@ reader__process_events(struct reader *rd, struct perf_session *session, mmap_prot = PROT_READ; mmap_flags = MAP_SHARED; - if (session->header.needs_swap) { + if (rd->in_place_update) { + mmap_prot |= PROT_WRITE; + } else if (session->header.needs_swap) { mmap_prot |= PROT_WRITE; mmap_flags = MAP_PRIVATE; } @@ -2274,6 +2277,7 @@ static int __perf_session__process_events(struct perf_session *session) .data_size = session->header.data_size, .data_offset = session->header.data_offset, .process = process_simple, + .in_place_update = session->data->in_place_update, }; struct ordered_events *oe = &session->ordered_events; struct perf_tool *tool = session->tool; diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index a76fff5e7d83..b759dfd633b4 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -827,11 +827,11 @@ static void counter_aggr_cb(struct perf_stat_config *config __maybe_unused, bool first __maybe_unused) { struct caggr_data *cd = data; - struct perf_stat_evsel *ps = counter->stats; + struct perf_counts_values *aggr = &counter->counts->aggr; - cd->avg += avg_stats(&ps->res_stats[0]); - cd->avg_enabled += avg_stats(&ps->res_stats[1]); - cd->avg_running += avg_stats(&ps->res_stats[2]); + cd->avg += aggr->val; + cd->avg_enabled += aggr->ena; + cd->avg_running += aggr->run; } /* diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index 2db46b9bebd0..d3ec2624e036 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -437,18 +437,6 @@ int perf_stat_process_counter(struct perf_stat_config *config, aggr->val = aggr->ena = aggr->run = 0; - /* - * We calculate counter's data every interval, - * and the display code shows ps->res_stats - * avg value. We need to zero the stats for - * interval mode, otherwise overall avg running - * averages will be shown for each interval. - */ - if (config->interval || config->summary) { - for (i = 0; i < 3; i++) - init_stats(&ps->res_stats[i]); - } - if (counter->per_pkg) evsel__zero_per_pkg(counter); |