diff options
60 files changed, 1587 insertions, 704 deletions
diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c index 6284779d64ee..b8df45883cf7 100644 --- a/arch/arm/kernel/hw_breakpoint.c +++ b/arch/arm/kernel/hw_breakpoint.c @@ -631,7 +631,7 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp) info->address &= ~alignment_mask; info->ctrl.len <<= offset; - if (!bp->overflow_handler) { + if (is_default_overflow_handler(bp)) { /* * Mismatch breakpoints are required for single-stepping * breakpoints. @@ -754,7 +754,7 @@ static void watchpoint_handler(unsigned long addr, unsigned int fsr, * mismatch breakpoint so we can single-step over the * watchpoint trigger. */ - if (!wp->overflow_handler) + if (is_default_overflow_handler(wp)) enable_single_step(wp, instruction_pointer(regs)); unlock: diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c index b45c95d34b83..4ef5373f9a76 100644 --- a/arch/arm64/kernel/hw_breakpoint.c +++ b/arch/arm64/kernel/hw_breakpoint.c @@ -616,7 +616,7 @@ static int breakpoint_handler(unsigned long unused, unsigned int esr, perf_bp_event(bp, regs); /* Do we need to handle the stepping? */ - if (!bp->overflow_handler) + if (is_default_overflow_handler(bp)) step = 1; unlock: rcu_read_unlock(); @@ -712,7 +712,7 @@ static int watchpoint_handler(unsigned long addr, unsigned int esr, perf_bp_event(wp, regs); /* Do we need to handle the stepping? */ - if (!wp->overflow_handler) + if (is_default_overflow_handler(wp)) step = 1; unlock: diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 2dc18605831f..a494fa34713a 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -164,10 +164,6 @@ config INSTRUCTION_DECODER def_bool y depends on KPROBES || PERF_EVENTS || UPROBES -config PERF_EVENTS_INTEL_UNCORE - def_bool y - depends on PERF_EVENTS && CPU_SUP_INTEL && PCI - config OUTPUT_FORMAT string default "elf32-i386" if X86_32 @@ -1046,6 +1042,8 @@ config X86_THERMAL_VECTOR def_bool y depends on X86_MCE_INTEL +source "arch/x86/events/Kconfig" + config X86_LEGACY_VM86 bool "Legacy VM86 support" default n @@ -1210,15 +1208,6 @@ config MICROCODE_OLD_INTERFACE def_bool y depends on MICROCODE -config PERF_EVENTS_AMD_POWER - depends on PERF_EVENTS && CPU_SUP_AMD - tristate "AMD Processor Power Reporting Mechanism" - ---help--- - Provide power reporting mechanism support for AMD processors. - Currently, it leverages X86_FEATURE_ACC_POWER - (CPUID Fn8000_0007_EDX[12]) interface to calculate the - average power consumption on Family 15h processors. - config X86_MSR tristate "/dev/cpu/*/msr - Model-specific register support" ---help--- diff --git a/arch/x86/events/Kconfig b/arch/x86/events/Kconfig new file mode 100644 index 000000000000..98397db5ceae --- /dev/null +++ b/arch/x86/events/Kconfig @@ -0,0 +1,36 @@ +menu "Performance monitoring" + +config PERF_EVENTS_INTEL_UNCORE + tristate "Intel uncore performance events" + depends on PERF_EVENTS && CPU_SUP_INTEL && PCI + default y + ---help--- + Include support for Intel uncore performance events. These are + available on NehalemEX and more modern processors. + +config PERF_EVENTS_INTEL_RAPL + tristate "Intel rapl performance events" + depends on PERF_EVENTS && CPU_SUP_INTEL && PCI + default y + ---help--- + Include support for Intel rapl performance events for power + monitoring on modern processors. + +config PERF_EVENTS_INTEL_CSTATE + tristate "Intel cstate performance events" + depends on PERF_EVENTS && CPU_SUP_INTEL && PCI + default y + ---help--- + Include support for Intel cstate performance events for power + monitoring on modern processors. + +config PERF_EVENTS_AMD_POWER + depends on PERF_EVENTS && CPU_SUP_AMD + tristate "AMD Processor Power Reporting Mechanism" + ---help--- + Provide power reporting mechanism support for AMD processors. + Currently, it leverages X86_FEATURE_ACC_POWER + (CPUID Fn8000_0007_EDX[12]) interface to calculate the + average power consumption on Family 15h processors. + +endmenu diff --git a/arch/x86/events/Makefile b/arch/x86/events/Makefile index f59618a39990..1d392c39fe56 100644 --- a/arch/x86/events/Makefile +++ b/arch/x86/events/Makefile @@ -6,9 +6,6 @@ obj-$(CONFIG_X86_LOCAL_APIC) += amd/ibs.o msr.o ifdef CONFIG_AMD_IOMMU obj-$(CONFIG_CPU_SUP_AMD) += amd/iommu.o endif -obj-$(CONFIG_CPU_SUP_INTEL) += intel/core.o intel/bts.o intel/cqm.o -obj-$(CONFIG_CPU_SUP_INTEL) += intel/cstate.o intel/ds.o intel/knc.o -obj-$(CONFIG_CPU_SUP_INTEL) += intel/lbr.o intel/p4.o intel/p6.o intel/pt.o -obj-$(CONFIG_CPU_SUP_INTEL) += intel/rapl.o msr.o -obj-$(CONFIG_PERF_EVENTS_INTEL_UNCORE) += intel/uncore.o intel/uncore_nhmex.o -obj-$(CONFIG_PERF_EVENTS_INTEL_UNCORE) += intel/uncore_snb.o intel/uncore_snbep.o + +obj-$(CONFIG_CPU_SUP_INTEL) += msr.o +obj-$(CONFIG_CPU_SUP_INTEL) += intel/ diff --git a/arch/x86/events/intel/Makefile b/arch/x86/events/intel/Makefile new file mode 100644 index 000000000000..3660b2cf245a --- /dev/null +++ b/arch/x86/events/intel/Makefile @@ -0,0 +1,9 @@ +obj-$(CONFIG_CPU_SUP_INTEL) += core.o bts.o cqm.o +obj-$(CONFIG_CPU_SUP_INTEL) += ds.o knc.o +obj-$(CONFIG_CPU_SUP_INTEL) += lbr.o p4.o p6.o pt.o +obj-$(CONFIG_PERF_EVENTS_INTEL_RAPL) += intel-rapl.o +intel-rapl-objs := rapl.o +obj-$(CONFIG_PERF_EVENTS_INTEL_UNCORE) += intel-uncore.o +intel-uncore-objs := uncore.o uncore_nhmex.o uncore_snb.o uncore_snbep.o +obj-$(CONFIG_PERF_EVENTS_INTEL_CSTATE) += intel-cstate.o +intel-cstate-objs := cstate.o diff --git a/arch/x86/events/intel/bts.c b/arch/x86/events/intel/bts.c index b99dc9258c0f..0a6e393a2e62 100644 --- a/arch/x86/events/intel/bts.c +++ b/arch/x86/events/intel/bts.c @@ -171,18 +171,6 @@ static void bts_buffer_pad_out(struct bts_phys *phys, unsigned long head) memset(page_address(phys->page) + index, 0, phys->size - index); } -static bool bts_buffer_is_full(struct bts_buffer *buf, struct bts_ctx *bts) -{ - if (buf->snapshot) - return false; - - if (local_read(&buf->data_size) >= bts->handle.size || - bts->handle.size - local_read(&buf->data_size) < BTS_RECORD_SIZE) - return true; - - return false; -} - static void bts_update(struct bts_ctx *bts) { int cpu = raw_smp_processor_id(); @@ -213,18 +201,15 @@ static void bts_update(struct bts_ctx *bts) } } +static int +bts_buffer_reset(struct bts_buffer *buf, struct perf_output_handle *handle); + static void __bts_event_start(struct perf_event *event) { struct bts_ctx *bts = this_cpu_ptr(&bts_ctx); struct bts_buffer *buf = perf_get_aux(&bts->handle); u64 config = 0; - if (!buf || bts_buffer_is_full(buf, bts)) - return; - - event->hw.itrace_started = 1; - event->hw.state = 0; - if (!buf->snapshot) config |= ARCH_PERFMON_EVENTSEL_INT; if (!event->attr.exclude_kernel) @@ -241,16 +226,41 @@ static void __bts_event_start(struct perf_event *event) wmb(); intel_pmu_enable_bts(config); + } static void bts_event_start(struct perf_event *event, int flags) { + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct bts_ctx *bts = this_cpu_ptr(&bts_ctx); + struct bts_buffer *buf; + + buf = perf_aux_output_begin(&bts->handle, event); + if (!buf) + goto fail_stop; + + if (bts_buffer_reset(buf, &bts->handle)) + goto fail_end_stop; + + bts->ds_back.bts_buffer_base = cpuc->ds->bts_buffer_base; + bts->ds_back.bts_absolute_maximum = cpuc->ds->bts_absolute_maximum; + bts->ds_back.bts_interrupt_threshold = cpuc->ds->bts_interrupt_threshold; + + event->hw.itrace_started = 1; + event->hw.state = 0; __bts_event_start(event); /* PMI handler: this counter is running and likely generating PMIs */ ACCESS_ONCE(bts->started) = 1; + + return; + +fail_end_stop: + perf_aux_output_end(&bts->handle, 0, false); + +fail_stop: + event->hw.state = PERF_HES_STOPPED; } static void __bts_event_stop(struct perf_event *event) @@ -269,15 +279,32 @@ static void __bts_event_stop(struct perf_event *event) static void bts_event_stop(struct perf_event *event, int flags) { + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct bts_ctx *bts = this_cpu_ptr(&bts_ctx); + struct bts_buffer *buf = perf_get_aux(&bts->handle); /* PMI handler: don't restart this counter */ ACCESS_ONCE(bts->started) = 0; __bts_event_stop(event); - if (flags & PERF_EF_UPDATE) + if (flags & PERF_EF_UPDATE) { bts_update(bts); + + if (buf) { + if (buf->snapshot) + bts->handle.head = + local_xchg(&buf->data_size, + buf->nr_pages << PAGE_SHIFT); + perf_aux_output_end(&bts->handle, local_xchg(&buf->data_size, 0), + !!local_xchg(&buf->lost, 0)); + } + + cpuc->ds->bts_index = bts->ds_back.bts_buffer_base; + cpuc->ds->bts_buffer_base = bts->ds_back.bts_buffer_base; + cpuc->ds->bts_absolute_maximum = bts->ds_back.bts_absolute_maximum; + cpuc->ds->bts_interrupt_threshold = bts->ds_back.bts_interrupt_threshold; + } } void intel_bts_enable_local(void) @@ -417,34 +444,14 @@ int intel_bts_interrupt(void) static void bts_event_del(struct perf_event *event, int mode) { - struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); - struct bts_ctx *bts = this_cpu_ptr(&bts_ctx); - struct bts_buffer *buf = perf_get_aux(&bts->handle); - bts_event_stop(event, PERF_EF_UPDATE); - - if (buf) { - if (buf->snapshot) - bts->handle.head = - local_xchg(&buf->data_size, - buf->nr_pages << PAGE_SHIFT); - perf_aux_output_end(&bts->handle, local_xchg(&buf->data_size, 0), - !!local_xchg(&buf->lost, 0)); - } - - cpuc->ds->bts_index = bts->ds_back.bts_buffer_base; - cpuc->ds->bts_buffer_base = bts->ds_back.bts_buffer_base; - cpuc->ds->bts_absolute_maximum = bts->ds_back.bts_absolute_maximum; - cpuc->ds->bts_interrupt_threshold = bts->ds_back.bts_interrupt_threshold; } static int bts_event_add(struct perf_event *event, int mode) { - struct bts_buffer *buf; struct bts_ctx *bts = this_cpu_ptr(&bts_ctx); struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct hw_perf_event *hwc = &event->hw; - int ret = -EBUSY; event->hw.state = PERF_HES_STOPPED; @@ -454,26 +461,10 @@ static int bts_event_add(struct perf_event *event, int mode) if (bts->handle.event) return -EBUSY; - buf = perf_aux_output_begin(&bts->handle, event); - if (!buf) - return -EINVAL; - - ret = bts_buffer_reset(buf, &bts->handle); - if (ret) { - perf_aux_output_end(&bts->handle, 0, false); - return ret; - } - - bts->ds_back.bts_buffer_base = cpuc->ds->bts_buffer_base; - bts->ds_back.bts_absolute_maximum = cpuc->ds->bts_absolute_maximum; - bts->ds_back.bts_interrupt_threshold = cpuc->ds->bts_interrupt_threshold; - if (mode & PERF_EF_START) { bts_event_start(event, 0); - if (hwc->state & PERF_HES_STOPPED) { - bts_event_del(event, 0); - return -EBUSY; - } + if (hwc->state & PERF_HES_STOPPED) + return -EINVAL; } return 0; diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c index 7946c4231169..9ba4e4136a15 100644 --- a/arch/x86/events/intel/cstate.c +++ b/arch/x86/events/intel/cstate.c @@ -91,6 +91,8 @@ #include <asm/cpu_device_id.h> #include "../perf_event.h" +MODULE_LICENSE("GPL"); + #define DEFINE_CSTATE_FORMAT_ATTR(_var, _name, _format) \ static ssize_t __cstate_##_var##_show(struct kobject *kobj, \ struct kobj_attribute *attr, \ @@ -106,22 +108,27 @@ static ssize_t cstate_get_attr_cpumask(struct device *dev, struct device_attribute *attr, char *buf); +/* Model -> events mapping */ +struct cstate_model { + unsigned long core_events; + unsigned long pkg_events; + unsigned long quirks; +}; + +/* Quirk flags */ +#define SLM_PKG_C6_USE_C7_MSR (1UL << 0) + struct perf_cstate_msr { u64 msr; struct perf_pmu_events_attr *attr; - bool (*test)(int idx); }; /* cstate_core PMU */ - static struct pmu cstate_core_pmu; static bool has_cstate_core; -enum perf_cstate_core_id { - /* - * cstate_core events - */ +enum perf_cstate_core_events { PERF_CSTATE_CORE_C1_RES = 0, PERF_CSTATE_CORE_C3_RES, PERF_CSTATE_CORE_C6_RES, @@ -130,69 +137,16 @@ enum perf_cstate_core_id { PERF_CSTATE_CORE_EVENT_MAX, }; -bool test_core(int idx) -{ - if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL || - boot_cpu_data.x86 != 6) - return false; - - switch (boot_cpu_data.x86_model) { - case 30: /* 45nm Nehalem */ - case 26: /* 45nm Nehalem-EP */ - case 46: /* 45nm Nehalem-EX */ - - case 37: /* 32nm Westmere */ - case 44: /* 32nm Westmere-EP */ - case 47: /* 32nm Westmere-EX */ - if (idx == PERF_CSTATE_CORE_C3_RES || - idx == PERF_CSTATE_CORE_C6_RES) - return true; - break; - case 42: /* 32nm SandyBridge */ - case 45: /* 32nm SandyBridge-E/EN/EP */ - - case 58: /* 22nm IvyBridge */ - case 62: /* 22nm IvyBridge-EP/EX */ - - case 60: /* 22nm Haswell Core */ - case 63: /* 22nm Haswell Server */ - case 69: /* 22nm Haswell ULT */ - case 70: /* 22nm Haswell + GT3e (Intel Iris Pro graphics) */ - - case 61: /* 14nm Broadwell Core-M */ - case 86: /* 14nm Broadwell Xeon D */ - case 71: /* 14nm Broadwell + GT3e (Intel Iris Pro graphics) */ - case 79: /* 14nm Broadwell Server */ - - case 78: /* 14nm Skylake Mobile */ - case 94: /* 14nm Skylake Desktop */ - if (idx == PERF_CSTATE_CORE_C3_RES || - idx == PERF_CSTATE_CORE_C6_RES || - idx == PERF_CSTATE_CORE_C7_RES) - return true; - break; - case 55: /* 22nm Atom "Silvermont" */ - case 77: /* 22nm Atom "Silvermont Avoton/Rangely" */ - case 76: /* 14nm Atom "Airmont" */ - if (idx == PERF_CSTATE_CORE_C1_RES || - idx == PERF_CSTATE_CORE_C6_RES) - return true; - break; - } - - return false; -} - PMU_EVENT_ATTR_STRING(c1-residency, evattr_cstate_core_c1, "event=0x00"); PMU_EVENT_ATTR_STRING(c3-residency, evattr_cstate_core_c3, "event=0x01"); PMU_EVENT_ATTR_STRING(c6-residency, evattr_cstate_core_c6, "event=0x02"); PMU_EVENT_ATTR_STRING(c7-residency, evattr_cstate_core_c7, "event=0x03"); static struct perf_cstate_msr core_msr[] = { - [PERF_CSTATE_CORE_C1_RES] = { MSR_CORE_C1_RES, &evattr_cstate_core_c1, test_core, }, - [PERF_CSTATE_CORE_C3_RES] = { MSR_CORE_C3_RESIDENCY, &evattr_cstate_core_c3, test_core, }, - [PERF_CSTATE_CORE_C6_RES] = { MSR_CORE_C6_RESIDENCY, &evattr_cstate_core_c6, test_core, }, - [PERF_CSTATE_CORE_C7_RES] = { MSR_CORE_C7_RESIDENCY, &evattr_cstate_core_c7, test_core, }, + [PERF_CSTATE_CORE_C1_RES] = { MSR_CORE_C1_RES, &evattr_cstate_core_c1 }, + [PERF_CSTATE_CORE_C3_RES] = { MSR_CORE_C3_RESIDENCY, &evattr_cstate_core_c3 }, + [PERF_CSTATE_CORE_C6_RES] = { MSR_CORE_C6_RESIDENCY, &evattr_cstate_core_c6 }, + [PERF_CSTATE_CORE_C7_RES] = { MSR_CORE_C7_RESIDENCY, &evattr_cstate_core_c7 }, }; static struct attribute *core_events_attrs[PERF_CSTATE_CORE_EVENT_MAX + 1] = { @@ -234,18 +188,11 @@ static const struct attribute_group *core_attr_groups[] = { NULL, }; -/* cstate_core PMU end */ - - /* cstate_pkg PMU */ - static struct pmu cstate_pkg_pmu; static bool has_cstate_pkg; -enum perf_cstate_pkg_id { - /* - * cstate_pkg events - */ +enum perf_cstate_pkg_events { PERF_CSTATE_PKG_C2_RES = 0, PERF_CSTATE_PKG_C3_RES, PERF_CSTATE_PKG_C6_RES, @@ -257,69 +204,6 @@ enum perf_cstate_pkg_id { PERF_CSTATE_PKG_EVENT_MAX, }; -bool test_pkg(int idx) -{ - if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL || - boot_cpu_data.x86 != 6) - return false; - - switch (boot_cpu_data.x86_model) { - case 30: /* 45nm Nehalem */ - case 26: /* 45nm Nehalem-EP */ - case 46: /* 45nm Nehalem-EX */ - - case 37: /* 32nm Westmere */ - case 44: /* 32nm Westmere-EP */ - case 47: /* 32nm Westmere-EX */ - if (idx == PERF_CSTATE_CORE_C3_RES || - idx == PERF_CSTATE_CORE_C6_RES || - idx == PERF_CSTATE_CORE_C7_RES) - return true; - break; - case 42: /* 32nm SandyBridge */ - case 45: /* 32nm SandyBridge-E/EN/EP */ - - case 58: /* 22nm IvyBridge */ - case 62: /* 22nm IvyBridge-EP/EX */ - - case 60: /* 22nm Haswell Core */ - case 63: /* 22nm Haswell Server */ - case 70: /* 22nm Haswell + GT3e (Intel Iris Pro graphics) */ - - case 61: /* 14nm Broadwell Core-M */ - case 86: /* 14nm Broadwell Xeon D */ - case 71: /* 14nm Broadwell + GT3e (Intel Iris Pro graphics) */ - case 79: /* 14nm Broadwell Server */ - - case 78: /* 14nm Skylake Mobile */ - case 94: /* 14nm Skylake Desktop */ - if (idx == PERF_CSTATE_PKG_C2_RES || - idx == PERF_CSTATE_PKG_C3_RES || - idx == PERF_CSTATE_PKG_C6_RES || - idx == PERF_CSTATE_PKG_C7_RES) - return true; - break; - case 55: /* 22nm Atom "Silvermont" */ - case 77: /* 22nm Atom "Silvermont Avoton/Rangely" */ - case 76: /* 14nm Atom "Airmont" */ - if (idx == PERF_CSTATE_CORE_C6_RES) - return true; - break; - case 69: /* 22nm Haswell ULT */ - if (idx == PERF_CSTATE_PKG_C2_RES || - idx == PERF_CSTATE_PKG_C3_RES || - idx == PERF_CSTATE_PKG_C6_RES || - idx == PERF_CSTATE_PKG_C7_RES || - idx == PERF_CSTATE_PKG_C8_RES || - idx == PERF_CSTATE_PKG_C9_RES || - idx == PERF_CSTATE_PKG_C10_RES) - return true; - break; - } - - return false; -} - PMU_EVENT_ATTR_STRING(c2-residency, evattr_cstate_pkg_c2, "event=0x00"); PMU_EVENT_ATTR_STRING(c3-residency, evattr_cstate_pkg_c3, "event=0x01"); PMU_EVENT_ATTR_STRING(c6-residency, evattr_cstate_pkg_c6, "event=0x02"); @@ -329,13 +213,13 @@ PMU_EVENT_ATTR_STRING(c9-residency, evattr_cstate_pkg_c9, "event=0x05"); PMU_EVENT_ATTR_STRING(c10-residency, evattr_cstate_pkg_c10, "event=0x06"); static struct perf_cstate_msr pkg_msr[] = { - [PERF_CSTATE_PKG_C2_RES] = { MSR_PKG_C2_RESIDENCY, &evattr_cstate_pkg_c2, test_pkg, }, - [PERF_CSTATE_PKG_C3_RES] = { MSR_PKG_C3_RESIDENCY, &evattr_cstate_pkg_c3, test_pkg, }, - [PERF_CSTATE_PKG_C6_RES] = { MSR_PKG_C6_RESIDENCY, &evattr_cstate_pkg_c6, test_pkg, }, - [PERF_CSTATE_PKG_C7_RES] = { MSR_PKG_C7_RESIDENCY, &evattr_cstate_pkg_c7, test_pkg, }, - [PERF_CSTATE_PKG_C8_RES] = { MSR_PKG_C8_RESIDENCY, &evattr_cstate_pkg_c8, test_pkg, }, - [PERF_CSTATE_PKG_C9_RES] = { MSR_PKG_C9_RESIDENCY, &evattr_cstate_pkg_c9, test_pkg, }, - [PERF_CSTATE_PKG_C10_RES] = { MSR_PKG_C10_RESIDENCY, &evattr_cstate_pkg_c10, test_pkg, }, + [PERF_CSTATE_PKG_C2_RES] = { MSR_PKG_C2_RESIDENCY, &evattr_cstate_pkg_c2 }, + [PERF_CSTATE_PKG_C3_RES] = { MSR_PKG_C3_RESIDENCY, &evattr_cstate_pkg_c3 }, + [PERF_CSTATE_PKG_C6_RES] = { MSR_PKG_C6_RESIDENCY, &evattr_cstate_pkg_c6 }, + [PERF_CSTATE_PKG_C7_RES] = { MSR_PKG_C7_RESIDENCY, &evattr_cstate_pkg_c7 }, + [PERF_CSTATE_PKG_C8_RES] = { MSR_PKG_C8_RESIDENCY, &evattr_cstate_pkg_c8 }, + [PERF_CSTATE_PKG_C9_RES] = { MSR_PKG_C9_RESIDENCY, &evattr_cstate_pkg_c9 }, + [PERF_CSTATE_PKG_C10_RES] = { MSR_PKG_C10_RESIDENCY, &evattr_cstate_pkg_c10 }, }; static struct attribute *pkg_events_attrs[PERF_CSTATE_PKG_EVENT_MAX + 1] = { @@ -366,8 +250,6 @@ static const struct attribute_group *pkg_attr_groups[] = { NULL, }; -/* cstate_pkg PMU end*/ - static ssize_t cstate_get_attr_cpumask(struct device *dev, struct device_attribute *attr, char *buf) @@ -385,7 +267,7 @@ static ssize_t cstate_get_attr_cpumask(struct device *dev, static int cstate_pmu_event_init(struct perf_event *event) { u64 cfg = event->attr.config; - int ret = 0; + int cpu; if (event->attr.type != event->pmu->type) return -ENOENT; @@ -400,26 +282,36 @@ static int cstate_pmu_event_init(struct perf_event *event) event->attr.sample_period) /* no sampling */ return -EINVAL; + if (event->cpu < 0) + return -EINVAL; + if (event->pmu == &cstate_core_pmu) { if (cfg >= PERF_CSTATE_CORE_EVENT_MAX) return -EINVAL; if (!core_msr[cfg].attr) return -EINVAL; event->hw.event_base = core_msr[cfg].msr; + cpu = cpumask_any_and(&cstate_core_cpu_mask, + topology_sibling_cpumask(event->cpu)); } else if (event->pmu == &cstate_pkg_pmu) { if (cfg >= PERF_CSTATE_PKG_EVENT_MAX) return -EINVAL; if (!pkg_msr[cfg].attr) return -EINVAL; event->hw.event_base = pkg_msr[cfg].msr; - } else + cpu = cpumask_any_and(&cstate_pkg_cpu_mask, + topology_core_cpumask(event->cpu)); + } else { return -ENOENT; + } + + if (cpu >= nr_cpu_ids) + return -ENODEV; - /* must be done before validate_group */ + event->cpu = cpu; event->hw.config = cfg; event->hw.idx = -1; - - return ret; + return 0; } static inline u64 cstate_pmu_read_counter(struct perf_event *event) @@ -469,172 +361,91 @@ static int cstate_pmu_event_add(struct perf_event *event, int mode) return 0; } +/* + * Check if exiting cpu is the designated reader. If so migrate the + * events when there is a valid target available + */ static void cstate_cpu_exit(int cpu) { - int i, id, target; + unsigned int target; - /* cpu exit for cstate core */ - if (has_cstate_core) { - id = topology_core_id(cpu); - target = -1; - - for_each_online_cpu(i) { - if (i == cpu) - continue; - if (id == topology_core_id(i)) { - target = i; - break; - } - } - if (cpumask_test_and_clear_cpu(cpu, &cstate_core_cpu_mask) && target >= 0) + if (has_cstate_core && + cpumask_test_and_clear_cpu(cpu, &cstate_core_cpu_mask)) { + + target = cpumask_any_but(topology_sibling_cpumask(cpu), cpu); + /* Migrate events if there is a valid target */ + if (target < nr_cpu_ids) { cpumask_set_cpu(target, &cstate_core_cpu_mask); - WARN_ON(cpumask_empty(&cstate_core_cpu_mask)); - if (target >= 0) perf_pmu_migrate_context(&cstate_core_pmu, cpu, target); + } } - /* cpu exit for cstate pkg */ - if (has_cstate_pkg) { - id = topology_physical_package_id(cpu); - target = -1; - - for_each_online_cpu(i) { - if (i == cpu) - continue; - if (id == topology_physical_package_id(i)) { - target = i; - break; - } - } - if (cpumask_test_and_clear_cpu(cpu, &cstate_pkg_cpu_mask) && target >= 0) + if (has_cstate_pkg && + cpumask_test_and_clear_cpu(cpu, &cstate_pkg_cpu_mask)) { + + target = cpumask_any_but(topology_core_cpumask(cpu), cpu); + /* Migrate events if there is a valid target */ + if (target < nr_cpu_ids) { cpumask_set_cpu(target, &cstate_pkg_cpu_mask); - WARN_ON(cpumask_empty(&cstate_pkg_cpu_mask)); - if (target >= 0) perf_pmu_migrate_context(&cstate_pkg_pmu, cpu, target); + } } } static void cstate_cpu_init(int cpu) { - int i, id; + unsigned int target; - /* cpu init for cstate core */ - if (has_cstate_core) { - id = topology_core_id(cpu); - for_each_cpu(i, &cstate_core_cpu_mask) { - if (id == topology_core_id(i)) - break; - } - if (i >= nr_cpu_ids) - cpumask_set_cpu(cpu, &cstate_core_cpu_mask); - } + /* + * If this is the first online thread of that core, set it in + * the core cpu mask as the designated reader. + */ + target = cpumask_any_and(&cstate_core_cpu_mask, + topology_sibling_cpumask(cpu)); - /* cpu init for cstate pkg */ - if (has_cstate_pkg) { - id = topology_physical_package_id(cpu); - for_each_cpu(i, &cstate_pkg_cpu_mask) { - if (id == topology_physical_package_id(i)) - break; - } - if (i >= nr_cpu_ids) - cpumask_set_cpu(cpu, &cstate_pkg_cpu_mask); - } + if (has_cstate_core && target >= nr_cpu_ids) + cpumask_set_cpu(cpu, &cstate_core_cpu_mask); + + /* + * If this is the first online thread of that package, set it + * in the package cpu mask as the designated reader. + */ + target = cpumask_any_and(&cstate_pkg_cpu_mask, + topology_core_cpumask(cpu)); + if (has_cstate_pkg && target >= nr_cpu_ids) + cpumask_set_cpu(cpu, &cstate_pkg_cpu_mask); } static int cstate_cpu_notifier(struct notifier_block *self, - unsigned long action, void *hcpu) + unsigned long action, void *hcpu) { unsigned int cpu = (long)hcpu; switch (action & ~CPU_TASKS_FROZEN) { - case CPU_UP_PREPARE: - break; case CPU_STARTING: cstate_cpu_init(cpu); break; - case CPU_UP_CANCELED: - case CPU_DYING: - break; - case CPU_ONLINE: - case CPU_DEAD: - break; case CPU_DOWN_PREPARE: cstate_cpu_exit(cpu); break; default: break; } - return NOTIFY_OK; } -/* - * Probe the cstate events and insert the available one into sysfs attrs - * Return false if there is no available events. - */ -static bool cstate_probe_msr(struct perf_cstate_msr *msr, - struct attribute **events_attrs, - int max_event_nr) -{ - int i, j = 0; - u64 val; - - /* Probe the cstate events. */ - for (i = 0; i < max_event_nr; i++) { - if (!msr[i].test(i) || rdmsrl_safe(msr[i].msr, &val)) - msr[i].attr = NULL; - } - - /* List remaining events in the sysfs attrs. */ - for (i = 0; i < max_event_nr; i++) { - if (msr[i].attr) - events_attrs[j++] = &msr[i].attr->attr.attr; - } - events_attrs[j] = NULL; - - return (j > 0) ? true : false; -} - -static int __init cstate_init(void) -{ - /* SLM has different MSR for PKG C6 */ - switch (boot_cpu_data.x86_model) { - case 55: - case 76: - case 77: - pkg_msr[PERF_CSTATE_PKG_C6_RES].msr = MSR_PKG_C7_RESIDENCY; - } - - if (cstate_probe_msr(core_msr, core_events_attrs, PERF_CSTATE_CORE_EVENT_MAX)) - has_cstate_core = true; - - if (cstate_probe_msr(pkg_msr, pkg_events_attrs, PERF_CSTATE_PKG_EVENT_MAX)) - has_cstate_pkg = true; - - return (has_cstate_core || has_cstate_pkg) ? 0 : -ENODEV; -} - -static void __init cstate_cpumask_init(void) -{ - int cpu; - - cpu_notifier_register_begin(); - - for_each_online_cpu(cpu) - cstate_cpu_init(cpu); - - __perf_cpu_notifier(cstate_cpu_notifier); - - cpu_notifier_register_done(); -} +static struct notifier_block cstate_cpu_nb = { + .notifier_call = cstate_cpu_notifier, + .priority = CPU_PRI_PERF + 1, +}; static struct pmu cstate_core_pmu = { .attr_groups = core_attr_groups, .name = "cstate_core", .task_ctx_nr = perf_invalid_context, .event_init = cstate_pmu_event_init, - .add = cstate_pmu_event_add, /* must have */ - .del = cstate_pmu_event_del, /* must have */ + .add = cstate_pmu_event_add, + .del = cstate_pmu_event_del, .start = cstate_pmu_event_start, .stop = cstate_pmu_event_stop, .read = cstate_pmu_event_update, @@ -646,49 +457,203 @@ static struct pmu cstate_pkg_pmu = { .name = "cstate_pkg", .task_ctx_nr = perf_invalid_context, .event_init = cstate_pmu_event_init, - .add = cstate_pmu_event_add, /* must have */ - .del = cstate_pmu_event_del, /* must have */ + .add = cstate_pmu_event_add, + .del = cstate_pmu_event_del, .start = cstate_pmu_event_start, .stop = cstate_pmu_event_stop, .read = cstate_pmu_event_update, .capabilities = PERF_PMU_CAP_NO_INTERRUPT, }; -static void __init cstate_pmus_register(void) +static const struct cstate_model nhm_cstates __initconst = { + .core_events = BIT(PERF_CSTATE_CORE_C3_RES) | + BIT(PERF_CSTATE_CORE_C6_RES), + + .pkg_events = BIT(PERF_CSTATE_PKG_C3_RES) | + BIT(PERF_CSTATE_PKG_C6_RES) | + BIT(PERF_CSTATE_PKG_C7_RES), +}; + +static const struct cstate_model snb_cstates __initconst = { + .core_events = BIT(PERF_CSTATE_CORE_C3_RES) | + BIT(PERF_CSTATE_CORE_C6_RES) | + BIT(PERF_CSTATE_CORE_C7_RES), + + .pkg_events = BIT(PERF_CSTATE_PKG_C2_RES) | + BIT(PERF_CSTATE_PKG_C3_RES) | + BIT(PERF_CSTATE_PKG_C6_RES) | + BIT(PERF_CSTATE_PKG_C7_RES), +}; + +static const struct cstate_model hswult_cstates __initconst = { + .core_events = BIT(PERF_CSTATE_CORE_C3_RES) | + BIT(PERF_CSTATE_CORE_C6_RES) | + BIT(PERF_CSTATE_CORE_C7_RES), + + .pkg_events = BIT(PERF_CSTATE_PKG_C2_RES) | + BIT(PERF_CSTATE_PKG_C3_RES) | + BIT(PERF_CSTATE_PKG_C6_RES) | + BIT(PERF_CSTATE_PKG_C7_RES) | + BIT(PERF_CSTATE_PKG_C8_RES) | + BIT(PERF_CSTATE_PKG_C9_RES) | + BIT(PERF_CSTATE_PKG_C10_RES), +}; + +static const struct cstate_model slm_cstates __initconst = { + .core_events = BIT(PERF_CSTATE_CORE_C1_RES) | + BIT(PERF_CSTATE_CORE_C6_RES), + + .pkg_events = BIT(PERF_CSTATE_PKG_C6_RES), + .quirks = SLM_PKG_C6_USE_C7_MSR, +}; + +#define X86_CSTATES_MODEL(model, states) \ + { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long) &(states) } + +static const struct x86_cpu_id intel_cstates_match[] __initconst = { + X86_CSTATES_MODEL(30, nhm_cstates), /* 45nm Nehalem */ + X86_CSTATES_MODEL(26, nhm_cstates), /* 45nm Nehalem-EP */ + X86_CSTATES_MODEL(46, nhm_cstates), /* 45nm Nehalem-EX */ + + X86_CSTATES_MODEL(37, nhm_cstates), /* 32nm Westmere */ + X86_CSTATES_MODEL(44, nhm_cstates), /* 32nm Westmere-EP */ + X86_CSTATES_MODEL(47, nhm_cstates), /* 32nm Westmere-EX */ + + X86_CSTATES_MODEL(42, snb_cstates), /* 32nm SandyBridge */ + X86_CSTATES_MODEL(45, snb_cstates), /* 32nm SandyBridge-E/EN/EP */ + + X86_CSTATES_MODEL(58, snb_cstates), /* 22nm IvyBridge */ + X86_CSTATES_MODEL(62, snb_cstates), /* 22nm IvyBridge-EP/EX */ + + X86_CSTATES_MODEL(60, snb_cstates), /* 22nm Haswell Core */ + X86_CSTATES_MODEL(63, snb_cstates), /* 22nm Haswell Server */ + X86_CSTATES_MODEL(70, snb_cstates), /* 22nm Haswell + GT3e */ + + X86_CSTATES_MODEL(69, hswult_cstates), /* 22nm Haswell ULT */ + + X86_CSTATES_MODEL(55, slm_cstates), /* 22nm Atom Silvermont */ + X86_CSTATES_MODEL(77, slm_cstates), /* 22nm Atom Avoton/Rangely */ + X86_CSTATES_MODEL(76, slm_cstates), /* 22nm Atom Airmont */ + + X86_CSTATES_MODEL(61, snb_cstates), /* 14nm Broadwell Core-M */ + X86_CSTATES_MODEL(86, snb_cstates), /* 14nm Broadwell Xeon D */ + X86_CSTATES_MODEL(71, snb_cstates), /* 14nm Broadwell + GT3e */ + X86_CSTATES_MODEL(79, snb_cstates), /* 14nm Broadwell Server */ + + X86_CSTATES_MODEL(78, snb_cstates), /* 14nm Skylake Mobile */ + X86_CSTATES_MODEL(94, snb_cstates), /* 14nm Skylake Desktop */ + { }, +}; +MODULE_DEVICE_TABLE(x86cpu, intel_cstates_match); + +/* + * Probe the cstate events and insert the available one into sysfs attrs + * Return false if there are no available events. + */ +static bool __init cstate_probe_msr(const unsigned long evmsk, int max, + struct perf_cstate_msr *msr, + struct attribute **attrs) { - int err; + bool found = false; + unsigned int bit; + u64 val; + + for (bit = 0; bit < max; bit++) { + if (test_bit(bit, &evmsk) && !rdmsrl_safe(msr[bit].msr, &val)) { + *attrs++ = &msr[bit].attr->attr.attr; + found = true; + } else { + msr[bit].attr = NULL; + } + } + *attrs = NULL; + + return found; +} + +static int __init cstate_probe(const struct cstate_model *cm) +{ + /* SLM has different MSR for PKG C6 */ + if (cm->quirks & SLM_PKG_C6_USE_C7_MSR) + pkg_msr[PERF_CSTATE_PKG_C6_RES].msr = MSR_PKG_C7_RESIDENCY; + + has_cstate_core = cstate_probe_msr(cm->core_events, + PERF_CSTATE_CORE_EVENT_MAX, + core_msr, core_events_attrs); + + has_cstate_pkg = cstate_probe_msr(cm->pkg_events, + PERF_CSTATE_PKG_EVENT_MAX, + pkg_msr, pkg_events_attrs); + + return (has_cstate_core || has_cstate_pkg) ? 0 : -ENODEV; +} + +static inline void cstate_cleanup(void) +{ + if (has_cstate_core) + perf_pmu_unregister(&cstate_core_pmu); + + if (has_cstate_pkg) + perf_pmu_unregister(&cstate_pkg_pmu); +} + +static int __init cstate_init(void) +{ + int cpu, err; + + cpu_notifier_register_begin(); + for_each_online_cpu(cpu) + cstate_cpu_init(cpu); if (has_cstate_core) { err = perf_pmu_register(&cstate_core_pmu, cstate_core_pmu.name, -1); - if (WARN_ON(err)) - pr_info("Failed to register PMU %s error %d\n", - cstate_core_pmu.name, err); + if (err) { + has_cstate_core = false; + pr_info("Failed to register cstate core pmu\n"); + goto out; + } } if (has_cstate_pkg) { err = perf_pmu_register(&cstate_pkg_pmu, cstate_pkg_pmu.name, -1); - if (WARN_ON(err)) - pr_info("Failed to register PMU %s error %d\n", - cstate_pkg_pmu.name, err); + if (err) { + has_cstate_pkg = false; + pr_info("Failed to register cstate pkg pmu\n"); + cstate_cleanup(); + goto out; + } } + __register_cpu_notifier(&cstate_cpu_nb); +out: + cpu_notifier_register_done(); + return err; } static int __init cstate_pmu_init(void) { + const struct x86_cpu_id *id; int err; - if (cpu_has_hypervisor) + if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) + return -ENODEV; + + id = x86_match_cpu(intel_cstates_match); + if (!id) return -ENODEV; - err = cstate_init(); + err = cstate_probe((const struct cstate_model *) id->driver_data); if (err) return err; - cstate_cpumask_init(); - - cstate_pmus_register(); - - return 0; + return cstate_init(); } +module_init(cstate_pmu_init); -device_initcall(cstate_pmu_init); +static void __exit cstate_pmu_exit(void) +{ + cpu_notifier_register_begin(); + __unregister_cpu_notifier(&cstate_cpu_nb); + cstate_cleanup(); + cpu_notifier_register_done(); +} +module_exit(cstate_pmu_exit); diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c index 6af7cf71d6b2..127f58c17976 100644 --- a/arch/x86/events/intel/pt.c +++ b/arch/x86/events/intel/pt.c @@ -906,26 +906,6 @@ static void pt_buffer_free_aux(void *data) } /** - * pt_buffer_is_full() - check if the buffer is full - * @buf: PT buffer. - * @pt: Per-cpu pt handle. - * - * If the user hasn't read data from the output region that aux_head - * points to, the buffer is considered full: the user needs to read at - * least this region and update aux_tail to point past it. - */ -static bool pt_buffer_is_full(struct pt_buffer *buf, struct pt *pt) -{ - if (buf->snapshot) - return false; - - if (local_read(&buf->data_size) >= pt->handle.size) - return true; - - return false; -} - -/** * intel_pt_interrupt() - PT PMI handler */ void intel_pt_interrupt(void) @@ -989,20 +969,33 @@ void intel_pt_interrupt(void) static void pt_event_start(struct perf_event *event, int mode) { + struct hw_perf_event *hwc = &event->hw; struct pt *pt = this_cpu_ptr(&pt_ctx); - struct pt_buffer *buf = perf_get_aux(&pt->handle); + struct pt_buffer *buf; - if (!buf || pt_buffer_is_full(buf, pt)) { - event->hw.state = PERF_HES_STOPPED; - return; + buf = perf_aux_output_begin(&pt->handle, event); + if (!buf) + goto fail_stop; + + pt_buffer_reset_offsets(buf, pt->handle.head); + if (!buf->snapshot) { + if (pt_buffer_reset_markers(buf, &pt->handle)) + goto fail_end_stop; } ACCESS_ONCE(pt->handle_nmi) = 1; - event->hw.state = 0; + hwc->state = 0; pt_config_buffer(buf->cur->table, buf->cur_idx, buf->output_off); pt_config(event); + + return; + +fail_end_stop: + perf_aux_output_end(&pt->handle, 0, true); +fail_stop: + hwc->state = PERF_HES_STOPPED; } static void pt_event_stop(struct perf_event *event, int mode) @@ -1035,19 +1028,7 @@ static void pt_event_stop(struct perf_event *event, int mode) pt_handle_status(pt); pt_update_head(pt); - } -} -static void pt_event_del(struct perf_event *event, int mode) -{ - struct pt *pt = this_cpu_ptr(&pt_ctx); - struct pt_buffer *buf; - - pt_event_stop(event, PERF_EF_UPDATE); - - buf = perf_get_aux(&pt->handle); - - if (buf) { if (buf->snapshot) pt->handle.head = local_xchg(&buf->data_size, @@ -1057,9 +1038,13 @@ static void pt_event_del(struct perf_event *event, int mode) } } +static void pt_event_del(struct perf_event *event, int mode) +{ + pt_event_stop(event, PERF_EF_UPDATE); +} + static int pt_event_add(struct perf_event *event, int mode) { - struct pt_buffer *buf; struct pt *pt = this_cpu_ptr(&pt_ctx); struct hw_perf_event *hwc = &event->hw; int ret = -EBUSY; @@ -1067,34 +1052,18 @@ static int pt_event_add(struct perf_event *event, int mode) if (pt->handle.event) goto fail; - buf = perf_aux_output_begin(&pt->handle, event); - ret = -EINVAL; - if (!buf) - goto fail_stop; - - pt_buffer_reset_offsets(buf, pt->handle.head); - if (!buf->snapshot) { - ret = pt_buffer_reset_markers(buf, &pt->handle); - if (ret) - goto fail_end_stop; - } - if (mode & PERF_EF_START) { pt_event_start(event, 0); - ret = -EBUSY; + ret = -EINVAL; if (hwc->state == PERF_HES_STOPPED) - goto fail_end_stop; + goto fail; } else { hwc->state = PERF_HES_STOPPED; } - return 0; - -fail_end_stop: - perf_aux_output_end(&pt->handle, 0, true); -fail_stop: - hwc->state = PERF_HES_STOPPED; + ret = 0; fail: + return ret; } diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c index 70c93f9b03ac..e657de1923c2 100644 --- a/arch/x86/events/intel/rapl.c +++ b/arch/x86/events/intel/rapl.c @@ -53,6 +53,8 @@ #include <asm/cpu_device_id.h> #include "../perf_event.h" +MODULE_LICENSE("GPL"); + /* * RAPL energy status counters */ @@ -592,6 +594,11 @@ static int rapl_cpu_notifier(struct notifier_block *self, return NOTIFY_OK; } +static struct notifier_block rapl_cpu_nb = { + .notifier_call = rapl_cpu_notifier, + .priority = CPU_PRI_PERF + 1, +}; + static int rapl_check_hw_unit(bool apply_quirk) { u64 msr_rapl_power_unit_bits; @@ -660,7 +667,7 @@ static int __init rapl_prepare_cpus(void) return 0; } -static void __init cleanup_rapl_pmus(void) +static void cleanup_rapl_pmus(void) { int i; @@ -691,51 +698,77 @@ static int __init init_rapl_pmus(void) return 0; } +#define X86_RAPL_MODEL_MATCH(model, init) \ + { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long)&init } + +struct intel_rapl_init_fun { + bool apply_quirk; + int cntr_mask; + struct attribute **attrs; +}; + +static const struct intel_rapl_init_fun snb_rapl_init __initconst = { + .apply_quirk = false, + .cntr_mask = RAPL_IDX_CLN, + .attrs = rapl_events_cln_attr, +}; + +static const struct intel_rapl_init_fun hsx_rapl_init __initconst = { + .apply_quirk = true, + .cntr_mask = RAPL_IDX_SRV, + .attrs = rapl_events_srv_attr, +}; + +static const struct intel_rapl_init_fun hsw_rapl_init __initconst = { + .apply_quirk = false, + .cntr_mask = RAPL_IDX_HSW, + .attrs = rapl_events_hsw_attr, +}; + +static const struct intel_rapl_init_fun snbep_rapl_init __initconst = { + .apply_quirk = false, + .cntr_mask = RAPL_IDX_SRV, + .attrs = rapl_events_srv_attr, +}; + +static const struct intel_rapl_init_fun knl_rapl_init __initconst = { + .apply_quirk = true, + .cntr_mask = RAPL_IDX_KNL, + .attrs = rapl_events_knl_attr, +}; + static const struct x86_cpu_id rapl_cpu_match[] __initconst = { - [0] = { .vendor = X86_VENDOR_INTEL, .family = 6 }, - [1] = {}, + X86_RAPL_MODEL_MATCH(42, snb_rapl_init), /* Sandy Bridge */ + X86_RAPL_MODEL_MATCH(58, snb_rapl_init), /* Ivy Bridge */ + X86_RAPL_MODEL_MATCH(63, hsx_rapl_init), /* Haswell-Server */ + X86_RAPL_MODEL_MATCH(79, hsx_rapl_init), /* Broadwell-Server */ + X86_RAPL_MODEL_MATCH(60, hsw_rapl_init), /* Haswell */ + X86_RAPL_MODEL_MATCH(69, hsw_rapl_init), /* Haswell-Celeron */ + X86_RAPL_MODEL_MATCH(61, hsw_rapl_init), /* Broadwell */ + X86_RAPL_MODEL_MATCH(71, hsw_rapl_init), /* Broadwell-H */ + X86_RAPL_MODEL_MATCH(45, snbep_rapl_init), /* Sandy Bridge-EP */ + X86_RAPL_MODEL_MATCH(62, snbep_rapl_init), /* IvyTown */ + X86_RAPL_MODEL_MATCH(87, knl_rapl_init), /* Knights Landing */ + {}, }; +MODULE_DEVICE_TABLE(x86cpu, rapl_cpu_match); + static int __init rapl_pmu_init(void) { - bool apply_quirk = false; + const struct x86_cpu_id *id; + struct intel_rapl_init_fun *rapl_init; + bool apply_quirk; int ret; - if (!x86_match_cpu(rapl_cpu_match)) + id = x86_match_cpu(rapl_cpu_match); + if (!id) return -ENODEV; - switch (boot_cpu_data.x86_model) { - case 42: /* Sandy Bridge */ - case 58: /* Ivy Bridge */ - rapl_cntr_mask = RAPL_IDX_CLN; - rapl_pmu_events_group.attrs = rapl_events_cln_attr; - break; - case 63: /* Haswell-Server */ - case 79: /* Broadwell-Server */ - apply_quirk = true; - rapl_cntr_mask = RAPL_IDX_SRV; - rapl_pmu_events_group.attrs = rapl_events_srv_attr; - break; - case 60: /* Haswell */ - case 69: /* Haswell-Celeron */ - case 61: /* Broadwell */ - case 71: /* Broadwell-H */ - rapl_cntr_mask = RAPL_IDX_HSW; - rapl_pmu_events_group.attrs = rapl_events_hsw_attr; - break; - case 45: /* Sandy Bridge-EP */ - case 62: /* IvyTown */ - rapl_cntr_mask = RAPL_IDX_SRV; - rapl_pmu_events_group.attrs = rapl_events_srv_attr; - break; - case 87: /* Knights Landing */ - apply_quirk = true; - rapl_cntr_mask = RAPL_IDX_KNL; - rapl_pmu_events_group.attrs = rapl_events_knl_attr; - break; - default: - return -ENODEV; - } + rapl_init = (struct intel_rapl_init_fun *)id->driver_data; + apply_quirk = rapl_init->apply_quirk; + rapl_cntr_mask = rapl_init->cntr_mask; + rapl_pmu_events_group.attrs = rapl_init->attrs; ret = rapl_check_hw_unit(apply_quirk); if (ret) @@ -755,7 +788,7 @@ static int __init rapl_pmu_init(void) if (ret) goto out; - __perf_cpu_notifier(rapl_cpu_notifier); + __register_cpu_notifier(&rapl_cpu_nb); cpu_notifier_register_done(); rapl_advertise(); return 0; @@ -766,4 +799,14 @@ out: cpu_notifier_register_done(); return ret; } -device_initcall(rapl_pmu_init); +module_init(rapl_pmu_init); + +static void __exit intel_rapl_exit(void) +{ + cpu_notifier_register_begin(); + __unregister_cpu_notifier(&rapl_cpu_nb); + perf_pmu_unregister(&rapl_pmus->pmu); + cleanup_rapl_pmus(); + cpu_notifier_register_done(); +} +module_exit(intel_rapl_exit); diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c index 7012d18bb293..17734a6ef474 100644 --- a/arch/x86/events/intel/uncore.c +++ b/arch/x86/events/intel/uncore.c @@ -1,3 +1,4 @@ +#include <asm/cpu_device_id.h> #include "uncore.h" static struct intel_uncore_type *empty_uncore[] = { NULL, }; @@ -21,6 +22,8 @@ static struct event_constraint uncore_constraint_fixed = struct event_constraint uncore_constraint_empty = EVENT_CONSTRAINT(0, 0, 0); +MODULE_LICENSE("GPL"); + static int uncore_pcibus_to_physid(struct pci_bus *bus) { struct pci2phy_map *map; @@ -754,7 +757,7 @@ static void uncore_pmu_unregister(struct intel_uncore_pmu *pmu) pmu->registered = false; } -static void __init __uncore_exit_boxes(struct intel_uncore_type *type, int cpu) +static void __uncore_exit_boxes(struct intel_uncore_type *type, int cpu) { struct intel_uncore_pmu *pmu = type->pmus; struct intel_uncore_box *box; @@ -770,7 +773,7 @@ static void __init __uncore_exit_boxes(struct intel_uncore_type *type, int cpu) } } -static void __init uncore_exit_boxes(void *dummy) +static void uncore_exit_boxes(void *dummy) { struct intel_uncore_type **types; @@ -787,7 +790,7 @@ static void uncore_free_boxes(struct intel_uncore_pmu *pmu) kfree(pmu->boxes); } -static void __init uncore_type_exit(struct intel_uncore_type *type) +static void uncore_type_exit(struct intel_uncore_type *type) { struct intel_uncore_pmu *pmu = type->pmus; int i; @@ -804,7 +807,7 @@ static void __init uncore_type_exit(struct intel_uncore_type *type) type->events_group = NULL; } -static void __init uncore_types_exit(struct intel_uncore_type **types) +static void uncore_types_exit(struct intel_uncore_type **types) { for (; *types; types++) uncore_type_exit(*types); @@ -989,46 +992,6 @@ static int __init uncore_pci_init(void) size_t size; int ret; - switch (boot_cpu_data.x86_model) { - case 45: /* Sandy Bridge-EP */ - ret = snbep_uncore_pci_init(); - break; - case 62: /* Ivy Bridge-EP */ - ret = ivbep_uncore_pci_init(); - break; - case 63: /* Haswell-EP */ - ret = hswep_uncore_pci_init(); - break; - case 79: /* BDX-EP */ - case 86: /* BDX-DE */ - ret = bdx_uncore_pci_init(); - break; - case 42: /* Sandy Bridge */ - ret = snb_uncore_pci_init(); - break; - case 58: /* Ivy Bridge */ - ret = ivb_uncore_pci_init(); - break; - case 60: /* Haswell */ - case 69: /* Haswell Celeron */ - ret = hsw_uncore_pci_init(); - break; - case 61: /* Broadwell */ - ret = bdw_uncore_pci_init(); - break; - case 87: /* Knights Landing */ - ret = knl_uncore_pci_init(); - break; - case 94: /* SkyLake */ - ret = skl_uncore_pci_init(); - break; - default: - return -ENODEV; - } - - if (ret) - return ret; - size = max_packages * sizeof(struct pci_extra_dev); uncore_extra_pci_dev = kzalloc(size, GFP_KERNEL); if (!uncore_extra_pci_dev) { @@ -1060,7 +1023,7 @@ err: return ret; } -static void __init uncore_pci_exit(void) +static void uncore_pci_exit(void) { if (pcidrv_registered) { pcidrv_registered = false; @@ -1287,46 +1250,6 @@ static int __init uncore_cpu_init(void) { int ret; - switch (boot_cpu_data.x86_model) { - case 26: /* Nehalem */ - case 30: - case 37: /* Westmere */ - case 44: - nhm_uncore_cpu_init(); - break; - case 42: /* Sandy Bridge */ - case 58: /* Ivy Bridge */ - case 60: /* Haswell */ - case 69: /* Haswell */ - case 70: /* Haswell */ - case 61: /* Broadwell */ - case 71: /* Broadwell */ - snb_uncore_cpu_init(); - break; - case 45: /* Sandy Bridge-EP */ - snbep_uncore_cpu_init(); - break; - case 46: /* Nehalem-EX */ - case 47: /* Westmere-EX aka. Xeon E7 */ - nhmex_uncore_cpu_init(); - break; - case 62: /* Ivy Bridge-EP */ - ivbep_uncore_cpu_init(); - break; - case 63: /* Haswell-EP */ - hswep_uncore_cpu_init(); - break; - case 79: /* BDX-EP */ - case 86: /* BDX-DE */ - bdx_uncore_cpu_init(); - break; - case 87: /* Knights Landing */ - knl_uncore_cpu_init(); - break; - default: - return -ENODEV; - } - ret = uncore_types_init(uncore_msr_uncores, true); if (ret) goto err; @@ -1376,11 +1299,105 @@ static int __init uncore_cpumask_init(bool msr) return 0; } +#define X86_UNCORE_MODEL_MATCH(model, init) \ + { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long)&init } + +struct intel_uncore_init_fun { + void (*cpu_init)(void); + int (*pci_init)(void); +}; + +static const struct intel_uncore_init_fun nhm_uncore_init __initconst = { + .cpu_init = nhm_uncore_cpu_init, +}; + +static const struct intel_uncore_init_fun snb_uncore_init __initconst = { + .cpu_init = snb_uncore_cpu_init, + .pci_init = snb_uncore_pci_init, +}; + +static const struct intel_uncore_init_fun ivb_uncore_init __initconst = { + .cpu_init = snb_uncore_cpu_init, + .pci_init = ivb_uncore_pci_init, +}; + +static const struct intel_uncore_init_fun hsw_uncore_init __initconst = { + .cpu_init = snb_uncore_cpu_init, + .pci_init = hsw_uncore_pci_init, +}; + +static const struct intel_uncore_init_fun bdw_uncore_init __initconst = { + .cpu_init = snb_uncore_cpu_init, + .pci_init = bdw_uncore_pci_init, +}; + +static const struct intel_uncore_init_fun snbep_uncore_init __initconst = { + .cpu_init = snbep_uncore_cpu_init, + .pci_init = snbep_uncore_pci_init, +}; + +static const struct intel_uncore_init_fun nhmex_uncore_init __initconst = { + .cpu_init = nhmex_uncore_cpu_init, +}; + +static const struct intel_uncore_init_fun ivbep_uncore_init __initconst = { + .cpu_init = ivbep_uncore_cpu_init, + .pci_init = ivbep_uncore_pci_init, +}; + +static const struct intel_uncore_init_fun hswep_uncore_init __initconst = { + .cpu_init = hswep_uncore_cpu_init, + .pci_init = hswep_uncore_pci_init, +}; + +static const struct intel_uncore_init_fun bdx_uncore_init __initconst = { + .cpu_init = bdx_uncore_cpu_init, + .pci_init = bdx_uncore_pci_init, +}; + +static const struct intel_uncore_init_fun knl_uncore_init __initconst = { + .cpu_init = knl_uncore_cpu_init, + .pci_init = knl_uncore_pci_init, +}; + +static const struct intel_uncore_init_fun skl_uncore_init __initconst = { + .pci_init = skl_uncore_pci_init, +}; + +static const struct x86_cpu_id intel_uncore_match[] __initconst = { + X86_UNCORE_MODEL_MATCH(26, nhm_uncore_init), /* Nehalem */ + X86_UNCORE_MODEL_MATCH(30, nhm_uncore_init), + X86_UNCORE_MODEL_MATCH(37, nhm_uncore_init), /* Westmere */ + X86_UNCORE_MODEL_MATCH(44, nhm_uncore_init), + X86_UNCORE_MODEL_MATCH(42, snb_uncore_init), /* Sandy Bridge */ + X86_UNCORE_MODEL_MATCH(58, ivb_uncore_init), /* Ivy Bridge */ + X86_UNCORE_MODEL_MATCH(60, hsw_uncore_init), /* Haswell */ + X86_UNCORE_MODEL_MATCH(69, hsw_uncore_init), /* Haswell Celeron */ + X86_UNCORE_MODEL_MATCH(70, hsw_uncore_init), /* Haswell */ + X86_UNCORE_MODEL_MATCH(61, bdw_uncore_init), /* Broadwell */ + X86_UNCORE_MODEL_MATCH(71, bdw_uncore_init), /* Broadwell */ + X86_UNCORE_MODEL_MATCH(45, snbep_uncore_init), /* Sandy Bridge-EP */ + X86_UNCORE_MODEL_MATCH(46, nhmex_uncore_init), /* Nehalem-EX */ + X86_UNCORE_MODEL_MATCH(47, nhmex_uncore_init), /* Westmere-EX aka. Xeon E7 */ + X86_UNCORE_MODEL_MATCH(62, ivbep_uncore_init), /* Ivy Bridge-EP */ + X86_UNCORE_MODEL_MATCH(63, hswep_uncore_init), /* Haswell-EP */ + X86_UNCORE_MODEL_MATCH(79, bdx_uncore_init), /* BDX-EP */ + X86_UNCORE_MODEL_MATCH(86, bdx_uncore_init), /* BDX-DE */ + X86_UNCORE_MODEL_MATCH(87, knl_uncore_init), /* Knights Landing */ + X86_UNCORE_MODEL_MATCH(94, skl_uncore_init), /* SkyLake */ + {}, +}; + +MODULE_DEVICE_TABLE(x86cpu, intel_uncore_match); + static int __init intel_uncore_init(void) { - int pret, cret, ret; + const struct x86_cpu_id *id; + struct intel_uncore_init_fun *uncore_init; + int pret = 0, cret = 0, ret; - if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) + id = x86_match_cpu(intel_uncore_match); + if (!id) return -ENODEV; if (cpu_has_hypervisor) @@ -1388,8 +1405,17 @@ static int __init intel_uncore_init(void) max_packages = topology_max_packages(); - pret = uncore_pci_init(); - cret = uncore_cpu_init(); + uncore_init = (struct intel_uncore_init_fun *)id->driver_data; + if (uncore_init->pci_init) { + pret = uncore_init->pci_init(); + if (!pret) + pret = uncore_pci_init(); + } + + if (uncore_init->cpu_init) { + uncore_init->cpu_init(); + cret = uncore_cpu_init(); + } if (cret && pret) return -ENODEV; @@ -1409,4 +1435,14 @@ err: cpu_notifier_register_done(); return ret; } -device_initcall(intel_uncore_init); +module_init(intel_uncore_init); + +static void __exit intel_uncore_exit(void) +{ + cpu_notifier_register_begin(); + __unregister_cpu_notifier(&uncore_cpu_nb); + uncore_types_exit(uncore_msr_uncores); + uncore_pci_exit(); + cpu_notifier_register_done(); +} +module_exit(intel_uncore_exit); diff --git a/arch/x86/events/msr.c b/arch/x86/events/msr.c index ec863b9a9f78..7111400a1f9a 100644 --- a/arch/x86/events/msr.c +++ b/arch/x86/events/msr.c @@ -6,6 +6,8 @@ enum perf_msr_id { PERF_MSR_MPERF = 2, PERF_MSR_PPERF = 3, PERF_MSR_SMI = 4, + PERF_MSR_PTSC = 5, + PERF_MSR_IRPERF = 6, PERF_MSR_EVENT_MAX, }; @@ -15,6 +17,16 @@ static bool test_aperfmperf(int idx) return boot_cpu_has(X86_FEATURE_APERFMPERF); } +static bool test_ptsc(int idx) +{ + return boot_cpu_has(X86_FEATURE_PTSC); +} + +static bool test_irperf(int idx) +{ + return boot_cpu_has(X86_FEATURE_IRPERF); +} + static bool test_intel(int idx) { if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL || @@ -69,18 +81,22 @@ struct perf_msr { bool (*test)(int idx); }; -PMU_EVENT_ATTR_STRING(tsc, evattr_tsc, "event=0x00"); -PMU_EVENT_ATTR_STRING(aperf, evattr_aperf, "event=0x01"); -PMU_EVENT_ATTR_STRING(mperf, evattr_mperf, "event=0x02"); -PMU_EVENT_ATTR_STRING(pperf, evattr_pperf, "event=0x03"); -PMU_EVENT_ATTR_STRING(smi, evattr_smi, "event=0x04"); +PMU_EVENT_ATTR_STRING(tsc, evattr_tsc, "event=0x00"); +PMU_EVENT_ATTR_STRING(aperf, evattr_aperf, "event=0x01"); +PMU_EVENT_ATTR_STRING(mperf, evattr_mperf, "event=0x02"); +PMU_EVENT_ATTR_STRING(pperf, evattr_pperf, "event=0x03"); +PMU_EVENT_ATTR_STRING(smi, evattr_smi, "event=0x04"); +PMU_EVENT_ATTR_STRING(ptsc, evattr_ptsc, "event=0x05"); +PMU_EVENT_ATTR_STRING(irperf, evattr_irperf, "event=0x06"); static struct perf_msr msr[] = { - [PERF_MSR_TSC] = { 0, &evattr_tsc, NULL, }, - [PERF_MSR_APERF] = { MSR_IA32_APERF, &evattr_aperf, test_aperfmperf, }, - [PERF_MSR_MPERF] = { MSR_IA32_MPERF, &evattr_mperf, test_aperfmperf, }, - [PERF_MSR_PPERF] = { MSR_PPERF, &evattr_pperf, test_intel, }, - [PERF_MSR_SMI] = { MSR_SMI_COUNT, &evattr_smi, test_intel, }, + [PERF_MSR_TSC] = { 0, &evattr_tsc, NULL, }, + [PERF_MSR_APERF] = { MSR_IA32_APERF, &evattr_aperf, test_aperfmperf, }, + [PERF_MSR_MPERF] = { MSR_IA32_MPERF, &evattr_mperf, test_aperfmperf, }, + [PERF_MSR_PPERF] = { MSR_PPERF, &evattr_pperf, test_intel, }, + [PERF_MSR_SMI] = { MSR_SMI_COUNT, &evattr_smi, test_intel, }, + [PERF_MSR_PTSC] = { MSR_F15H_PTSC, &evattr_ptsc, test_ptsc, }, + [PERF_MSR_IRPERF] = { MSR_F17H_IRPERF, &evattr_irperf, test_irperf, }, }; static struct attribute *events_attrs[PERF_MSR_EVENT_MAX + 1] = { diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 8f9afefd2dc5..8cd6a320f658 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -177,6 +177,7 @@ #define X86_FEATURE_PERFCTR_CORE ( 6*32+23) /* core performance counter extensions */ #define X86_FEATURE_PERFCTR_NB ( 6*32+24) /* NB performance counter extensions */ #define X86_FEATURE_BPEXT (6*32+26) /* data breakpoint extension */ +#define X86_FEATURE_PTSC ( 6*32+27) /* performance time-stamp counter */ #define X86_FEATURE_PERFCTR_L2 ( 6*32+28) /* L2 performance counter extensions */ #define X86_FEATURE_MWAITX ( 6*32+29) /* MWAIT extension (MONITORX/MWAITX) */ @@ -250,6 +251,7 @@ /* AMD-defined CPU features, CPUID level 0x80000008 (ebx), word 13 */ #define X86_FEATURE_CLZERO (13*32+0) /* CLZERO instruction */ +#define X86_FEATURE_IRPERF (13*32+1) /* Instructions Retired Count */ /* Thermal and Power Management Leaf, CPUID level 0x00000006 (eax), word 14 */ #define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */ diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 5b3c9a55f51c..f882cbf9e3da 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -315,6 +315,9 @@ #define MSR_AMD64_IBSOPDATA4 0xc001103d #define MSR_AMD64_IBS_REG_COUNT_MAX 8 /* includes MSR_AMD64_IBSBRTARGET */ +/* Fam 17h MSRs */ +#define MSR_F17H_IRPERF 0xc00000e9 + /* Fam 16h MSRs */ #define MSR_F16H_L2I_PERF_CTL 0xc0010230 #define MSR_F16H_L2I_PERF_CTR 0xc0010231 @@ -328,6 +331,7 @@ #define MSR_F15H_PERF_CTR 0xc0010201 #define MSR_F15H_NB_PERF_CTL 0xc0010240 #define MSR_F15H_NB_PERF_CTR 0xc0010241 +#define MSR_F15H_PTSC 0xc0010280 #define MSR_F15H_IC_CFG 0xc0011021 /* Fam 10h MSRs */ diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index f291275ffd71..b8b195fbe787 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -838,6 +838,12 @@ extern void perf_event_output(struct perf_event *event, struct perf_sample_data *data, struct pt_regs *regs); +static inline bool +is_default_overflow_handler(struct perf_event *event) +{ + return (event->overflow_handler == perf_event_output); +} + extern void perf_event_header__init_id(struct perf_event_header *header, struct perf_sample_data *data, diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 1afe9623c1a7..a3c19034d5f8 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -401,6 +401,7 @@ struct perf_event_attr { #define PERF_EVENT_IOC_SET_FILTER _IOW('$', 6, char *) #define PERF_EVENT_IOC_ID _IOR('$', 7, __u64 *) #define PERF_EVENT_IOC_SET_BPF _IOW('$', 8, __u32) +#define PERF_EVENT_IOC_PAUSE_OUTPUT _IOW('$', 9, __u32) enum perf_event_ioc_flags { PERF_IOC_FLAG_GROUP = 1U << 0, diff --git a/kernel/events/core.c b/kernel/events/core.c index 52bedc5a5aaa..8c3b35f2a269 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -1925,8 +1925,13 @@ event_sched_in(struct perf_event *event, if (event->state <= PERF_EVENT_STATE_OFF) return 0; - event->state = PERF_EVENT_STATE_ACTIVE; - event->oncpu = smp_processor_id(); + WRITE_ONCE(event->oncpu, smp_processor_id()); + /* + * Order event::oncpu write to happen before the ACTIVE state + * is visible. + */ + smp_wmb(); + WRITE_ONCE(event->state, PERF_EVENT_STATE_ACTIVE); /* * Unthrottle events, since we scheduled we might have missed several @@ -2358,6 +2363,29 @@ void perf_event_enable(struct perf_event *event) } EXPORT_SYMBOL_GPL(perf_event_enable); +static int __perf_event_stop(void *info) +{ + struct perf_event *event = info; + + /* for AUX events, our job is done if the event is already inactive */ + if (READ_ONCE(event->state) != PERF_EVENT_STATE_ACTIVE) + return 0; + + /* matches smp_wmb() in event_sched_in() */ + smp_rmb(); + + /* + * There is a window with interrupts enabled before we get here, + * so we need to check again lest we try to stop another CPU's event. + */ + if (READ_ONCE(event->oncpu) != smp_processor_id()) + return -EAGAIN; + + event->pmu->stop(event, PERF_EF_UPDATE); + + return 0; +} + static int _perf_event_refresh(struct perf_event *event, int refresh) { /* @@ -4351,6 +4379,19 @@ static long _perf_ioctl(struct perf_event *event, unsigned int cmd, unsigned lon case PERF_EVENT_IOC_SET_BPF: return perf_event_set_bpf_prog(event, arg); + case PERF_EVENT_IOC_PAUSE_OUTPUT: { + struct ring_buffer *rb; + + rcu_read_lock(); + rb = rcu_dereference(event->rb); + if (!rb || !rb->nr_pages) { + rcu_read_unlock(); + return -EINVAL; + } + rb_toggle_paused(rb, !!arg); + rcu_read_unlock(); + return 0; + } default: return -ENOTTY; } @@ -4667,6 +4708,8 @@ static void perf_mmap_open(struct vm_area_struct *vma) event->pmu->event_mapped(event); } +static void perf_pmu_output_stop(struct perf_event *event); + /* * A buffer can be mmap()ed multiple times; either directly through the same * event, or through other events by use of perf_event_set_output(). @@ -4694,10 +4737,22 @@ static void perf_mmap_close(struct vm_area_struct *vma) */ if (rb_has_aux(rb) && vma->vm_pgoff == rb->aux_pgoff && atomic_dec_and_mutex_lock(&rb->aux_mmap_count, &event->mmap_mutex)) { + /* + * Stop all AUX events that are writing to this buffer, + * so that we can free its AUX pages and corresponding PMU + * data. Note that after rb::aux_mmap_count dropped to zero, + * they won't start any more (see perf_aux_output_begin()). + */ + perf_pmu_output_stop(event); + + /* now it's safe to free the pages */ atomic_long_sub(rb->aux_nr_pages, &mmap_user->locked_vm); vma->vm_mm->pinned_vm -= rb->aux_mmap_locked; + /* this has to be the last one */ rb_free_aux(rb); + WARN_ON_ONCE(atomic_read(&rb->aux_refcount)); + mutex_unlock(&event->mmap_mutex); } @@ -5768,6 +5823,80 @@ next: rcu_read_unlock(); } +struct remote_output { + struct ring_buffer *rb; + int err; +}; + +static void __perf_event_output_stop(struct perf_event *event, void *data) +{ + struct perf_event *parent = event->parent; + struct remote_output *ro = data; + struct ring_buffer *rb = ro->rb; + + if (!has_aux(event)) + return; + + if (!parent) + parent = event; + + /* + * In case of inheritance, it will be the parent that links to the + * ring-buffer, but it will be the child that's actually using it: + */ + if (rcu_dereference(parent->rb) == rb) + ro->err = __perf_event_stop(event); +} + +static int __perf_pmu_output_stop(void *info) +{ + struct perf_event *event = info; + struct pmu *pmu = event->pmu; + struct perf_cpu_context *cpuctx = get_cpu_ptr(pmu->pmu_cpu_context); + struct remote_output ro = { + .rb = event->rb, + }; + + rcu_read_lock(); + perf_event_aux_ctx(&cpuctx->ctx, __perf_event_output_stop, &ro); + if (cpuctx->task_ctx) + perf_event_aux_ctx(cpuctx->task_ctx, __perf_event_output_stop, + &ro); + rcu_read_unlock(); + + return ro.err; +} + +static void perf_pmu_output_stop(struct perf_event *event) +{ + struct perf_event *iter; + int err, cpu; + +restart: + rcu_read_lock(); + list_for_each_entry_rcu(iter, &event->rb->event_list, rb_entry) { + /* + * For per-CPU events, we need to make sure that neither they + * nor their children are running; for cpu==-1 events it's + * sufficient to stop the event itself if it's active, since + * it can't have children. + */ + cpu = iter->cpu; + if (cpu == -1) + cpu = READ_ONCE(iter->oncpu); + + if (cpu == -1) + continue; + + err = cpu_function_call(cpu, __perf_pmu_output_stop, event); + if (err == -EAGAIN) { + rcu_read_unlock(); + goto restart; + } + } + rcu_read_unlock(); +} + /* * task tracking -- fork/exit * @@ -6499,10 +6628,7 @@ static int __perf_event_overflow(struct perf_event *event, irq_work_queue(&event->pending); } - if (event->overflow_handler) - event->overflow_handler(event, data, regs); - else - perf_event_output(event, data, regs); + event->overflow_handler(event, data, regs); if (*perf_event_fasync(event) && event->pending_kill) { event->pending_wakeup = 1; @@ -7693,6 +7819,15 @@ int perf_pmu_register(struct pmu *pmu, const char *name, int type) } skip_type: + if (pmu->task_ctx_nr == perf_hw_context) { + static int hw_context_taken = 0; + + if (WARN_ON_ONCE(hw_context_taken)) + pmu->task_ctx_nr = perf_invalid_context; + + hw_context_taken = 1; + } + pmu->pmu_cpu_context = find_pmu_context(pmu->task_ctx_nr); if (pmu->pmu_cpu_context) goto got_cpu_context; @@ -8014,8 +8149,13 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu, context = parent_event->overflow_handler_context; } - event->overflow_handler = overflow_handler; - event->overflow_handler_context = context; + if (overflow_handler) { + event->overflow_handler = overflow_handler; + event->overflow_handler_context = context; + } else { + event->overflow_handler = perf_event_output; + event->overflow_handler_context = NULL; + } perf_event__state_init(event); diff --git a/kernel/events/internal.h b/kernel/events/internal.h index 4199b6d193f5..05f9f6d626df 100644 --- a/kernel/events/internal.h +++ b/kernel/events/internal.h @@ -11,13 +11,13 @@ struct ring_buffer { atomic_t refcount; struct rcu_head rcu_head; - struct irq_work irq_work; #ifdef CONFIG_PERF_USE_VMALLOC struct work_struct work; int page_order; /* allocation order */ #endif int nr_pages; /* nr of data pages */ int overwrite; /* can overwrite itself */ + int paused; /* can write into ring buffer */ atomic_t poll; /* POLL_ for wakeups */ @@ -65,6 +65,14 @@ static inline void rb_free_rcu(struct rcu_head *rcu_head) rb_free(rb); } +static inline void rb_toggle_paused(struct ring_buffer *rb, bool pause) +{ + if (!pause && rb->nr_pages) + rb->paused = 0; + else + rb->paused = 1; +} + extern struct ring_buffer * rb_alloc(int nr_pages, long watermark, int cpu, int flags); extern void perf_event_wakeup(struct perf_event *event); diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index c61f0cbd308b..60be55a64040 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -102,8 +102,21 @@ out: preempt_enable(); } -int perf_output_begin(struct perf_output_handle *handle, - struct perf_event *event, unsigned int size) +static bool __always_inline +ring_buffer_has_space(unsigned long head, unsigned long tail, + unsigned long data_size, unsigned int size, + bool backward) +{ + if (!backward) + return CIRC_SPACE(head, tail, data_size) >= size; + else + return CIRC_SPACE(tail, head, data_size) >= size; +} + +static int __always_inline +__perf_output_begin(struct perf_output_handle *handle, + struct perf_event *event, unsigned int size, + bool backward) { struct ring_buffer *rb; unsigned long tail, offset, head; @@ -125,8 +138,11 @@ int perf_output_begin(struct perf_output_handle *handle, if (unlikely(!rb)) goto out; - if (unlikely(!rb->nr_pages)) + if (unlikely(rb->paused)) { + if (rb->nr_pages) + local_inc(&rb->lost); goto out; + } handle->rb = rb; handle->event = event; @@ -143,9 +159,12 @@ int perf_output_begin(struct perf_output_handle *handle, do { tail = READ_ONCE(rb->user_page->data_tail); offset = head = local_read(&rb->head); - if (!rb->overwrite && - unlikely(CIRC_SPACE(head, tail, perf_data_size(rb)) < size)) - goto fail; + if (!rb->overwrite) { + if (unlikely(!ring_buffer_has_space(head, tail, + perf_data_size(rb), + size, backward))) + goto fail; + } /* * The above forms a control dependency barrier separating the @@ -159,9 +178,17 @@ int perf_output_begin(struct perf_output_handle *handle, * See perf_output_put_handle(). */ - head += size; + if (!backward) + head += size; + else + head -= size; } while (local_cmpxchg(&rb->head, offset, head) != offset); + if (backward) { + offset = head; + head = (u64)(-head); + } + /* * We rely on the implied barrier() by local_cmpxchg() to ensure * none of the data stores below can be lifted up by the compiler. @@ -203,6 +230,12 @@ out: return -ENOSPC; } +int perf_output_begin(struct perf_output_handle *handle, + struct perf_event *event, unsigned int size) +{ + return __perf_output_begin(handle, event, size, false); +} + unsigned int perf_output_copy(struct perf_output_handle *handle, const void *buf, unsigned int len) { @@ -221,8 +254,6 @@ void perf_output_end(struct perf_output_handle *handle) rcu_read_unlock(); } -static void rb_irq_work(struct irq_work *work); - static void ring_buffer_init(struct ring_buffer *rb, long watermark, int flags) { @@ -243,16 +274,13 @@ ring_buffer_init(struct ring_buffer *rb, long watermark, int flags) INIT_LIST_HEAD(&rb->event_list); spin_lock_init(&rb->event_lock); - init_irq_work(&rb->irq_work, rb_irq_work); -} -static void ring_buffer_put_async(struct ring_buffer *rb) -{ - if (!atomic_dec_and_test(&rb->refcount)) - return; - - rb->rcu_head.next = (void *)rb; - irq_work_queue(&rb->irq_work); + /* + * perf_output_begin() only checks rb->paused, therefore + * rb->paused must be true if we have no pages for output. + */ + if (!rb->nr_pages) + rb->paused = 1; } /* @@ -264,6 +292,10 @@ static void ring_buffer_put_async(struct ring_buffer *rb) * The ordering is similar to that of perf_output_{begin,end}, with * the exception of (B), which should be taken care of by the pmu * driver, since ordering rules will differ depending on hardware. + * + * Call this from pmu::start(); see the comment in perf_aux_output_end() + * about its use in pmu callbacks. Both can also be called from the PMI + * handler if needed. */ void *perf_aux_output_begin(struct perf_output_handle *handle, struct perf_event *event) @@ -288,6 +320,13 @@ void *perf_aux_output_begin(struct perf_output_handle *handle, goto err; /* + * If rb::aux_mmap_count is zero (and rb_has_aux() above went through), + * the aux buffer is in perf_mmap_close(), about to get freed. + */ + if (!atomic_read(&rb->aux_mmap_count)) + goto err_put; + + /* * Nesting is not supported for AUX area, make sure nested * writers are caught early */ @@ -328,10 +367,11 @@ void *perf_aux_output_begin(struct perf_output_handle *handle, return handle->rb->aux_priv; err_put: + /* can't be last */ rb_free_aux(rb); err: - ring_buffer_put_async(rb); + ring_buffer_put(rb); handle->event = NULL; return NULL; @@ -342,6 +382,10 @@ err: * aux_head and posting a PERF_RECORD_AUX into the perf buffer. It is the * pmu driver's responsibility to observe ordering rules of the hardware, * so that all the data is externally visible before this is called. + * + * Note: this has to be called from pmu::stop() callback, as the assumption + * of the AUX buffer management code is that after pmu::stop(), the AUX + * transaction must be stopped and therefore drop the AUX reference count. */ void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size, bool truncated) @@ -381,8 +425,9 @@ void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size, handle->event = NULL; local_set(&rb->aux_nest, 0); + /* can't be last */ rb_free_aux(rb); - ring_buffer_put_async(rb); + ring_buffer_put(rb); } /* @@ -463,6 +508,14 @@ static void __rb_free_aux(struct ring_buffer *rb) { int pg; + /* + * Should never happen, the last reference should be dropped from + * perf_mmap_close() path, which first stops aux transactions (which + * in turn are the atomic holders of aux_refcount) and then does the + * last rb_free_aux(). + */ + WARN_ON_ONCE(in_atomic()); + if (rb->aux_priv) { rb->free_aux(rb->aux_priv); rb->free_aux = NULL; @@ -574,18 +627,7 @@ out: void rb_free_aux(struct ring_buffer *rb) { if (atomic_dec_and_test(&rb->aux_refcount)) - irq_work_queue(&rb->irq_work); -} - -static void rb_irq_work(struct irq_work *work) -{ - struct ring_buffer *rb = container_of(work, struct ring_buffer, irq_work); - - if (!atomic_read(&rb->aux_refcount)) __rb_free_aux(rb); - - if (rb->rcu_head.next == (void *)rb) - call_rcu(&rb->rcu_head, rb_free_rcu); } #ifndef CONFIG_PERF_USE_VMALLOC diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c index 00df25fd86ef..e11108f1d197 100644 --- a/kernel/trace/trace_event_perf.c +++ b/kernel/trace/trace_event_perf.c @@ -47,6 +47,9 @@ static int perf_trace_event_perm(struct trace_event_call *tp_event, if (perf_paranoid_tracepoint_raw() && !capable(CAP_SYS_ADMIN)) return -EPERM; + if (!is_sampling_event(p_event)) + return 0; + /* * We don't allow user space callchains for function trace * event, due to issues with page faults while tracing page diff --git a/tools/perf/Documentation/intel-pt.txt b/tools/perf/Documentation/intel-pt.txt index be764f9ec769..c6c8318e38a2 100644 --- a/tools/perf/Documentation/intel-pt.txt +++ b/tools/perf/Documentation/intel-pt.txt @@ -672,6 +672,7 @@ The letters are: d create a debug log g synthesize a call chain (use with i or x) l synthesize last branch entries (use with i or x) + s skip initial number of events "Instructions" events look like they were recorded by "perf record -e instructions". @@ -730,6 +731,12 @@ from one sample to the next. To disable trace decoding entirely, use the option --no-itrace. +It is also possible to skip events generated (instructions, branches, transactions) +at the beginning. This is useful to ignore initialization code. + + --itrace=i0nss1000000 + +skips the first million instructions. dump option ----------- diff --git a/tools/perf/Documentation/itrace.txt b/tools/perf/Documentation/itrace.txt index 65453f4c7006..e2a4c5e0dbe5 100644 --- a/tools/perf/Documentation/itrace.txt +++ b/tools/perf/Documentation/itrace.txt @@ -7,6 +7,7 @@ d create a debug log g synthesize a call chain (use with i or x) l synthesize last branch entries (use with i or x) + s skip initial number of events The default is all events i.e. the same as --itrace=ibxe @@ -24,3 +25,10 @@ Also the number of last branch entries (default 64, max. 1024) for instructions or transactions events can be specified. + + It is also possible to skip events generated (instructions, branches, transactions) + at the beginning. This is useful to ignore initialization code. + + --itrace=i0nss1000000 + + skips the first million instructions. diff --git a/tools/perf/Documentation/perf-annotate.txt b/tools/perf/Documentation/perf-annotate.txt index e9cd39a92dc2..778f54d4d0bd 100644 --- a/tools/perf/Documentation/perf-annotate.txt +++ b/tools/perf/Documentation/perf-annotate.txt @@ -33,7 +33,7 @@ OPTIONS -f:: --force:: - Don't complain, do it. + Don't do ownership validation. -v:: --verbose:: diff --git a/tools/perf/Documentation/perf-diff.txt b/tools/perf/Documentation/perf-diff.txt index d1deb573877f..3e9490b9c533 100644 --- a/tools/perf/Documentation/perf-diff.txt +++ b/tools/perf/Documentation/perf-diff.txt @@ -75,7 +75,7 @@ OPTIONS -f:: --force:: - Don't complain, do it. + Don't do ownership validation. --symfs=<directory>:: Look for files with symbols relative to this directory. diff --git a/tools/perf/Documentation/perf-mem.txt b/tools/perf/Documentation/perf-mem.txt index 43310d8661fe..1d6092c460dd 100644 --- a/tools/perf/Documentation/perf-mem.txt +++ b/tools/perf/Documentation/perf-mem.txt @@ -48,6 +48,14 @@ OPTIONS option can be passed in record mode. It will be interpreted the same way as perf record. +-K:: +--all-kernel:: + Configure all used events to run in kernel space. + +-U:: +--all-user:: + Configure all used events to run in user space. + SEE ALSO -------- linkperf:perf-record[1], linkperf:perf-report[1] diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index 12113992ac9d..496d42cdf02b 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -285,7 +285,7 @@ OPTIONS -f:: --force:: - Don't complain, do it. + Don't do ownership validation. --symfs=<directory>:: Look for files with symbols relative to this directory. diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt index 382ddfb45d1d..22ef3933342a 100644 --- a/tools/perf/Documentation/perf-script.txt +++ b/tools/perf/Documentation/perf-script.txt @@ -262,6 +262,10 @@ include::itrace.txt[] --ns:: Use 9 decimal places when displaying time (i.e. show the nanoseconds) +-f:: +--force:: + Don't do ownership validation. + SEE ALSO -------- linkperf:perf-record[1], linkperf:perf-script-perl[1], diff --git a/tools/perf/arch/x86/util/intel-bts.c b/tools/perf/arch/x86/util/intel-bts.c index d66f9ad4df2e..7dc30637cf66 100644 --- a/tools/perf/arch/x86/util/intel-bts.c +++ b/tools/perf/arch/x86/util/intel-bts.c @@ -438,6 +438,11 @@ struct auxtrace_record *intel_bts_recording_init(int *err) if (!intel_bts_pmu) return NULL; + if (setenv("JITDUMP_USE_ARCH_TIMESTAMP", "1", 1)) { + *err = -errno; + return NULL; + } + btsr = zalloc(sizeof(struct intel_bts_recording)); if (!btsr) { *err = -ENOMEM; diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c index a3395179c9ee..a07b9605e93b 100644 --- a/tools/perf/arch/x86/util/intel-pt.c +++ b/tools/perf/arch/x86/util/intel-pt.c @@ -1027,6 +1027,11 @@ struct auxtrace_record *intel_pt_recording_init(int *err) if (!intel_pt_pmu) return NULL; + if (setenv("JITDUMP_USE_ARCH_TIMESTAMP", "1", 1)) { + *err = -errno; + return NULL; + } + ptr = zalloc(sizeof(struct intel_pt_recording)); if (!ptr) { *err = -ENOMEM; diff --git a/tools/perf/arch/x86/util/tsc.c b/tools/perf/arch/x86/util/tsc.c index fd2868490d00..357f1b13b5ae 100644 --- a/tools/perf/arch/x86/util/tsc.c +++ b/tools/perf/arch/x86/util/tsc.c @@ -7,7 +7,6 @@ #include <linux/types.h> #include "../../util/debug.h" #include "../../util/tsc.h" -#include "tsc.h" int perf_read_tsc_conversion(const struct perf_event_mmap_page *pc, struct perf_tsc_conversion *tc) @@ -46,3 +45,34 @@ u64 rdtsc(void) return low | ((u64)high) << 32; } + +int perf_event__synth_time_conv(const struct perf_event_mmap_page *pc, + struct perf_tool *tool, + perf_event__handler_t process, + struct machine *machine) +{ + union perf_event event = { + .time_conv = { + .header = { + .type = PERF_RECORD_TIME_CONV, + .size = sizeof(struct time_conv_event), + }, + }, + }; + struct perf_tsc_conversion tc; + int err; + + err = perf_read_tsc_conversion(pc, &tc); + if (err == -EOPNOTSUPP) + return 0; + if (err) + return err; + + pr_debug2("Synthesizing TSC conversion information\n"); + + event.time_conv.time_mult = tc.time_mult; + event.time_conv.time_shift = tc.time_shift; + event.time_conv.time_zero = tc.time_zero; + + return process(tool, &event, NULL, machine); +} diff --git a/tools/perf/arch/x86/util/tsc.h b/tools/perf/arch/x86/util/tsc.h deleted file mode 100644 index 2edc4d31065c..000000000000 --- a/tools/perf/arch/x86/util/tsc.h +++ /dev/null @@ -1,17 +0,0 @@ -#ifndef TOOLS_PERF_ARCH_X86_UTIL_TSC_H__ -#define TOOLS_PERF_ARCH_X86_UTIL_TSC_H__ - -#include <linux/types.h> - -struct perf_tsc_conversion { - u16 time_shift; - u32 time_mult; - u64 time_zero; -}; - -struct perf_event_mmap_page; - -int perf_read_tsc_conversion(const struct perf_event_mmap_page *pc, - struct perf_tsc_conversion *tc); - -#endif /* TOOLS_PERF_ARCH_X86_UTIL_TSC_H__ */ diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index d1a2d104f2bc..e5afa8fe1bf1 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -748,6 +748,7 @@ int cmd_inject(int argc, const char **argv, const char *prefix __maybe_unused) .auxtrace_info = perf_event__repipe_op2_synth, .auxtrace = perf_event__repipe_auxtrace, .auxtrace_error = perf_event__repipe_op2_synth, + .time_conv = perf_event__repipe_op2_synth, .finished_round = perf_event__repipe_oe_synth, .build_id = perf_event__repipe_op2_synth, .id_index = perf_event__repipe_op2_synth, diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c index 85db3be4b3cb..1dc140c5481d 100644 --- a/tools/perf/builtin-mem.c +++ b/tools/perf/builtin-mem.c @@ -62,19 +62,22 @@ static int __cmd_record(int argc, const char **argv, struct perf_mem *mem) int rec_argc, i = 0, j; const char **rec_argv; int ret; + bool all_user = false, all_kernel = false; struct option options[] = { OPT_CALLBACK('e', "event", &mem, "event", "event selector. use 'perf mem record -e list' to list available events", parse_record_events), OPT_INCR('v', "verbose", &verbose, "be more verbose (show counter open errors, etc)"), + OPT_BOOLEAN('U', "--all-user", &all_user, "collect only user level data"), + OPT_BOOLEAN('K', "--all-kernel", &all_kernel, "collect only kernel level data"), OPT_END() }; argc = parse_options(argc, argv, options, record_mem_usage, PARSE_OPT_STOP_AT_NON_OPTION); - rec_argc = argc + 7; /* max number of arguments */ + rec_argc = argc + 9; /* max number of arguments */ rec_argv = calloc(rec_argc + 1, sizeof(char *)); if (!rec_argv) return -1; @@ -103,6 +106,12 @@ static int __cmd_record(int argc, const char **argv, struct perf_mem *mem) rec_argv[i++] = perf_mem_events__name(j); }; + if (all_user) + rec_argv[i++] = "--all-user"; + + if (all_kernel) + rec_argv[i++] = "--all-kernel"; + for (j = 0; j < argc; j++, i++) rec_argv[i] = argv[j]; diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 515510ecc76a..410035c6e300 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -29,6 +29,7 @@ #include "util/data.h" #include "util/perf_regs.h" #include "util/auxtrace.h" +#include "util/tsc.h" #include "util/parse-branch-options.h" #include "util/parse-regs-options.h" #include "util/llvm-utils.h" @@ -512,6 +513,15 @@ static void workload_exec_failed_signal(int signo __maybe_unused, static void snapshot_sig_handler(int sig); +int __weak +perf_event__synth_time_conv(const struct perf_event_mmap_page *pc __maybe_unused, + struct perf_tool *tool __maybe_unused, + perf_event__handler_t process __maybe_unused, + struct machine *machine __maybe_unused) +{ + return 0; +} + static int record__synthesize(struct record *rec) { struct perf_session *session = rec->session; @@ -549,6 +559,11 @@ static int record__synthesize(struct record *rec) } } + err = perf_event__synth_time_conv(rec->evlist->mmap[0].base, tool, + process_synthesized_event, machine); + if (err) + goto out; + if (rec->opts.full_auxtrace) { err = perf_event__synthesize_auxtrace_info(rec->itr, tool, session, process_synthesized_event); diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 93ac724fb635..d309f4535a45 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -40,6 +40,11 @@ #include <sys/mman.h> #include <linux/futex.h> #include <linux/err.h> +#include <linux/seccomp.h> +#include <linux/filter.h> +#include <linux/audit.h> +#include <sys/ptrace.h> +#include <linux/random.h> /* For older distros: */ #ifndef MAP_STACK @@ -1001,6 +1006,69 @@ static const char *tioctls[] = { static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401); #endif /* defined(__i386__) || defined(__x86_64__) */ +static size_t syscall_arg__scnprintf_seccomp_op(char *bf, size_t size, struct syscall_arg *arg) +{ + int op = arg->val; + size_t printed = 0; + + switch (op) { +#define P_SECCOMP_SET_MODE_OP(n) case SECCOMP_SET_MODE_##n: printed = scnprintf(bf, size, #n); break + P_SECCOMP_SET_MODE_OP(STRICT); + P_SECCOMP_SET_MODE_OP(FILTER); +#undef P_SECCOMP_SET_MODE_OP + default: printed = scnprintf(bf, size, "%#x", op); break; + } + + return printed; +} + +#define SCA_SECCOMP_OP syscall_arg__scnprintf_seccomp_op + +static size_t syscall_arg__scnprintf_seccomp_flags(char *bf, size_t size, + struct syscall_arg *arg) +{ + int printed = 0, flags = arg->val; + +#define P_FLAG(n) \ + if (flags & SECCOMP_FILTER_FLAG_##n) { \ + printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \ + flags &= ~SECCOMP_FILTER_FLAG_##n; \ + } + + P_FLAG(TSYNC); +#undef P_FLAG + + if (flags) + printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags); + + return printed; +} + +#define SCA_SECCOMP_FLAGS syscall_arg__scnprintf_seccomp_flags + +static size_t syscall_arg__scnprintf_getrandom_flags(char *bf, size_t size, + struct syscall_arg *arg) +{ + int printed = 0, flags = arg->val; + +#define P_FLAG(n) \ + if (flags & GRND_##n) { \ + printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \ + flags &= ~GRND_##n; \ + } + + P_FLAG(RANDOM); + P_FLAG(NONBLOCK); +#undef P_FLAG + + if (flags) + printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags); + + return printed; +} + +#define SCA_GETRANDOM_FLAGS syscall_arg__scnprintf_getrandom_flags + #define STRARRAY(arg, name, array) \ .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \ .arg_parm = { [arg] = &strarray__##array, } @@ -1093,6 +1161,8 @@ static struct syscall_fmt { { .name = "getdents64", .errmsg = true, .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, { .name = "getitimer", .errmsg = true, STRARRAY(0, which, itimers), }, + { .name = "getrandom", .errmsg = true, + .arg_scnprintf = { [2] = SCA_GETRANDOM_FLAGS, /* flags */ }, }, { .name = "getrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), }, { .name = "getxattr", .errmsg = true, .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, }, @@ -1234,6 +1304,9 @@ static struct syscall_fmt { .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, }, { .name = "rt_tgsigqueueinfo", .errmsg = true, .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, }, + { .name = "seccomp", .errmsg = true, + .arg_scnprintf = { [0] = SCA_SECCOMP_OP, /* op */ + [1] = SCA_SECCOMP_FLAGS, /* flags */ }, }, { .name = "select", .errmsg = true, .timeout = true, }, { .name = "sendmmsg", .errmsg = true, .arg_scnprintf = { [0] = SCA_FD, /* fd */ @@ -1618,6 +1691,7 @@ static int trace__process_event(struct trace *trace, struct machine *machine, color_fprintf(trace->output, PERF_COLOR_RED, "LOST %" PRIu64 " events!\n", event->lost.lost); ret = machine__process_lost_event(machine, event, sample); + break; default: ret = machine__process_event(machine, event, sample); break; @@ -2326,6 +2400,23 @@ static bool skip_sample(struct trace *trace, struct perf_sample *sample) return false; } +static void trace__set_base_time(struct trace *trace, + struct perf_evsel *evsel, + struct perf_sample *sample) +{ + /* + * BPF events were not setting PERF_SAMPLE_TIME, so be more robust + * and don't use sample->time unconditionally, we may end up having + * some other event in the future without PERF_SAMPLE_TIME for good + * reason, i.e. we may not be interested in its timestamps, just in + * it taking place, picking some piece of information when it + * appears in our event stream (vfs_getname comes to mind). + */ + if (trace->base_time == 0 && !trace->full_time && + (evsel->attr.sample_type & PERF_SAMPLE_TIME)) + trace->base_time = sample->time; +} + static int trace__process_sample(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, @@ -2340,8 +2431,7 @@ static int trace__process_sample(struct perf_tool *tool, if (skip_sample(trace, sample)) return 0; - if (!trace->full_time && trace->base_time == 0) - trace->base_time = sample->time; + trace__set_base_time(trace, evsel, sample); if (handler) { ++trace->nr_events; @@ -2479,9 +2569,6 @@ static void trace__handle_event(struct trace *trace, union perf_event *event, st const u32 type = event->header.type; struct perf_evsel *evsel; - if (!trace->full_time && trace->base_time == 0) - trace->base_time = sample->time; - if (type != PERF_RECORD_SAMPLE) { trace__process_event(trace, trace->host, event, sample); return; @@ -2493,6 +2580,8 @@ static void trace__handle_event(struct trace *trace, union perf_event *event, st return; } + trace__set_base_time(trace, evsel, sample); + if (evsel->attr.type == PERF_TYPE_TRACEPOINT && sample->raw_data == NULL) { fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n", diff --git a/tools/perf/jvmti/jvmti_agent.c b/tools/perf/jvmti/jvmti_agent.c index 6461e02ab940..3573f315f955 100644 --- a/tools/perf/jvmti/jvmti_agent.c +++ b/tools/perf/jvmti/jvmti_agent.c @@ -92,6 +92,22 @@ error: return ret; } +static int use_arch_timestamp; + +static inline uint64_t +get_arch_timestamp(void) +{ +#if defined(__i386__) || defined(__x86_64__) + unsigned int low, high; + + asm volatile("rdtsc" : "=a" (low), "=d" (high)); + + return low | ((uint64_t)high) << 32; +#else + return 0; +#endif +} + #define NSEC_PER_SEC 1000000000 static int perf_clk_id = CLOCK_MONOTONIC; @@ -107,6 +123,9 @@ perf_get_timestamp(void) struct timespec ts; int ret; + if (use_arch_timestamp) + return get_arch_timestamp(); + ret = clock_gettime(perf_clk_id, &ts); if (ret) return 0; @@ -203,6 +222,17 @@ perf_close_marker_file(void) munmap(marker_addr, pgsz); } +static void +init_arch_timestamp(void) +{ + char *str = getenv("JITDUMP_USE_ARCH_TIMESTAMP"); + + if (!str || !*str || !strcmp(str, "0")) + return; + + use_arch_timestamp = 1; +} + void *jvmti_open(void) { int pad_cnt; @@ -211,11 +241,17 @@ void *jvmti_open(void) int fd; FILE *fp; + init_arch_timestamp(); + /* * check if clockid is supported */ - if (!perf_get_timestamp()) - warnx("jvmti: kernel does not support %d clock id", perf_clk_id); + if (!perf_get_timestamp()) { + if (use_arch_timestamp) + warnx("jvmti: arch timestamp not supported"); + else + warnx("jvmti: kernel does not support %d clock id", perf_clk_id); + } memset(&header, 0, sizeof(header)); @@ -263,6 +299,9 @@ void *jvmti_open(void) header.timestamp = perf_get_timestamp(); + if (use_arch_timestamp) + header.flags |= JITDUMP_FLAGS_ARCH_TIMESTAMP; + if (!fwrite(&header, sizeof(header), 1, fp)) { warn("jvmti: cannot write dumpfile header"); goto error; diff --git a/tools/perf/perf.c b/tools/perf/perf.c index aaee0a782747..7b2df2b46525 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c @@ -549,6 +549,7 @@ int main(int argc, const char **argv) srandom(time(NULL)); perf_config(perf_default_config, NULL); + set_buildid_dir(NULL); /* get debugfs/tracefs mount point from /proc/mounts */ tracing_path_mount(); @@ -572,7 +573,6 @@ int main(int argc, const char **argv) } if (!prefixcmp(cmd, "trace")) { #ifdef HAVE_LIBAUDIT_SUPPORT - set_buildid_dir(NULL); setup_path(); argv[0] = "trace"; return cmd_trace(argc, argv, NULL); @@ -587,7 +587,6 @@ int main(int argc, const char **argv) argc--; handle_options(&argv, &argc, NULL); commit_pager_choice(); - set_buildid_dir(NULL); if (argc > 0) { if (!prefixcmp(argv[0], "--")) diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build index 1ba628ed049a..449fe97a555f 100644 --- a/tools/perf/tests/Build +++ b/tools/perf/tests/Build @@ -37,6 +37,7 @@ perf-y += topology.o perf-y += cpumap.o perf-y += stat.o perf-y += event_update.o +perf-y += event-times.o $(OUTPUT)tests/llvm-src-base.c: tests/bpf-script-example.c tests/Build $(call rule_mkdir) diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index f2b1dcac45d3..93c467015e71 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -204,6 +204,10 @@ static struct test generic_tests[] = { .func = test__event_update, }, { + .desc = "Test events times", + .func = test__event_times, + }, + { .func = NULL, }, }; diff --git a/tools/perf/tests/event-times.c b/tools/perf/tests/event-times.c new file mode 100644 index 000000000000..95fb744f6628 --- /dev/null +++ b/tools/perf/tests/event-times.c @@ -0,0 +1,236 @@ +#include <linux/compiler.h> +#include <string.h> +#include "tests.h" +#include "evlist.h" +#include "evsel.h" +#include "util.h" +#include "debug.h" +#include "thread_map.h" +#include "target.h" + +static int attach__enable_on_exec(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel = perf_evlist__last(evlist); + struct target target = { + .uid = UINT_MAX, + }; + const char *argv[] = { "true", NULL, }; + char sbuf[STRERR_BUFSIZE]; + int err; + + pr_debug("attaching to spawned child, enable on exec\n"); + + err = perf_evlist__create_maps(evlist, &target); + if (err < 0) { + pr_debug("Not enough memory to create thread/cpu maps\n"); + return err; + } + + err = perf_evlist__prepare_workload(evlist, &target, argv, false, NULL); + if (err < 0) { + pr_debug("Couldn't run the workload!\n"); + return err; + } + + evsel->attr.enable_on_exec = 1; + + err = perf_evlist__open(evlist); + if (err < 0) { + pr_debug("perf_evlist__open: %s\n", + strerror_r(errno, sbuf, sizeof(sbuf))); + return err; + } + + return perf_evlist__start_workload(evlist) == 1 ? TEST_OK : TEST_FAIL; +} + +static int detach__enable_on_exec(struct perf_evlist *evlist) +{ + waitpid(evlist->workload.pid, NULL, 0); + return 0; +} + +static int attach__current_disabled(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel = perf_evlist__last(evlist); + struct thread_map *threads; + int err; + + pr_debug("attaching to current thread as disabled\n"); + + threads = thread_map__new(-1, getpid(), UINT_MAX); + if (threads == NULL) { + pr_debug("thread_map__new\n"); + return -1; + } + + evsel->attr.disabled = 1; + + err = perf_evsel__open_per_thread(evsel, threads); + if (err) { + pr_debug("Failed to open event cpu-clock:u\n"); + return err; + } + + thread_map__put(threads); + return perf_evsel__enable(evsel) == 0 ? TEST_OK : TEST_FAIL; +} + +static int attach__current_enabled(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel = perf_evlist__last(evlist); + struct thread_map *threads; + int err; + + pr_debug("attaching to current thread as enabled\n"); + + threads = thread_map__new(-1, getpid(), UINT_MAX); + if (threads == NULL) { + pr_debug("failed to call thread_map__new\n"); + return -1; + } + + err = perf_evsel__open_per_thread(evsel, threads); + + thread_map__put(threads); + return err == 0 ? TEST_OK : TEST_FAIL; +} + +static int detach__disable(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel = perf_evlist__last(evlist); + + return perf_evsel__enable(evsel); +} + +static int attach__cpu_disabled(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel = perf_evlist__last(evlist); + struct cpu_map *cpus; + int err; + + pr_debug("attaching to CPU 0 as enabled\n"); + + cpus = cpu_map__new("0"); + if (cpus == NULL) { + pr_debug("failed to call cpu_map__new\n"); + return -1; + } + + evsel->attr.disabled = 1; + + err = perf_evsel__open_per_cpu(evsel, cpus); + if (err) { + if (err == -EACCES) + return TEST_SKIP; + + pr_debug("Failed to open event cpu-clock:u\n"); + return err; + } + + cpu_map__put(cpus); + return perf_evsel__enable(evsel); +} + +static int attach__cpu_enabled(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel = perf_evlist__last(evlist); + struct cpu_map *cpus; + int err; + + pr_debug("attaching to CPU 0 as enabled\n"); + + cpus = cpu_map__new("0"); + if (cpus == NULL) { + pr_debug("failed to call cpu_map__new\n"); + return -1; + } + + err = perf_evsel__open_per_cpu(evsel, cpus); + if (err == -EACCES) + return TEST_SKIP; + + cpu_map__put(cpus); + return err ? TEST_FAIL : TEST_OK; +} + +static int test_times(int (attach)(struct perf_evlist *), + int (detach)(struct perf_evlist *)) +{ + struct perf_counts_values count; + struct perf_evlist *evlist = NULL; + struct perf_evsel *evsel; + int err = -1, i; + + evlist = perf_evlist__new(); + if (!evlist) { + pr_debug("failed to create event list\n"); + goto out_err; + } + + err = parse_events(evlist, "cpu-clock:u", NULL); + if (err) { + pr_debug("failed to parse event cpu-clock:u\n"); + goto out_err; + } + + evsel = perf_evlist__last(evlist); + evsel->attr.read_format |= + PERF_FORMAT_TOTAL_TIME_ENABLED | + PERF_FORMAT_TOTAL_TIME_RUNNING; + + err = attach(evlist); + if (err == TEST_SKIP) { + pr_debug(" SKIP : not enough rights\n"); + return err; + } + + TEST_ASSERT_VAL("failed to attach", !err); + + for (i = 0; i < 100000000; i++) { } + + TEST_ASSERT_VAL("failed to detach", !detach(evlist)); + + perf_evsel__read(evsel, 0, 0, &count); + + err = !(count.ena == count.run); + + pr_debug(" %s: ena %" PRIu64", run %" PRIu64"\n", + !err ? "OK " : "FAILED", + count.ena, count.run); + +out_err: + if (evlist) + perf_evlist__delete(evlist); + return !err ? TEST_OK : TEST_FAIL; +} + +/* + * This test creates software event 'cpu-clock' + * attaches it in several ways (explained below) + * and checks that enabled and running times + * match. + */ +int test__event_times(int subtest __maybe_unused) +{ + int err, ret = 0; + +#define _T(attach, detach) \ + err = test_times(attach, detach); \ + if (err && (ret == TEST_OK || ret == TEST_SKIP)) \ + ret = err; + + /* attach on newly spawned process after exec */ + _T(attach__enable_on_exec, detach__enable_on_exec) + /* attach on current process as enabled */ + _T(attach__current_enabled, detach__disable) + /* attach on current process as disabled */ + _T(attach__current_disabled, detach__disable) + /* attach on cpu as disabled */ + _T(attach__cpu_disabled, detach__disable) + /* attach on cpu as enabled */ + _T(attach__cpu_enabled, detach__disable) + +#undef _T + return ret; +} diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h index 82b2b5e6ba7c..0fc946989cf0 100644 --- a/tools/perf/tests/tests.h +++ b/tools/perf/tests/tests.h @@ -85,6 +85,7 @@ int test__synthesize_stat_config(int subtest); int test__synthesize_stat(int subtest); int test__synthesize_stat_round(int subtest); int test__event_update(int subtest); +int test__event_times(int subtest); #if defined(__arm__) || defined(__aarch64__) #ifdef HAVE_DWARF_UNWIND_SUPPORT diff --git a/tools/perf/util/Build b/tools/perf/util/Build index da48fd843438..85ceff357769 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -69,8 +69,7 @@ libperf-y += stat-shadow.o libperf-y += record.o libperf-y += srcline.o libperf-y += data.o -libperf-$(CONFIG_X86) += tsc.o -libperf-$(CONFIG_AUXTRACE) += tsc.o +libperf-y += tsc.o libperf-y += cloexec.o libperf-y += thread-stack.o libperf-$(CONFIG_AUXTRACE) += auxtrace.o diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index ec164fe70718..c9169011e55e 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -940,6 +940,7 @@ void itrace_synth_opts__set_default(struct itrace_synth_opts *synth_opts) synth_opts->period = PERF_ITRACE_DEFAULT_PERIOD; synth_opts->callchain_sz = PERF_ITRACE_DEFAULT_CALLCHAIN_SZ; synth_opts->last_branch_sz = PERF_ITRACE_DEFAULT_LAST_BRANCH_SZ; + synth_opts->initial_skip = 0; } /* @@ -1064,6 +1065,12 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str, synth_opts->last_branch_sz = val; } break; + case 's': + synth_opts->initial_skip = strtoul(p, &endptr, 10); + if (p == endptr) + goto out_err; + p = endptr; + break; case ' ': case ',': break; diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h index 57ff31ecb8e4..767989e0e312 100644 --- a/tools/perf/util/auxtrace.h +++ b/tools/perf/util/auxtrace.h @@ -68,6 +68,7 @@ enum itrace_period_type { * @last_branch_sz: branch context size * @period: 'instructions' events period * @period_type: 'instructions' events period type + * @initial_skip: skip N events at the beginning. */ struct itrace_synth_opts { bool set; @@ -86,6 +87,7 @@ struct itrace_synth_opts { unsigned int last_branch_sz; unsigned long long period; enum itrace_period_type period_type; + unsigned long initial_skip; }; /** diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c index 4e727635476e..5c20d783423b 100644 --- a/tools/perf/util/config.c +++ b/tools/perf/util/config.c @@ -377,6 +377,21 @@ const char *perf_config_dirname(const char *name, const char *value) return value; } +static int perf_buildid_config(const char *var, const char *value) +{ + /* same dir for all commands */ + if (!strcmp(var, "buildid.dir")) { + const char *dirname = perf_config_dirname(var, value); + + if (!dirname) + return -1; + strncpy(buildid_dir, dirname, MAXPATHLEN-1); + buildid_dir[MAXPATHLEN-1] = '\0'; + } + + return 0; +} + static int perf_default_core_config(const char *var __maybe_unused, const char *value __maybe_unused) { @@ -412,6 +427,9 @@ int perf_default_config(const char *var, const char *value, if (!prefixcmp(var, "llvm.")) return perf_llvm_config(var, value); + if (!prefixcmp(var, "buildid.")) + return perf_buildid_config(var, value); + /* Add other config variables here. */ return 0; } @@ -515,49 +533,18 @@ int config_error_nonbool(const char *var) return error("Missing value for '%s'", var); } -struct buildid_dir_config { - char *dir; -}; - -static int buildid_dir_command_config(const char *var, const char *value, - void *data) -{ - struct buildid_dir_config *c = data; - const char *v; - - /* same dir for all commands */ - if (!strcmp(var, "buildid.dir")) { - v = perf_config_dirname(var, value); - if (!v) - return -1; - strncpy(c->dir, v, MAXPATHLEN-1); - c->dir[MAXPATHLEN-1] = '\0'; - } - return 0; -} - -static void check_buildid_dir_config(void) -{ - struct buildid_dir_config c; - c.dir = buildid_dir; - perf_config(buildid_dir_command_config, &c); -} - void set_buildid_dir(const char *dir) { if (dir) scnprintf(buildid_dir, MAXPATHLEN-1, "%s", dir); - /* try config file */ - if (buildid_dir[0] == '\0') - check_buildid_dir_config(); - /* default to $HOME/.debug */ if (buildid_dir[0] == '\0') { - char *v = getenv("HOME"); - if (v) { + char *home = getenv("HOME"); + + if (home) { snprintf(buildid_dir, MAXPATHLEN-1, "%s/%s", - v, DEBUG_CACHE_DIR); + home, DEBUG_CACHE_DIR); } else { strncpy(buildid_dir, DEBUG_CACHE_DIR, MAXPATHLEN-1); } diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index dad55d04ffdd..b68959037688 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -45,6 +45,7 @@ static const char *perf_event__names[] = { [PERF_RECORD_STAT] = "STAT", [PERF_RECORD_STAT_ROUND] = "STAT_ROUND", [PERF_RECORD_EVENT_UPDATE] = "EVENT_UPDATE", + [PERF_RECORD_TIME_CONV] = "TIME_CONV", }; const char *perf_event__name(unsigned int id) diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index 6bb1c928350d..8d363d5e65a2 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -233,6 +233,7 @@ enum perf_user_event_type { /* above any possible kernel type */ PERF_RECORD_STAT = 76, PERF_RECORD_STAT_ROUND = 77, PERF_RECORD_EVENT_UPDATE = 78, + PERF_RECORD_TIME_CONV = 79, PERF_RECORD_HEADER_MAX }; @@ -469,6 +470,13 @@ struct stat_round_event { u64 time; }; +struct time_conv_event { + struct perf_event_header header; + u64 time_shift; + u64 time_mult; + u64 time_zero; +}; + union perf_event { struct perf_event_header header; struct mmap_event mmap; @@ -497,6 +505,7 @@ union perf_event { struct stat_config_event stat_config; struct stat_event stat; struct stat_round_event stat_round; + struct time_conv_event time_conv; }; void perf_event__print_totals(void); diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 738ce226002b..3fd7c2c72f4a 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -226,7 +226,8 @@ struct perf_evsel *perf_evsel__new_idx(struct perf_event_attr *attr, int idx) perf_evsel__init(evsel, attr, idx); if (perf_evsel__is_bpf_output(evsel)) { - evsel->attr.sample_type |= PERF_SAMPLE_RAW; + evsel->attr.sample_type |= (PERF_SAMPLE_RAW | PERF_SAMPLE_TIME | + PERF_SAMPLE_CPU | PERF_SAMPLE_PERIOD), evsel->attr.sample_period = 1; } diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 31c4641fe5ff..3d34c57dfbe2 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -1295,8 +1295,9 @@ static int hists__hierarchy_insert_entry(struct hists *hists, return ret; } -int hists__collapse_insert_entry(struct hists *hists, struct rb_root *root, - struct hist_entry *he) +static int hists__collapse_insert_entry(struct hists *hists, + struct rb_root *root, + struct hist_entry *he) { struct rb_node **p = &root->rb_node; struct rb_node *parent = NULL; diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index bec0cd660fbd..588596561cb3 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -199,8 +199,6 @@ int hists__init(void); int __hists__init(struct hists *hists, struct perf_hpp_list *hpp_list); struct rb_root *hists__get_rotate_entries_in(struct hists *hists); -int hists__collapse_insert_entry(struct hists *hists, - struct rb_root *root, struct hist_entry *he); struct perf_hpp { char *buf; diff --git a/tools/perf/util/intel-bts.c b/tools/perf/util/intel-bts.c index abf1366e2a24..9df996085563 100644 --- a/tools/perf/util/intel-bts.c +++ b/tools/perf/util/intel-bts.c @@ -66,6 +66,7 @@ struct intel_bts { u64 branches_id; size_t branches_event_size; bool synth_needs_swap; + unsigned long num_events; }; struct intel_bts_queue { @@ -275,6 +276,10 @@ static int intel_bts_synth_branch_sample(struct intel_bts_queue *btsq, union perf_event event; struct perf_sample sample = { .ip = 0, }; + if (bts->synth_opts.initial_skip && + bts->num_events++ <= bts->synth_opts.initial_skip) + return 0; + event.sample.header.type = PERF_RECORD_SAMPLE; event.sample.header.misc = PERF_RECORD_MISC_USER; event.sample.header.size = sizeof(struct perf_event_header); diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index 407f11b97c8d..ddec87f6e616 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -100,6 +100,8 @@ struct intel_pt { u64 cyc_bit; u64 noretcomp_bit; unsigned max_non_turbo_ratio; + + unsigned long num_events; }; enum switch_state { @@ -972,6 +974,10 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq) if (pt->branches_filter && !(pt->branches_filter & ptq->flags)) return 0; + if (pt->synth_opts.initial_skip && + pt->num_events++ < pt->synth_opts.initial_skip) + return 0; + event->sample.header.type = PERF_RECORD_SAMPLE; event->sample.header.misc = PERF_RECORD_MISC_USER; event->sample.header.size = sizeof(struct perf_event_header); @@ -1029,6 +1035,10 @@ static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq) union perf_event *event = ptq->event_buf; struct perf_sample sample = { .ip = 0, }; + if (pt->synth_opts.initial_skip && + pt->num_events++ < pt->synth_opts.initial_skip) + return 0; + event->sample.header.type = PERF_RECORD_SAMPLE; event->sample.header.misc = PERF_RECORD_MISC_USER; event->sample.header.size = sizeof(struct perf_event_header); @@ -1087,6 +1097,10 @@ static int intel_pt_synth_transaction_sample(struct intel_pt_queue *ptq) union perf_event *event = ptq->event_buf; struct perf_sample sample = { .ip = 0, }; + if (pt->synth_opts.initial_skip && + pt->num_events++ < pt->synth_opts.initial_skip) + return 0; + event->sample.header.type = PERF_RECORD_SAMPLE; event->sample.header.misc = PERF_RECORD_MISC_USER; event->sample.header.size = sizeof(struct perf_event_header); @@ -1199,14 +1213,18 @@ static int intel_pt_sample(struct intel_pt_queue *ptq) ptq->have_sample = false; if (pt->sample_instructions && - (state->type & INTEL_PT_INSTRUCTION)) { + (state->type & INTEL_PT_INSTRUCTION) && + (!pt->synth_opts.initial_skip || + pt->num_events++ >= pt->synth_opts.initial_skip)) { err = intel_pt_synth_instruction_sample(ptq); if (err) return err; } if (pt->sample_transactions && - (state->type & INTEL_PT_TRANSACTION)) { + (state->type & INTEL_PT_TRANSACTION) && + (!pt->synth_opts.initial_skip || + pt->num_events++ >= pt->synth_opts.initial_skip)) { err = intel_pt_synth_transaction_sample(ptq); if (err) return err; diff --git a/tools/perf/util/jitdump.c b/tools/perf/util/jitdump.c index ad0c0bb1fbc7..52fcef3074fe 100644 --- a/tools/perf/util/jitdump.c +++ b/tools/perf/util/jitdump.c @@ -17,6 +17,7 @@ #include "strlist.h" #include <elf.h> +#include "tsc.h" #include "session.h" #include "jit.h" #include "jitdump.h" @@ -33,6 +34,7 @@ struct jit_buf_desc { size_t bufsize; FILE *in; bool needs_bswap; /* handles cross-endianess */ + bool use_arch_timestamp; void *debug_data; size_t nr_debug_entries; uint32_t code_load_count; @@ -158,13 +160,16 @@ jit_open(struct jit_buf_desc *jd, const char *name) header.flags = bswap_64(header.flags); } + jd->use_arch_timestamp = header.flags & JITDUMP_FLAGS_ARCH_TIMESTAMP; + if (verbose > 2) - pr_debug("version=%u\nhdr.size=%u\nts=0x%llx\npid=%d\nelf_mach=%d\n", + pr_debug("version=%u\nhdr.size=%u\nts=0x%llx\npid=%d\nelf_mach=%d\nuse_arch_timestamp=%d\n", header.version, header.total_size, (unsigned long long)header.timestamp, header.pid, - header.elf_mach); + header.elf_mach, + jd->use_arch_timestamp); if (header.flags & JITDUMP_FLAGS_RESERVED) { pr_err("jitdump file contains invalid or unsupported flags 0x%llx\n", @@ -172,10 +177,15 @@ jit_open(struct jit_buf_desc *jd, const char *name) goto error; } + if (jd->use_arch_timestamp && !jd->session->time_conv.time_mult) { + pr_err("jitdump file uses arch timestamps but there is no timestamp conversion\n"); + goto error; + } + /* * validate event is using the correct clockid */ - if (jit_validate_events(jd->session)) { + if (!jd->use_arch_timestamp && jit_validate_events(jd->session)) { pr_err("error, jitted code must be sampled with perf record -k 1\n"); goto error; } @@ -329,6 +339,23 @@ jit_inject_event(struct jit_buf_desc *jd, union perf_event *event) return 0; } +static uint64_t convert_timestamp(struct jit_buf_desc *jd, uint64_t timestamp) +{ + struct perf_tsc_conversion tc; + + if (!jd->use_arch_timestamp) + return timestamp; + + tc.time_shift = jd->session->time_conv.time_shift; + tc.time_mult = jd->session->time_conv.time_mult; + tc.time_zero = jd->session->time_conv.time_zero; + + if (!tc.time_mult) + return 0; + + return tsc_to_perf_time(timestamp, &tc); +} + static int jit_repipe_code_load(struct jit_buf_desc *jd, union jr_entry *jr) { struct perf_sample sample; @@ -410,7 +437,7 @@ static int jit_repipe_code_load(struct jit_buf_desc *jd, union jr_entry *jr) id->tid = tid; } if (jd->sample_type & PERF_SAMPLE_TIME) - id->time = jr->load.p.timestamp; + id->time = convert_timestamp(jd, jr->load.p.timestamp); /* * create pseudo sample to induce dso hit increment @@ -499,7 +526,7 @@ static int jit_repipe_code_move(struct jit_buf_desc *jd, union jr_entry *jr) id->tid = tid; } if (jd->sample_type & PERF_SAMPLE_TIME) - id->time = jr->load.p.timestamp; + id->time = convert_timestamp(jd, jr->load.p.timestamp); /* * create pseudo sample to induce dso hit increment diff --git a/tools/perf/util/jitdump.h b/tools/perf/util/jitdump.h index b66c1f503d9e..bcacd20d0c1c 100644 --- a/tools/perf/util/jitdump.h +++ b/tools/perf/util/jitdump.h @@ -23,9 +23,12 @@ #define JITHEADER_VERSION 1 enum jitdump_flags_bits { + JITDUMP_FLAGS_ARCH_TIMESTAMP_BIT, JITDUMP_FLAGS_MAX_BIT, }; +#define JITDUMP_FLAGS_ARCH_TIMESTAMP (1ULL << JITDUMP_FLAGS_ARCH_TIMESTAMP_BIT) + #define JITDUMP_FLAGS_RESERVED (JITDUMP_FLAGS_MAX_BIT < 64 ? \ (~((1ULL << JITDUMP_FLAGS_MAX_BIT) - 1)) : 0) diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index adef23b1352e..bf34468a99cb 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -602,14 +602,13 @@ static void pmu_format_value(unsigned long *format, __u64 value, __u64 *v, static __u64 pmu_format_max_value(const unsigned long *format) { - int w; + __u64 w = 0; + int fbit; - w = bitmap_weight(format, PERF_PMU_FORMAT_BITS); - if (!w) - return 0; - if (w < 64) - return (1ULL << w) - 1; - return -1; + for_each_set_bit(fbit, format, PERF_PMU_FORMAT_BITS) + w |= (1ULL << fbit); + + return w; } /* diff --git a/tools/perf/util/scripting-engines/trace-event-perl.c b/tools/perf/util/scripting-engines/trace-event-perl.c index b3aabc0d4eb0..1d160855cda9 100644 --- a/tools/perf/util/scripting-engines/trace-event-perl.c +++ b/tools/perf/util/scripting-engines/trace-event-perl.c @@ -31,6 +31,8 @@ #include <perl.h> #include "../../perf.h" +#include "../callchain.h" +#include "../machine.h" #include "../thread.h" #include "../event.h" #include "../trace-event.h" @@ -248,10 +250,78 @@ static void define_event_symbols(struct event_format *event, define_event_symbols(event, ev_name, args->next); } +static SV *perl_process_callchain(struct perf_sample *sample, + struct perf_evsel *evsel, + struct addr_location *al) +{ + AV *list; + + list = newAV(); + if (!list) + goto exit; + + if (!symbol_conf.use_callchain || !sample->callchain) + goto exit; + + if (thread__resolve_callchain(al->thread, evsel, + sample, NULL, NULL, + PERF_MAX_STACK_DEPTH) != 0) { + pr_err("Failed to resolve callchain. Skipping\n"); + goto exit; + } + callchain_cursor_commit(&callchain_cursor); + + + while (1) { + HV *elem; + struct callchain_cursor_node *node; + node = callchain_cursor_current(&callchain_cursor); + if (!node) + break; + + elem = newHV(); + if (!elem) + goto exit; + + hv_stores(elem, "ip", newSVuv(node->ip)); + + if (node->sym) { + HV *sym = newHV(); + if (!sym) + goto exit; + hv_stores(sym, "start", newSVuv(node->sym->start)); + hv_stores(sym, "end", newSVuv(node->sym->end)); + hv_stores(sym, "binding", newSVuv(node->sym->binding)); + hv_stores(sym, "name", newSVpvn(node->sym->name, + node->sym->namelen)); + hv_stores(elem, "sym", newRV_noinc((SV*)sym)); + } + + if (node->map) { + struct map *map = node->map; + const char *dsoname = "[unknown]"; + if (map && map->dso && (map->dso->name || map->dso->long_name)) { + if (symbol_conf.show_kernel_path && map->dso->long_name) + dsoname = map->dso->long_name; + else if (map->dso->name) + dsoname = map->dso->name; + } + hv_stores(elem, "dso", newSVpv(dsoname,0)); + } + + callchain_cursor_advance(&callchain_cursor); + av_push(list, newRV_noinc((SV*)elem)); + } + +exit: + return newRV_noinc((SV*)list); +} + static void perl_process_tracepoint(struct perf_sample *sample, struct perf_evsel *evsel, - struct thread *thread) + struct addr_location *al) { + struct thread *thread = al->thread; struct event_format *event = evsel->tp_format; struct format_field *field; static char handler[256]; @@ -295,6 +365,7 @@ static void perl_process_tracepoint(struct perf_sample *sample, XPUSHs(sv_2mortal(newSVuv(ns))); XPUSHs(sv_2mortal(newSViv(pid))); XPUSHs(sv_2mortal(newSVpv(comm, 0))); + XPUSHs(sv_2mortal(perl_process_callchain(sample, evsel, al))); /* common fields other than pid can be accessed via xsub fns */ @@ -329,6 +400,7 @@ static void perl_process_tracepoint(struct perf_sample *sample, XPUSHs(sv_2mortal(newSVuv(nsecs))); XPUSHs(sv_2mortal(newSViv(pid))); XPUSHs(sv_2mortal(newSVpv(comm, 0))); + XPUSHs(sv_2mortal(perl_process_callchain(sample, evsel, al))); call_pv("main::trace_unhandled", G_SCALAR); } SPAGAIN; @@ -366,7 +438,7 @@ static void perl_process_event(union perf_event *event, struct perf_evsel *evsel, struct addr_location *al) { - perl_process_tracepoint(sample, evsel, al->thread); + perl_process_tracepoint(sample, evsel, al); perl_process_event_generic(event, sample, evsel); } @@ -490,7 +562,27 @@ static int perl_generate_script(struct pevent *pevent, const char *outfile) fprintf(ofp, "use Perf::Trace::Util;\n\n"); fprintf(ofp, "sub trace_begin\n{\n\t# optional\n}\n\n"); - fprintf(ofp, "sub trace_end\n{\n\t# optional\n}\n\n"); + fprintf(ofp, "sub trace_end\n{\n\t# optional\n}\n"); + + + fprintf(ofp, "\n\ +sub print_backtrace\n\ +{\n\ + my $callchain = shift;\n\ + for my $node (@$callchain)\n\ + {\n\ + if(exists $node->{sym})\n\ + {\n\ + printf( \"\\t[\\%%x] \\%%s\\n\", $node->{ip}, $node->{sym}{name});\n\ + }\n\ + else\n\ + {\n\ + printf( \"\\t[\\%%x]\\n\", $node{ip});\n\ + }\n\ + }\n\ +}\n\n\ +"); + while ((event = trace_find_next_event(pevent, event))) { fprintf(ofp, "sub %s::%s\n{\n", event->system, event->name); @@ -502,7 +594,8 @@ static int perl_generate_script(struct pevent *pevent, const char *outfile) fprintf(ofp, "$common_secs, "); fprintf(ofp, "$common_nsecs,\n"); fprintf(ofp, "\t $common_pid, "); - fprintf(ofp, "$common_comm,\n\t "); + fprintf(ofp, "$common_comm, "); + fprintf(ofp, "$common_callchain,\n\t "); not_first = 0; count = 0; @@ -519,7 +612,7 @@ static int perl_generate_script(struct pevent *pevent, const char *outfile) fprintf(ofp, "\tprint_header($event_name, $common_cpu, " "$common_secs, $common_nsecs,\n\t " - "$common_pid, $common_comm);\n\n"); + "$common_pid, $common_comm, $common_callchain);\n\n"); fprintf(ofp, "\tprintf(\""); @@ -581,17 +674,22 @@ static int perl_generate_script(struct pevent *pevent, const char *outfile) fprintf(ofp, "$%s", f->name); } - fprintf(ofp, ");\n"); + fprintf(ofp, ");\n\n"); + + fprintf(ofp, "\tprint_backtrace($common_callchain);\n"); + fprintf(ofp, "}\n\n"); } fprintf(ofp, "sub trace_unhandled\n{\n\tmy ($event_name, $context, " "$common_cpu, $common_secs, $common_nsecs,\n\t " - "$common_pid, $common_comm) = @_;\n\n"); + "$common_pid, $common_comm, $common_callchain) = @_;\n\n"); fprintf(ofp, "\tprint_header($event_name, $common_cpu, " "$common_secs, $common_nsecs,\n\t $common_pid, " - "$common_comm);\n}\n\n"); + "$common_comm, $common_callchain);\n"); + fprintf(ofp, "\tprint_backtrace($common_callchain);\n"); + fprintf(ofp, "}\n\n"); fprintf(ofp, "sub print_header\n{\n" "\tmy ($event_name, $cpu, $secs, $nsecs, $pid, $comm) = @_;\n\n" diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 4abd85c6346d..ef370557fb9a 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -409,6 +409,8 @@ void perf_tool__fill_defaults(struct perf_tool *tool) tool->stat = process_stat_stub; if (tool->stat_round == NULL) tool->stat_round = process_stat_round_stub; + if (tool->time_conv == NULL) + tool->time_conv = process_event_op2_stub; } static void swap_sample_id_all(union perf_event *event, void *data) @@ -794,6 +796,7 @@ static perf_event__swap_op perf_event__swap_ops[] = { [PERF_RECORD_STAT] = perf_event__stat_swap, [PERF_RECORD_STAT_ROUND] = perf_event__stat_round_swap, [PERF_RECORD_EVENT_UPDATE] = perf_event__event_update_swap, + [PERF_RECORD_TIME_CONV] = perf_event__all64_swap, [PERF_RECORD_HEADER_MAX] = NULL, }; @@ -1341,6 +1344,9 @@ static s64 perf_session__process_user_event(struct perf_session *session, return tool->stat(tool, event, session); case PERF_RECORD_STAT_ROUND: return tool->stat_round(tool, event, session); + case PERF_RECORD_TIME_CONV: + session->time_conv = event->time_conv; + return tool->time_conv(tool, event, session); default: return -EINVAL; } diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h index 5f792e35d4c1..f96fc9e8c52e 100644 --- a/tools/perf/util/session.h +++ b/tools/perf/util/session.h @@ -26,6 +26,7 @@ struct perf_session { struct itrace_synth_opts *itrace_synth_opts; struct list_head auxtrace_index; struct trace_event tevent; + struct time_conv_event time_conv; bool repipe; bool one_mmap; void *one_mmap_addr; diff --git a/tools/perf/util/tool.h b/tools/perf/util/tool.h index 55de4cffcd4e..ac2590a3de2d 100644 --- a/tools/perf/util/tool.h +++ b/tools/perf/util/tool.h @@ -57,6 +57,7 @@ struct perf_tool { id_index, auxtrace_info, auxtrace_error, + time_conv, thread_map, cpu_map, stat_config, diff --git a/tools/perf/util/tsc.h b/tools/perf/util/tsc.h index a8b78f1b3243..d5b11e2b85e0 100644 --- a/tools/perf/util/tsc.h +++ b/tools/perf/util/tsc.h @@ -3,10 +3,29 @@ #include <linux/types.h> -#include "../arch/x86/util/tsc.h" +#include "event.h" + +struct perf_tsc_conversion { + u16 time_shift; + u32 time_mult; + u64 time_zero; +}; +struct perf_event_mmap_page; + +int perf_read_tsc_conversion(const struct perf_event_mmap_page *pc, + struct perf_tsc_conversion *tc); u64 perf_time_to_tsc(u64 ns, struct perf_tsc_conversion *tc); u64 tsc_to_perf_time(u64 cyc, struct perf_tsc_conversion *tc); u64 rdtsc(void); +struct perf_event_mmap_page; +struct perf_tool; +struct machine; + +int perf_event__synth_time_conv(const struct perf_event_mmap_page *pc, + struct perf_tool *tool, + perf_event__handler_t process, + struct machine *machine); + #endif |