diff options
83 files changed, 2253 insertions, 1112 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index ed42cb65a19b..66de4da2d244 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3086,6 +3086,7 @@ M: Stephen Boyd <sboyd@codeaurora.org> L: linux-clk@vger.kernel.org T: git git://git.kernel.org/pub/scm/linux/kernel/git/clk/linux.git S: Maintained +F: Documentation/devicetree/bindings/clock/ F: drivers/clk/ X: drivers/clk/clkdev.c F: include/linux/clk-pr* diff --git a/arch/arm/mach-vexpress/spc.c b/arch/arm/mach-vexpress/spc.c index 5766ce2be32b..8409cab3f760 100644 --- a/arch/arm/mach-vexpress/spc.c +++ b/arch/arm/mach-vexpress/spc.c @@ -547,7 +547,7 @@ static struct clk *ve_spc_clk_register(struct device *cpu_dev) init.name = dev_name(cpu_dev); init.ops = &clk_spc_ops; - init.flags = CLK_IS_ROOT | CLK_GET_RATE_NOCACHE; + init.flags = CLK_GET_RATE_NOCACHE; init.num_parents = 0; return devm_clk_register(cpu_dev, &spc->hw); diff --git a/arch/powerpc/platforms/512x/clock-commonclk.c b/arch/powerpc/platforms/512x/clock-commonclk.c index c50ea76ba66c..6081fbd75330 100644 --- a/arch/powerpc/platforms/512x/clock-commonclk.c +++ b/arch/powerpc/platforms/512x/clock-commonclk.c @@ -221,7 +221,7 @@ static bool soc_has_mclk_mux0_canin(void) /* convenience wrappers around the common clk API */ static inline struct clk *mpc512x_clk_fixed(const char *name, int rate) { - return clk_register_fixed_rate(NULL, name, NULL, CLK_IS_ROOT, rate); + return clk_register_fixed_rate(NULL, name, NULL, 0, rate); } static inline struct clk *mpc512x_clk_factor( diff --git a/arch/powerpc/platforms/cell/spufs/coredump.c b/arch/powerpc/platforms/cell/spufs/coredump.c index 84fb984f29c1..85c85eb3e245 100644 --- a/arch/powerpc/platforms/cell/spufs/coredump.c +++ b/arch/powerpc/platforms/cell/spufs/coredump.c @@ -172,7 +172,7 @@ static int spufs_arch_write_note(struct spu_context *ctx, int i, if (rc < 0) goto out; - skip = roundup(cprm->file->f_pos - total + sz, 4) - cprm->file->f_pos; + skip = roundup(cprm->pos - total + sz, 4) - cprm->pos; if (!dump_skip(cprm, skip)) goto Eio; out: diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 33787ee817f0..929655db5084 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -1622,6 +1622,29 @@ ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr, cha } EXPORT_SYMBOL_GPL(events_sysfs_show); +ssize_t events_ht_sysfs_show(struct device *dev, struct device_attribute *attr, + char *page) +{ + struct perf_pmu_events_ht_attr *pmu_attr = + container_of(attr, struct perf_pmu_events_ht_attr, attr); + + /* + * Report conditional events depending on Hyper-Threading. + * + * This is overly conservative as usually the HT special + * handling is not needed if the other CPU thread is idle. + * + * Note this does not (and cannot) handle the case when thread + * siblings are invisible, for example with virtualization + * if they are owned by some other guest. The user tool + * has to re-read when a thread sibling gets onlined later. + */ + return sprintf(page, "%s", + topology_max_smt_threads() > 1 ? + pmu_attr->event_str_ht : + pmu_attr->event_str_noht); +} + EVENT_ATTR(cpu-cycles, CPU_CYCLES ); EVENT_ATTR(instructions, INSTRUCTIONS ); EVENT_ATTR(cache-references, CACHE_REFERENCES ); diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 7c666958a625..5081b4cdad0d 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -177,7 +177,7 @@ static struct event_constraint intel_slm_event_constraints[] __read_mostly = EVENT_CONSTRAINT_END }; -struct event_constraint intel_skl_event_constraints[] = { +static struct event_constraint intel_skl_event_constraints[] = { FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ @@ -186,10 +186,8 @@ struct event_constraint intel_skl_event_constraints[] = { }; static struct extra_reg intel_knl_extra_regs[] __read_mostly = { - INTEL_UEVENT_EXTRA_REG(0x01b7, - MSR_OFFCORE_RSP_0, 0x7f9ffbffffull, RSP_0), - INTEL_UEVENT_EXTRA_REG(0x02b7, - MSR_OFFCORE_RSP_1, 0x3f9ffbffffull, RSP_1), + INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x799ffbb6e7ull, RSP_0), + INTEL_UEVENT_EXTRA_REG(0x02b7, MSR_OFFCORE_RSP_1, 0x399ffbffe7ull, RSP_1), EVENT_EXTRA_END }; @@ -225,14 +223,51 @@ EVENT_ATTR_STR(mem-loads, mem_ld_nhm, "event=0x0b,umask=0x10,ldlat=3"); EVENT_ATTR_STR(mem-loads, mem_ld_snb, "event=0xcd,umask=0x1,ldlat=3"); EVENT_ATTR_STR(mem-stores, mem_st_snb, "event=0xcd,umask=0x2"); -struct attribute *nhm_events_attrs[] = { +static struct attribute *nhm_events_attrs[] = { EVENT_PTR(mem_ld_nhm), NULL, }; -struct attribute *snb_events_attrs[] = { +/* + * topdown events for Intel Core CPUs. + * + * The events are all in slots, which is a free slot in a 4 wide + * pipeline. Some events are already reported in slots, for cycle + * events we multiply by the pipeline width (4). + * + * With Hyper Threading on, topdown metrics are either summed or averaged + * between the threads of a core: (count_t0 + count_t1). + * + * For the average case the metric is always scaled to pipeline width, + * so we use factor 2 ((count_t0 + count_t1) / 2 * 4) + */ + +EVENT_ATTR_STR_HT(topdown-total-slots, td_total_slots, + "event=0x3c,umask=0x0", /* cpu_clk_unhalted.thread */ + "event=0x3c,umask=0x0,any=1"); /* cpu_clk_unhalted.thread_any */ +EVENT_ATTR_STR_HT(topdown-total-slots.scale, td_total_slots_scale, "4", "2"); +EVENT_ATTR_STR(topdown-slots-issued, td_slots_issued, + "event=0xe,umask=0x1"); /* uops_issued.any */ +EVENT_ATTR_STR(topdown-slots-retired, td_slots_retired, + "event=0xc2,umask=0x2"); /* uops_retired.retire_slots */ +EVENT_ATTR_STR(topdown-fetch-bubbles, td_fetch_bubbles, + "event=0x9c,umask=0x1"); /* idq_uops_not_delivered_core */ +EVENT_ATTR_STR_HT(topdown-recovery-bubbles, td_recovery_bubbles, + "event=0xd,umask=0x3,cmask=1", /* int_misc.recovery_cycles */ + "event=0xd,umask=0x3,cmask=1,any=1"); /* int_misc.recovery_cycles_any */ +EVENT_ATTR_STR_HT(topdown-recovery-bubbles.scale, td_recovery_bubbles_scale, + "4", "2"); + +static struct attribute *snb_events_attrs[] = { EVENT_PTR(mem_ld_snb), EVENT_PTR(mem_st_snb), + EVENT_PTR(td_slots_issued), + EVENT_PTR(td_slots_retired), + EVENT_PTR(td_fetch_bubbles), + EVENT_PTR(td_total_slots), + EVENT_PTR(td_total_slots_scale), + EVENT_PTR(td_recovery_bubbles), + EVENT_PTR(td_recovery_bubbles_scale), NULL, }; @@ -258,7 +293,7 @@ static struct event_constraint intel_hsw_event_constraints[] = { EVENT_CONSTRAINT_END }; -struct event_constraint intel_bdw_event_constraints[] = { +static struct event_constraint intel_bdw_event_constraints[] = { FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ @@ -1332,6 +1367,29 @@ static __initconst const u64 atom_hw_cache_event_ids }, }; +EVENT_ATTR_STR(topdown-total-slots, td_total_slots_slm, "event=0x3c"); +EVENT_ATTR_STR(topdown-total-slots.scale, td_total_slots_scale_slm, "2"); +/* no_alloc_cycles.not_delivered */ +EVENT_ATTR_STR(topdown-fetch-bubbles, td_fetch_bubbles_slm, + "event=0xca,umask=0x50"); +EVENT_ATTR_STR(topdown-fetch-bubbles.scale, td_fetch_bubbles_scale_slm, "2"); +/* uops_retired.all */ +EVENT_ATTR_STR(topdown-slots-issued, td_slots_issued_slm, + "event=0xc2,umask=0x10"); +/* uops_retired.all */ +EVENT_ATTR_STR(topdown-slots-retired, td_slots_retired_slm, + "event=0xc2,umask=0x10"); + +static struct attribute *slm_events_attrs[] = { + EVENT_PTR(td_total_slots_slm), + EVENT_PTR(td_total_slots_scale_slm), + EVENT_PTR(td_fetch_bubbles_slm), + EVENT_PTR(td_fetch_bubbles_scale_slm), + EVENT_PTR(td_slots_issued_slm), + EVENT_PTR(td_slots_retired_slm), + NULL +}; + static struct extra_reg intel_slm_extra_regs[] __read_mostly = { /* must define OFFCORE_RSP_X first, see intel_fixup_er() */ @@ -3437,6 +3495,13 @@ static struct attribute *hsw_events_attrs[] = { EVENT_PTR(cycles_ct), EVENT_PTR(mem_ld_hsw), EVENT_PTR(mem_st_hsw), + EVENT_PTR(td_slots_issued), + EVENT_PTR(td_slots_retired), + EVENT_PTR(td_fetch_bubbles), + EVENT_PTR(td_total_slots), + EVENT_PTR(td_total_slots_scale), + EVENT_PTR(td_recovery_bubbles), + EVENT_PTR(td_recovery_bubbles_scale), NULL }; @@ -3587,6 +3652,7 @@ __init int intel_pmu_init(void) x86_pmu.pebs_constraints = intel_slm_pebs_event_constraints; x86_pmu.extra_regs = intel_slm_extra_regs; x86_pmu.flags |= PMU_FL_HAS_RSP_1; + x86_pmu.cpu_events = slm_events_attrs; pr_cont("Silvermont events, "); break; @@ -3805,6 +3871,12 @@ __init int intel_pmu_init(void) memcpy(hw_cache_extra_regs, skl_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); intel_pmu_lbr_init_skl(); + /* INT_MISC.RECOVERY_CYCLES has umask 1 in Skylake */ + event_attr_td_recovery_bubbles.event_str_noht = + "event=0xd,umask=0x1,cmask=1"; + event_attr_td_recovery_bubbles.event_str_ht = + "event=0xd,umask=0x1,cmask=1,any=1"; + x86_pmu.event_constraints = intel_skl_event_constraints; x86_pmu.pebs_constraints = intel_skl_pebs_event_constraints; x86_pmu.extra_regs = intel_skl_extra_regs; @@ -3917,16 +3989,14 @@ __init int intel_pmu_init(void) */ static __init int fixup_ht_bug(void) { - int cpu = smp_processor_id(); - int w, c; + int c; /* * problem not present on this CPU model, nothing to do */ if (!(x86_pmu.flags & PMU_FL_EXCL_ENABLED)) return 0; - w = cpumask_weight(topology_sibling_cpumask(cpu)); - if (w > 1) { + if (topology_max_smt_threads() > 1) { pr_info("PMU erratum BJ122, BV98, HSD29 worked around, HT is on\n"); return 0; } diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c index 99c4bab123cd..e30eef4f29a6 100644 --- a/arch/x86/events/intel/rapl.c +++ b/arch/x86/events/intel/rapl.c @@ -714,7 +714,7 @@ static void cleanup_rapl_pmus(void) int i; for (i = 0; i < rapl_pmus->maxpkg; i++) - kfree(rapl_pmus->pmus + i); + kfree(rapl_pmus->pmus[i]); kfree(rapl_pmus); } diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c index fce74062d981..65490589e52e 100644 --- a/arch/x86/events/intel/uncore.c +++ b/arch/x86/events/intel/uncore.c @@ -882,7 +882,7 @@ uncore_types_init(struct intel_uncore_type **types, bool setid) static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) { struct intel_uncore_type *type; - struct intel_uncore_pmu *pmu; + struct intel_uncore_pmu *pmu = NULL; struct intel_uncore_box *box; int phys_id, pkg, ret; @@ -903,20 +903,37 @@ static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id } type = uncore_pci_uncores[UNCORE_PCI_DEV_TYPE(id->driver_data)]; + /* - * for performance monitoring unit with multiple boxes, - * each box has a different function id. + * Some platforms, e.g. Knights Landing, use a common PCI device ID + * for multiple instances of an uncore PMU device type. We should check + * PCI slot and func to indicate the uncore box. */ - pmu = &type->pmus[UNCORE_PCI_DEV_IDX(id->driver_data)]; - /* Knights Landing uses a common PCI device ID for multiple instances of - * an uncore PMU device type. There is only one entry per device type in - * the knl_uncore_pci_ids table inspite of multiple devices present for - * some device types. Hence PCI device idx would be 0 for all devices. - * So increment pmu pointer to point to an unused array element. - */ - if (boot_cpu_data.x86_model == 87) { - while (pmu->func_id >= 0) - pmu++; + if (id->driver_data & ~0xffff) { + struct pci_driver *pci_drv = pdev->driver; + const struct pci_device_id *ids = pci_drv->id_table; + unsigned int devfn; + + while (ids && ids->vendor) { + if ((ids->vendor == pdev->vendor) && + (ids->device == pdev->device)) { + devfn = PCI_DEVFN(UNCORE_PCI_DEV_DEV(ids->driver_data), + UNCORE_PCI_DEV_FUNC(ids->driver_data)); + if (devfn == pdev->devfn) { + pmu = &type->pmus[UNCORE_PCI_DEV_IDX(ids->driver_data)]; + break; + } + } + ids++; + } + if (pmu == NULL) + return -ENODEV; + } else { + /* + * for performance monitoring unit with multiple boxes, + * each box has a different function id. + */ + pmu = &type->pmus[UNCORE_PCI_DEV_IDX(id->driver_data)]; } if (WARN_ON_ONCE(pmu->boxes[pkg] != NULL)) diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h index 79766b9a3580..66c3a3657a10 100644 --- a/arch/x86/events/intel/uncore.h +++ b/arch/x86/events/intel/uncore.h @@ -15,7 +15,11 @@ #define UNCORE_PMC_IDX_FIXED UNCORE_PMC_IDX_MAX_GENERIC #define UNCORE_PMC_IDX_MAX (UNCORE_PMC_IDX_FIXED + 1) +#define UNCORE_PCI_DEV_FULL_DATA(dev, func, type, idx) \ + ((dev << 24) | (func << 16) | (type << 8) | idx) #define UNCORE_PCI_DEV_DATA(type, idx) ((type << 8) | idx) +#define UNCORE_PCI_DEV_DEV(data) ((data >> 24) & 0xff) +#define UNCORE_PCI_DEV_FUNC(data) ((data >> 16) & 0xff) #define UNCORE_PCI_DEV_TYPE(data) ((data >> 8) & 0xff) #define UNCORE_PCI_DEV_IDX(data) (data & 0xff) #define UNCORE_EXTRA_PCI_DEV 0xff diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index b2625867ebd1..7336e55c248c 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -2164,21 +2164,101 @@ static struct intel_uncore_type *knl_pci_uncores[] = { */ static const struct pci_device_id knl_uncore_pci_ids[] = { - { /* MC UClk */ + { /* MC0 UClk */ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7841), - .driver_data = UNCORE_PCI_DEV_DATA(KNL_PCI_UNCORE_MC_UCLK, 0), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(10, 0, KNL_PCI_UNCORE_MC_UCLK, 0), }, - { /* MC DClk Channel */ + { /* MC1 UClk */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7841), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(11, 0, KNL_PCI_UNCORE_MC_UCLK, 1), + }, + { /* MC0 DClk CH 0 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7843), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(8, 2, KNL_PCI_UNCORE_MC_DCLK, 0), + }, + { /* MC0 DClk CH 1 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7843), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(8, 3, KNL_PCI_UNCORE_MC_DCLK, 1), + }, + { /* MC0 DClk CH 2 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7843), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(8, 4, KNL_PCI_UNCORE_MC_DCLK, 2), + }, + { /* MC1 DClk CH 0 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7843), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(9, 2, KNL_PCI_UNCORE_MC_DCLK, 3), + }, + { /* MC1 DClk CH 1 */ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7843), - .driver_data = UNCORE_PCI_DEV_DATA(KNL_PCI_UNCORE_MC_DCLK, 0), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(9, 3, KNL_PCI_UNCORE_MC_DCLK, 4), + }, + { /* MC1 DClk CH 2 */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7843), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(9, 4, KNL_PCI_UNCORE_MC_DCLK, 5), + }, + { /* EDC0 UClk */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7833), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(15, 0, KNL_PCI_UNCORE_EDC_UCLK, 0), + }, + { /* EDC1 UClk */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7833), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(16, 0, KNL_PCI_UNCORE_EDC_UCLK, 1), + }, + { /* EDC2 UClk */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7833), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(17, 0, KNL_PCI_UNCORE_EDC_UCLK, 2), + }, + { /* EDC3 UClk */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7833), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(18, 0, KNL_PCI_UNCORE_EDC_UCLK, 3), }, - { /* EDC UClk */ + { /* EDC4 UClk */ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7833), - .driver_data = UNCORE_PCI_DEV_DATA(KNL_PCI_UNCORE_EDC_UCLK, 0), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(19, 0, KNL_PCI_UNCORE_EDC_UCLK, 4), + }, + { /* EDC5 UClk */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7833), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(20, 0, KNL_PCI_UNCORE_EDC_UCLK, 5), + }, + { /* EDC6 UClk */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7833), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(21, 0, KNL_PCI_UNCORE_EDC_UCLK, 6), + }, + { /* EDC7 UClk */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7833), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(22, 0, KNL_PCI_UNCORE_EDC_UCLK, 7), + }, + { /* EDC0 EClk */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7835), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(24, 2, KNL_PCI_UNCORE_EDC_ECLK, 0), + }, + { /* EDC1 EClk */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7835), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(25, 2, KNL_PCI_UNCORE_EDC_ECLK, 1), + }, + { /* EDC2 EClk */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7835), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(26, 2, KNL_PCI_UNCORE_EDC_ECLK, 2), + }, + { /* EDC3 EClk */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7835), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(27, 2, KNL_PCI_UNCORE_EDC_ECLK, 3), + }, + { /* EDC4 EClk */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7835), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(28, 2, KNL_PCI_UNCORE_EDC_ECLK, 4), + }, + { /* EDC5 EClk */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7835), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(29, 2, KNL_PCI_UNCORE_EDC_ECLK, 5), + }, + { /* EDC6 EClk */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7835), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(30, 2, KNL_PCI_UNCORE_EDC_ECLK, 6), }, - { /* EDC EClk */ + { /* EDC7 EClk */ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7835), - .driver_data = UNCORE_PCI_DEV_DATA(KNL_PCI_UNCORE_EDC_ECLK, 0), + .driver_data = UNCORE_PCI_DEV_FULL_DATA(31, 2, KNL_PCI_UNCORE_EDC_ECLK, 7), }, { /* M2PCIe */ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7817), diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index 8bd764df815d..e2d7285a2dac 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -668,6 +668,14 @@ static struct perf_pmu_events_attr event_attr_##v = { \ .event_str = str, \ }; +#define EVENT_ATTR_STR_HT(_name, v, noht, ht) \ +static struct perf_pmu_events_ht_attr event_attr_##v = { \ + .attr = __ATTR(_name, 0444, events_ht_sysfs_show, NULL),\ + .id = 0, \ + .event_str_noht = noht, \ + .event_str_ht = ht, \ +} + extern struct x86_pmu x86_pmu __read_mostly; static inline bool x86_pmu_has_lbr_callstack(void) @@ -803,6 +811,8 @@ struct attribute **merge_attr(struct attribute **a, struct attribute **b); ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr, char *page); +ssize_t events_ht_sysfs_show(struct device *dev, struct device_attribute *attr, + char *page); #ifdef CONFIG_CPU_SUP_AMD diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index 7f991bd5031b..e346572841a0 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -129,6 +129,14 @@ extern const struct cpumask *cpu_coregroup_mask(int cpu); extern unsigned int __max_logical_packages; #define topology_max_packages() (__max_logical_packages) + +extern int __max_smt_threads; + +static inline int topology_max_smt_threads(void) +{ + return __max_smt_threads; +} + int topology_update_package_map(unsigned int apicid, unsigned int cpu); extern int topology_phys_to_logical_pkg(unsigned int pkg); #else @@ -136,6 +144,7 @@ extern int topology_phys_to_logical_pkg(unsigned int pkg); static inline int topology_update_package_map(unsigned int apicid, unsigned int cpu) { return 0; } static inline int topology_phys_to_logical_pkg(unsigned int pkg) { return 0; } +static inline int topology_max_smt_threads(void) { return 1; } #endif static inline void arch_fix_phys_package_id(int num, u32 slot) diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index fafe8b923cac..2ed0ec1353f8 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -105,6 +105,9 @@ static unsigned int max_physical_pkg_id __read_mostly; unsigned int __max_logical_packages __read_mostly; EXPORT_SYMBOL(__max_logical_packages); +/* Maximum number of SMT threads on any online core */ +int __max_smt_threads __read_mostly; + static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip) { unsigned long flags; @@ -493,7 +496,7 @@ void set_cpu_sibling_map(int cpu) bool has_mp = has_smt || boot_cpu_data.x86_max_cores > 1; struct cpuinfo_x86 *c = &cpu_data(cpu); struct cpuinfo_x86 *o; - int i; + int i, threads; cpumask_set_cpu(cpu, cpu_sibling_setup_mask); @@ -550,6 +553,10 @@ void set_cpu_sibling_map(int cpu) if (match_die(c, o) && !topology_same_node(c, o)) primarily_use_numa_for_topology(); } + + threads = cpumask_weight(topology_sibling_cpumask(cpu)); + if (threads > __max_smt_threads) + __max_smt_threads = threads; } /* maps the cpu to the sched domain representing multi-core */ @@ -1441,6 +1448,21 @@ __init void prefill_possible_map(void) #ifdef CONFIG_HOTPLUG_CPU +/* Recompute SMT state for all CPUs on offline */ +static void recompute_smt_state(void) +{ + int max_threads, cpu; + + max_threads = 0; + for_each_online_cpu (cpu) { + int threads = cpumask_weight(topology_sibling_cpumask(cpu)); + + if (threads > max_threads) + max_threads = threads; + } + __max_smt_threads = max_threads; +} + static void remove_siblinginfo(int cpu) { int sibling; @@ -1465,6 +1487,7 @@ static void remove_siblinginfo(int cpu) c->phys_proc_id = 0; c->cpu_core_id = 0; cpumask_clear_cpu(cpu, cpu_sibling_setup_mask); + recompute_smt_state(); } static void remove_cpu_from_maps(int cpu) diff --git a/drivers/clk/Kconfig b/drivers/clk/Kconfig index 53ddba26578c..98efbfcdb503 100644 --- a/drivers/clk/Kconfig +++ b/drivers/clk/Kconfig @@ -175,6 +175,7 @@ config COMMON_CLK_KEYSTONE config COMMON_CLK_NXP def_bool COMMON_CLK && (ARCH_LPC18XX || ARCH_LPC32XX) select REGMAP_MMIO if ARCH_LPC32XX + select MFD_SYSCON if ARCH_LPC18XX ---help--- Support for clock providers on NXP platforms. diff --git a/drivers/clk/microchip/clk-pic32mzda.c b/drivers/clk/microchip/clk-pic32mzda.c index 020a29acc5b0..51f54380474b 100644 --- a/drivers/clk/microchip/clk-pic32mzda.c +++ b/drivers/clk/microchip/clk-pic32mzda.c @@ -180,15 +180,15 @@ static int pic32mzda_clk_probe(struct platform_device *pdev) /* register fixed rate clocks */ clks[POSCCLK] = clk_register_fixed_rate(&pdev->dev, "posc_clk", NULL, - CLK_IS_ROOT, 24000000); + 0, 24000000); clks[FRCCLK] = clk_register_fixed_rate(&pdev->dev, "frc_clk", NULL, - CLK_IS_ROOT, 8000000); + 0, 8000000); clks[BFRCCLK] = clk_register_fixed_rate(&pdev->dev, "bfrc_clk", NULL, - CLK_IS_ROOT, 8000000); + 0, 8000000); clks[LPRCCLK] = clk_register_fixed_rate(&pdev->dev, "lprc_clk", NULL, - CLK_IS_ROOT, 32000); + 0, 32000); clks[UPLLCLK] = clk_register_fixed_rate(&pdev->dev, "usbphy_clk", NULL, - CLK_IS_ROOT, 24000000); + 0, 24000000); /* fixed rate (optional) clock */ if (of_find_property(np, "microchip,pic32mzda-sosc", NULL)) { pr_info("pic32-clk: dt requests SOSC.\n"); diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c index 6aa256b0a1ed..c3ee3ad98a63 100644 --- a/drivers/edac/edac_mc.c +++ b/drivers/edac/edac_mc.c @@ -565,7 +565,8 @@ void edac_mc_reset_delay_period(unsigned long value) list_for_each(item, &mc_devices) { mci = list_entry(item, struct mem_ctl_info, link); - edac_mod_work(&mci->work, value); + if (mci->op_state == OP_RUNNING_POLL) + edac_mod_work(&mci->work, value); } mutex_unlock(&mem_ctls_mutex); } diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c index b4d0bf6534cf..6744d88bdea8 100644 --- a/drivers/edac/sb_edac.c +++ b/drivers/edac/sb_edac.c @@ -239,8 +239,11 @@ static const u32 rir_offset[MAX_RIR_RANGES][MAX_RIR_WAY] = { { 0x1a0, 0x1a4, 0x1a8, 0x1ac, 0x1b0, 0x1b4, 0x1b8, 0x1bc }, }; -#define RIR_RNK_TGT(reg) GET_BITFIELD(reg, 16, 19) -#define RIR_OFFSET(reg) GET_BITFIELD(reg, 2, 14) +#define RIR_RNK_TGT(type, reg) (((type) == BROADWELL) ? \ + GET_BITFIELD(reg, 20, 23) : GET_BITFIELD(reg, 16, 19)) + +#define RIR_OFFSET(type, reg) (((type) == HASWELL || (type) == BROADWELL) ? \ + GET_BITFIELD(reg, 2, 15) : GET_BITFIELD(reg, 2, 14)) /* Device 16, functions 2-7 */ @@ -326,6 +329,7 @@ struct pci_id_descr { struct pci_id_table { const struct pci_id_descr *descr; int n_devs; + enum type type; }; struct sbridge_dev { @@ -394,9 +398,14 @@ static const struct pci_id_descr pci_dev_descr_sbridge[] = { { PCI_DESCR(PCI_DEVICE_ID_INTEL_SBRIDGE_BR, 0) }, }; -#define PCI_ID_TABLE_ENTRY(A) { .descr=A, .n_devs = ARRAY_SIZE(A) } +#define PCI_ID_TABLE_ENTRY(A, T) { \ + .descr = A, \ + .n_devs = ARRAY_SIZE(A), \ + .type = T \ +} + static const struct pci_id_table pci_dev_descr_sbridge_table[] = { - PCI_ID_TABLE_ENTRY(pci_dev_descr_sbridge), + PCI_ID_TABLE_ENTRY(pci_dev_descr_sbridge, SANDY_BRIDGE), {0,} /* 0 terminated list. */ }; @@ -463,7 +472,7 @@ static const struct pci_id_descr pci_dev_descr_ibridge[] = { }; static const struct pci_id_table pci_dev_descr_ibridge_table[] = { - PCI_ID_TABLE_ENTRY(pci_dev_descr_ibridge), + PCI_ID_TABLE_ENTRY(pci_dev_descr_ibridge, IVY_BRIDGE), {0,} /* 0 terminated list. */ }; @@ -536,7 +545,7 @@ static const struct pci_id_descr pci_dev_descr_haswell[] = { }; static const struct pci_id_table pci_dev_descr_haswell_table[] = { - PCI_ID_TABLE_ENTRY(pci_dev_descr_haswell), + PCI_ID_TABLE_ENTRY(pci_dev_descr_haswell, HASWELL), {0,} /* 0 terminated list. */ }; @@ -580,7 +589,7 @@ static const struct pci_id_descr pci_dev_descr_knl[] = { }; static const struct pci_id_table pci_dev_descr_knl_table[] = { - PCI_ID_TABLE_ENTRY(pci_dev_descr_knl), + PCI_ID_TABLE_ENTRY(pci_dev_descr_knl, KNIGHTS_LANDING), {0,} }; @@ -648,7 +657,7 @@ static const struct pci_id_descr pci_dev_descr_broadwell[] = { }; static const struct pci_id_table pci_dev_descr_broadwell_table[] = { - PCI_ID_TABLE_ENTRY(pci_dev_descr_broadwell), + PCI_ID_TABLE_ENTRY(pci_dev_descr_broadwell, BROADWELL), {0,} /* 0 terminated list. */ }; @@ -1894,14 +1903,14 @@ static void get_memory_layout(const struct mem_ctl_info *mci) pci_read_config_dword(pvt->pci_tad[i], rir_offset[j][k], ®); - tmp_mb = RIR_OFFSET(reg) << 6; + tmp_mb = RIR_OFFSET(pvt->info.type, reg) << 6; gb = div_u64_rem(tmp_mb, 1024, &mb); edac_dbg(0, "CH#%d RIR#%d INTL#%d, offset %u.%03u GB (0x%016Lx), tgt: %d, reg=0x%08x\n", i, j, k, gb, (mb*1000)/1024, ((u64)tmp_mb) << 20L, - (u32)RIR_RNK_TGT(reg), + (u32)RIR_RNK_TGT(pvt->info.type, reg), reg); } } @@ -2234,7 +2243,7 @@ static int get_memory_error_data(struct mem_ctl_info *mci, pci_read_config_dword(pvt->pci_tad[ch_add + base_ch], rir_offset[n_rir][idx], ®); - *rank = RIR_RNK_TGT(reg); + *rank = RIR_RNK_TGT(pvt->info.type, reg); edac_dbg(0, "RIR#%d: channel address 0x%08Lx < 0x%08Lx, RIR interleave %d, index %d\n", n_rir, @@ -3357,12 +3366,12 @@ fail0: #define ICPU(model, table) \ { X86_VENDOR_INTEL, 6, model, 0, (unsigned long)&table } -/* Order here must match "enum type" */ static const struct x86_cpu_id sbridge_cpuids[] = { ICPU(0x2d, pci_dev_descr_sbridge_table), /* SANDY_BRIDGE */ ICPU(0x3e, pci_dev_descr_ibridge_table), /* IVY_BRIDGE */ ICPU(0x3f, pci_dev_descr_haswell_table), /* HASWELL */ ICPU(0x4f, pci_dev_descr_broadwell_table), /* BROADWELL */ + ICPU(0x56, pci_dev_descr_broadwell_table), /* BROADWELL-DE */ ICPU(0x57, pci_dev_descr_knl_table), /* KNIGHTS_LANDING */ { } }; @@ -3398,7 +3407,7 @@ static int sbridge_probe(const struct x86_cpu_id *id) mc, mc + 1, num_mc); sbridge_dev->mc = mc++; - rc = sbridge_register_mci(sbridge_dev, id - sbridge_cpuids); + rc = sbridge_register_mci(sbridge_dev, ptable->type); if (unlikely(rc < 0)) goto fail1; } diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index e158b22ef32f..a7a28110dc80 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -2275,7 +2275,7 @@ static int elf_core_dump(struct coredump_params *cprm) goto end_coredump; /* Align to page */ - if (!dump_skip(cprm, dataoff - cprm->file->f_pos)) + if (!dump_skip(cprm, dataoff - cprm->pos)) goto end_coredump; for (i = 0, vma = first_vma(current, gate_vma); vma != NULL; diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index 71ade0e556b7..203589311bf8 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -1787,7 +1787,7 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm) goto end_coredump; } - if (!dump_skip(cprm, dataoff - cprm->file->f_pos)) + if (!dump_skip(cprm, dataoff - cprm->pos)) goto end_coredump; if (!elf_fdpic_dump_segments(cprm)) diff --git a/fs/coredump.c b/fs/coredump.c index 38a7ab87e10a..281b768000e6 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -794,6 +794,7 @@ int dump_emit(struct coredump_params *cprm, const void *addr, int nr) return 0; file->f_pos = pos; cprm->written += n; + cprm->pos += n; nr -= n; } return 1; @@ -808,6 +809,7 @@ int dump_skip(struct coredump_params *cprm, size_t nr) if (dump_interrupted() || file->f_op->llseek(file, nr, SEEK_CUR) < 0) return 0; + cprm->pos += nr; return 1; } else { while (nr > PAGE_SIZE) { @@ -822,7 +824,7 @@ EXPORT_SYMBOL(dump_skip); int dump_align(struct coredump_params *cprm, int align) { - unsigned mod = cprm->file->f_pos & (align - 1); + unsigned mod = cprm->pos & (align - 1); if (align & (align - 1)) return 0; return mod ? dump_skip(cprm, align - mod) : 1; diff --git a/fs/dcache.c b/fs/dcache.c index ad4a542e9bab..817c243c1ff1 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1636,7 +1636,7 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name) struct dentry *dentry = __d_alloc(parent->d_sb, name); if (!dentry) return NULL; - + dentry->d_flags |= DCACHE_RCUACCESS; spin_lock(&parent->d_lock); /* * don't need child lock because it is not subject @@ -2358,7 +2358,6 @@ static void __d_rehash(struct dentry * entry, struct hlist_bl_head *b) { BUG_ON(!d_unhashed(entry)); hlist_bl_lock(b); - entry->d_flags |= DCACHE_RCUACCESS; hlist_bl_add_head_rcu(&entry->d_hash, b); hlist_bl_unlock(b); } @@ -2843,6 +2842,7 @@ static void __d_move(struct dentry *dentry, struct dentry *target, /* ... and switch them in the tree */ if (IS_ROOT(dentry)) { /* splicing a tree */ + dentry->d_flags |= DCACHE_RCUACCESS; dentry->d_parent = target->d_parent; target->d_parent = target; list_del_init(&target->d_child); diff --git a/fs/namei.c b/fs/namei.c index 6a82fb7e2127..70580ab1445c 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -3030,9 +3030,13 @@ static int atomic_open(struct nameidata *nd, struct dentry *dentry, } if (*opened & FILE_CREATED) fsnotify_create(dir, dentry); - path->dentry = dentry; - path->mnt = nd->path.mnt; - return 1; + if (unlikely(d_is_negative(dentry))) { + error = -ENOENT; + } else { + path->dentry = dentry; + path->mnt = nd->path.mnt; + return 1; + } } } dput(dentry); @@ -3201,9 +3205,7 @@ static int do_last(struct nameidata *nd, int acc_mode = op->acc_mode; unsigned seq; struct inode *inode; - struct path save_parent = { .dentry = NULL, .mnt = NULL }; struct path path; - bool retried = false; int error; nd->flags &= ~LOOKUP_PARENT; @@ -3246,7 +3248,6 @@ static int do_last(struct nameidata *nd, return -EISDIR; } -retry_lookup: if (open_flag & (O_CREAT | O_TRUNC | O_WRONLY | O_RDWR)) { error = mnt_want_write(nd->path.mnt); if (!error) @@ -3298,6 +3299,10 @@ retry_lookup: got_write = false; } + error = follow_managed(&path, nd); + if (unlikely(error < 0)) + return error; + if (unlikely(d_is_negative(path.dentry))) { path_to_nameidata(&path, nd); return -ENOENT; @@ -3313,10 +3318,6 @@ retry_lookup: return -EEXIST; } - error = follow_managed(&path, nd); - if (unlikely(error < 0)) - return error; - seq = 0; /* out of RCU mode, so the value doesn't matter */ inode = d_backing_inode(path.dentry); finish_lookup: @@ -3327,23 +3328,14 @@ finish_lookup: if (unlikely(error)) return error; - if ((nd->flags & LOOKUP_RCU) || nd->path.mnt != path.mnt) { - path_to_nameidata(&path, nd); - } else { - save_parent.dentry = nd->path.dentry; - save_parent.mnt = mntget(path.mnt); - nd->path.dentry = path.dentry; - - } + path_to_nameidata(&path, nd); nd->inode = inode; nd->seq = seq; /* Why this, you ask? _Now_ we might have grown LOOKUP_JUMPED... */ finish_open: error = complete_walk(nd); - if (error) { - path_put(&save_parent); + if (error) return error; - } audit_inode(nd->name, nd->path.dentry, 0); error = -EISDIR; if ((open_flag & O_CREAT) && d_is_dir(nd->path.dentry)) @@ -3366,13 +3358,9 @@ finish_open_created: goto out; BUG_ON(*opened & FILE_OPENED); /* once it's opened, it's opened */ error = vfs_open(&nd->path, file, current_cred()); - if (!error) { - *opened |= FILE_OPENED; - } else { - if (error == -EOPENSTALE) - goto stale_open; + if (error) goto out; - } + *opened |= FILE_OPENED; opened: error = open_check_o_direct(file); if (!error) @@ -3388,26 +3376,7 @@ out: } if (got_write) mnt_drop_write(nd->path.mnt); - path_put(&save_parent); return error; - -stale_open: - /* If no saved parent or already retried then can't retry */ - if (!save_parent.dentry || retried) - goto out; - - BUG_ON(save_parent.dentry != dir); - path_put(&nd->path); - nd->path = save_parent; - nd->inode = dir->d_inode; - save_parent.mnt = NULL; - save_parent.dentry = NULL; - if (got_write) { - mnt_drop_write(nd->path.mnt); - got_write = false; - } - retried = true; - goto retry_lookup; } static int do_tmpfile(struct nameidata *nd, unsigned flags, diff --git a/fs/namespace.c b/fs/namespace.c index 4fb1691b4355..a7ec92c051f5 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -2409,8 +2409,10 @@ static int do_new_mount(struct path *path, const char *fstype, int flags, mnt_flags |= MNT_NODEV | MNT_LOCK_NODEV; } if (type->fs_flags & FS_USERNS_VISIBLE) { - if (!fs_fully_visible(type, &mnt_flags)) + if (!fs_fully_visible(type, &mnt_flags)) { + put_filesystem(type); return -EPERM; + } } } @@ -3271,7 +3273,7 @@ static bool fs_fully_visible(struct file_system_type *type, int *new_mnt_flags) list_for_each_entry(child, &mnt->mnt_mounts, mnt_child) { struct inode *inode = child->mnt_mountpoint->d_inode; /* Only worry about locked mounts */ - if (!(mnt_flags & MNT_LOCKED)) + if (!(child->mnt.mnt_flags & MNT_LOCKED)) continue; /* Is the directory permanetly empty? */ if (!is_empty_dir_inode(inode)) diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h index 576e4639ca60..314b3caa701c 100644 --- a/include/linux/binfmts.h +++ b/include/linux/binfmts.h @@ -65,6 +65,7 @@ struct coredump_params { unsigned long limit; unsigned long mm_flags; loff_t written; + loff_t pos; }; /* diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index 0c72204c75fc..fb39d5add173 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -25,7 +25,7 @@ #define CLK_SET_PARENT_GATE BIT(1) /* must be gated across re-parent */ #define CLK_SET_RATE_PARENT BIT(2) /* propagate rate change up one level */ #define CLK_IGNORE_UNUSED BIT(3) /* do not gate even if unused */ -#define CLK_IS_ROOT BIT(4) /* Deprecated: Don't use */ + /* unused */ #define CLK_IS_BASIC BIT(5) /* Basic clk, can't do a to_clk_foo() */ #define CLK_GET_RATE_NOCACHE BIT(6) /* do not use the cached clk rate */ #define CLK_SET_RATE_NO_REPARENT BIT(7) /* don't re-parent on rate change */ diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 1a827cecd62f..7921f4f20a58 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -517,6 +517,11 @@ struct swevent_hlist { struct perf_cgroup; struct ring_buffer; +struct pmu_event_list { + raw_spinlock_t lock; + struct list_head list; +}; + /** * struct perf_event - performance event kernel representation: */ @@ -675,6 +680,7 @@ struct perf_event { int cgrp_defer_enabled; #endif + struct list_head sb_list; #endif /* CONFIG_PERF_EVENTS */ }; @@ -1074,7 +1080,7 @@ extern void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct extern struct perf_callchain_entry * get_perf_callchain(struct pt_regs *regs, u32 init_nr, bool kernel, bool user, u32 max_stack, bool crosstask, bool add_mark); -extern int get_callchain_buffers(void); +extern int get_callchain_buffers(int max_stack); extern void put_callchain_buffers(void); extern int sysctl_perf_event_max_stack; @@ -1326,6 +1332,13 @@ struct perf_pmu_events_attr { const char *event_str; }; +struct perf_pmu_events_ht_attr { + struct device_attribute attr; + u64 id; + const char *event_str_ht; + const char *event_str_noht; +}; + ssize_t perf_event_sysfs_show(struct device *dev, struct device_attribute *attr, char *page); diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 36ce552cf6a9..c66a485a24ac 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -276,6 +276,9 @@ enum perf_event_read_format { /* * Hardware event_id to monitor via a performance monitoring event: + * + * @sample_max_stack: Max number of frame pointers in a callchain, + * should be < /proc/sys/kernel/perf_event_max_stack */ struct perf_event_attr { @@ -385,7 +388,8 @@ struct perf_event_attr { * Wakeup watermark for AUX area */ __u32 aux_watermark; - __u32 __reserved_2; /* align to __u64 */ + __u16 sample_max_stack; + __u16 __reserved_2; /* align to __u64 */ }; #define perf_flags(attr) (*(&(attr)->read_format + 1)) diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c index 080a2dfb5800..bf4495fcd25d 100644 --- a/kernel/bpf/stackmap.c +++ b/kernel/bpf/stackmap.c @@ -99,7 +99,7 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr) if (err) goto free_smap; - err = get_callchain_buffers(); + err = get_callchain_buffers(sysctl_perf_event_max_stack); if (err) goto free_smap; diff --git a/kernel/events/callchain.c b/kernel/events/callchain.c index 179ef4640964..e9fdb5203de5 100644 --- a/kernel/events/callchain.c +++ b/kernel/events/callchain.c @@ -104,7 +104,7 @@ fail: return -ENOMEM; } -int get_callchain_buffers(void) +int get_callchain_buffers(int event_max_stack) { int err = 0; int count; @@ -121,6 +121,15 @@ int get_callchain_buffers(void) /* If the allocation failed, give up */ if (!callchain_cpus_entries) err = -ENOMEM; + /* + * If requesting per event more than the global cap, + * return a different error to help userspace figure + * this out. + * + * And also do it here so that we have &callchain_mutex held. + */ + if (event_max_stack > sysctl_perf_event_max_stack) + err = -EOVERFLOW; goto exit; } @@ -174,11 +183,12 @@ perf_callchain(struct perf_event *event, struct pt_regs *regs) bool user = !event->attr.exclude_callchain_user; /* Disallow cross-task user callchains. */ bool crosstask = event->ctx->task && event->ctx->task != current; + const u32 max_stack = event->attr.sample_max_stack; if (!kernel && !user) return NULL; - return get_perf_callchain(regs, 0, kernel, user, sysctl_perf_event_max_stack, crosstask, true); + return get_perf_callchain(regs, 0, kernel, user, max_stack, crosstask, true); } struct perf_callchain_entry * diff --git a/kernel/events/core.c b/kernel/events/core.c index 274450efea90..05b923e2111a 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -335,6 +335,7 @@ static atomic_t perf_sched_count; static DEFINE_PER_CPU(atomic_t, perf_cgroup_events); static DEFINE_PER_CPU(int, perf_sched_cb_usages); +static DEFINE_PER_CPU(struct pmu_event_list, pmu_sb_events); static atomic_t nr_mmap_events __read_mostly; static atomic_t nr_comm_events __read_mostly; @@ -396,6 +397,13 @@ int perf_proc_update_handler(struct ctl_table *table, int write, if (ret || !write) return ret; + /* + * If throttling is disabled don't allow the write: + */ + if (sysctl_perf_cpu_time_max_percent == 100 || + sysctl_perf_cpu_time_max_percent == 0) + return -EINVAL; + max_samples_per_tick = DIV_ROUND_UP(sysctl_perf_event_sample_rate, HZ); perf_sample_period_ns = NSEC_PER_SEC / sysctl_perf_event_sample_rate; update_perf_cpu_limits(); @@ -3665,6 +3673,39 @@ static void free_event_rcu(struct rcu_head *head) static void ring_buffer_attach(struct perf_event *event, struct ring_buffer *rb); +static void detach_sb_event(struct perf_event *event) +{ + struct pmu_event_list *pel = per_cpu_ptr(&pmu_sb_events, event->cpu); + + raw_spin_lock(&pel->lock); + list_del_rcu(&event->sb_list); + raw_spin_unlock(&pel->lock); +} + +static bool is_sb_event(struct perf_event *event) +{ + struct perf_event_attr *attr = &event->attr; + + if (event->parent) + return false; + + if (event->attach_state & PERF_ATTACH_TASK) + return false; + + if (attr->mmap || attr->mmap_data || attr->mmap2 || + attr->comm || attr->comm_exec || + attr->task || + attr->context_switch) + return true; + return false; +} + +static void unaccount_pmu_sb_event(struct perf_event *event) +{ + if (is_sb_event(event)) + detach_sb_event(event); +} + static void unaccount_event_cpu(struct perf_event *event, int cpu) { if (event->parent) @@ -3728,6 +3769,8 @@ static void unaccount_event(struct perf_event *event) } unaccount_event_cpu(event, event->cpu); + + unaccount_pmu_sb_event(event); } static void perf_sched_delayed(struct work_struct *work) @@ -5856,11 +5899,11 @@ perf_event_read_event(struct perf_event *event, perf_output_end(&handle); } -typedef void (perf_event_aux_output_cb)(struct perf_event *event, void *data); +typedef void (perf_iterate_f)(struct perf_event *event, void *data); static void -perf_event_aux_ctx(struct perf_event_context *ctx, - perf_event_aux_output_cb output, +perf_iterate_ctx(struct perf_event_context *ctx, + perf_iterate_f output, void *data, bool all) { struct perf_event *event; @@ -5877,52 +5920,55 @@ perf_event_aux_ctx(struct perf_event_context *ctx, } } -static void -perf_event_aux_task_ctx(perf_event_aux_output_cb output, void *data, - struct perf_event_context *task_ctx) +static void perf_iterate_sb_cpu(perf_iterate_f output, void *data) { - rcu_read_lock(); - preempt_disable(); - perf_event_aux_ctx(task_ctx, output, data, false); - preempt_enable(); - rcu_read_unlock(); + struct pmu_event_list *pel = this_cpu_ptr(&pmu_sb_events); + struct perf_event *event; + + list_for_each_entry_rcu(event, &pel->list, sb_list) { + if (event->state < PERF_EVENT_STATE_INACTIVE) + continue; + if (!event_filter_match(event)) + continue; + output(event, data); + } } +/* + * Iterate all events that need to receive side-band events. + * + * For new callers; ensure that account_pmu_sb_event() includes + * your event, otherwise it might not get delivered. + */ static void -perf_event_aux(perf_event_aux_output_cb output, void *data, +perf_iterate_sb(perf_iterate_f output, void *data, struct perf_event_context *task_ctx) { - struct perf_cpu_context *cpuctx; struct perf_event_context *ctx; - struct pmu *pmu; int ctxn; + rcu_read_lock(); + preempt_disable(); + /* - * If we have task_ctx != NULL we only notify - * the task context itself. The task_ctx is set - * only for EXIT events before releasing task + * If we have task_ctx != NULL we only notify the task context itself. + * The task_ctx is set only for EXIT events before releasing task * context. */ if (task_ctx) { - perf_event_aux_task_ctx(output, data, task_ctx); - return; + perf_iterate_ctx(task_ctx, output, data, false); + goto done; } - rcu_read_lock(); - list_for_each_entry_rcu(pmu, &pmus, entry) { - cpuctx = get_cpu_ptr(pmu->pmu_cpu_context); - if (cpuctx->unique_pmu != pmu) - goto next; - perf_event_aux_ctx(&cpuctx->ctx, output, data, false); - ctxn = pmu->task_ctx_nr; - if (ctxn < 0) - goto next; + perf_iterate_sb_cpu(output, data); + + for_each_task_context_nr(ctxn) { ctx = rcu_dereference(current->perf_event_ctxp[ctxn]); if (ctx) - perf_event_aux_ctx(ctx, output, data, false); -next: - put_cpu_ptr(pmu->pmu_cpu_context); + perf_iterate_ctx(ctx, output, data, false); } +done: + preempt_enable(); rcu_read_unlock(); } @@ -5971,7 +6017,7 @@ void perf_event_exec(void) perf_event_enable_on_exec(ctxn); - perf_event_aux_ctx(ctx, perf_event_addr_filters_exec, NULL, + perf_iterate_ctx(ctx, perf_event_addr_filters_exec, NULL, true); } rcu_read_unlock(); @@ -6015,9 +6061,9 @@ static int __perf_pmu_output_stop(void *info) }; rcu_read_lock(); - perf_event_aux_ctx(&cpuctx->ctx, __perf_event_output_stop, &ro, false); + perf_iterate_ctx(&cpuctx->ctx, __perf_event_output_stop, &ro, false); if (cpuctx->task_ctx) - perf_event_aux_ctx(cpuctx->task_ctx, __perf_event_output_stop, + perf_iterate_ctx(cpuctx->task_ctx, __perf_event_output_stop, &ro, false); rcu_read_unlock(); @@ -6146,7 +6192,7 @@ static void perf_event_task(struct task_struct *task, }, }; - perf_event_aux(perf_event_task_output, + perf_iterate_sb(perf_event_task_output, &task_event, task_ctx); } @@ -6225,7 +6271,7 @@ static void perf_event_comm_event(struct perf_comm_event *comm_event) comm_event->event_id.header.size = sizeof(comm_event->event_id) + size; - perf_event_aux(perf_event_comm_output, + perf_iterate_sb(perf_event_comm_output, comm_event, NULL); } @@ -6456,7 +6502,7 @@ got_name: mmap_event->event_id.header.size = sizeof(mmap_event->event_id) + size; - perf_event_aux(perf_event_mmap_output, + perf_iterate_sb(perf_event_mmap_output, mmap_event, NULL); @@ -6539,7 +6585,7 @@ static void perf_addr_filters_adjust(struct vm_area_struct *vma) if (!ctx) continue; - perf_event_aux_ctx(ctx, __perf_addr_filters_adjust, vma, true); + perf_iterate_ctx(ctx, __perf_addr_filters_adjust, vma, true); } rcu_read_unlock(); } @@ -6726,7 +6772,7 @@ static void perf_event_switch(struct task_struct *task, }, }; - perf_event_aux(perf_event_switch_output, + perf_iterate_sb(perf_event_switch_output, &switch_event, NULL); } @@ -8648,6 +8694,28 @@ unlock: return pmu; } +static void attach_sb_event(struct perf_event *event) +{ + struct pmu_event_list *pel = per_cpu_ptr(&pmu_sb_events, event->cpu); + + raw_spin_lock(&pel->lock); + list_add_rcu(&event->sb_list, &pel->list); + raw_spin_unlock(&pel->lock); +} + +/* + * We keep a list of all !task (and therefore per-cpu) events + * that need to receive side-band records. + * + * This avoids having to scan all the various PMU per-cpu contexts + * looking for them. + */ +static void account_pmu_sb_event(struct perf_event *event) +{ + if (is_sb_event(event)) + attach_sb_event(event); +} + static void account_event_cpu(struct perf_event *event, int cpu) { if (event->parent) @@ -8728,6 +8796,8 @@ static void account_event(struct perf_event *event) enabled: account_event_cpu(event, event->cpu); + + account_pmu_sb_event(event); } /* @@ -8876,7 +8946,7 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu, if (!event->parent) { if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) { - err = get_callchain_buffers(); + err = get_callchain_buffers(attr->sample_max_stack); if (err) goto err_addr_filters; } @@ -9198,6 +9268,9 @@ SYSCALL_DEFINE5(perf_event_open, return -EINVAL; } + if (!attr.sample_max_stack) + attr.sample_max_stack = sysctl_perf_event_max_stack; + /* * In cgroup mode, the pid argument is used to pass the fd * opened to the cgroup directory in cgroupfs. The cpu argument @@ -9271,7 +9344,7 @@ SYSCALL_DEFINE5(perf_event_open, if (is_sampling_event(event)) { if (event->pmu->capabilities & PERF_PMU_CAP_NO_INTERRUPT) { - err = -ENOTSUPP; + err = -EOPNOTSUPP; goto err_alloc; } } @@ -10233,6 +10306,9 @@ static void __init perf_event_init_all_cpus(void) swhash = &per_cpu(swevent_htable, cpu); mutex_init(&swhash->hlist_mutex); INIT_LIST_HEAD(&per_cpu(active_ctx_list, cpu)); + + INIT_LIST_HEAD(&per_cpu(pmu_sb_events.list, cpu)); + raw_spin_lock_init(&per_cpu(pmu_sb_events.lock, cpu)); } } diff --git a/tools/lib/api/Makefile b/tools/lib/api/Makefile index 316f308a63ea..67ff93ec1515 100644 --- a/tools/lib/api/Makefile +++ b/tools/lib/api/Makefile @@ -10,6 +10,7 @@ endif CC = $(CROSS_COMPILE)gcc AR = $(CROSS_COMPILE)ar +LD = $(CROSS_COMPILE)ld MAKEFLAGS += --no-print-directory diff --git a/tools/lib/api/fd/array.c b/tools/lib/api/fd/array.c index 0e636c4339b8..b0a035fc87b3 100644 --- a/tools/lib/api/fd/array.c +++ b/tools/lib/api/fd/array.c @@ -85,7 +85,8 @@ int fdarray__add(struct fdarray *fda, int fd, short revents) } int fdarray__filter(struct fdarray *fda, short revents, - void (*entry_destructor)(struct fdarray *fda, int fd)) + void (*entry_destructor)(struct fdarray *fda, int fd, void *arg), + void *arg) { int fd, nr = 0; @@ -95,7 +96,7 @@ int fdarray__filter(struct fdarray *fda, short revents, for (fd = 0; fd < fda->nr; ++fd) { if (fda->entries[fd].revents & revents) { if (entry_destructor) - entry_destructor(fda, fd); + entry_destructor(fda, fd, arg); continue; } diff --git a/tools/lib/api/fd/array.h b/tools/lib/api/fd/array.h index 45db01818f45..e87fd800fa8d 100644 --- a/tools/lib/api/fd/array.h +++ b/tools/lib/api/fd/array.h @@ -34,7 +34,8 @@ void fdarray__delete(struct fdarray *fda); int fdarray__add(struct fdarray *fda, int fd, short revents); int fdarray__poll(struct fdarray *fda, int timeout); int fdarray__filter(struct fdarray *fda, short revents, - void (*entry_destructor)(struct fdarray *fda, int fd)); + void (*entry_destructor)(struct fdarray *fda, int fd, void *arg), + void *arg); int fdarray__grow(struct fdarray *fda, int extra); int fdarray__fprintf(struct fdarray *fda, FILE *fp); diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 7e543c3102d4..462e526a4465 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -1186,20 +1186,14 @@ bpf_object__next(struct bpf_object *prev) return next; } -const char * -bpf_object__get_name(struct bpf_object *obj) +const char *bpf_object__name(struct bpf_object *obj) { - if (!obj) - return ERR_PTR(-EINVAL); - return obj->path; + return obj ? obj->path : ERR_PTR(-EINVAL); } -unsigned int -bpf_object__get_kversion(struct bpf_object *obj) +unsigned int bpf_object__kversion(struct bpf_object *obj) { - if (!obj) - return 0; - return obj->kern_version; + return obj ? obj->kern_version : 0; } struct bpf_program * @@ -1224,9 +1218,8 @@ bpf_program__next(struct bpf_program *prev, struct bpf_object *obj) return &obj->programs[idx]; } -int bpf_program__set_private(struct bpf_program *prog, - void *priv, - bpf_program_clear_priv_t clear_priv) +int bpf_program__set_priv(struct bpf_program *prog, void *priv, + bpf_program_clear_priv_t clear_priv) { if (prog->priv && prog->clear_priv) prog->clear_priv(prog, prog->priv); @@ -1236,10 +1229,9 @@ int bpf_program__set_private(struct bpf_program *prog, return 0; } -int bpf_program__get_private(struct bpf_program *prog, void **ppriv) +void *bpf_program__priv(struct bpf_program *prog) { - *ppriv = prog->priv; - return 0; + return prog ? prog->priv : ERR_PTR(-EINVAL); } const char *bpf_program__title(struct bpf_program *prog, bool needs_copy) @@ -1311,32 +1303,23 @@ int bpf_program__nth_fd(struct bpf_program *prog, int n) return fd; } -int bpf_map__get_fd(struct bpf_map *map) +int bpf_map__fd(struct bpf_map *map) { - if (!map) - return -EINVAL; - - return map->fd; + return map ? map->fd : -EINVAL; } -int bpf_map__get_def(struct bpf_map *map, struct bpf_map_def *pdef) +const struct bpf_map_def *bpf_map__def(struct bpf_map *map) { - if (!map || !pdef) - return -EINVAL; - - *pdef = map->def; - return 0; + return map ? &map->def : ERR_PTR(-EINVAL); } -const char *bpf_map__get_name(struct bpf_map *map) +const char *bpf_map__name(struct bpf_map *map) { - if (!map) - return NULL; - return map->name; + return map ? map->name : NULL; } -int bpf_map__set_private(struct bpf_map *map, void *priv, - bpf_map_clear_priv_t clear_priv) +int bpf_map__set_priv(struct bpf_map *map, void *priv, + bpf_map_clear_priv_t clear_priv) { if (!map) return -EINVAL; @@ -1351,14 +1334,9 @@ int bpf_map__set_private(struct bpf_map *map, void *priv, return 0; } -int bpf_map__get_private(struct bpf_map *map, void **ppriv) +void *bpf_map__priv(struct bpf_map *map) { - if (!map) - return -EINVAL; - - if (ppriv) - *ppriv = map->priv; - return 0; + return map ? map->priv : ERR_PTR(-EINVAL); } struct bpf_map * @@ -1389,7 +1367,7 @@ bpf_map__next(struct bpf_map *prev, struct bpf_object *obj) } struct bpf_map * -bpf_object__get_map_by_name(struct bpf_object *obj, const char *name) +bpf_object__find_map_by_name(struct bpf_object *obj, const char *name) { struct bpf_map *pos; diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index a51594c7b518..722f46b2d553 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -55,8 +55,8 @@ void bpf_object__close(struct bpf_object *object); /* Load/unload object into/from kernel */ int bpf_object__load(struct bpf_object *obj); int bpf_object__unload(struct bpf_object *obj); -const char *bpf_object__get_name(struct bpf_object *obj); -unsigned int bpf_object__get_kversion(struct bpf_object *obj); +const char *bpf_object__name(struct bpf_object *obj); +unsigned int bpf_object__kversion(struct bpf_object *obj); struct bpf_object *bpf_object__next(struct bpf_object *prev); #define bpf_object__for_each_safe(pos, tmp) \ @@ -78,11 +78,10 @@ struct bpf_program *bpf_program__next(struct bpf_program *prog, typedef void (*bpf_program_clear_priv_t)(struct bpf_program *, void *); -int bpf_program__set_private(struct bpf_program *prog, void *priv, - bpf_program_clear_priv_t clear_priv); +int bpf_program__set_priv(struct bpf_program *prog, void *priv, + bpf_program_clear_priv_t clear_priv); -int bpf_program__get_private(struct bpf_program *prog, - void **ppriv); +void *bpf_program__priv(struct bpf_program *prog); const char *bpf_program__title(struct bpf_program *prog, bool needs_copy); @@ -171,7 +170,7 @@ struct bpf_map_def { */ struct bpf_map; struct bpf_map * -bpf_object__get_map_by_name(struct bpf_object *obj, const char *name); +bpf_object__find_map_by_name(struct bpf_object *obj, const char *name); struct bpf_map * bpf_map__next(struct bpf_map *map, struct bpf_object *obj); @@ -180,13 +179,13 @@ bpf_map__next(struct bpf_map *map, struct bpf_object *obj); (pos) != NULL; \ (pos) = bpf_map__next((pos), (obj))) -int bpf_map__get_fd(struct bpf_map *map); -int bpf_map__get_def(struct bpf_map *map, struct bpf_map_def *pdef); -const char *bpf_map__get_name(struct bpf_map *map); +int bpf_map__fd(struct bpf_map *map); +const struct bpf_map_def *bpf_map__def(struct bpf_map *map); +const char *bpf_map__name(struct bpf_map *map); typedef void (*bpf_map_clear_priv_t)(struct bpf_map *, void *); -int bpf_map__set_private(struct bpf_map *map, void *priv, - bpf_map_clear_priv_t clear_priv); -int bpf_map__get_private(struct bpf_map *map, void **ppriv); +int bpf_map__set_priv(struct bpf_map *map, void *priv, + bpf_map_clear_priv_t clear_priv); +void *bpf_map__priv(struct bpf_map *map); #endif diff --git a/tools/perf/.gitignore b/tools/perf/.gitignore index 3d1bb802dbf4..3db3db9278be 100644 --- a/tools/perf/.gitignore +++ b/tools/perf/.gitignore @@ -30,3 +30,4 @@ config.mak.autogen *.pyo .config-detected util/intel-pt-decoder/inat-tables.c +arch/*/include/generated/ diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt index 04f23b404bbc..d96ccd4844df 100644 --- a/tools/perf/Documentation/perf-stat.txt +++ b/tools/perf/Documentation/perf-stat.txt @@ -204,6 +204,38 @@ Aggregate counts per physical processor for system-wide mode measurements. --no-aggr:: Do not aggregate counts across all monitored CPUs. +--topdown:: +Print top down level 1 metrics if supported by the CPU. This allows to +determine bottle necks in the CPU pipeline for CPU bound workloads, +by breaking the cycles consumed down into frontend bound, backend bound, +bad speculation and retiring. + +Frontend bound means that the CPU cannot fetch and decode instructions fast +enough. Backend bound means that computation or memory access is the bottle +neck. Bad Speculation means that the CPU wasted cycles due to branch +mispredictions and similar issues. Retiring means that the CPU computed without +an apparently bottleneck. The bottleneck is only the real bottleneck +if the workload is actually bound by the CPU and not by something else. + +For best results it is usually a good idea to use it with interval +mode like -I 1000, as the bottleneck of workloads can change often. + +The top down metrics are collected per core instead of per +CPU thread. Per core mode is automatically enabled +and -a (global monitoring) is needed, requiring root rights or +perf.perf_event_paranoid=-1. + +Topdown uses the full Performance Monitoring Unit, and needs +disabling of the NMI watchdog (as root): +echo 0 > /proc/sys/kernel/nmi_watchdog +for best results. Otherwise the bottlenecks may be inconsistent +on workload with changing phases. + +This enables --metric-only, unless overriden with --no-metric-only. + +To interpret the results it is usually needed to know on which +CPUs the workload runs on. If needed the CPUs can be forced using +taskset. EXAMPLES -------- diff --git a/tools/perf/arch/arm/util/Build b/tools/perf/arch/arm/util/Build index d22e3d07de3d..f98da17357c0 100644 --- a/tools/perf/arch/arm/util/Build +++ b/tools/perf/arch/arm/util/Build @@ -1,4 +1,4 @@ libperf-$(CONFIG_DWARF) += dwarf-regs.o -libperf-$(CONFIG_LIBUNWIND) += unwind-libunwind.o +libperf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o diff --git a/tools/perf/arch/arm64/util/Build b/tools/perf/arch/arm64/util/Build index e58123a8912b..02f41dba4f4f 100644 --- a/tools/perf/arch/arm64/util/Build +++ b/tools/perf/arch/arm64/util/Build @@ -1,2 +1,2 @@ libperf-$(CONFIG_DWARF) += dwarf-regs.o -libperf-$(CONFIG_LIBUNWIND) += unwind-libunwind.o +libperf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o diff --git a/tools/perf/arch/arm64/util/unwind-libunwind.c b/tools/perf/arch/arm64/util/unwind-libunwind.c index a87afa91a99e..c116b713f7f7 100644 --- a/tools/perf/arch/arm64/util/unwind-libunwind.c +++ b/tools/perf/arch/arm64/util/unwind-libunwind.c @@ -1,11 +1,13 @@ +#ifndef REMOTE_UNWIND_LIBUNWIND #include <errno.h> #include <libunwind.h> #include "perf_regs.h" #include "../../util/unwind.h" #include "../../util/debug.h" +#endif -int libunwind__arch_reg_id(int regnum) +int LIBUNWIND__ARCH_REG_ID(int regnum) { switch (regnum) { case UNW_AARCH64_X0: diff --git a/tools/perf/arch/common.c b/tools/perf/arch/common.c index e83c8ce24303..fa090a9eaa38 100644 --- a/tools/perf/arch/common.c +++ b/tools/perf/arch/common.c @@ -102,7 +102,7 @@ static int lookup_triplets(const char *const *triplets, const char *name) * Return architecture name in a normalized form. * The conversion logic comes from the Makefile. */ -static const char *normalize_arch(char *arch) +const char *normalize_arch(char *arch) { if (!strcmp(arch, "x86_64")) return "x86"; diff --git a/tools/perf/arch/common.h b/tools/perf/arch/common.h index 7529cfb143ce..6b01c736b7d9 100644 --- a/tools/perf/arch/common.h +++ b/tools/perf/arch/common.h @@ -6,5 +6,6 @@ extern const char *objdump_path; int perf_env__lookup_objdump(struct perf_env *env); +const char *normalize_arch(char *arch); #endif /* ARCH_PERF_COMMON_H */ diff --git a/tools/perf/arch/x86/util/Build b/tools/perf/arch/x86/util/Build index 465970370f3e..f95e6f46ef0d 100644 --- a/tools/perf/arch/x86/util/Build +++ b/tools/perf/arch/x86/util/Build @@ -3,11 +3,12 @@ libperf-y += tsc.o libperf-y += pmu.o libperf-y += kvm-stat.o libperf-y += perf_regs.o +libperf-y += group.o libperf-$(CONFIG_DWARF) += dwarf-regs.o libperf-$(CONFIG_BPF_PROLOGUE) += dwarf-regs.o -libperf-$(CONFIG_LIBUNWIND) += unwind-libunwind.o +libperf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o libperf-$(CONFIG_AUXTRACE) += auxtrace.o diff --git a/tools/perf/arch/x86/util/group.c b/tools/perf/arch/x86/util/group.c new file mode 100644 index 000000000000..37f92aa39a5d --- /dev/null +++ b/tools/perf/arch/x86/util/group.c @@ -0,0 +1,27 @@ +#include <stdio.h> +#include "api/fs/fs.h" +#include "util/group.h" + +/* + * Check whether we can use a group for top down. + * Without a group may get bad results due to multiplexing. + */ +bool arch_topdown_check_group(bool *warn) +{ + int n; + + if (sysctl__read_int("kernel/nmi_watchdog", &n) < 0) + return false; + if (n > 0) { + *warn = true; + return false; + } + return true; +} + +void arch_topdown_group_warn(void) +{ + fprintf(stderr, + "nmi_watchdog enabled with topdown. May give wrong results.\n" + "Disable with echo 0 > /proc/sys/kernel/nmi_watchdog\n"); +} diff --git a/tools/perf/arch/x86/util/tsc.c b/tools/perf/arch/x86/util/tsc.c index 357f1b13b5ae..2e5567c94e09 100644 --- a/tools/perf/arch/x86/util/tsc.c +++ b/tools/perf/arch/x86/util/tsc.c @@ -62,6 +62,8 @@ int perf_event__synth_time_conv(const struct perf_event_mmap_page *pc, struct perf_tsc_conversion tc; int err; + if (!pc) + return 0; err = perf_read_tsc_conversion(pc, &tc); if (err == -EOPNOTSUPP) return 0; diff --git a/tools/perf/arch/x86/util/unwind-libunwind.c b/tools/perf/arch/x86/util/unwind-libunwind.c index db25e93d989c..4f16661cbdbb 100644 --- a/tools/perf/arch/x86/util/unwind-libunwind.c +++ b/tools/perf/arch/x86/util/unwind-libunwind.c @@ -1,12 +1,14 @@ +#ifndef REMOTE_UNWIND_LIBUNWIND #include <errno.h> #include <libunwind.h> #include "perf_regs.h" #include "../../util/unwind.h" #include "../../util/debug.h" +#endif #ifdef HAVE_ARCH_X86_64_SUPPORT -int libunwind__arch_reg_id(int regnum) +int LIBUNWIND__ARCH_REG_ID(int regnum) { int id; @@ -70,7 +72,7 @@ int libunwind__arch_reg_id(int regnum) return id; } #else -int libunwind__arch_reg_id(int regnum) +int LIBUNWIND__ARCH_REG_ID(int regnum) { int id; diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index dc3fcb597e4c..d4cf1b0c88f9 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -655,6 +655,13 @@ perf_event__synth_time_conv(const struct perf_event_mmap_page *pc __maybe_unused return 0; } +static const struct perf_event_mmap_page *record__pick_pc(struct record *rec) +{ + if (rec->evlist && rec->evlist->mmap && rec->evlist->mmap[0].base) + return rec->evlist->mmap[0].base; + return NULL; +} + static int record__synthesize(struct record *rec) { struct perf_session *session = rec->session; @@ -692,7 +699,7 @@ static int record__synthesize(struct record *rec) } } - err = perf_event__synth_time_conv(rec->evlist->mmap[0].base, tool, + err = perf_event__synth_time_conv(record__pick_pc(rec), tool, process_synthesized_event, machine); if (err) goto out; diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index e3ce2f34d3ad..46011235af5d 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -339,7 +339,7 @@ static void set_print_ip_opts(struct perf_event_attr *attr) */ static int perf_session__check_output_opt(struct perf_session *session) { - int j; + unsigned int j; struct perf_evsel *evsel; for (j = 0; j < PERF_TYPE_MAX; ++j) { @@ -388,17 +388,20 @@ static int perf_session__check_output_opt(struct perf_session *session) struct perf_event_attr *attr; j = PERF_TYPE_TRACEPOINT; - evsel = perf_session__find_first_evtype(session, j); - if (evsel == NULL) - goto out; - attr = &evsel->attr; + evlist__for_each(session->evlist, evsel) { + if (evsel->attr.type != j) + continue; + + attr = &evsel->attr; - if (attr->sample_type & PERF_SAMPLE_CALLCHAIN) { - output[j].fields |= PERF_OUTPUT_IP; - output[j].fields |= PERF_OUTPUT_SYM; - output[j].fields |= PERF_OUTPUT_DSO; - set_print_ip_opts(attr); + if (attr->sample_type & PERF_SAMPLE_CALLCHAIN) { + output[j].fields |= PERF_OUTPUT_IP; + output[j].fields |= PERF_OUTPUT_SYM; + output[j].fields |= PERF_OUTPUT_DSO; + set_print_ip_opts(attr); + goto out; + } } } diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index ee7ada78d86f..dff63733dfb7 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -59,10 +59,13 @@ #include "util/thread.h" #include "util/thread_map.h" #include "util/counts.h" +#include "util/group.h" #include "util/session.h" #include "util/tool.h" +#include "util/group.h" #include "asm/bug.h" +#include <api/fs/fs.h> #include <stdlib.h> #include <sys/prctl.h> #include <locale.h> @@ -98,6 +101,15 @@ static const char * transaction_limited_attrs = { "}" }; +static const char * topdown_attrs[] = { + "topdown-total-slots", + "topdown-slots-retired", + "topdown-recovery-bubbles", + "topdown-fetch-bubbles", + "topdown-slots-issued", + NULL, +}; + static struct perf_evlist *evsel_list; static struct target target = { @@ -112,6 +124,7 @@ static volatile pid_t child_pid = -1; static bool null_run = false; static int detailed_run = 0; static bool transaction_run; +static bool topdown_run = false; static bool big_num = true; static int big_num_opt = -1; static const char *csv_sep = NULL; @@ -124,6 +137,7 @@ static unsigned int initial_delay = 0; static unsigned int unit_width = 4; /* strlen("unit") */ static bool forever = false; static bool metric_only = false; +static bool force_metric_only = false; static struct timespec ref_time; static struct cpu_map *aggr_map; static aggr_get_id_t aggr_get_id; @@ -1302,7 +1316,15 @@ static int aggr_header_lens[] = { [AGGR_GLOBAL] = 0, }; -static void print_metric_headers(char *prefix) +static const char *aggr_header_csv[] = { + [AGGR_CORE] = "core,cpus,", + [AGGR_SOCKET] = "socket,cpus", + [AGGR_NONE] = "cpu,", + [AGGR_THREAD] = "comm-pid,", + [AGGR_GLOBAL] = "" +}; + +static void print_metric_headers(const char *prefix, bool no_indent) { struct perf_stat_output_ctx out; struct perf_evsel *counter; @@ -1313,9 +1335,15 @@ static void print_metric_headers(char *prefix) if (prefix) fprintf(stat_config.output, "%s", prefix); - if (!csv_output) + if (!csv_output && !no_indent) fprintf(stat_config.output, "%*s", aggr_header_lens[stat_config.aggr_mode], ""); + if (csv_output) { + if (stat_config.interval) + fputs("time,", stat_config.output); + fputs(aggr_header_csv[stat_config.aggr_mode], + stat_config.output); + } /* Print metrics headers only */ evlist__for_each(evsel_list, counter) { @@ -1338,28 +1366,40 @@ static void print_interval(char *prefix, struct timespec *ts) sprintf(prefix, "%6lu.%09lu%s", ts->tv_sec, ts->tv_nsec, csv_sep); - if (num_print_interval == 0 && !csv_output && !metric_only) { + if (num_print_interval == 0 && !csv_output) { switch (stat_config.aggr_mode) { case AGGR_SOCKET: - fprintf(output, "# time socket cpus counts %*s events\n", unit_width, "unit"); + fprintf(output, "# time socket cpus"); + if (!metric_only) + fprintf(output, " counts %*s events\n", unit_width, "unit"); break; case AGGR_CORE: - fprintf(output, "# time core cpus counts %*s events\n", unit_width, "unit"); + fprintf(output, "# time core cpus"); + if (!metric_only) + fprintf(output, " counts %*s events\n", unit_width, "unit"); break; case AGGR_NONE: - fprintf(output, "# time CPU counts %*s events\n", unit_width, "unit"); + fprintf(output, "# time CPU"); + if (!metric_only) + fprintf(output, " counts %*s events\n", unit_width, "unit"); break; case AGGR_THREAD: - fprintf(output, "# time comm-pid counts %*s events\n", unit_width, "unit"); + fprintf(output, "# time comm-pid"); + if (!metric_only) + fprintf(output, " counts %*s events\n", unit_width, "unit"); break; case AGGR_GLOBAL: default: - fprintf(output, "# time counts %*s events\n", unit_width, "unit"); + fprintf(output, "# time"); + if (!metric_only) + fprintf(output, " counts %*s events\n", unit_width, "unit"); case AGGR_UNSET: break; } } + if (num_print_interval == 0 && metric_only) + print_metric_headers(" ", true); if (++num_print_interval == 25) num_print_interval = 0; } @@ -1428,8 +1468,8 @@ static void print_counters(struct timespec *ts, int argc, const char **argv) if (metric_only) { static int num_print_iv; - if (num_print_iv == 0) - print_metric_headers(prefix); + if (num_print_iv == 0 && !interval) + print_metric_headers(prefix, false); if (num_print_iv++ == 25) num_print_iv = 0; if (stat_config.aggr_mode == AGGR_GLOBAL && prefix) @@ -1520,6 +1560,14 @@ static int stat__set_big_num(const struct option *opt __maybe_unused, return 0; } +static int enable_metric_only(const struct option *opt __maybe_unused, + const char *s __maybe_unused, int unset) +{ + force_metric_only = true; + metric_only = !unset; + return 0; +} + static const struct option stat_options[] = { OPT_BOOLEAN('T', "transaction", &transaction_run, "hardware transaction statistics"), @@ -1578,8 +1626,10 @@ static const struct option stat_options[] = { "aggregate counts per thread", AGGR_THREAD), OPT_UINTEGER('D', "delay", &initial_delay, "ms to wait before starting measurement after program start"), - OPT_BOOLEAN(0, "metric-only", &metric_only, - "Only print computed metrics. No raw values"), + OPT_CALLBACK_NOOPT(0, "metric-only", &metric_only, NULL, + "Only print computed metrics. No raw values", enable_metric_only), + OPT_BOOLEAN(0, "topdown", &topdown_run, + "measure topdown level 1 statistics"), OPT_END() }; @@ -1772,12 +1822,62 @@ static int perf_stat_init_aggr_mode_file(struct perf_stat *st) return 0; } +static int topdown_filter_events(const char **attr, char **str, bool use_group) +{ + int off = 0; + int i; + int len = 0; + char *s; + + for (i = 0; attr[i]; i++) { + if (pmu_have_event("cpu", attr[i])) { + len += strlen(attr[i]) + 1; + attr[i - off] = attr[i]; + } else + off++; + } + attr[i - off] = NULL; + + *str = malloc(len + 1 + 2); + if (!*str) + return -1; + s = *str; + if (i - off == 0) { + *s = 0; + return 0; + } + if (use_group) + *s++ = '{'; + for (i = 0; attr[i]; i++) { + strcpy(s, attr[i]); + s += strlen(s); + *s++ = ','; + } + if (use_group) { + s[-1] = '}'; + *s = 0; + } else + s[-1] = 0; + return 0; +} + +__weak bool arch_topdown_check_group(bool *warn) +{ + *warn = false; + return false; +} + +__weak void arch_topdown_group_warn(void) +{ +} + /* * Add default attributes, if there were no attributes specified or * if -d/--detailed, -d -d or -d -d -d is used: */ static int add_default_attributes(void) { + int err; struct perf_event_attr default_attrs0[] = { { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK }, @@ -1896,7 +1996,6 @@ static int add_default_attributes(void) return 0; if (transaction_run) { - int err; if (pmu_have_event("cpu", "cycles-ct") && pmu_have_event("cpu", "el-start")) err = parse_events(evsel_list, transaction_attrs, NULL); @@ -1909,6 +2008,46 @@ static int add_default_attributes(void) return 0; } + if (topdown_run) { + char *str = NULL; + bool warn = false; + + if (stat_config.aggr_mode != AGGR_GLOBAL && + stat_config.aggr_mode != AGGR_CORE) { + pr_err("top down event configuration requires --per-core mode\n"); + return -1; + } + stat_config.aggr_mode = AGGR_CORE; + if (nr_cgroups || !target__has_cpu(&target)) { + pr_err("top down event configuration requires system-wide mode (-a)\n"); + return -1; + } + + if (!force_metric_only) + metric_only = true; + if (topdown_filter_events(topdown_attrs, &str, + arch_topdown_check_group(&warn)) < 0) { + pr_err("Out of memory\n"); + return -1; + } + if (topdown_attrs[0] && str) { + if (warn) + arch_topdown_group_warn(); + err = parse_events(evsel_list, str, NULL); + if (err) { + fprintf(stderr, + "Cannot set up top down events %s: %d\n", + str, err); + free(str); + return -1; + } + } else { + fprintf(stderr, "System does not support topdown\n"); + return -1; + } + free(str); + } + if (!evsel_list->nr_entries) { if (target__has_cpu(&target)) default_attrs0[0].config = PERF_COUNT_SW_CPU_CLOCK; diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index 5ad0255f8756..098874b99981 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -73,17 +73,25 @@ endif # # make DEBUG=1 LIBUNWIND_DIR=/opt/libunwind/ # + +libunwind_arch_set_flags = $(eval $(libunwind_arch_set_flags_code)) +define libunwind_arch_set_flags_code + FEATURE_CHECK_CFLAGS-libunwind-$(1) = -I$(LIBUNWIND_DIR)/include + FEATURE_CHECK_LDFLAGS-libunwind-$(1) = -L$(LIBUNWIND_DIR)/lib +endef + ifdef LIBUNWIND_DIR LIBUNWIND_CFLAGS = -I$(LIBUNWIND_DIR)/include LIBUNWIND_LDFLAGS = -L$(LIBUNWIND_DIR)/lib + LIBUNWIND_ARCHS = x86 x86_64 arm aarch64 debug-frame-arm debug-frame-aarch64 + $(foreach libunwind_arch,$(LIBUNWIND_ARCHS),$(call libunwind_arch_set_flags,$(libunwind_arch))) endif -LIBUNWIND_LDFLAGS += $(LIBUNWIND_LIBS) # Set per-feature check compilation flags FEATURE_CHECK_CFLAGS-libunwind = $(LIBUNWIND_CFLAGS) -FEATURE_CHECK_LDFLAGS-libunwind = $(LIBUNWIND_LDFLAGS) +FEATURE_CHECK_LDFLAGS-libunwind = $(LIBUNWIND_LDFLAGS) $(LIBUNWIND_LIBS) FEATURE_CHECK_CFLAGS-libunwind-debug-frame = $(LIBUNWIND_CFLAGS) -FEATURE_CHECK_LDFLAGS-libunwind-debug-frame = $(LIBUNWIND_LDFLAGS) +FEATURE_CHECK_LDFLAGS-libunwind-debug-frame = $(LIBUNWIND_LDFLAGS) $(LIBUNWIND_LIBS) ifeq ($(NO_PERF_REGS),0) CFLAGS += -DHAVE_PERF_REGS_SUPPORT @@ -351,10 +359,40 @@ ifeq ($(ARCH),powerpc) endif ifndef NO_LIBUNWIND + have_libunwind := + + ifeq ($(feature-libunwind-x86), 1) + $(call detected,CONFIG_LIBUNWIND_X86) + CFLAGS += -DHAVE_LIBUNWIND_X86_SUPPORT + LDFLAGS += -lunwind-x86 + have_libunwind = 1 + endif + + ifeq ($(feature-libunwind-aarch64), 1) + $(call detected,CONFIG_LIBUNWIND_AARCH64) + CFLAGS += -DHAVE_LIBUNWIND_AARCH64_SUPPORT + LDFLAGS += -lunwind-aarch64 + have_libunwind = 1 + $(call feature_check,libunwind-debug-frame-aarch64) + ifneq ($(feature-libunwind-debug-frame-aarch64), 1) + msg := $(warning No debug_frame support found in libunwind-aarch64); + CFLAGS += -DNO_LIBUNWIND_DEBUG_FRAME_AARCH64 + endif + endif + ifneq ($(feature-libunwind), 1) msg := $(warning No libunwind found. Please install libunwind-dev[el] >= 1.1 and/or set LIBUNWIND_DIR); + NO_LOCAL_LIBUNWIND := 1 + else + have_libunwind := 1 + $(call detected,CONFIG_LOCAL_LIBUNWIND) + endif + + ifneq ($(have_libunwind), 1) NO_LIBUNWIND := 1 endif +else + NO_LOCAL_LIBUNWIND := 1 endif ifndef NO_LIBBPF @@ -392,7 +430,7 @@ else NO_DWARF_UNWIND := 1 endif -ifndef NO_LIBUNWIND +ifndef NO_LOCAL_LIBUNWIND ifeq ($(ARCH),$(filter $(ARCH),arm arm64)) $(call feature_check,libunwind-debug-frame) ifneq ($(feature-libunwind-debug-frame), 1) @@ -403,8 +441,12 @@ ifndef NO_LIBUNWIND # non-ARM has no dwarf_find_debug_frame() function: CFLAGS += -DNO_LIBUNWIND_DEBUG_FRAME endif - CFLAGS += -DHAVE_LIBUNWIND_SUPPORT EXTLIBS += $(LIBUNWIND_LIBS) + LDFLAGS += $(LIBUNWIND_LIBS) +endif + +ifndef NO_LIBUNWIND + CFLAGS += -DHAVE_LIBUNWIND_SUPPORT CFLAGS += $(LIBUNWIND_CFLAGS) LDFLAGS += $(LIBUNWIND_LDFLAGS) endif diff --git a/tools/perf/tests/fdarray.c b/tools/perf/tests/fdarray.c index c809463edbe5..59dbd0550c51 100644 --- a/tools/perf/tests/fdarray.c +++ b/tools/perf/tests/fdarray.c @@ -36,7 +36,7 @@ int test__fdarray__filter(int subtest __maybe_unused) } fdarray__init_revents(fda, POLLIN); - nr_fds = fdarray__filter(fda, POLLHUP, NULL); + nr_fds = fdarray__filter(fda, POLLHUP, NULL, NULL); if (nr_fds != fda->nr_alloc) { pr_debug("\nfdarray__filter()=%d != %d shouldn't have filtered anything", nr_fds, fda->nr_alloc); @@ -44,7 +44,7 @@ int test__fdarray__filter(int subtest __maybe_unused) } fdarray__init_revents(fda, POLLHUP); - nr_fds = fdarray__filter(fda, POLLHUP, NULL); + nr_fds = fdarray__filter(fda, POLLHUP, NULL, NULL); if (nr_fds != 0) { pr_debug("\nfdarray__filter()=%d != %d, should have filtered all fds", nr_fds, fda->nr_alloc); @@ -57,7 +57,7 @@ int test__fdarray__filter(int subtest __maybe_unused) pr_debug("\nfiltering all but fda->entries[2]:"); fdarray__fprintf_prefix(fda, "before", stderr); - nr_fds = fdarray__filter(fda, POLLHUP, NULL); + nr_fds = fdarray__filter(fda, POLLHUP, NULL, NULL); fdarray__fprintf_prefix(fda, " after", stderr); if (nr_fds != 1) { pr_debug("\nfdarray__filter()=%d != 1, should have left just one event", nr_fds); @@ -78,7 +78,7 @@ int test__fdarray__filter(int subtest __maybe_unused) pr_debug("\nfiltering all but (fda->entries[0], fda->entries[3]):"); fdarray__fprintf_prefix(fda, "before", stderr); - nr_fds = fdarray__filter(fda, POLLHUP, NULL); + nr_fds = fdarray__filter(fda, POLLHUP, NULL, NULL); fdarray__fprintf_prefix(fda, " after", stderr); if (nr_fds != 2) { pr_debug("\nfdarray__filter()=%d != 2, should have left just two events", diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c index 7865f68dc0d8..b2a2c74136a5 100644 --- a/tools/perf/tests/parse-events.c +++ b/tools/perf/tests/parse-events.c @@ -1783,8 +1783,8 @@ static int test_pmu_events(void) struct evlist_test e; char name[MAX_NAME]; - if (!strcmp(ent->d_name, ".") || - !strcmp(ent->d_name, "..")) + /* Names containing . are special and cannot be used directly */ + if (strchr(ent->d_name, '.')) continue; snprintf(name, MAX_NAME, "cpu/event=%s/u", ent->d_name); diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 8c6c8a0ca642..fced8336e5fd 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -99,7 +99,10 @@ libperf-$(CONFIG_DWARF) += probe-finder.o libperf-$(CONFIG_DWARF) += dwarf-aux.o libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o +libperf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind-local.o libperf-$(CONFIG_LIBUNWIND) += unwind-libunwind.o +libperf-$(CONFIG_LIBUNWIND_X86) += libunwind/x86_32.o +libperf-$(CONFIG_LIBUNWIND_AARCH64) += libunwind/arm64.o libperf-$(CONFIG_LIBBABELTRACE) += data-convert-bt.o diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c index 493307d1414c..dcc8845881ae 100644 --- a/tools/perf/util/bpf-loader.c +++ b/tools/perf/util/bpf-loader.c @@ -339,7 +339,7 @@ config_bpf_program(struct bpf_program *prog) } pr_debug("bpf: config '%s' is ok\n", config_str); - err = bpf_program__set_private(prog, priv, clear_prog_priv); + err = bpf_program__set_priv(prog, priv, clear_prog_priv); if (err) { pr_debug("Failed to set priv for program '%s'\n", config_str); goto errout; @@ -380,15 +380,14 @@ preproc_gen_prologue(struct bpf_program *prog, int n, struct bpf_insn *orig_insns, int orig_insns_cnt, struct bpf_prog_prep_result *res) { + struct bpf_prog_priv *priv = bpf_program__priv(prog); struct probe_trace_event *tev; struct perf_probe_event *pev; - struct bpf_prog_priv *priv; struct bpf_insn *buf; size_t prologue_cnt = 0; int i, err; - err = bpf_program__get_private(prog, (void **)&priv); - if (err || !priv) + if (IS_ERR(priv) || !priv) goto errout; pev = &priv->pev; @@ -535,13 +534,12 @@ static int map_prologue(struct perf_probe_event *pev, int *mapping, static int hook_load_preprocessor(struct bpf_program *prog) { + struct bpf_prog_priv *priv = bpf_program__priv(prog); struct perf_probe_event *pev; - struct bpf_prog_priv *priv; bool need_prologue = false; int err, i; - err = bpf_program__get_private(prog, (void **)&priv); - if (err || !priv) { + if (IS_ERR(priv) || !priv) { pr_debug("Internal error when hook preprocessor\n"); return -BPF_LOADER_ERRNO__INTERNAL; } @@ -607,9 +605,11 @@ int bpf__probe(struct bpf_object *obj) if (err) goto out; - err = bpf_program__get_private(prog, (void **)&priv); - if (err || !priv) + priv = bpf_program__priv(prog); + if (IS_ERR(priv) || !priv) { + err = PTR_ERR(priv); goto out; + } pev = &priv->pev; err = convert_perf_probe_events(pev, 1); @@ -645,13 +645,12 @@ int bpf__unprobe(struct bpf_object *obj) { int err, ret = 0; struct bpf_program *prog; - struct bpf_prog_priv *priv; bpf_object__for_each_program(prog, obj) { + struct bpf_prog_priv *priv = bpf_program__priv(prog); int i; - err = bpf_program__get_private(prog, (void **)&priv); - if (err || !priv) + if (IS_ERR(priv) || !priv) continue; for (i = 0; i < priv->pev.ntevs; i++) { @@ -702,14 +701,12 @@ int bpf__foreach_tev(struct bpf_object *obj, int err; bpf_object__for_each_program(prog, obj) { + struct bpf_prog_priv *priv = bpf_program__priv(prog); struct probe_trace_event *tev; struct perf_probe_event *pev; - struct bpf_prog_priv *priv; int i, fd; - err = bpf_program__get_private(prog, - (void **)&priv); - if (err || !priv) { + if (IS_ERR(priv) || !priv) { pr_debug("bpf: failed to get private field\n"); return -BPF_LOADER_ERRNO__INTERNAL; } @@ -897,15 +894,12 @@ bpf_map_priv__clone(struct bpf_map_priv *priv) static int bpf_map__add_op(struct bpf_map *map, struct bpf_map_op *op) { - struct bpf_map_priv *priv; - const char *map_name; - int err; + const char *map_name = bpf_map__name(map); + struct bpf_map_priv *priv = bpf_map__priv(map); - map_name = bpf_map__get_name(map); - err = bpf_map__get_private(map, (void **)&priv); - if (err) { + if (IS_ERR(priv)) { pr_debug("Failed to get private from map %s\n", map_name); - return err; + return PTR_ERR(priv); } if (!priv) { @@ -916,7 +910,7 @@ bpf_map__add_op(struct bpf_map *map, struct bpf_map_op *op) } INIT_LIST_HEAD(&priv->ops_list); - if (bpf_map__set_private(map, priv, bpf_map_priv__clear)) { + if (bpf_map__set_priv(map, priv, bpf_map_priv__clear)) { free(priv); return -BPF_LOADER_ERRNO__INTERNAL; } @@ -948,30 +942,26 @@ static int __bpf_map__config_value(struct bpf_map *map, struct parse_events_term *term) { - struct bpf_map_def def; struct bpf_map_op *op; - const char *map_name; - int err; + const char *map_name = bpf_map__name(map); + const struct bpf_map_def *def = bpf_map__def(map); - map_name = bpf_map__get_name(map); - - err = bpf_map__get_def(map, &def); - if (err) { + if (IS_ERR(def)) { pr_debug("Unable to get map definition from '%s'\n", map_name); return -BPF_LOADER_ERRNO__INTERNAL; } - if (def.type != BPF_MAP_TYPE_ARRAY) { + if (def->type != BPF_MAP_TYPE_ARRAY) { pr_debug("Map %s type is not BPF_MAP_TYPE_ARRAY\n", map_name); return -BPF_LOADER_ERRNO__OBJCONF_MAP_TYPE; } - if (def.key_size < sizeof(unsigned int)) { + if (def->key_size < sizeof(unsigned int)) { pr_debug("Map %s has incorrect key size\n", map_name); return -BPF_LOADER_ERRNO__OBJCONF_MAP_KEYSIZE; } - switch (def.value_size) { + switch (def->value_size) { case 1: case 2: case 4: @@ -1014,12 +1004,10 @@ __bpf_map__config_event(struct bpf_map *map, struct perf_evlist *evlist) { struct perf_evsel *evsel; - struct bpf_map_def def; + const struct bpf_map_def *def; struct bpf_map_op *op; - const char *map_name; - int err; + const char *map_name = bpf_map__name(map); - map_name = bpf_map__get_name(map); evsel = perf_evlist__find_evsel_by_str(evlist, term->val.str); if (!evsel) { pr_debug("Event (for '%s') '%s' doesn't exist\n", @@ -1027,18 +1015,18 @@ __bpf_map__config_event(struct bpf_map *map, return -BPF_LOADER_ERRNO__OBJCONF_MAP_NOEVT; } - err = bpf_map__get_def(map, &def); - if (err) { + def = bpf_map__def(map); + if (IS_ERR(def)) { pr_debug("Unable to get map definition from '%s'\n", map_name); - return err; + return PTR_ERR(def); } /* * No need to check key_size and value_size: * kernel has already checked them. */ - if (def.type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) { + if (def->type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) { pr_debug("Map %s type is not BPF_MAP_TYPE_PERF_EVENT_ARRAY\n", map_name); return -BPF_LOADER_ERRNO__OBJCONF_MAP_TYPE; @@ -1087,9 +1075,8 @@ config_map_indices_range_check(struct parse_events_term *term, const char *map_name) { struct parse_events_array *array = &term->array; - struct bpf_map_def def; + const struct bpf_map_def *def; unsigned int i; - int err; if (!array->nr_ranges) return 0; @@ -1099,8 +1086,8 @@ config_map_indices_range_check(struct parse_events_term *term, return -BPF_LOADER_ERRNO__INTERNAL; } - err = bpf_map__get_def(map, &def); - if (err) { + def = bpf_map__def(map); + if (IS_ERR(def)) { pr_debug("ERROR: Unable to get map definition from '%s'\n", map_name); return -BPF_LOADER_ERRNO__INTERNAL; @@ -1111,7 +1098,7 @@ config_map_indices_range_check(struct parse_events_term *term, size_t length = array->ranges[i].length; unsigned int idx = start + length - 1; - if (idx >= def.max_entries) { + if (idx >= def->max_entries) { pr_debug("ERROR: index %d too large\n", idx); return -BPF_LOADER_ERRNO__OBJCONF_MAP_IDX2BIG; } @@ -1147,7 +1134,7 @@ bpf__obj_config_map(struct bpf_object *obj, goto out; } - map = bpf_object__get_map_by_name(obj, map_name); + map = bpf_object__find_map_by_name(obj, map_name); if (!map) { pr_debug("ERROR: Map %s doesn't exist\n", map_name); err = -BPF_LOADER_ERRNO__OBJCONF_MAP_NOTEXIST; @@ -1204,14 +1191,14 @@ out: } typedef int (*map_config_func_t)(const char *name, int map_fd, - struct bpf_map_def *pdef, + const struct bpf_map_def *pdef, struct bpf_map_op *op, void *pkey, void *arg); static int foreach_key_array_all(map_config_func_t func, void *arg, const char *name, - int map_fd, struct bpf_map_def *pdef, + int map_fd, const struct bpf_map_def *pdef, struct bpf_map_op *op) { unsigned int i; @@ -1231,7 +1218,7 @@ foreach_key_array_all(map_config_func_t func, static int foreach_key_array_ranges(map_config_func_t func, void *arg, const char *name, int map_fd, - struct bpf_map_def *pdef, + const struct bpf_map_def *pdef, struct bpf_map_op *op) { unsigned int i, j; @@ -1261,15 +1248,12 @@ bpf_map_config_foreach_key(struct bpf_map *map, void *arg) { int err, map_fd; - const char *name; struct bpf_map_op *op; - struct bpf_map_def def; - struct bpf_map_priv *priv; + const struct bpf_map_def *def; + const char *name = bpf_map__name(map); + struct bpf_map_priv *priv = bpf_map__priv(map); - name = bpf_map__get_name(map); - - err = bpf_map__get_private(map, (void **)&priv); - if (err) { + if (IS_ERR(priv)) { pr_debug("ERROR: failed to get private from map %s\n", name); return -BPF_LOADER_ERRNO__INTERNAL; } @@ -1278,29 +1262,29 @@ bpf_map_config_foreach_key(struct bpf_map *map, return 0; } - err = bpf_map__get_def(map, &def); - if (err) { + def = bpf_map__def(map); + if (IS_ERR(def)) { pr_debug("ERROR: failed to get definition from map %s\n", name); return -BPF_LOADER_ERRNO__INTERNAL; } - map_fd = bpf_map__get_fd(map); + map_fd = bpf_map__fd(map); if (map_fd < 0) { pr_debug("ERROR: failed to get fd from map %s\n", name); return map_fd; } list_for_each_entry(op, &priv->ops_list, list) { - switch (def.type) { + switch (def->type) { case BPF_MAP_TYPE_ARRAY: case BPF_MAP_TYPE_PERF_EVENT_ARRAY: switch (op->key_type) { case BPF_MAP_KEY_ALL: err = foreach_key_array_all(func, arg, name, - map_fd, &def, op); + map_fd, def, op); break; case BPF_MAP_KEY_RANGES: err = foreach_key_array_ranges(func, arg, name, - map_fd, &def, + map_fd, def, op); break; default: @@ -1410,7 +1394,7 @@ apply_config_evsel_for_key(const char *name, int map_fd, void *pkey, static int apply_obj_config_map_for_key(const char *name, int map_fd, - struct bpf_map_def *pdef __maybe_unused, + const struct bpf_map_def *pdef, struct bpf_map_op *op, void *pkey, void *arg __maybe_unused) { @@ -1475,9 +1459,9 @@ int bpf__apply_obj_config(void) #define bpf__for_each_stdout_map(pos, obj, objtmp) \ bpf__for_each_map(pos, obj, objtmp) \ - if (bpf_map__get_name(pos) && \ + if (bpf_map__name(pos) && \ (strcmp("__bpf_stdout__", \ - bpf_map__get_name(pos)) == 0)) + bpf_map__name(pos)) == 0)) int bpf__setup_stdout(struct perf_evlist *evlist __maybe_unused) { @@ -1489,10 +1473,9 @@ int bpf__setup_stdout(struct perf_evlist *evlist __maybe_unused) bool need_init = false; bpf__for_each_stdout_map(map, obj, tmp) { - struct bpf_map_priv *priv; + struct bpf_map_priv *priv = bpf_map__priv(map); - err = bpf_map__get_private(map, (void **)&priv); - if (err) + if (IS_ERR(priv)) return -BPF_LOADER_ERRNO__INTERNAL; /* @@ -1520,10 +1503,9 @@ int bpf__setup_stdout(struct perf_evlist *evlist __maybe_unused) } bpf__for_each_stdout_map(map, obj, tmp) { - struct bpf_map_priv *priv; + struct bpf_map_priv *priv = bpf_map__priv(map); - err = bpf_map__get_private(map, (void **)&priv); - if (err) + if (IS_ERR(priv)) return -BPF_LOADER_ERRNO__INTERNAL; if (priv) continue; @@ -1533,7 +1515,7 @@ int bpf__setup_stdout(struct perf_evlist *evlist __maybe_unused) if (!priv) return -ENOMEM; - err = bpf_map__set_private(map, priv, bpf_map_priv__clear); + err = bpf_map__set_priv(map, priv, bpf_map_priv__clear); if (err) { bpf_map_priv__clear(map, priv); return err; @@ -1677,7 +1659,7 @@ int bpf__strerror_load(struct bpf_object *obj, { bpf__strerror_head(err, buf, size); case LIBBPF_ERRNO__KVER: { - unsigned int obj_kver = bpf_object__get_kversion(obj); + unsigned int obj_kver = bpf_object__kversion(obj); unsigned int real_kver; if (fetch_kernel_version(&real_kver, NULL, 0)) { diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index 67e5966503b2..20aef90bf194 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c @@ -144,7 +144,29 @@ static int asnprintf(char **strp, size_t size, const char *fmt, ...) return ret; } -static char *build_id__filename(const char *sbuild_id, char *bf, size_t size) +char *build_id_cache__kallsyms_path(const char *sbuild_id, char *bf, + size_t size) +{ + bool retry_old = true; + + snprintf(bf, size, "%s/%s/%s/kallsyms", + buildid_dir, DSO__NAME_KALLSYMS, sbuild_id); +retry: + if (!access(bf, F_OK)) + return bf; + if (retry_old) { + /* Try old style kallsyms cache */ + snprintf(bf, size, "%s/%s/%s", + buildid_dir, DSO__NAME_KALLSYMS, sbuild_id); + retry_old = false; + goto retry; + } + + return NULL; +} + +static char *build_id_cache__linkname(const char *sbuild_id, char *bf, + size_t size) { char *tmp = bf; int ret = asnprintf(&bf, size, "%s/.build-id/%.2s/%s", buildid_dir, @@ -154,23 +176,52 @@ static char *build_id__filename(const char *sbuild_id, char *bf, size_t size) return bf; } +static const char *build_id_cache__basename(bool is_kallsyms, bool is_vdso) +{ + return is_kallsyms ? "kallsyms" : (is_vdso ? "vdso" : "elf"); +} + char *dso__build_id_filename(const struct dso *dso, char *bf, size_t size) { - char build_id_hex[SBUILD_ID_SIZE]; + bool is_kallsyms = dso__is_kallsyms((struct dso *)dso); + bool is_vdso = dso__is_vdso((struct dso *)dso); + char sbuild_id[SBUILD_ID_SIZE]; + char *linkname; + bool alloc = (bf == NULL); + int ret; if (!dso->has_build_id) return NULL; - build_id__sprintf(dso->build_id, sizeof(dso->build_id), build_id_hex); - return build_id__filename(build_id_hex, bf, size); + build_id__sprintf(dso->build_id, sizeof(dso->build_id), sbuild_id); + linkname = build_id_cache__linkname(sbuild_id, NULL, 0); + if (!linkname) + return NULL; + + /* Check if old style build_id cache */ + if (is_regular_file(linkname)) + ret = asnprintf(&bf, size, "%s", linkname); + else + ret = asnprintf(&bf, size, "%s/%s", linkname, + build_id_cache__basename(is_kallsyms, is_vdso)); + if (ret < 0 || (!alloc && size < (unsigned int)ret)) + bf = NULL; + free(linkname); + + return bf; } bool dso__build_id_is_kmod(const struct dso *dso, char *bf, size_t size) { - char *id_name, *ch; + char *id_name = NULL, *ch; struct stat sb; + char sbuild_id[SBUILD_ID_SIZE]; + + if (!dso->has_build_id) + goto err; - id_name = dso__build_id_filename(dso, bf, size); + build_id__sprintf(dso->build_id, sizeof(dso->build_id), sbuild_id); + id_name = build_id_cache__linkname(sbuild_id, NULL, 0); if (!id_name) goto err; if (access(id_name, F_OK)) @@ -194,18 +245,14 @@ bool dso__build_id_is_kmod(const struct dso *dso, char *bf, size_t size) if (ch - 3 < bf) goto err; + free(id_name); return strncmp(".ko", ch - 3, 3) == 0; err: - /* - * If dso__build_id_filename work, get id_name again, - * because id_name points to bf and is broken. - */ - if (id_name) - id_name = dso__build_id_filename(dso, bf, size); pr_err("Invalid build id: %s\n", id_name ? : dso->long_name ? : dso->short_name ? : "[unknown]"); + free(id_name); return false; } @@ -341,7 +388,8 @@ void disable_buildid_cache(void) } static char *build_id_cache__dirname_from_path(const char *name, - bool is_kallsyms, bool is_vdso) + bool is_kallsyms, bool is_vdso, + const char *sbuild_id) { char *realname = (char *)name, *filename; bool slash = is_kallsyms || is_vdso; @@ -352,8 +400,9 @@ static char *build_id_cache__dirname_from_path(const char *name, return NULL; } - if (asprintf(&filename, "%s%s%s", buildid_dir, slash ? "/" : "", - is_vdso ? DSO__NAME_VDSO : realname) < 0) + if (asprintf(&filename, "%s%s%s%s%s", buildid_dir, slash ? "/" : "", + is_vdso ? DSO__NAME_VDSO : realname, + sbuild_id ? "/" : "", sbuild_id ?: "") < 0) filename = NULL; if (!slash) @@ -368,7 +417,8 @@ int build_id_cache__list_build_ids(const char *pathname, char *dir_name; int ret = 0; - dir_name = build_id_cache__dirname_from_path(pathname, false, false); + dir_name = build_id_cache__dirname_from_path(pathname, false, false, + NULL); if (!dir_name) return -ENOMEM; @@ -385,7 +435,7 @@ int build_id_cache__add_s(const char *sbuild_id, const char *name, { const size_t size = PATH_MAX; char *realname = NULL, *filename = NULL, *dir_name = NULL, - *linkname = zalloc(size), *targetname, *tmp; + *linkname = zalloc(size), *tmp; int err = -1; if (!is_kallsyms) { @@ -394,14 +444,22 @@ int build_id_cache__add_s(const char *sbuild_id, const char *name, goto out_free; } - dir_name = build_id_cache__dirname_from_path(name, is_kallsyms, is_vdso); + dir_name = build_id_cache__dirname_from_path(name, is_kallsyms, + is_vdso, sbuild_id); if (!dir_name) goto out_free; + /* Remove old style build-id cache */ + if (is_regular_file(dir_name)) + if (unlink(dir_name)) + goto out_free; + if (mkdir_p(dir_name, 0755)) goto out_free; - if (asprintf(&filename, "%s/%s", dir_name, sbuild_id) < 0) { + /* Save the allocated buildid dirname */ + if (asprintf(&filename, "%s/%s", dir_name, + build_id_cache__basename(is_kallsyms, is_vdso)) < 0) { filename = NULL; goto out_free; } @@ -415,7 +473,7 @@ int build_id_cache__add_s(const char *sbuild_id, const char *name, goto out_free; } - if (!build_id__filename(sbuild_id, linkname, size)) + if (!build_id_cache__linkname(sbuild_id, linkname, size)) goto out_free; tmp = strrchr(linkname, '/'); *tmp = '\0'; @@ -424,10 +482,10 @@ int build_id_cache__add_s(const char *sbuild_id, const char *name, goto out_free; *tmp = '/'; - targetname = filename + strlen(buildid_dir) - 5; - memcpy(targetname, "../..", 5); + tmp = dir_name + strlen(buildid_dir) - 5; + memcpy(tmp, "../..", 5); - if (symlink(targetname, linkname) == 0) + if (symlink(tmp, linkname) == 0) err = 0; out_free: if (!is_kallsyms) @@ -452,7 +510,7 @@ static int build_id_cache__add_b(const u8 *build_id, size_t build_id_size, bool build_id_cache__cached(const char *sbuild_id) { bool ret = false; - char *filename = build_id__filename(sbuild_id, NULL, 0); + char *filename = build_id_cache__linkname(sbuild_id, NULL, 0); if (filename && !access(filename, F_OK)) ret = true; @@ -471,7 +529,7 @@ int build_id_cache__remove_s(const char *sbuild_id) if (filename == NULL || linkname == NULL) goto out_free; - if (!build_id__filename(sbuild_id, linkname, size)) + if (!build_id_cache__linkname(sbuild_id, linkname, size)) goto out_free; if (access(linkname, F_OK)) @@ -489,7 +547,7 @@ int build_id_cache__remove_s(const char *sbuild_id) tmp = strrchr(linkname, '/') + 1; snprintf(tmp, size - (tmp - linkname), "%s", filename); - if (unlink(linkname)) + if (rm_rf(linkname)) goto out_free; err = 0; @@ -501,7 +559,7 @@ out_free: static int dso__cache_build_id(struct dso *dso, struct machine *machine) { - bool is_kallsyms = dso->kernel && dso->long_name[0] != '/'; + bool is_kallsyms = dso__is_kallsyms(dso); bool is_vdso = dso__is_vdso(dso); const char *name = dso->long_name; char nm[PATH_MAX]; diff --git a/tools/perf/util/build-id.h b/tools/perf/util/build-id.h index 64af3e20610d..e5435f46e48e 100644 --- a/tools/perf/util/build-id.h +++ b/tools/perf/util/build-id.h @@ -14,6 +14,8 @@ struct dso; int build_id__sprintf(const u8 *build_id, int len, char *bf); int sysfs__sprintf_build_id(const char *root_dir, char *sbuild_id); int filename__sprintf_build_id(const char *pathname, char *sbuild_id); +char *build_id_cache__kallsyms_path(const char *sbuild_id, char *bf, + size_t size); char *dso__build_id_filename(const struct dso *dso, char *bf, size_t size); bool dso__build_id_is_kmod(const struct dso *dso, char *bf, size_t size); diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index 65e2a4f7cb4e..a70f6b54eb92 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h @@ -94,6 +94,7 @@ struct callchain_param { enum perf_call_graph_mode record_mode; u32 dump_size; enum chain_mode mode; + u16 max_stack; u32 print_limit; double min_percent; sort_chain_func_t sort; diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c index dad7d8272168..8749eca3055f 100644 --- a/tools/perf/util/config.c +++ b/tools/perf/util/config.c @@ -275,7 +275,8 @@ static int perf_parse_file(config_fn_t fn, void *data) break; } } - die("bad config file line %d in %s", config_linenr, config_file_name); + pr_err("bad config file line %d in %s\n", config_linenr, config_file_name); + return -1; } static int parse_unit_factor(const char *end, unsigned long *val) @@ -479,16 +480,15 @@ static int perf_config_global(void) int perf_config(config_fn_t fn, void *data) { - int ret = 0, found = 0; + int ret = -1; const char *home = NULL; /* Setting $PERF_CONFIG makes perf read _only_ the given config file. */ if (config_exclusive_filename) return perf_config_from_file(fn, config_exclusive_filename, data); if (perf_config_system() && !access(perf_etc_perfconfig(), R_OK)) { - ret += perf_config_from_file(fn, perf_etc_perfconfig(), - data); - found += 1; + if (perf_config_from_file(fn, perf_etc_perfconfig(), data) < 0) + goto out; } home = getenv("HOME"); @@ -514,14 +514,12 @@ int perf_config(config_fn_t fn, void *data) if (!st.st_size) goto out_free; - ret += perf_config_from_file(fn, user_config, data); - found += 1; + ret = perf_config_from_file(fn, user_config, data); + out_free: free(user_config); } out: - if (found == 0) - return -1; return ret; } @@ -609,8 +607,12 @@ static int collect_config(const char *var, const char *value, struct perf_config_section *section = NULL; struct perf_config_item *item = NULL; struct perf_config_set *set = perf_config_set; - struct list_head *sections = &set->sections; + struct list_head *sections; + + if (set == NULL) + return -1; + sections = &set->sections; key = ptr = strdup(var); if (!key) { pr_debug("%s: strdup failed\n", __func__); @@ -641,17 +643,64 @@ static int collect_config(const char *var, const char *value, out_free: free(key); - perf_config_set__delete(set); return -1; } +static int perf_config_set__init(struct perf_config_set *set) +{ + int ret = -1; + const char *home = NULL; + + /* Setting $PERF_CONFIG makes perf read _only_ the given config file. */ + if (config_exclusive_filename) + return perf_config_from_file(collect_config, config_exclusive_filename, set); + if (perf_config_system() && !access(perf_etc_perfconfig(), R_OK)) { + if (perf_config_from_file(collect_config, perf_etc_perfconfig(), set) < 0) + goto out; + } + + home = getenv("HOME"); + if (perf_config_global() && home) { + char *user_config = strdup(mkpath("%s/.perfconfig", home)); + struct stat st; + + if (user_config == NULL) { + warning("Not enough memory to process %s/.perfconfig, " + "ignoring it.", home); + goto out; + } + + if (stat(user_config, &st) < 0) + goto out_free; + + if (st.st_uid && (st.st_uid != geteuid())) { + warning("File %s not owned by current user or root, " + "ignoring it.", user_config); + goto out_free; + } + + if (!st.st_size) + goto out_free; + + ret = perf_config_from_file(collect_config, user_config, set); + +out_free: + free(user_config); + } +out: + return ret; +} + struct perf_config_set *perf_config_set__new(void) { struct perf_config_set *set = zalloc(sizeof(*set)); if (set) { INIT_LIST_HEAD(&set->sections); - perf_config(collect_config, set); + if (perf_config_set__init(set) < 0) { + perf_config_set__delete(set); + set = NULL; + } } return set; diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c index bbf69d248ec5..9f53020c3269 100644 --- a/tools/perf/util/data-convert-bt.c +++ b/tools/perf/util/data-convert-bt.c @@ -204,6 +204,44 @@ static unsigned long long adjust_signedness(unsigned long long value_int, int si return (value_int & value_mask) | ~value_mask; } +static int string_set_value(struct bt_ctf_field *field, const char *string) +{ + char *buffer = NULL; + size_t len = strlen(string), i, p; + int err; + + for (i = p = 0; i < len; i++, p++) { + if (isprint(string[i])) { + if (!buffer) + continue; + buffer[p] = string[i]; + } else { + char numstr[5]; + + snprintf(numstr, sizeof(numstr), "\\x%02x", + (unsigned int)(string[i]) & 0xff); + + if (!buffer) { + buffer = zalloc(i + (len - i) * 4 + 2); + if (!buffer) { + pr_err("failed to set unprintable string '%s'\n", string); + return bt_ctf_field_string_set_value(field, "UNPRINTABLE-STRING"); + } + if (i > 0) + strncpy(buffer, string, i); + } + strncat(buffer + p, numstr, 4); + p += 3; + } + } + + if (!buffer) + return bt_ctf_field_string_set_value(field, string); + err = bt_ctf_field_string_set_value(field, buffer); + free(buffer); + return err; +} + static int add_tracepoint_field_value(struct ctf_writer *cw, struct bt_ctf_event_class *event_class, struct bt_ctf_event *event, @@ -270,8 +308,7 @@ static int add_tracepoint_field_value(struct ctf_writer *cw, } if (flags & FIELD_IS_STRING) - ret = bt_ctf_field_string_set_value(field, - data + offset + i * len); + ret = string_set_value(field, data + offset + i * len); else { unsigned long long value_int; diff --git a/tools/perf/util/db-export.c b/tools/perf/util/db-export.c index c9a6dc173e74..b0c2b5c5d337 100644 --- a/tools/perf/util/db-export.c +++ b/tools/perf/util/db-export.c @@ -233,17 +233,6 @@ int db_export__symbol(struct db_export *dbe, struct symbol *sym, return 0; } -static struct thread *get_main_thread(struct machine *machine, struct thread *thread) -{ - if (thread->pid_ == thread->tid) - return thread__get(thread); - - if (thread->pid_ == -1) - return NULL; - - return machine__find_thread(machine, thread->pid_, thread->pid_); -} - static int db_ids_from_al(struct db_export *dbe, struct addr_location *al, u64 *dso_db_id, u64 *sym_db_id, u64 *offset) { @@ -382,7 +371,7 @@ int db_export__sample(struct db_export *dbe, union perf_event *event, if (err) return err; - main_thread = get_main_thread(al->machine, thread); + main_thread = thread__main_thread(al->machine, thread); if (main_thread) comm = machine__thread_exec_comm(al->machine, main_thread); diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h index 0953280629cf..76d79d070e21 100644 --- a/tools/perf/util/dso.h +++ b/tools/perf/util/dso.h @@ -349,6 +349,11 @@ static inline bool dso__is_kcore(struct dso *dso) dso->binary_type == DSO_BINARY_TYPE__GUEST_KCORE; } +static inline bool dso__is_kallsyms(struct dso *dso) +{ + return dso->kernel && dso->long_name[0] != '/'; +} + void dso__free_a2l(struct dso *dso); enum dso_type dso__type(struct dso *dso, struct machine *machine); diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index f6fcc6832949..9b141f12329e 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -673,6 +673,8 @@ int perf_event__synthesize_kernel_mmap(struct perf_tool *tool, int err; union perf_event *event; + if (symbol_conf.kptr_restrict) + return -1; if (map == NULL) return -1; diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index e82ba90cc969..1b918aa075d6 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -462,9 +462,9 @@ int perf_evlist__alloc_pollfd(struct perf_evlist *evlist) return 0; } -static int __perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd, int idx) +static int __perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd, int idx, short revent) { - int pos = fdarray__add(&evlist->pollfd, fd, POLLIN | POLLERR | POLLHUP); + int pos = fdarray__add(&evlist->pollfd, fd, revent | POLLERR | POLLHUP); /* * Save the idx so that when we filter out fds POLLHUP'ed we can * close the associated evlist->mmap[] entry. @@ -480,10 +480,11 @@ static int __perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd, int idx int perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd) { - return __perf_evlist__add_pollfd(evlist, fd, -1); + return __perf_evlist__add_pollfd(evlist, fd, -1, POLLIN); } -static void perf_evlist__munmap_filtered(struct fdarray *fda, int fd) +static void perf_evlist__munmap_filtered(struct fdarray *fda, int fd, + void *arg __maybe_unused) { struct perf_evlist *evlist = container_of(fda, struct perf_evlist, pollfd); @@ -493,7 +494,7 @@ static void perf_evlist__munmap_filtered(struct fdarray *fda, int fd) int perf_evlist__filter_pollfd(struct perf_evlist *evlist, short revents_and_mask) { return fdarray__filter(&evlist->pollfd, revents_and_mask, - perf_evlist__munmap_filtered); + perf_evlist__munmap_filtered, NULL); } int perf_evlist__poll(struct perf_evlist *evlist, int timeout) @@ -777,7 +778,7 @@ broken_event: return event; } -union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx) +union perf_event *perf_evlist__mmap_read_forward(struct perf_evlist *evlist, int idx) { struct perf_mmap *md = &evlist->mmap[idx]; u64 head; @@ -832,6 +833,13 @@ perf_evlist__mmap_read_backward(struct perf_evlist *evlist, int idx) return perf_mmap__read(md, false, start, end, &md->prev); } +union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx) +{ + if (!evlist->backward) + return perf_evlist__mmap_read_forward(evlist, idx); + return perf_evlist__mmap_read_backward(evlist, idx); +} + void perf_evlist__mmap_read_catchup(struct perf_evlist *evlist, int idx) { struct perf_mmap *md = &evlist->mmap[idx]; @@ -856,9 +864,11 @@ static void perf_evlist__mmap_get(struct perf_evlist *evlist, int idx) static void perf_evlist__mmap_put(struct perf_evlist *evlist, int idx) { - BUG_ON(atomic_read(&evlist->mmap[idx].refcnt) == 0); + struct perf_mmap *md = &evlist->mmap[idx]; + + BUG_ON(md->base && atomic_read(&md->refcnt) == 0); - if (atomic_dec_and_test(&evlist->mmap[idx].refcnt)) + if (atomic_dec_and_test(&md->refcnt)) __perf_evlist__munmap(evlist, idx); } @@ -936,9 +946,12 @@ static int perf_evlist__alloc_mmap(struct perf_evlist *evlist) if (cpu_map__empty(evlist->cpus)) evlist->nr_mmaps = thread_map__nr(evlist->threads); evlist->mmap = zalloc(evlist->nr_mmaps * sizeof(struct perf_mmap)); + if (!evlist->mmap) + return -ENOMEM; + for (i = 0; i < evlist->nr_mmaps; i++) evlist->mmap[i].fd = -1; - return evlist->mmap != NULL ? 0 : -ENOMEM; + return 0; } struct mmap_params { @@ -983,15 +996,28 @@ static int __perf_evlist__mmap(struct perf_evlist *evlist, int idx, return 0; } +static bool +perf_evlist__should_poll(struct perf_evlist *evlist __maybe_unused, + struct perf_evsel *evsel) +{ + if (evsel->overwrite) + return false; + return true; +} + static int perf_evlist__mmap_per_evsel(struct perf_evlist *evlist, int idx, struct mmap_params *mp, int cpu, int thread, int *output) { struct perf_evsel *evsel; + int revent; evlist__for_each(evlist, evsel) { int fd; + if (evsel->overwrite != (evlist->overwrite && evlist->backward)) + continue; + if (evsel->system_wide && thread) continue; @@ -1008,6 +1034,8 @@ static int perf_evlist__mmap_per_evsel(struct perf_evlist *evlist, int idx, perf_evlist__mmap_get(evlist, idx); } + revent = perf_evlist__should_poll(evlist, evsel) ? POLLIN : 0; + /* * The system_wide flag causes a selected event to be opened * always without a pid. Consequently it will never get a @@ -1016,7 +1044,7 @@ static int perf_evlist__mmap_per_evsel(struct perf_evlist *evlist, int idx, * Therefore don't add it for polling. */ if (!evsel->system_wide && - __perf_evlist__add_pollfd(evlist, fd, idx) < 0) { + __perf_evlist__add_pollfd(evlist, fd, idx, revent) < 0) { perf_evlist__mmap_put(evlist, idx); return -1; } diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index d740fb877ab6..68cb1361c97c 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -131,6 +131,8 @@ struct perf_sample_id *perf_evlist__id2sid(struct perf_evlist *evlist, u64 id); union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx); +union perf_event *perf_evlist__mmap_read_forward(struct perf_evlist *evlist, + int idx); union perf_event *perf_evlist__mmap_read_backward(struct perf_evlist *evlist, int idx); void perf_evlist__mmap_read_catchup(struct perf_evlist *evlist, int idx); diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 5d7037ef7d3b..9b2e3e624efe 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -572,6 +572,8 @@ void perf_evsel__config_callchain(struct perf_evsel *evsel, perf_evsel__set_sample_bit(evsel, CALLCHAIN); + attr->sample_max_stack = param->max_stack; + if (param->record_mode == CALLCHAIN_LBR) { if (!opts->branch_stack) { if (attr->exclude_user) { @@ -635,7 +637,8 @@ static void apply_config_terms(struct perf_evsel *evsel, struct perf_event_attr *attr = &evsel->attr; struct callchain_param param; u32 dump_size = 0; - char *callgraph_buf = NULL; + int max_stack = 0; + const char *callgraph_buf = NULL; /* callgraph default */ param.record_mode = callchain_param.record_mode; @@ -662,6 +665,9 @@ static void apply_config_terms(struct perf_evsel *evsel, case PERF_EVSEL__CONFIG_TERM_STACK_USER: dump_size = term->val.stack_user; break; + case PERF_EVSEL__CONFIG_TERM_MAX_STACK: + max_stack = term->val.max_stack; + break; case PERF_EVSEL__CONFIG_TERM_INHERIT: /* * attr->inherit should has already been set by @@ -677,7 +683,12 @@ static void apply_config_terms(struct perf_evsel *evsel, } /* User explicitly set per-event callgraph, clear the old setting and reset. */ - if ((callgraph_buf != NULL) || (dump_size > 0)) { + if ((callgraph_buf != NULL) || (dump_size > 0) || max_stack) { + if (max_stack) { + param.max_stack = max_stack; + if (callgraph_buf == NULL) + callgraph_buf = "fp"; + } /* parse callgraph parameters */ if (callgraph_buf != NULL) { @@ -1329,6 +1340,7 @@ int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr, PRINT_ATTRf(clockid, p_signed); PRINT_ATTRf(sample_regs_intr, p_hex); PRINT_ATTRf(aux_watermark, p_unsigned); + PRINT_ATTRf(sample_max_stack, p_unsigned); return ret; } @@ -2239,17 +2251,11 @@ void *perf_evsel__rawptr(struct perf_evsel *evsel, struct perf_sample *sample, return sample->raw_data + offset; } -u64 perf_evsel__intval(struct perf_evsel *evsel, struct perf_sample *sample, - const char *name) +u64 format_field__intval(struct format_field *field, struct perf_sample *sample, + bool needs_swap) { - struct format_field *field = perf_evsel__field(evsel, name); - void *ptr; u64 value; - - if (!field) - return 0; - - ptr = sample->raw_data + field->offset; + void *ptr = sample->raw_data + field->offset; switch (field->size) { case 1: @@ -2267,7 +2273,7 @@ u64 perf_evsel__intval(struct perf_evsel *evsel, struct perf_sample *sample, return 0; } - if (!evsel->needs_swap) + if (!needs_swap) return value; switch (field->size) { @@ -2284,6 +2290,17 @@ u64 perf_evsel__intval(struct perf_evsel *evsel, struct perf_sample *sample, return 0; } +u64 perf_evsel__intval(struct perf_evsel *evsel, struct perf_sample *sample, + const char *name) +{ + struct format_field *field = perf_evsel__field(evsel, name); + + if (!field) + return 0; + + return field ? format_field__intval(field, sample, evsel->needs_swap) : 0; +} + bool perf_evsel__fallback(struct perf_evsel *evsel, int err, char *msg, size_t msgsize) { @@ -2372,6 +2389,9 @@ int perf_evsel__open_strerror(struct perf_evsel *evsel, struct target *target, "No such device - did you specify an out-of-range profile CPU?"); break; case EOPNOTSUPP: + if (evsel->attr.sample_period != 0) + return scnprintf(msg, size, "%s", + "PMU Hardware doesn't support sampling/overflow-interrupts."); if (evsel->attr.precise_ip) return scnprintf(msg, size, "%s", "\'precise\' request may not be supported. Try removing 'p' modifier."); diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index c1f10159804c..828ddd1c8947 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -44,6 +44,7 @@ enum { PERF_EVSEL__CONFIG_TERM_CALLGRAPH, PERF_EVSEL__CONFIG_TERM_STACK_USER, PERF_EVSEL__CONFIG_TERM_INHERIT, + PERF_EVSEL__CONFIG_TERM_MAX_STACK, PERF_EVSEL__CONFIG_TERM_MAX, }; @@ -56,6 +57,7 @@ struct perf_evsel_config_term { bool time; char *callgraph; u64 stack_user; + int max_stack; bool inherit; } val; }; @@ -259,6 +261,8 @@ static inline char *perf_evsel__strval(struct perf_evsel *evsel, struct format_field; +u64 format_field__intval(struct format_field *field, struct perf_sample *sample, bool needs_swap); + struct format_field *perf_evsel__field(struct perf_evsel *evsel, const char *name); #define perf_evsel__match(evsel, t, c) \ diff --git a/tools/perf/util/group.h b/tools/perf/util/group.h new file mode 100644 index 000000000000..116debe7a995 --- /dev/null +++ b/tools/perf/util/group.h @@ -0,0 +1,7 @@ +#ifndef GROUP_H +#define GROUP_H 1 + +bool arch_topdown_check_group(bool *warn); +void arch_topdown_group_warn(void); + +#endif diff --git a/tools/perf/util/libunwind/arm64.c b/tools/perf/util/libunwind/arm64.c new file mode 100644 index 000000000000..4fb5395669f8 --- /dev/null +++ b/tools/perf/util/libunwind/arm64.c @@ -0,0 +1,35 @@ +/* + * This file setups defines to compile arch specific binary from the + * generic one. + * + * The function 'LIBUNWIND__ARCH_REG_ID' name is set according to arch + * name and the defination of this function is included directly from + * 'arch/arm64/util/unwind-libunwind.c', to make sure that this function + * is defined no matter what arch the host is. + * + * Finally, the arch specific unwind methods are exported which will + * be assigned to each arm64 thread. + */ + +#define REMOTE_UNWIND_LIBUNWIND + +#define LIBUNWIND__ARCH_REG_ID(regnum) libunwind__arm64_reg_id(regnum) + +#include "unwind.h" +#include "debug.h" +#include "libunwind-aarch64.h" +#include <../../../../arch/arm64/include/uapi/asm/perf_regs.h> +#include "../../arch/arm64/util/unwind-libunwind.c" + +/* NO_LIBUNWIND_DEBUG_FRAME is a feature flag for local libunwind, + * assign NO_LIBUNWIND_DEBUG_FRAME_AARCH64 to it for compiling arm64 + * unwind methods. + */ +#undef NO_LIBUNWIND_DEBUG_FRAME +#ifdef NO_LIBUNWIND_DEBUG_FRAME_AARCH64 +#define NO_LIBUNWIND_DEBUG_FRAME +#endif +#include "util/unwind-libunwind-local.c" + +struct unwind_libunwind_ops * +arm64_unwind_libunwind_ops = &_unwind_libunwind_ops; diff --git a/tools/perf/util/libunwind/x86_32.c b/tools/perf/util/libunwind/x86_32.c new file mode 100644 index 000000000000..d98c17e19a2b --- /dev/null +++ b/tools/perf/util/libunwind/x86_32.c @@ -0,0 +1,37 @@ +/* + * This file setups defines to compile arch specific binary from the + * generic one. + * + * The function 'LIBUNWIND__ARCH_REG_ID' name is set according to arch + * name and the defination of this function is included directly from + * 'arch/x86/util/unwind-libunwind.c', to make sure that this function + * is defined no matter what arch the host is. + * + * Finally, the arch specific unwind methods are exported which will + * be assigned to each x86 thread. + */ + +#define REMOTE_UNWIND_LIBUNWIND +#define LIBUNWIND__ARCH_REG_ID(regnum) libunwind__x86_reg_id(regnum) + +#include "unwind.h" +#include "debug.h" +#include "libunwind-x86.h" +#include <../../../../arch/x86/include/uapi/asm/perf_regs.h> + +/* HAVE_ARCH_X86_64_SUPPORT is used in'arch/x86/util/unwind-libunwind.c' + * for x86_32, we undef it to compile code for x86_32 only. + */ +#undef HAVE_ARCH_X86_64_SUPPORT +#include "../../arch/x86/util/unwind-libunwind.c" + +/* Explicitly define NO_LIBUNWIND_DEBUG_FRAME, because non-ARM has no + * dwarf_find_debug_frame() function. + */ +#ifndef NO_LIBUNWIND_DEBUG_FRAME +#define NO_LIBUNWIND_DEBUG_FRAME +#endif +#include "util/unwind-libunwind-local.c" + +struct unwind_libunwind_ops * +x86_32_unwind_libunwind_ops = &_unwind_libunwind_ops; diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index b1772180c820..a0c186acb1f3 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -1353,11 +1353,16 @@ int machine__process_mmap2_event(struct machine *machine, if (map == NULL) goto out_problem_map; - thread__insert_map(thread, map); + ret = thread__insert_map(thread, map); + if (ret) + goto out_problem_insert; + thread__put(thread); map__put(map); return 0; +out_problem_insert: + map__put(map); out_problem_map: thread__put(thread); out_problem: @@ -1403,11 +1408,16 @@ int machine__process_mmap_event(struct machine *machine, union perf_event *event if (map == NULL) goto out_problem_map; - thread__insert_map(thread, map); + ret = thread__insert_map(thread, map); + if (ret) + goto out_problem_insert; + thread__put(thread); map__put(map); return 0; +out_problem_insert: + map__put(map); out_problem_map: thread__put(thread); out_problem: diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index c6fd0479f4cd..d15e335842b7 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -900,6 +900,7 @@ static const char *config_term_names[__PARSE_EVENTS__TERM_TYPE_NR] = { [PARSE_EVENTS__TERM_TYPE_STACKSIZE] = "stack-size", [PARSE_EVENTS__TERM_TYPE_NOINHERIT] = "no-inherit", [PARSE_EVENTS__TERM_TYPE_INHERIT] = "inherit", + [PARSE_EVENTS__TERM_TYPE_MAX_STACK] = "max-stack", }; static bool config_term_shrinked; @@ -995,6 +996,9 @@ do { \ case PARSE_EVENTS__TERM_TYPE_NAME: CHECK_TYPE_VAL(STR); break; + case PARSE_EVENTS__TERM_TYPE_MAX_STACK: + CHECK_TYPE_VAL(NUM); + break; default: err->str = strdup("unknown term"); err->idx = term->err_term; @@ -1040,6 +1044,7 @@ static int config_term_tracepoint(struct perf_event_attr *attr, case PARSE_EVENTS__TERM_TYPE_STACKSIZE: case PARSE_EVENTS__TERM_TYPE_INHERIT: case PARSE_EVENTS__TERM_TYPE_NOINHERIT: + case PARSE_EVENTS__TERM_TYPE_MAX_STACK: return config_term_common(attr, term, err); default: if (err) { @@ -1109,6 +1114,9 @@ do { \ case PARSE_EVENTS__TERM_TYPE_NOINHERIT: ADD_CONFIG_TERM(INHERIT, inherit, term->val.num ? 0 : 1); break; + case PARSE_EVENTS__TERM_TYPE_MAX_STACK: + ADD_CONFIG_TERM(MAX_STACK, max_stack, term->val.num); + break; default: break; } diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index d740c3ca9a1d..46c05ccd5dfe 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -68,6 +68,7 @@ enum { PARSE_EVENTS__TERM_TYPE_STACKSIZE, PARSE_EVENTS__TERM_TYPE_NOINHERIT, PARSE_EVENTS__TERM_TYPE_INHERIT, + PARSE_EVENTS__TERM_TYPE_MAX_STACK, __PARSE_EVENTS__TERM_TYPE_NR, }; diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l index 1477fbc78993..3c15b33b2e84 100644 --- a/tools/perf/util/parse-events.l +++ b/tools/perf/util/parse-events.l @@ -199,6 +199,7 @@ branch_type { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE time { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_TIME); } call-graph { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CALLGRAPH); } stack-size { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_STACKSIZE); } +max-stack { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_MAX_STACK); } inherit { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_INHERIT); } no-inherit { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_NOINHERIT); } , { return ','; } @@ -259,6 +260,7 @@ cycles-ct { return str(yyscanner, PE_KERNEL_PMU_EVENT); } cycles-t { return str(yyscanner, PE_KERNEL_PMU_EVENT); } mem-loads { return str(yyscanner, PE_KERNEL_PMU_EVENT); } mem-stores { return str(yyscanner, PE_KERNEL_PMU_EVENT); } +topdown-[a-z-]+ { return str(yyscanner, PE_KERNEL_PMU_EVENT); } L1-dcache|l1-d|l1d|L1-data | L1-icache|l1-i|l1i|L1-instruction | diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 5214974e841a..dfedf097b9b1 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -593,6 +593,7 @@ do { \ if (bswap_safe(f, 0)) \ attr->f = bswap_##sz(attr->f); \ } while(0) +#define bswap_field_16(f) bswap_field(f, 16) #define bswap_field_32(f) bswap_field(f, 32) #define bswap_field_64(f) bswap_field(f, 64) @@ -608,6 +609,7 @@ do { \ bswap_field_64(sample_regs_user); bswap_field_32(sample_stack_user); bswap_field_32(aux_watermark); + bswap_field_16(sample_max_stack); /* * After read_format are bitfields. Check read_format because diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index aa9efe08762b..8a2bbd2a4d82 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -36,6 +36,11 @@ static struct stats runtime_dtlb_cache_stats[NUM_CTX][MAX_NR_CPUS]; static struct stats runtime_cycles_in_tx_stats[NUM_CTX][MAX_NR_CPUS]; static struct stats runtime_transaction_stats[NUM_CTX][MAX_NR_CPUS]; static struct stats runtime_elision_stats[NUM_CTX][MAX_NR_CPUS]; +static struct stats runtime_topdown_total_slots[NUM_CTX][MAX_NR_CPUS]; +static struct stats runtime_topdown_slots_issued[NUM_CTX][MAX_NR_CPUS]; +static struct stats runtime_topdown_slots_retired[NUM_CTX][MAX_NR_CPUS]; +static struct stats runtime_topdown_fetch_bubbles[NUM_CTX][MAX_NR_CPUS]; +static struct stats runtime_topdown_recovery_bubbles[NUM_CTX][MAX_NR_CPUS]; static bool have_frontend_stalled; struct stats walltime_nsecs_stats; @@ -82,6 +87,11 @@ void perf_stat__reset_shadow_stats(void) sizeof(runtime_transaction_stats)); memset(runtime_elision_stats, 0, sizeof(runtime_elision_stats)); memset(&walltime_nsecs_stats, 0, sizeof(walltime_nsecs_stats)); + memset(runtime_topdown_total_slots, 0, sizeof(runtime_topdown_total_slots)); + memset(runtime_topdown_slots_retired, 0, sizeof(runtime_topdown_slots_retired)); + memset(runtime_topdown_slots_issued, 0, sizeof(runtime_topdown_slots_issued)); + memset(runtime_topdown_fetch_bubbles, 0, sizeof(runtime_topdown_fetch_bubbles)); + memset(runtime_topdown_recovery_bubbles, 0, sizeof(runtime_topdown_recovery_bubbles)); } /* @@ -105,6 +115,16 @@ void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 *count, update_stats(&runtime_transaction_stats[ctx][cpu], count[0]); else if (perf_stat_evsel__is(counter, ELISION_START)) update_stats(&runtime_elision_stats[ctx][cpu], count[0]); + else if (perf_stat_evsel__is(counter, TOPDOWN_TOTAL_SLOTS)) + update_stats(&runtime_topdown_total_slots[ctx][cpu], count[0]); + else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_ISSUED)) + update_stats(&runtime_topdown_slots_issued[ctx][cpu], count[0]); + else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_RETIRED)) + update_stats(&runtime_topdown_slots_retired[ctx][cpu], count[0]); + else if (perf_stat_evsel__is(counter, TOPDOWN_FETCH_BUBBLES)) + update_stats(&runtime_topdown_fetch_bubbles[ctx][cpu],count[0]); + else if (perf_stat_evsel__is(counter, TOPDOWN_RECOVERY_BUBBLES)) + update_stats(&runtime_topdown_recovery_bubbles[ctx][cpu], count[0]); else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) update_stats(&runtime_stalled_cycles_front_stats[ctx][cpu], count[0]); else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND)) @@ -302,6 +322,107 @@ static void print_ll_cache_misses(int cpu, out->print_metric(out->ctx, color, "%7.2f%%", "of all LL-cache hits", ratio); } +/* + * High level "TopDown" CPU core pipe line bottleneck break down. + * + * Basic concept following + * Yasin, A Top Down Method for Performance analysis and Counter architecture + * ISPASS14 + * + * The CPU pipeline is divided into 4 areas that can be bottlenecks: + * + * Frontend -> Backend -> Retiring + * BadSpeculation in addition means out of order execution that is thrown away + * (for example branch mispredictions) + * Frontend is instruction decoding. + * Backend is execution, like computation and accessing data in memory + * Retiring is good execution that is not directly bottlenecked + * + * The formulas are computed in slots. + * A slot is an entry in the pipeline each for the pipeline width + * (for example a 4-wide pipeline has 4 slots for each cycle) + * + * Formulas: + * BadSpeculation = ((SlotsIssued - SlotsRetired) + RecoveryBubbles) / + * TotalSlots + * Retiring = SlotsRetired / TotalSlots + * FrontendBound = FetchBubbles / TotalSlots + * BackendBound = 1.0 - BadSpeculation - Retiring - FrontendBound + * + * The kernel provides the mapping to the low level CPU events and any scaling + * needed for the CPU pipeline width, for example: + * + * TotalSlots = Cycles * 4 + * + * The scaling factor is communicated in the sysfs unit. + * + * In some cases the CPU may not be able to measure all the formulas due to + * missing events. In this case multiple formulas are combined, as possible. + * + * Full TopDown supports more levels to sub-divide each area: for example + * BackendBound into computing bound and memory bound. For now we only + * support Level 1 TopDown. + */ + +static double sanitize_val(double x) +{ + if (x < 0 && x >= -0.02) + return 0.0; + return x; +} + +static double td_total_slots(int ctx, int cpu) +{ + return avg_stats(&runtime_topdown_total_slots[ctx][cpu]); +} + +static double td_bad_spec(int ctx, int cpu) +{ + double bad_spec = 0; + double total_slots; + double total; + + total = avg_stats(&runtime_topdown_slots_issued[ctx][cpu]) - + avg_stats(&runtime_topdown_slots_retired[ctx][cpu]) + + avg_stats(&runtime_topdown_recovery_bubbles[ctx][cpu]); + total_slots = td_total_slots(ctx, cpu); + if (total_slots) + bad_spec = total / total_slots; + return sanitize_val(bad_spec); +} + +static double td_retiring(int ctx, int cpu) +{ + double retiring = 0; + double total_slots = td_total_slots(ctx, cpu); + double ret_slots = avg_stats(&runtime_topdown_slots_retired[ctx][cpu]); + + if (total_slots) + retiring = ret_slots / total_slots; + return retiring; +} + +static double td_fe_bound(int ctx, int cpu) +{ + double fe_bound = 0; + double total_slots = td_total_slots(ctx, cpu); + double fetch_bub = avg_stats(&runtime_topdown_fetch_bubbles[ctx][cpu]); + + if (total_slots) + fe_bound = fetch_bub / total_slots; + return fe_bound; +} + +static double td_be_bound(int ctx, int cpu) +{ + double sum = (td_fe_bound(ctx, cpu) + + td_bad_spec(ctx, cpu) + + td_retiring(ctx, cpu)); + if (sum == 0) + return 0; + return sanitize_val(1.0 - sum); +} + void perf_stat__print_shadow_stats(struct perf_evsel *evsel, double avg, int cpu, struct perf_stat_output_ctx *out) @@ -309,6 +430,7 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, void *ctxp = out->ctx; print_metric_t print_metric = out->print_metric; double total, ratio = 0.0, total2; + const char *color = NULL; int ctx = evsel_context(evsel); if (perf_evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) { @@ -452,6 +574,46 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, avg / ratio); else print_metric(ctxp, NULL, NULL, "CPUs utilized", 0); + } else if (perf_stat_evsel__is(evsel, TOPDOWN_FETCH_BUBBLES)) { + double fe_bound = td_fe_bound(ctx, cpu); + + if (fe_bound > 0.2) + color = PERF_COLOR_RED; + print_metric(ctxp, color, "%8.1f%%", "frontend bound", + fe_bound * 100.); + } else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_RETIRED)) { + double retiring = td_retiring(ctx, cpu); + + if (retiring > 0.7) + color = PERF_COLOR_GREEN; + print_metric(ctxp, color, "%8.1f%%", "retiring", + retiring * 100.); + } else if (perf_stat_evsel__is(evsel, TOPDOWN_RECOVERY_BUBBLES)) { + double bad_spec = td_bad_spec(ctx, cpu); + + if (bad_spec > 0.1) + color = PERF_COLOR_RED; + print_metric(ctxp, color, "%8.1f%%", "bad speculation", + bad_spec * 100.); + } else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_ISSUED)) { + double be_bound = td_be_bound(ctx, cpu); + const char *name = "backend bound"; + static int have_recovery_bubbles = -1; + + /* In case the CPU does not support topdown-recovery-bubbles */ + if (have_recovery_bubbles < 0) + have_recovery_bubbles = pmu_have_event("cpu", + "topdown-recovery-bubbles"); + if (!have_recovery_bubbles) + name = "backend bound/bad spec"; + + if (be_bound > 0.2) + color = PERF_COLOR_RED; + if (td_total_slots(ctx, cpu) > 0) + print_metric(ctxp, color, "%8.1f%%", name, + be_bound * 100.); + else + print_metric(ctxp, NULL, NULL, name, 0); } else if (runtime_nsecs_stats[cpu].n != 0) { char unit = 'M'; char unit_buf[10]; diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index ffa1d0653861..c1ba255f2abe 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -79,6 +79,11 @@ static const char *id_str[PERF_STAT_EVSEL_ID__MAX] = { ID(TRANSACTION_START, cpu/tx-start/), ID(ELISION_START, cpu/el-start/), ID(CYCLES_IN_TX_CP, cpu/cycles-ct/), + ID(TOPDOWN_TOTAL_SLOTS, topdown-total-slots), + ID(TOPDOWN_SLOTS_ISSUED, topdown-slots-issued), + ID(TOPDOWN_SLOTS_RETIRED, topdown-slots-retired), + ID(TOPDOWN_FETCH_BUBBLES, topdown-fetch-bubbles), + ID(TOPDOWN_RECOVERY_BUBBLES, topdown-recovery-bubbles), }; #undef ID diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index 0150e786ccc7..c29bb94c48a4 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -17,6 +17,11 @@ enum perf_stat_evsel_id { PERF_STAT_EVSEL_ID__TRANSACTION_START, PERF_STAT_EVSEL_ID__ELISION_START, PERF_STAT_EVSEL_ID__CYCLES_IN_TX_CP, + PERF_STAT_EVSEL_ID__TOPDOWN_TOTAL_SLOTS, + PERF_STAT_EVSEL_ID__TOPDOWN_SLOTS_ISSUED, + PERF_STAT_EVSEL_ID__TOPDOWN_SLOTS_RETIRED, + PERF_STAT_EVSEL_ID__TOPDOWN_FETCH_BUBBLES, + PERF_STAT_EVSEL_ID__TOPDOWN_RECOVERY_BUBBLES, PERF_STAT_EVSEL_ID__MAX, }; diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 20f9cb32b703..09c5c34ae38d 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -1641,6 +1641,20 @@ static int find_matching_kcore(struct map *map, char *dir, size_t dir_sz) return ret; } +/* + * Use open(O_RDONLY) to check readability directly instead of access(R_OK) + * since access(R_OK) only checks with real UID/GID but open() use effective + * UID/GID and actual capabilities (e.g. /proc/kcore requires CAP_SYS_RAWIO). + */ +static bool filename__readable(const char *file) +{ + int fd = open(file, O_RDONLY); + if (fd < 0) + return false; + close(fd); + return true; +} + static char *dso__find_kallsyms(struct dso *dso, struct map *map) { u8 host_build_id[BUILD_ID_SIZE]; @@ -1660,58 +1674,43 @@ static char *dso__find_kallsyms(struct dso *dso, struct map *map) sizeof(host_build_id)) == 0) is_host = dso__build_id_equal(dso, host_build_id); - build_id__sprintf(dso->build_id, sizeof(dso->build_id), sbuild_id); - - scnprintf(path, sizeof(path), "%s/%s/%s", buildid_dir, - DSO__NAME_KCORE, sbuild_id); - - /* Use /proc/kallsyms if possible */ + /* Try a fast path for /proc/kallsyms if possible */ if (is_host) { - DIR *d; - int fd; - - /* If no cached kcore go with /proc/kallsyms */ - d = opendir(path); - if (!d) - goto proc_kallsyms; - closedir(d); - /* - * Do not check the build-id cache, until we know we cannot use - * /proc/kcore. + * Do not check the build-id cache, unless we know we cannot use + * /proc/kcore or module maps don't match to /proc/kallsyms. + * To check readability of /proc/kcore, do not use access(R_OK) + * since /proc/kcore requires CAP_SYS_RAWIO to read and access + * can't check it. */ - fd = open("/proc/kcore", O_RDONLY); - if (fd != -1) { - close(fd); - /* If module maps match go with /proc/kallsyms */ - if (!validate_kcore_addresses("/proc/kallsyms", map)) - goto proc_kallsyms; - } - - /* Find kallsyms in build-id cache with kcore */ - if (!find_matching_kcore(map, path, sizeof(path))) - return strdup(path); - - goto proc_kallsyms; + if (filename__readable("/proc/kcore") && + !validate_kcore_addresses("/proc/kallsyms", map)) + goto proc_kallsyms; } + build_id__sprintf(dso->build_id, sizeof(dso->build_id), sbuild_id); + /* Find kallsyms in build-id cache with kcore */ + scnprintf(path, sizeof(path), "%s/%s/%s", + buildid_dir, DSO__NAME_KCORE, sbuild_id); + if (!find_matching_kcore(map, path, sizeof(path))) return strdup(path); - scnprintf(path, sizeof(path), "%s/%s/%s", - buildid_dir, DSO__NAME_KALLSYMS, sbuild_id); + /* Use current /proc/kallsyms if possible */ + if (is_host) { +proc_kallsyms: + return strdup("/proc/kallsyms"); + } - if (access(path, F_OK)) { + /* Finally, find a cache of kallsyms */ + if (!build_id_cache__kallsyms_path(sbuild_id, path, sizeof(path))) { pr_err("No kallsyms or vmlinux with build-id %s was found\n", sbuild_id); return NULL; } return strdup(path); - -proc_kallsyms: - return strdup("/proc/kallsyms"); } static int dso__load_kernel_sym(struct dso *dso, struct map *map, @@ -1933,17 +1932,17 @@ int setup_intlist(struct intlist **list, const char *list_str, static bool symbol__read_kptr_restrict(void) { bool value = false; + FILE *fp = fopen("/proc/sys/kernel/kptr_restrict", "r"); - if (geteuid() != 0) { - FILE *fp = fopen("/proc/sys/kernel/kptr_restrict", "r"); - if (fp != NULL) { - char line[8]; + if (fp != NULL) { + char line[8]; - if (fgets(line, sizeof(line), fp) != NULL) - value = atoi(line) != 0; + if (fgets(line, sizeof(line), fp) != NULL) + value = (geteuid() != 0) ? + (atoi(line) != 0) : + (atoi(line) == 2); - fclose(fp); - } + fclose(fp); } return value; diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c index 45fcb715a36b..f30f9566fddc 100644 --- a/tools/perf/util/thread.c +++ b/tools/perf/util/thread.c @@ -43,9 +43,6 @@ struct thread *thread__new(pid_t pid, pid_t tid) thread->cpu = -1; INIT_LIST_HEAD(&thread->comm_list); - if (unwind__prepare_access(thread) < 0) - goto err_thread; - comm_str = malloc(32); if (!comm_str) goto err_thread; @@ -201,10 +198,18 @@ size_t thread__fprintf(struct thread *thread, FILE *fp) map_groups__fprintf(thread->mg, fp); } -void thread__insert_map(struct thread *thread, struct map *map) +int thread__insert_map(struct thread *thread, struct map *map) { + int ret; + + ret = unwind__prepare_access(thread, map); + if (ret) + return ret; + map_groups__fixup_overlappings(thread->mg, map, stderr); map_groups__insert(thread->mg, map); + + return 0; } static int thread__clone_map_groups(struct thread *thread, @@ -265,3 +270,14 @@ void thread__find_cpumode_addr_location(struct thread *thread, break; } } + +struct thread *thread__main_thread(struct machine *machine, struct thread *thread) +{ + if (thread->pid_ == thread->tid) + return thread__get(thread); + + if (thread->pid_ == -1) + return NULL; + + return machine__find_thread(machine, thread->pid_, thread->pid_); +} diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h index 45fba13c800b..99263cb6e6b6 100644 --- a/tools/perf/util/thread.h +++ b/tools/perf/util/thread.h @@ -9,11 +9,9 @@ #include "symbol.h" #include <strlist.h> #include <intlist.h> -#ifdef HAVE_LIBUNWIND_SUPPORT -#include <libunwind.h> -#endif struct thread_stack; +struct unwind_libunwind_ops; struct thread { union { @@ -36,7 +34,8 @@ struct thread { void *priv; struct thread_stack *ts; #ifdef HAVE_LIBUNWIND_SUPPORT - unw_addr_space_t addr_space; + void *addr_space; + struct unwind_libunwind_ops *unwind_libunwind_ops; #endif }; @@ -77,10 +76,12 @@ int thread__comm_len(struct thread *thread); struct comm *thread__comm(const struct thread *thread); struct comm *thread__exec_comm(const struct thread *thread); const char *thread__comm_str(const struct thread *thread); -void thread__insert_map(struct thread *thread, struct map *map); +int thread__insert_map(struct thread *thread, struct map *map); int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp); size_t thread__fprintf(struct thread *thread, FILE *fp); +struct thread *thread__main_thread(struct machine *machine, struct thread *thread); + void thread__find_addr_map(struct thread *thread, u8 cpumode, enum map_type type, u64 addr, struct addr_location *al); diff --git a/tools/perf/util/unwind-libunwind-local.c b/tools/perf/util/unwind-libunwind-local.c new file mode 100644 index 000000000000..01c2e86977f4 --- /dev/null +++ b/tools/perf/util/unwind-libunwind-local.c @@ -0,0 +1,697 @@ +/* + * Post mortem Dwarf CFI based unwinding on top of regs and stack dumps. + * + * Lots of this code have been borrowed or heavily inspired from parts of + * the libunwind 0.99 code which are (amongst other contributors I may have + * forgotten): + * + * Copyright (C) 2002-2007 Hewlett-Packard Co + * Contributed by David Mosberger-Tang <davidm@hpl.hp.com> + * + * And the bugs have been added by: + * + * Copyright (C) 2010, Frederic Weisbecker <fweisbec@gmail.com> + * Copyright (C) 2012, Jiri Olsa <jolsa@redhat.com> + * + */ + +#include <elf.h> +#include <gelf.h> +#include <fcntl.h> +#include <string.h> +#include <unistd.h> +#include <sys/mman.h> +#include <linux/list.h> +#ifndef REMOTE_UNWIND_LIBUNWIND +#include <libunwind.h> +#include <libunwind-ptrace.h> +#endif +#include "callchain.h" +#include "thread.h" +#include "session.h" +#include "perf_regs.h" +#include "unwind.h" +#include "symbol.h" +#include "util.h" +#include "debug.h" +#include "asm/bug.h" + +extern int +UNW_OBJ(dwarf_search_unwind_table) (unw_addr_space_t as, + unw_word_t ip, + unw_dyn_info_t *di, + unw_proc_info_t *pi, + int need_unwind_info, void *arg); + +#define dwarf_search_unwind_table UNW_OBJ(dwarf_search_unwind_table) + +extern int +UNW_OBJ(dwarf_find_debug_frame) (int found, unw_dyn_info_t *di_debug, + unw_word_t ip, + unw_word_t segbase, + const char *obj_name, unw_word_t start, + unw_word_t end); + +#define dwarf_find_debug_frame UNW_OBJ(dwarf_find_debug_frame) + +#define DW_EH_PE_FORMAT_MASK 0x0f /* format of the encoded value */ +#define DW_EH_PE_APPL_MASK 0x70 /* how the value is to be applied */ + +/* Pointer-encoding formats: */ +#define DW_EH_PE_omit 0xff +#define DW_EH_PE_ptr 0x00 /* pointer-sized unsigned value */ +#define DW_EH_PE_udata4 0x03 /* unsigned 32-bit value */ +#define DW_EH_PE_udata8 0x04 /* unsigned 64-bit value */ +#define DW_EH_PE_sdata4 0x0b /* signed 32-bit value */ +#define DW_EH_PE_sdata8 0x0c /* signed 64-bit value */ + +/* Pointer-encoding application: */ +#define DW_EH_PE_absptr 0x00 /* absolute value */ +#define DW_EH_PE_pcrel 0x10 /* rel. to addr. of encoded value */ + +/* + * The following are not documented by LSB v1.3, yet they are used by + * GCC, presumably they aren't documented by LSB since they aren't + * used on Linux: + */ +#define DW_EH_PE_funcrel 0x40 /* start-of-procedure-relative */ +#define DW_EH_PE_aligned 0x50 /* aligned pointer */ + +/* Flags intentionaly not handled, since they're not needed: + * #define DW_EH_PE_indirect 0x80 + * #define DW_EH_PE_uleb128 0x01 + * #define DW_EH_PE_udata2 0x02 + * #define DW_EH_PE_sleb128 0x09 + * #define DW_EH_PE_sdata2 0x0a + * #define DW_EH_PE_textrel 0x20 + * #define DW_EH_PE_datarel 0x30 + */ + +struct unwind_info { + struct perf_sample *sample; + struct machine *machine; + struct thread *thread; +}; + +#define dw_read(ptr, type, end) ({ \ + type *__p = (type *) ptr; \ + type __v; \ + if ((__p + 1) > (type *) end) \ + return -EINVAL; \ + __v = *__p++; \ + ptr = (typeof(ptr)) __p; \ + __v; \ + }) + +static int __dw_read_encoded_value(u8 **p, u8 *end, u64 *val, + u8 encoding) +{ + u8 *cur = *p; + *val = 0; + + switch (encoding) { + case DW_EH_PE_omit: + *val = 0; + goto out; + case DW_EH_PE_ptr: + *val = dw_read(cur, unsigned long, end); + goto out; + default: + break; + } + + switch (encoding & DW_EH_PE_APPL_MASK) { + case DW_EH_PE_absptr: + break; + case DW_EH_PE_pcrel: + *val = (unsigned long) cur; + break; + default: + return -EINVAL; + } + + if ((encoding & 0x07) == 0x00) + encoding |= DW_EH_PE_udata4; + + switch (encoding & DW_EH_PE_FORMAT_MASK) { + case DW_EH_PE_sdata4: + *val += dw_read(cur, s32, end); + break; + case DW_EH_PE_udata4: + *val += dw_read(cur, u32, end); + break; + case DW_EH_PE_sdata8: + *val += dw_read(cur, s64, end); + break; + case DW_EH_PE_udata8: + *val += dw_read(cur, u64, end); + break; + default: + return -EINVAL; + } + + out: + *p = cur; + return 0; +} + +#define dw_read_encoded_value(ptr, end, enc) ({ \ + u64 __v; \ + if (__dw_read_encoded_value(&ptr, end, &__v, enc)) { \ + return -EINVAL; \ + } \ + __v; \ + }) + +static u64 elf_section_offset(int fd, const char *name) +{ + Elf *elf; + GElf_Ehdr ehdr; + GElf_Shdr shdr; + u64 offset = 0; + + elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL); + if (elf == NULL) + return 0; + + do { + if (gelf_getehdr(elf, &ehdr) == NULL) + break; + + if (!elf_section_by_name(elf, &ehdr, &shdr, name, NULL)) + break; + + offset = shdr.sh_offset; + } while (0); + + elf_end(elf); + return offset; +} + +#ifndef NO_LIBUNWIND_DEBUG_FRAME +static int elf_is_exec(int fd, const char *name) +{ + Elf *elf; + GElf_Ehdr ehdr; + int retval = 0; + + elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL); + if (elf == NULL) + return 0; + if (gelf_getehdr(elf, &ehdr) == NULL) + goto out; + + retval = (ehdr.e_type == ET_EXEC); + +out: + elf_end(elf); + pr_debug("unwind: elf_is_exec(%s): %d\n", name, retval); + return retval; +} +#endif + +struct table_entry { + u32 start_ip_offset; + u32 fde_offset; +}; + +struct eh_frame_hdr { + unsigned char version; + unsigned char eh_frame_ptr_enc; + unsigned char fde_count_enc; + unsigned char table_enc; + + /* + * The rest of the header is variable-length and consists of the + * following members: + * + * encoded_t eh_frame_ptr; + * encoded_t fde_count; + */ + + /* A single encoded pointer should not be more than 8 bytes. */ + u64 enc[2]; + + /* + * struct { + * encoded_t start_ip; + * encoded_t fde_addr; + * } binary_search_table[fde_count]; + */ + char data[0]; +} __packed; + +static int unwind_spec_ehframe(struct dso *dso, struct machine *machine, + u64 offset, u64 *table_data, u64 *segbase, + u64 *fde_count) +{ + struct eh_frame_hdr hdr; + u8 *enc = (u8 *) &hdr.enc; + u8 *end = (u8 *) &hdr.data; + ssize_t r; + + r = dso__data_read_offset(dso, machine, offset, + (u8 *) &hdr, sizeof(hdr)); + if (r != sizeof(hdr)) + return -EINVAL; + + /* We dont need eh_frame_ptr, just skip it. */ + dw_read_encoded_value(enc, end, hdr.eh_frame_ptr_enc); + + *fde_count = dw_read_encoded_value(enc, end, hdr.fde_count_enc); + *segbase = offset; + *table_data = (enc - (u8 *) &hdr) + offset; + return 0; +} + +static int read_unwind_spec_eh_frame(struct dso *dso, struct machine *machine, + u64 *table_data, u64 *segbase, + u64 *fde_count) +{ + int ret = -EINVAL, fd; + u64 offset = dso->data.eh_frame_hdr_offset; + + if (offset == 0) { + fd = dso__data_get_fd(dso, machine); + if (fd < 0) + return -EINVAL; + + /* Check the .eh_frame section for unwinding info */ + offset = elf_section_offset(fd, ".eh_frame_hdr"); + dso->data.eh_frame_hdr_offset = offset; + dso__data_put_fd(dso); + } + + if (offset) + ret = unwind_spec_ehframe(dso, machine, offset, + table_data, segbase, + fde_count); + + return ret; +} + +#ifndef NO_LIBUNWIND_DEBUG_FRAME +static int read_unwind_spec_debug_frame(struct dso *dso, + struct machine *machine, u64 *offset) +{ + int fd; + u64 ofs = dso->data.debug_frame_offset; + + if (ofs == 0) { + fd = dso__data_get_fd(dso, machine); + if (fd < 0) + return -EINVAL; + + /* Check the .debug_frame section for unwinding info */ + ofs = elf_section_offset(fd, ".debug_frame"); + dso->data.debug_frame_offset = ofs; + dso__data_put_fd(dso); + } + + *offset = ofs; + if (*offset) + return 0; + + return -EINVAL; +} +#endif + +static struct map *find_map(unw_word_t ip, struct unwind_info *ui) +{ + struct addr_location al; + + thread__find_addr_map(ui->thread, PERF_RECORD_MISC_USER, + MAP__FUNCTION, ip, &al); + if (!al.map) { + /* + * We've seen cases (softice) where DWARF unwinder went + * through non executable mmaps, which we need to lookup + * in MAP__VARIABLE tree. + */ + thread__find_addr_map(ui->thread, PERF_RECORD_MISC_USER, + MAP__VARIABLE, ip, &al); + } + return al.map; +} + +static int +find_proc_info(unw_addr_space_t as, unw_word_t ip, unw_proc_info_t *pi, + int need_unwind_info, void *arg) +{ + struct unwind_info *ui = arg; + struct map *map; + unw_dyn_info_t di; + u64 table_data, segbase, fde_count; + int ret = -EINVAL; + + map = find_map(ip, ui); + if (!map || !map->dso) + return -EINVAL; + + pr_debug("unwind: find_proc_info dso %s\n", map->dso->name); + + /* Check the .eh_frame section for unwinding info */ + if (!read_unwind_spec_eh_frame(map->dso, ui->machine, + &table_data, &segbase, &fde_count)) { + memset(&di, 0, sizeof(di)); + di.format = UNW_INFO_FORMAT_REMOTE_TABLE; + di.start_ip = map->start; + di.end_ip = map->end; + di.u.rti.segbase = map->start + segbase; + di.u.rti.table_data = map->start + table_data; + di.u.rti.table_len = fde_count * sizeof(struct table_entry) + / sizeof(unw_word_t); + ret = dwarf_search_unwind_table(as, ip, &di, pi, + need_unwind_info, arg); + } + +#ifndef NO_LIBUNWIND_DEBUG_FRAME + /* Check the .debug_frame section for unwinding info */ + if (ret < 0 && + !read_unwind_spec_debug_frame(map->dso, ui->machine, &segbase)) { + int fd = dso__data_get_fd(map->dso, ui->machine); + int is_exec = elf_is_exec(fd, map->dso->name); + unw_word_t base = is_exec ? 0 : map->start; + const char *symfile; + + if (fd >= 0) + dso__data_put_fd(map->dso); + + symfile = map->dso->symsrc_filename ?: map->dso->name; + + memset(&di, 0, sizeof(di)); + if (dwarf_find_debug_frame(0, &di, ip, base, symfile, + map->start, map->end)) + return dwarf_search_unwind_table(as, ip, &di, pi, + need_unwind_info, arg); + } +#endif + + return ret; +} + +static int access_fpreg(unw_addr_space_t __maybe_unused as, + unw_regnum_t __maybe_unused num, + unw_fpreg_t __maybe_unused *val, + int __maybe_unused __write, + void __maybe_unused *arg) +{ + pr_err("unwind: access_fpreg unsupported\n"); + return -UNW_EINVAL; +} + +static int get_dyn_info_list_addr(unw_addr_space_t __maybe_unused as, + unw_word_t __maybe_unused *dil_addr, + void __maybe_unused *arg) +{ + return -UNW_ENOINFO; +} + +static int resume(unw_addr_space_t __maybe_unused as, + unw_cursor_t __maybe_unused *cu, + void __maybe_unused *arg) +{ + pr_err("unwind: resume unsupported\n"); + return -UNW_EINVAL; +} + +static int +get_proc_name(unw_addr_space_t __maybe_unused as, + unw_word_t __maybe_unused addr, + char __maybe_unused *bufp, size_t __maybe_unused buf_len, + unw_word_t __maybe_unused *offp, void __maybe_unused *arg) +{ + pr_err("unwind: get_proc_name unsupported\n"); + return -UNW_EINVAL; +} + +static int access_dso_mem(struct unwind_info *ui, unw_word_t addr, + unw_word_t *data) +{ + struct map *map; + ssize_t size; + + map = find_map(addr, ui); + if (!map) { + pr_debug("unwind: no map for %lx\n", (unsigned long)addr); + return -1; + } + + if (!map->dso) + return -1; + + size = dso__data_read_addr(map->dso, map, ui->machine, + addr, (u8 *) data, sizeof(*data)); + + return !(size == sizeof(*data)); +} + +static int access_mem(unw_addr_space_t __maybe_unused as, + unw_word_t addr, unw_word_t *valp, + int __write, void *arg) +{ + struct unwind_info *ui = arg; + struct stack_dump *stack = &ui->sample->user_stack; + u64 start, end; + int offset; + int ret; + + /* Don't support write, probably not needed. */ + if (__write || !stack || !ui->sample->user_regs.regs) { + *valp = 0; + return 0; + } + + ret = perf_reg_value(&start, &ui->sample->user_regs, PERF_REG_SP); + if (ret) + return ret; + + end = start + stack->size; + + /* Check overflow. */ + if (addr + sizeof(unw_word_t) < addr) + return -EINVAL; + + if (addr < start || addr + sizeof(unw_word_t) >= end) { + ret = access_dso_mem(ui, addr, valp); + if (ret) { + pr_debug("unwind: access_mem %p not inside range" + " 0x%" PRIx64 "-0x%" PRIx64 "\n", + (void *) (uintptr_t) addr, start, end); + *valp = 0; + return ret; + } + return 0; + } + + offset = addr - start; + *valp = *(unw_word_t *)&stack->data[offset]; + pr_debug("unwind: access_mem addr %p val %lx, offset %d\n", + (void *) (uintptr_t) addr, (unsigned long)*valp, offset); + return 0; +} + +static int access_reg(unw_addr_space_t __maybe_unused as, + unw_regnum_t regnum, unw_word_t *valp, + int __write, void *arg) +{ + struct unwind_info *ui = arg; + int id, ret; + u64 val; + + /* Don't support write, I suspect we don't need it. */ + if (__write) { + pr_err("unwind: access_reg w %d\n", regnum); + return 0; + } + + if (!ui->sample->user_regs.regs) { + *valp = 0; + return 0; + } + + id = LIBUNWIND__ARCH_REG_ID(regnum); + if (id < 0) + return -EINVAL; + + ret = perf_reg_value(&val, &ui->sample->user_regs, id); + if (ret) { + pr_err("unwind: can't read reg %d\n", regnum); + return ret; + } + + *valp = (unw_word_t) val; + pr_debug("unwind: reg %d, val %lx\n", regnum, (unsigned long)*valp); + return 0; +} + +static void put_unwind_info(unw_addr_space_t __maybe_unused as, + unw_proc_info_t *pi __maybe_unused, + void *arg __maybe_unused) +{ + pr_debug("unwind: put_unwind_info called\n"); +} + +static int entry(u64 ip, struct thread *thread, + unwind_entry_cb_t cb, void *arg) +{ + struct unwind_entry e; + struct addr_location al; + + thread__find_addr_location(thread, PERF_RECORD_MISC_USER, + MAP__FUNCTION, ip, &al); + + e.ip = ip; + e.map = al.map; + e.sym = al.sym; + + pr_debug("unwind: %s:ip = 0x%" PRIx64 " (0x%" PRIx64 ")\n", + al.sym ? al.sym->name : "''", + ip, + al.map ? al.map->map_ip(al.map, ip) : (u64) 0); + + return cb(&e, arg); +} + +static void display_error(int err) +{ + switch (err) { + case UNW_EINVAL: + pr_err("unwind: Only supports local.\n"); + break; + case UNW_EUNSPEC: + pr_err("unwind: Unspecified error.\n"); + break; + case UNW_EBADREG: + pr_err("unwind: Register unavailable.\n"); + break; + default: + break; + } +} + +static unw_accessors_t accessors = { + .find_proc_info = find_proc_info, + .put_unwind_info = put_unwind_info, + .get_dyn_info_list_addr = get_dyn_info_list_addr, + .access_mem = access_mem, + .access_reg = access_reg, + .access_fpreg = access_fpreg, + .resume = resume, + .get_proc_name = get_proc_name, +}; + +static int _unwind__prepare_access(struct thread *thread) +{ + if (callchain_param.record_mode != CALLCHAIN_DWARF) + return 0; + + thread->addr_space = unw_create_addr_space(&accessors, 0); + if (!thread->addr_space) { + pr_err("unwind: Can't create unwind address space.\n"); + return -ENOMEM; + } + + unw_set_caching_policy(thread->addr_space, UNW_CACHE_GLOBAL); + return 0; +} + +static void _unwind__flush_access(struct thread *thread) +{ + if (callchain_param.record_mode != CALLCHAIN_DWARF) + return; + + unw_flush_cache(thread->addr_space, 0, 0); +} + +static void _unwind__finish_access(struct thread *thread) +{ + if (callchain_param.record_mode != CALLCHAIN_DWARF) + return; + + unw_destroy_addr_space(thread->addr_space); +} + +static int get_entries(struct unwind_info *ui, unwind_entry_cb_t cb, + void *arg, int max_stack) +{ + u64 val; + unw_word_t ips[max_stack]; + unw_addr_space_t addr_space; + unw_cursor_t c; + int ret, i = 0; + + ret = perf_reg_value(&val, &ui->sample->user_regs, PERF_REG_IP); + if (ret) + return ret; + + ips[i++] = (unw_word_t) val; + + /* + * If we need more than one entry, do the DWARF + * unwind itself. + */ + if (max_stack - 1 > 0) { + WARN_ONCE(!ui->thread, "WARNING: ui->thread is NULL"); + addr_space = ui->thread->addr_space; + + if (addr_space == NULL) + return -1; + + ret = unw_init_remote(&c, addr_space, ui); + if (ret) + display_error(ret); + + while (!ret && (unw_step(&c) > 0) && i < max_stack) { + unw_get_reg(&c, UNW_REG_IP, &ips[i]); + ++i; + } + + max_stack = i; + } + + /* + * Display what we got based on the order setup. + */ + for (i = 0; i < max_stack && !ret; i++) { + int j = i; + + if (callchain_param.order == ORDER_CALLER) + j = max_stack - i - 1; + ret = ips[j] ? entry(ips[j], ui->thread, cb, arg) : 0; + } + + return ret; +} + +static int _unwind__get_entries(unwind_entry_cb_t cb, void *arg, + struct thread *thread, + struct perf_sample *data, int max_stack) +{ + struct unwind_info ui = { + .sample = data, + .thread = thread, + .machine = thread->mg->machine, + }; + + if (!data->user_regs.regs) + return -EINVAL; + + if (max_stack <= 0) + return -EINVAL; + + return get_entries(&ui, cb, arg, max_stack); +} + +static struct unwind_libunwind_ops +_unwind_libunwind_ops = { + .prepare_access = _unwind__prepare_access, + .flush_access = _unwind__flush_access, + .finish_access = _unwind__finish_access, + .get_entries = _unwind__get_entries, +}; + +#ifndef REMOTE_UNWIND_LIBUNWIND +struct unwind_libunwind_ops * +local_unwind_libunwind_ops = &_unwind_libunwind_ops; +#endif diff --git a/tools/perf/util/unwind-libunwind.c b/tools/perf/util/unwind-libunwind.c index 63687d3a344e..854711966cad 100644 --- a/tools/perf/util/unwind-libunwind.c +++ b/tools/perf/util/unwind-libunwind.c @@ -1,682 +1,76 @@ -/* - * Post mortem Dwarf CFI based unwinding on top of regs and stack dumps. - * - * Lots of this code have been borrowed or heavily inspired from parts of - * the libunwind 0.99 code which are (amongst other contributors I may have - * forgotten): - * - * Copyright (C) 2002-2007 Hewlett-Packard Co - * Contributed by David Mosberger-Tang <davidm@hpl.hp.com> - * - * And the bugs have been added by: - * - * Copyright (C) 2010, Frederic Weisbecker <fweisbec@gmail.com> - * Copyright (C) 2012, Jiri Olsa <jolsa@redhat.com> - * - */ - -#include <elf.h> -#include <gelf.h> -#include <fcntl.h> -#include <string.h> -#include <unistd.h> -#include <sys/mman.h> -#include <linux/list.h> -#include <libunwind.h> -#include <libunwind-ptrace.h> -#include "callchain.h" +#include "unwind.h" #include "thread.h" #include "session.h" -#include "perf_regs.h" -#include "unwind.h" -#include "symbol.h" -#include "util.h" #include "debug.h" -#include "asm/bug.h" - -extern int -UNW_OBJ(dwarf_search_unwind_table) (unw_addr_space_t as, - unw_word_t ip, - unw_dyn_info_t *di, - unw_proc_info_t *pi, - int need_unwind_info, void *arg); - -#define dwarf_search_unwind_table UNW_OBJ(dwarf_search_unwind_table) - -extern int -UNW_OBJ(dwarf_find_debug_frame) (int found, unw_dyn_info_t *di_debug, - unw_word_t ip, - unw_word_t segbase, - const char *obj_name, unw_word_t start, - unw_word_t end); - -#define dwarf_find_debug_frame UNW_OBJ(dwarf_find_debug_frame) - -#define DW_EH_PE_FORMAT_MASK 0x0f /* format of the encoded value */ -#define DW_EH_PE_APPL_MASK 0x70 /* how the value is to be applied */ - -/* Pointer-encoding formats: */ -#define DW_EH_PE_omit 0xff -#define DW_EH_PE_ptr 0x00 /* pointer-sized unsigned value */ -#define DW_EH_PE_udata4 0x03 /* unsigned 32-bit value */ -#define DW_EH_PE_udata8 0x04 /* unsigned 64-bit value */ -#define DW_EH_PE_sdata4 0x0b /* signed 32-bit value */ -#define DW_EH_PE_sdata8 0x0c /* signed 64-bit value */ - -/* Pointer-encoding application: */ -#define DW_EH_PE_absptr 0x00 /* absolute value */ -#define DW_EH_PE_pcrel 0x10 /* rel. to addr. of encoded value */ - -/* - * The following are not documented by LSB v1.3, yet they are used by - * GCC, presumably they aren't documented by LSB since they aren't - * used on Linux: - */ -#define DW_EH_PE_funcrel 0x40 /* start-of-procedure-relative */ -#define DW_EH_PE_aligned 0x50 /* aligned pointer */ +#include "arch/common.h" -/* Flags intentionaly not handled, since they're not needed: - * #define DW_EH_PE_indirect 0x80 - * #define DW_EH_PE_uleb128 0x01 - * #define DW_EH_PE_udata2 0x02 - * #define DW_EH_PE_sleb128 0x09 - * #define DW_EH_PE_sdata2 0x0a - * #define DW_EH_PE_textrel 0x20 - * #define DW_EH_PE_datarel 0x30 - */ +struct unwind_libunwind_ops __weak *local_unwind_libunwind_ops; +struct unwind_libunwind_ops __weak *x86_32_unwind_libunwind_ops; +struct unwind_libunwind_ops __weak *arm64_unwind_libunwind_ops; -struct unwind_info { - struct perf_sample *sample; - struct machine *machine; - struct thread *thread; -}; - -#define dw_read(ptr, type, end) ({ \ - type *__p = (type *) ptr; \ - type __v; \ - if ((__p + 1) > (type *) end) \ - return -EINVAL; \ - __v = *__p++; \ - ptr = (typeof(ptr)) __p; \ - __v; \ - }) - -static int __dw_read_encoded_value(u8 **p, u8 *end, u64 *val, - u8 encoding) +static void unwind__register_ops(struct thread *thread, + struct unwind_libunwind_ops *ops) { - u8 *cur = *p; - *val = 0; - - switch (encoding) { - case DW_EH_PE_omit: - *val = 0; - goto out; - case DW_EH_PE_ptr: - *val = dw_read(cur, unsigned long, end); - goto out; - default: - break; - } - - switch (encoding & DW_EH_PE_APPL_MASK) { - case DW_EH_PE_absptr: - break; - case DW_EH_PE_pcrel: - *val = (unsigned long) cur; - break; - default: - return -EINVAL; - } - - if ((encoding & 0x07) == 0x00) - encoding |= DW_EH_PE_udata4; - - switch (encoding & DW_EH_PE_FORMAT_MASK) { - case DW_EH_PE_sdata4: - *val += dw_read(cur, s32, end); - break; - case DW_EH_PE_udata4: - *val += dw_read(cur, u32, end); - break; - case DW_EH_PE_sdata8: - *val += dw_read(cur, s64, end); - break; - case DW_EH_PE_udata8: - *val += dw_read(cur, u64, end); - break; - default: - return -EINVAL; - } - - out: - *p = cur; - return 0; -} - -#define dw_read_encoded_value(ptr, end, enc) ({ \ - u64 __v; \ - if (__dw_read_encoded_value(&ptr, end, &__v, enc)) { \ - return -EINVAL; \ - } \ - __v; \ - }) - -static u64 elf_section_offset(int fd, const char *name) -{ - Elf *elf; - GElf_Ehdr ehdr; - GElf_Shdr shdr; - u64 offset = 0; - - elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL); - if (elf == NULL) - return 0; - - do { - if (gelf_getehdr(elf, &ehdr) == NULL) - break; - - if (!elf_section_by_name(elf, &ehdr, &shdr, name, NULL)) - break; - - offset = shdr.sh_offset; - } while (0); - - elf_end(elf); - return offset; + thread->unwind_libunwind_ops = ops; } -#ifndef NO_LIBUNWIND_DEBUG_FRAME -static int elf_is_exec(int fd, const char *name) +int unwind__prepare_access(struct thread *thread, struct map *map) { - Elf *elf; - GElf_Ehdr ehdr; - int retval = 0; + const char *arch; + enum dso_type dso_type; + struct unwind_libunwind_ops *ops = local_unwind_libunwind_ops; - elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL); - if (elf == NULL) + if (thread->addr_space) { + pr_debug("unwind: thread map already set, dso=%s\n", + map->dso->name); return 0; - if (gelf_getehdr(elf, &ehdr) == NULL) - goto out; - - retval = (ehdr.e_type == ET_EXEC); - -out: - elf_end(elf); - pr_debug("unwind: elf_is_exec(%s): %d\n", name, retval); - return retval; -} -#endif - -struct table_entry { - u32 start_ip_offset; - u32 fde_offset; -}; - -struct eh_frame_hdr { - unsigned char version; - unsigned char eh_frame_ptr_enc; - unsigned char fde_count_enc; - unsigned char table_enc; - - /* - * The rest of the header is variable-length and consists of the - * following members: - * - * encoded_t eh_frame_ptr; - * encoded_t fde_count; - */ - - /* A single encoded pointer should not be more than 8 bytes. */ - u64 enc[2]; - - /* - * struct { - * encoded_t start_ip; - * encoded_t fde_addr; - * } binary_search_table[fde_count]; - */ - char data[0]; -} __packed; - -static int unwind_spec_ehframe(struct dso *dso, struct machine *machine, - u64 offset, u64 *table_data, u64 *segbase, - u64 *fde_count) -{ - struct eh_frame_hdr hdr; - u8 *enc = (u8 *) &hdr.enc; - u8 *end = (u8 *) &hdr.data; - ssize_t r; - - r = dso__data_read_offset(dso, machine, offset, - (u8 *) &hdr, sizeof(hdr)); - if (r != sizeof(hdr)) - return -EINVAL; - - /* We dont need eh_frame_ptr, just skip it. */ - dw_read_encoded_value(enc, end, hdr.eh_frame_ptr_enc); - - *fde_count = dw_read_encoded_value(enc, end, hdr.fde_count_enc); - *segbase = offset; - *table_data = (enc - (u8 *) &hdr) + offset; - return 0; -} - -static int read_unwind_spec_eh_frame(struct dso *dso, struct machine *machine, - u64 *table_data, u64 *segbase, - u64 *fde_count) -{ - int ret = -EINVAL, fd; - u64 offset = dso->data.eh_frame_hdr_offset; - - if (offset == 0) { - fd = dso__data_get_fd(dso, machine); - if (fd < 0) - return -EINVAL; - - /* Check the .eh_frame section for unwinding info */ - offset = elf_section_offset(fd, ".eh_frame_hdr"); - dso->data.eh_frame_hdr_offset = offset; - dso__data_put_fd(dso); } - if (offset) - ret = unwind_spec_ehframe(dso, machine, offset, - table_data, segbase, - fde_count); + /* env->arch is NULL for live-mode (i.e. perf top) */ + if (!thread->mg->machine->env || !thread->mg->machine->env->arch) + goto out_register; - return ret; -} - -#ifndef NO_LIBUNWIND_DEBUG_FRAME -static int read_unwind_spec_debug_frame(struct dso *dso, - struct machine *machine, u64 *offset) -{ - int fd; - u64 ofs = dso->data.debug_frame_offset; - - if (ofs == 0) { - fd = dso__data_get_fd(dso, machine); - if (fd < 0) - return -EINVAL; - - /* Check the .debug_frame section for unwinding info */ - ofs = elf_section_offset(fd, ".debug_frame"); - dso->data.debug_frame_offset = ofs; - dso__data_put_fd(dso); - } - - *offset = ofs; - if (*offset) + dso_type = dso__type(map->dso, thread->mg->machine); + if (dso_type == DSO__TYPE_UNKNOWN) return 0; - return -EINVAL; -} -#endif - -static struct map *find_map(unw_word_t ip, struct unwind_info *ui) -{ - struct addr_location al; - - thread__find_addr_map(ui->thread, PERF_RECORD_MISC_USER, - MAP__FUNCTION, ip, &al); - if (!al.map) { - /* - * We've seen cases (softice) where DWARF unwinder went - * through non executable mmaps, which we need to lookup - * in MAP__VARIABLE tree. - */ - thread__find_addr_map(ui->thread, PERF_RECORD_MISC_USER, - MAP__VARIABLE, ip, &al); - } - return al.map; -} - -static int -find_proc_info(unw_addr_space_t as, unw_word_t ip, unw_proc_info_t *pi, - int need_unwind_info, void *arg) -{ - struct unwind_info *ui = arg; - struct map *map; - unw_dyn_info_t di; - u64 table_data, segbase, fde_count; - int ret = -EINVAL; - - map = find_map(ip, ui); - if (!map || !map->dso) - return -EINVAL; - - pr_debug("unwind: find_proc_info dso %s\n", map->dso->name); - - /* Check the .eh_frame section for unwinding info */ - if (!read_unwind_spec_eh_frame(map->dso, ui->machine, - &table_data, &segbase, &fde_count)) { - memset(&di, 0, sizeof(di)); - di.format = UNW_INFO_FORMAT_REMOTE_TABLE; - di.start_ip = map->start; - di.end_ip = map->end; - di.u.rti.segbase = map->start + segbase; - di.u.rti.table_data = map->start + table_data; - di.u.rti.table_len = fde_count * sizeof(struct table_entry) - / sizeof(unw_word_t); - ret = dwarf_search_unwind_table(as, ip, &di, pi, - need_unwind_info, arg); - } - -#ifndef NO_LIBUNWIND_DEBUG_FRAME - /* Check the .debug_frame section for unwinding info */ - if (ret < 0 && - !read_unwind_spec_debug_frame(map->dso, ui->machine, &segbase)) { - int fd = dso__data_get_fd(map->dso, ui->machine); - int is_exec = elf_is_exec(fd, map->dso->name); - unw_word_t base = is_exec ? 0 : map->start; - const char *symfile; - - if (fd >= 0) - dso__data_put_fd(map->dso); - - symfile = map->dso->symsrc_filename ?: map->dso->name; - - memset(&di, 0, sizeof(di)); - if (dwarf_find_debug_frame(0, &di, ip, base, symfile, - map->start, map->end)) - return dwarf_search_unwind_table(as, ip, &di, pi, - need_unwind_info, arg); - } -#endif - - return ret; -} - -static int access_fpreg(unw_addr_space_t __maybe_unused as, - unw_regnum_t __maybe_unused num, - unw_fpreg_t __maybe_unused *val, - int __maybe_unused __write, - void __maybe_unused *arg) -{ - pr_err("unwind: access_fpreg unsupported\n"); - return -UNW_EINVAL; -} - -static int get_dyn_info_list_addr(unw_addr_space_t __maybe_unused as, - unw_word_t __maybe_unused *dil_addr, - void __maybe_unused *arg) -{ - return -UNW_ENOINFO; -} - -static int resume(unw_addr_space_t __maybe_unused as, - unw_cursor_t __maybe_unused *cu, - void __maybe_unused *arg) -{ - pr_err("unwind: resume unsupported\n"); - return -UNW_EINVAL; -} + arch = normalize_arch(thread->mg->machine->env->arch); -static int -get_proc_name(unw_addr_space_t __maybe_unused as, - unw_word_t __maybe_unused addr, - char __maybe_unused *bufp, size_t __maybe_unused buf_len, - unw_word_t __maybe_unused *offp, void __maybe_unused *arg) -{ - pr_err("unwind: get_proc_name unsupported\n"); - return -UNW_EINVAL; -} - -static int access_dso_mem(struct unwind_info *ui, unw_word_t addr, - unw_word_t *data) -{ - struct map *map; - ssize_t size; - - map = find_map(addr, ui); - if (!map) { - pr_debug("unwind: no map for %lx\n", (unsigned long)addr); - return -1; + if (!strcmp(arch, "x86")) { + if (dso_type != DSO__TYPE_64BIT) + ops = x86_32_unwind_libunwind_ops; + } else if (!strcmp(arch, "arm64") || !strcmp(arch, "arm")) { + if (dso_type == DSO__TYPE_64BIT) + ops = arm64_unwind_libunwind_ops; } - if (!map->dso) + if (!ops) { + pr_err("unwind: target platform=%s is not supported\n", arch); return -1; - - size = dso__data_read_addr(map->dso, map, ui->machine, - addr, (u8 *) data, sizeof(*data)); - - return !(size == sizeof(*data)); -} - -static int access_mem(unw_addr_space_t __maybe_unused as, - unw_word_t addr, unw_word_t *valp, - int __write, void *arg) -{ - struct unwind_info *ui = arg; - struct stack_dump *stack = &ui->sample->user_stack; - u64 start, end; - int offset; - int ret; - - /* Don't support write, probably not needed. */ - if (__write || !stack || !ui->sample->user_regs.regs) { - *valp = 0; - return 0; - } - - ret = perf_reg_value(&start, &ui->sample->user_regs, PERF_REG_SP); - if (ret) - return ret; - - end = start + stack->size; - - /* Check overflow. */ - if (addr + sizeof(unw_word_t) < addr) - return -EINVAL; - - if (addr < start || addr + sizeof(unw_word_t) >= end) { - ret = access_dso_mem(ui, addr, valp); - if (ret) { - pr_debug("unwind: access_mem %p not inside range" - " 0x%" PRIx64 "-0x%" PRIx64 "\n", - (void *) (uintptr_t) addr, start, end); - *valp = 0; - return ret; - } - return 0; - } - - offset = addr - start; - *valp = *(unw_word_t *)&stack->data[offset]; - pr_debug("unwind: access_mem addr %p val %lx, offset %d\n", - (void *) (uintptr_t) addr, (unsigned long)*valp, offset); - return 0; -} - -static int access_reg(unw_addr_space_t __maybe_unused as, - unw_regnum_t regnum, unw_word_t *valp, - int __write, void *arg) -{ - struct unwind_info *ui = arg; - int id, ret; - u64 val; - - /* Don't support write, I suspect we don't need it. */ - if (__write) { - pr_err("unwind: access_reg w %d\n", regnum); - return 0; - } - - if (!ui->sample->user_regs.regs) { - *valp = 0; - return 0; - } - - id = libunwind__arch_reg_id(regnum); - if (id < 0) - return -EINVAL; - - ret = perf_reg_value(&val, &ui->sample->user_regs, id); - if (ret) { - pr_err("unwind: can't read reg %d\n", regnum); - return ret; - } - - *valp = (unw_word_t) val; - pr_debug("unwind: reg %d, val %lx\n", regnum, (unsigned long)*valp); - return 0; -} - -static void put_unwind_info(unw_addr_space_t __maybe_unused as, - unw_proc_info_t *pi __maybe_unused, - void *arg __maybe_unused) -{ - pr_debug("unwind: put_unwind_info called\n"); -} - -static int entry(u64 ip, struct thread *thread, - unwind_entry_cb_t cb, void *arg) -{ - struct unwind_entry e; - struct addr_location al; - - thread__find_addr_location(thread, PERF_RECORD_MISC_USER, - MAP__FUNCTION, ip, &al); - - e.ip = ip; - e.map = al.map; - e.sym = al.sym; - - pr_debug("unwind: %s:ip = 0x%" PRIx64 " (0x%" PRIx64 ")\n", - al.sym ? al.sym->name : "''", - ip, - al.map ? al.map->map_ip(al.map, ip) : (u64) 0); - - return cb(&e, arg); -} - -static void display_error(int err) -{ - switch (err) { - case UNW_EINVAL: - pr_err("unwind: Only supports local.\n"); - break; - case UNW_EUNSPEC: - pr_err("unwind: Unspecified error.\n"); - break; - case UNW_EBADREG: - pr_err("unwind: Register unavailable.\n"); - break; - default: - break; - } -} - -static unw_accessors_t accessors = { - .find_proc_info = find_proc_info, - .put_unwind_info = put_unwind_info, - .get_dyn_info_list_addr = get_dyn_info_list_addr, - .access_mem = access_mem, - .access_reg = access_reg, - .access_fpreg = access_fpreg, - .resume = resume, - .get_proc_name = get_proc_name, -}; - -int unwind__prepare_access(struct thread *thread) -{ - if (callchain_param.record_mode != CALLCHAIN_DWARF) - return 0; - - thread->addr_space = unw_create_addr_space(&accessors, 0); - if (!thread->addr_space) { - pr_err("unwind: Can't create unwind address space.\n"); - return -ENOMEM; } +out_register: + unwind__register_ops(thread, ops); - unw_set_caching_policy(thread->addr_space, UNW_CACHE_GLOBAL); - return 0; + return thread->unwind_libunwind_ops->prepare_access(thread); } void unwind__flush_access(struct thread *thread) { - if (callchain_param.record_mode != CALLCHAIN_DWARF) - return; - - unw_flush_cache(thread->addr_space, 0, 0); + if (thread->unwind_libunwind_ops) + thread->unwind_libunwind_ops->flush_access(thread); } void unwind__finish_access(struct thread *thread) { - if (callchain_param.record_mode != CALLCHAIN_DWARF) - return; - - unw_destroy_addr_space(thread->addr_space); -} - -static int get_entries(struct unwind_info *ui, unwind_entry_cb_t cb, - void *arg, int max_stack) -{ - u64 val; - unw_word_t ips[max_stack]; - unw_addr_space_t addr_space; - unw_cursor_t c; - int ret, i = 0; - - ret = perf_reg_value(&val, &ui->sample->user_regs, PERF_REG_IP); - if (ret) - return ret; - - ips[i++] = (unw_word_t) val; - - /* - * If we need more than one entry, do the DWARF - * unwind itself. - */ - if (max_stack - 1 > 0) { - WARN_ONCE(!ui->thread, "WARNING: ui->thread is NULL"); - addr_space = ui->thread->addr_space; - - if (addr_space == NULL) - return -1; - - ret = unw_init_remote(&c, addr_space, ui); - if (ret) - display_error(ret); - - while (!ret && (unw_step(&c) > 0) && i < max_stack) { - unw_get_reg(&c, UNW_REG_IP, &ips[i]); - ++i; - } - - max_stack = i; - } - - /* - * Display what we got based on the order setup. - */ - for (i = 0; i < max_stack && !ret; i++) { - int j = i; - - if (callchain_param.order == ORDER_CALLER) - j = max_stack - i - 1; - ret = ips[j] ? entry(ips[j], ui->thread, cb, arg) : 0; - } - - return ret; + if (thread->unwind_libunwind_ops) + thread->unwind_libunwind_ops->finish_access(thread); } int unwind__get_entries(unwind_entry_cb_t cb, void *arg, - struct thread *thread, - struct perf_sample *data, int max_stack) + struct thread *thread, + struct perf_sample *data, int max_stack) { - struct unwind_info ui = { - .sample = data, - .thread = thread, - .machine = thread->mg->machine, - }; - - if (!data->user_regs.regs) - return -EINVAL; - - if (max_stack <= 0) - return -EINVAL; - - return get_entries(&ui, cb, arg, max_stack); + if (thread->unwind_libunwind_ops) + return thread->unwind_libunwind_ops->get_entries(cb, arg, thread, data, max_stack); + return 0; } diff --git a/tools/perf/util/unwind.h b/tools/perf/util/unwind.h index 12790cf94618..b07466240346 100644 --- a/tools/perf/util/unwind.h +++ b/tools/perf/util/unwind.h @@ -14,18 +14,31 @@ struct unwind_entry { typedef int (*unwind_entry_cb_t)(struct unwind_entry *entry, void *arg); +struct unwind_libunwind_ops { + int (*prepare_access)(struct thread *thread); + void (*flush_access)(struct thread *thread); + void (*finish_access)(struct thread *thread); + int (*get_entries)(unwind_entry_cb_t cb, void *arg, + struct thread *thread, + struct perf_sample *data, int max_stack); +}; + #ifdef HAVE_DWARF_UNWIND_SUPPORT int unwind__get_entries(unwind_entry_cb_t cb, void *arg, struct thread *thread, struct perf_sample *data, int max_stack); /* libunwind specific */ #ifdef HAVE_LIBUNWIND_SUPPORT -int libunwind__arch_reg_id(int regnum); -int unwind__prepare_access(struct thread *thread); +#ifndef LIBUNWIND__ARCH_REG_ID +#define LIBUNWIND__ARCH_REG_ID(regnum) libunwind__arch_reg_id(regnum) +#endif +int LIBUNWIND__ARCH_REG_ID(int regnum); +int unwind__prepare_access(struct thread *thread, struct map *map); void unwind__flush_access(struct thread *thread); void unwind__finish_access(struct thread *thread); #else -static inline int unwind__prepare_access(struct thread *thread __maybe_unused) +static inline int unwind__prepare_access(struct thread *thread __maybe_unused, + struct map *map __maybe_unused) { return 0; } @@ -44,7 +57,8 @@ unwind__get_entries(unwind_entry_cb_t cb __maybe_unused, return 0; } -static inline int unwind__prepare_access(struct thread *thread __maybe_unused) +static inline int unwind__prepare_access(struct thread *thread __maybe_unused, + struct map *map __maybe_unused) { return 0; } |