diff options
167 files changed, 6201 insertions, 1141 deletions
diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt index a93b414672a7..f886fbb1ad05 100644 --- a/Documentation/sysctl/kernel.txt +++ b/Documentation/sysctl/kernel.txt @@ -58,6 +58,8 @@ show up in /proc/sys/kernel: - panic_on_stackoverflow - panic_on_unrecovered_nmi - panic_on_warn +- perf_cpu_time_max_percent +- perf_event_paranoid - pid_max - powersave-nap [ PPC only ] - printk @@ -639,6 +641,17 @@ allowed to execute. ============================================================== +perf_event_paranoid: + +Controls use of the performance events system by unprivileged +users (without CAP_SYS_ADMIN). The default value is 1. + + -1: Allow use of (almost) all events by all users +>=0: Disallow raw tracepoint access by users without CAP_IOC_LOCK +>=1: Disallow CPU event access by users without CAP_SYS_ADMIN +>=2: Disallow kernel profiling by users without CAP_SYS_ADMIN + +============================================================== pid_max: diff --git a/arch/x86/Kbuild b/arch/x86/Kbuild index 1538562cc720..eb3abf8ac44e 100644 --- a/arch/x86/Kbuild +++ b/arch/x86/Kbuild @@ -1,6 +1,7 @@ - obj-y += entry/ +obj-$(CONFIG_PERF_EVENTS) += events/ + obj-$(CONFIG_KVM) += kvm/ # Xen paravirtualization support diff --git a/arch/x86/events/Makefile b/arch/x86/events/Makefile new file mode 100644 index 000000000000..fdfea1511cc0 --- /dev/null +++ b/arch/x86/events/Makefile @@ -0,0 +1,13 @@ +obj-y += core.o + +obj-$(CONFIG_CPU_SUP_AMD) += amd/core.o amd/uncore.o +obj-$(CONFIG_X86_LOCAL_APIC) += amd/ibs.o msr.o +ifdef CONFIG_AMD_IOMMU +obj-$(CONFIG_CPU_SUP_AMD) += amd/iommu.o +endif +obj-$(CONFIG_CPU_SUP_INTEL) += intel/core.o intel/bts.o intel/cqm.o +obj-$(CONFIG_CPU_SUP_INTEL) += intel/cstate.o intel/ds.o intel/knc.o +obj-$(CONFIG_CPU_SUP_INTEL) += intel/lbr.o intel/p4.o intel/p6.o intel/pt.o +obj-$(CONFIG_CPU_SUP_INTEL) += intel/rapl.o msr.o +obj-$(CONFIG_PERF_EVENTS_INTEL_UNCORE) += intel/uncore.o intel/uncore_nhmex.o +obj-$(CONFIG_PERF_EVENTS_INTEL_UNCORE) += intel/uncore_snb.o intel/uncore_snbep.o diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/events/amd/core.c index 58610539b048..049ada8d4e9c 100644 --- a/arch/x86/kernel/cpu/perf_event_amd.c +++ b/arch/x86/events/amd/core.c @@ -5,7 +5,7 @@ #include <linux/slab.h> #include <asm/apicdef.h> -#include "perf_event.h" +#include "../perf_event.h" static __initconst const u64 amd_hw_cache_event_ids [PERF_COUNT_HW_CACHE_MAX] diff --git a/arch/x86/kernel/cpu/perf_event_amd_ibs.c b/arch/x86/events/amd/ibs.c index 989d3c215d2b..51087c29b2c2 100644 --- a/arch/x86/kernel/cpu/perf_event_amd_ibs.c +++ b/arch/x86/events/amd/ibs.c @@ -14,7 +14,7 @@ #include <asm/apic.h> -#include "perf_event.h" +#include "../perf_event.h" static u32 ibs_caps; @@ -670,7 +670,7 @@ static __init int perf_event_ibs_init(void) perf_ibs_pmu_init(&perf_ibs_op, "ibs_op"); register_nmi_handler(NMI_LOCAL, perf_ibs_nmi_handler, 0, "perf_ibs"); - printk(KERN_INFO "perf: AMD IBS detected (0x%08x)\n", ibs_caps); + pr_info("perf: AMD IBS detected (0x%08x)\n", ibs_caps); return 0; } @@ -774,14 +774,14 @@ static int setup_ibs_ctl(int ibs_eilvt_off) pci_read_config_dword(cpu_cfg, IBSCTL, &value); if (value != (ibs_eilvt_off | IBSCTL_LVT_OFFSET_VALID)) { pci_dev_put(cpu_cfg); - printk(KERN_DEBUG "Failed to setup IBS LVT offset, " - "IBSCTL = 0x%08x\n", value); + pr_debug("Failed to setup IBS LVT offset, IBSCTL = 0x%08x\n", + value); return -EINVAL; } } while (1); if (!nodes) { - printk(KERN_DEBUG "No CPU node configured for IBS\n"); + pr_debug("No CPU node configured for IBS\n"); return -ENODEV; } @@ -810,7 +810,7 @@ static void force_ibs_eilvt_setup(void) preempt_enable(); if (offset == APIC_EILVT_NR_MAX) { - printk(KERN_DEBUG "No EILVT entry available\n"); + pr_debug("No EILVT entry available\n"); return; } diff --git a/arch/x86/kernel/cpu/perf_event_amd_iommu.c b/arch/x86/events/amd/iommu.c index 97242a9242bd..635e5eba0caf 100644 --- a/arch/x86/kernel/cpu/perf_event_amd_iommu.c +++ b/arch/x86/events/amd/iommu.c @@ -16,8 +16,8 @@ #include <linux/cpumask.h> #include <linux/slab.h> -#include "perf_event.h" -#include "perf_event_amd_iommu.h" +#include "../perf_event.h" +#include "iommu.h" #define COUNTER_SHIFT 16 diff --git a/arch/x86/kernel/cpu/perf_event_amd_iommu.h b/arch/x86/events/amd/iommu.h index 845d173278e3..845d173278e3 100644 --- a/arch/x86/kernel/cpu/perf_event_amd_iommu.h +++ b/arch/x86/events/amd/iommu.h diff --git a/arch/x86/kernel/cpu/perf_event_amd_uncore.c b/arch/x86/events/amd/uncore.c index 8836fc9fa84b..3db9569e658c 100644 --- a/arch/x86/kernel/cpu/perf_event_amd_uncore.c +++ b/arch/x86/events/amd/uncore.c @@ -538,7 +538,7 @@ static int __init amd_uncore_init(void) if (ret) goto fail_nb; - printk(KERN_INFO "perf: AMD NB counters detected\n"); + pr_info("perf: AMD NB counters detected\n"); ret = 0; } @@ -552,7 +552,7 @@ static int __init amd_uncore_init(void) if (ret) goto fail_l2; - printk(KERN_INFO "perf: AMD L2I counters detected\n"); + pr_info("perf: AMD L2I counters detected\n"); ret = 0; } diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/events/core.c index 1b443db2db50..7402c8182813 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/events/core.c @@ -254,15 +254,16 @@ static bool check_hw_exists(void) * We still allow the PMU driver to operate: */ if (bios_fail) { - printk(KERN_CONT "Broken BIOS detected, complain to your hardware vendor.\n"); - printk(KERN_ERR FW_BUG "the BIOS has corrupted hw-PMU resources (MSR %x is %Lx)\n", reg_fail, val_fail); + pr_cont("Broken BIOS detected, complain to your hardware vendor.\n"); + pr_err(FW_BUG "the BIOS has corrupted hw-PMU resources (MSR %x is %Lx)\n", + reg_fail, val_fail); } return true; msr_fail: - printk(KERN_CONT "Broken PMU hardware detected, using software events only.\n"); - printk("%sFailed to access perfctr msr (MSR %x is %Lx)\n", + pr_cont("Broken PMU hardware detected, using software events only.\n"); + pr_info("%sFailed to access perfctr msr (MSR %x is %Lx)\n", boot_cpu_has(X86_FEATURE_HYPERVISOR) ? KERN_INFO : KERN_ERR, reg, val_new); diff --git a/arch/x86/kernel/cpu/perf_event_intel_bts.c b/arch/x86/events/intel/bts.c index 2cad71d1b14c..b99dc9258c0f 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_bts.c +++ b/arch/x86/events/intel/bts.c @@ -26,7 +26,7 @@ #include <asm-generic/sizes.h> #include <asm/perf_event.h> -#include "perf_event.h" +#include "../perf_event.h" struct bts_ctx { struct perf_output_handle handle; diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/events/intel/core.c index fed2ab1f1065..a7ec685657a5 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/events/intel/core.c @@ -18,7 +18,7 @@ #include <asm/hardirq.h> #include <asm/apic.h> -#include "perf_event.h" +#include "../perf_event.h" /* * Intel PerfMon, used on Core and later. diff --git a/arch/x86/kernel/cpu/perf_event_intel_cqm.c b/arch/x86/events/intel/cqm.c index a316ca96f1b6..1b064c430140 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_cqm.c +++ b/arch/x86/events/intel/cqm.c @@ -7,7 +7,7 @@ #include <linux/perf_event.h> #include <linux/slab.h> #include <asm/cpu_device_id.h> -#include "perf_event.h" +#include "../perf_event.h" #define MSR_IA32_PQR_ASSOC 0x0c8f #define MSR_IA32_QM_CTR 0x0c8e diff --git a/arch/x86/kernel/cpu/perf_event_intel_cstate.c b/arch/x86/events/intel/cstate.c index 75a38b5a2e26..7946c4231169 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_cstate.c +++ b/arch/x86/events/intel/cstate.c @@ -89,7 +89,7 @@ #include <linux/slab.h> #include <linux/perf_event.h> #include <asm/cpu_device_id.h> -#include "perf_event.h" +#include "../perf_event.h" #define DEFINE_CSTATE_FORMAT_ATTR(_var, _name, _format) \ static ssize_t __cstate_##_var##_show(struct kobject *kobj, \ diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/events/intel/ds.c index 10602f0a438f..c8a243d6fc82 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/events/intel/ds.c @@ -5,7 +5,7 @@ #include <asm/perf_event.h> #include <asm/insn.h> -#include "perf_event.h" +#include "../perf_event.h" /* The size of a BTS record in bytes: */ #define BTS_RECORD_SIZE 24 @@ -1325,13 +1325,13 @@ void __init intel_ds_init(void) switch (format) { case 0: - printk(KERN_CONT "PEBS fmt0%c, ", pebs_type); + pr_cont("PEBS fmt0%c, ", pebs_type); x86_pmu.pebs_record_size = sizeof(struct pebs_record_core); x86_pmu.drain_pebs = intel_pmu_drain_pebs_core; break; case 1: - printk(KERN_CONT "PEBS fmt1%c, ", pebs_type); + pr_cont("PEBS fmt1%c, ", pebs_type); x86_pmu.pebs_record_size = sizeof(struct pebs_record_nhm); x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm; break; @@ -1351,7 +1351,7 @@ void __init intel_ds_init(void) break; default: - printk(KERN_CONT "no PEBS fmt%d%c, ", format, pebs_type); + pr_cont("no PEBS fmt%d%c, ", format, pebs_type); x86_pmu.pebs = 0; } } diff --git a/arch/x86/kernel/cpu/perf_event_knc.c b/arch/x86/events/intel/knc.c index 5b0c232d1ee6..206226e08f49 100644 --- a/arch/x86/kernel/cpu/perf_event_knc.c +++ b/arch/x86/events/intel/knc.c @@ -5,7 +5,7 @@ #include <asm/hardirq.h> -#include "perf_event.h" +#include "../perf_event.h" static const u64 knc_perfmon_event_map[] = { diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/events/intel/lbr.c index 653f88d25987..69dd11887dd1 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c +++ b/arch/x86/events/intel/lbr.c @@ -5,7 +5,7 @@ #include <asm/msr.h> #include <asm/insn.h> -#include "perf_event.h" +#include "../perf_event.h" enum { LBR_FORMAT_32 = 0x00, diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/events/intel/p4.c index f2e56783af3d..0a5ede187d9c 100644 --- a/arch/x86/kernel/cpu/perf_event_p4.c +++ b/arch/x86/events/intel/p4.c @@ -13,7 +13,7 @@ #include <asm/hardirq.h> #include <asm/apic.h> -#include "perf_event.h" +#include "../perf_event.h" #define P4_CNTR_LIMIT 3 /* diff --git a/arch/x86/kernel/cpu/perf_event_p6.c b/arch/x86/events/intel/p6.c index 7c1a0c07b607..1f5c47ab4c65 100644 --- a/arch/x86/kernel/cpu/perf_event_p6.c +++ b/arch/x86/events/intel/p6.c @@ -1,7 +1,7 @@ #include <linux/perf_event.h> #include <linux/types.h> -#include "perf_event.h" +#include "../perf_event.h" /* * Not sure about some of these diff --git a/arch/x86/kernel/cpu/perf_event_intel_pt.c b/arch/x86/events/intel/pt.c index c0bbd1033b7c..6af7cf71d6b2 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_pt.c +++ b/arch/x86/events/intel/pt.c @@ -29,8 +29,8 @@ #include <asm/io.h> #include <asm/intel_pt.h> -#include "perf_event.h" -#include "intel_pt.h" +#include "../perf_event.h" +#include "pt.h" static DEFINE_PER_CPU(struct pt, pt_ctx); diff --git a/arch/x86/kernel/cpu/intel_pt.h b/arch/x86/events/intel/pt.h index 336878a5d205..336878a5d205 100644 --- a/arch/x86/kernel/cpu/intel_pt.h +++ b/arch/x86/events/intel/pt.h diff --git a/arch/x86/kernel/cpu/perf_event_intel_rapl.c b/arch/x86/events/intel/rapl.c index 24a351ad628d..580f504ec27c 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_rapl.c +++ b/arch/x86/events/intel/rapl.c @@ -48,7 +48,7 @@ #include <linux/slab.h> #include <linux/perf_event.h> #include <asm/cpu_device_id.h> -#include "perf_event.h" +#include "../perf_event.h" /* * RAPL energy status counters diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/events/intel/uncore.c index 3bf41d413775..91a18d6c4405 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c +++ b/arch/x86/events/intel/uncore.c @@ -1,4 +1,4 @@ -#include "perf_event_intel_uncore.h" +#include "uncore.h" static struct intel_uncore_type *empty_uncore[] = { NULL, }; struct intel_uncore_type **uncore_msr_uncores = empty_uncore; diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.h b/arch/x86/events/intel/uncore.h index a7086b862156..6a1340c7f3cc 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.h +++ b/arch/x86/events/intel/uncore.h @@ -2,7 +2,7 @@ #include <linux/slab.h> #include <linux/pci.h> #include <linux/perf_event.h> -#include "perf_event.h" +#include "../perf_event.h" #define UNCORE_PMU_NAME_LEN 32 #define UNCORE_PMU_HRTIMER_INTERVAL (60LL * NSEC_PER_SEC) diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore_nhmex.c b/arch/x86/events/intel/uncore_nhmex.c index 2749965afed0..e89bf5c536e0 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore_nhmex.c +++ b/arch/x86/events/intel/uncore_nhmex.c @@ -1,5 +1,5 @@ /* Nehalem-EX/Westmere-EX uncore support */ -#include "perf_event_intel_uncore.h" +#include "uncore.h" /* NHM-EX event control */ #define NHMEX_PMON_CTL_EV_SEL_MASK 0x000000ff diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c b/arch/x86/events/intel/uncore_snb.c index 2bd030ddd0db..2049d26a7ae6 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c +++ b/arch/x86/events/intel/uncore_snb.c @@ -1,5 +1,5 @@ /* Nehalem/SandBridge/Haswell uncore support */ -#include "perf_event_intel_uncore.h" +#include "uncore.h" /* Uncore IMC PCI IDs */ #define PCI_DEVICE_ID_INTEL_SNB_IMC 0x0100 diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index 33acb884ccf1..0c801f77c03d 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -1,6 +1,5 @@ /* SandyBridge-EP/IvyTown uncore support */ -#include "perf_event_intel_uncore.h" - +#include "uncore.h" /* SNB-EP Box level control */ #define SNBEP_PMON_BOX_CTL_RST_CTRL (1 << 0) diff --git a/arch/x86/kernel/cpu/perf_event_msr.c b/arch/x86/events/msr.c index ec863b9a9f78..ec863b9a9f78 100644 --- a/arch/x86/kernel/cpu/perf_event_msr.c +++ b/arch/x86/events/msr.c diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/events/perf_event.h index 7bb61e32fb29..7bb61e32fb29 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/events/perf_event.h diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h index 1514753fd435..15340e36ddcb 100644 --- a/arch/x86/include/asm/elf.h +++ b/arch/x86/include/asm/elf.h @@ -256,7 +256,7 @@ extern int force_personality32; instruction set this CPU supports. This could be done in user space, but it's not easy, and we've already done it here. */ -#define ELF_HWCAP (boot_cpu_data.x86_capability[0]) +#define ELF_HWCAP (boot_cpu_data.x86_capability[CPUID_1_EDX]) /* This yields a string that ld.so will use to load implementation specific libraries for optimization. This is more specific in diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 58031303e304..7a60424d63fa 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -30,33 +30,11 @@ obj-$(CONFIG_CPU_SUP_CENTAUR) += centaur.o obj-$(CONFIG_CPU_SUP_TRANSMETA_32) += transmeta.o obj-$(CONFIG_CPU_SUP_UMC_32) += umc.o -obj-$(CONFIG_PERF_EVENTS) += perf_event.o - -ifdef CONFIG_PERF_EVENTS -obj-$(CONFIG_CPU_SUP_AMD) += perf_event_amd.o perf_event_amd_uncore.o -ifdef CONFIG_AMD_IOMMU -obj-$(CONFIG_CPU_SUP_AMD) += perf_event_amd_iommu.o -endif -obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_p6.o perf_event_knc.o perf_event_p4.o -obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_lbr.o perf_event_intel_ds.o perf_event_intel.o -obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_rapl.o perf_event_intel_cqm.o -obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_pt.o perf_event_intel_bts.o -obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_cstate.o - -obj-$(CONFIG_PERF_EVENTS_INTEL_UNCORE) += perf_event_intel_uncore.o \ - perf_event_intel_uncore_snb.o \ - perf_event_intel_uncore_snbep.o \ - perf_event_intel_uncore_nhmex.o -obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_msr.o -obj-$(CONFIG_CPU_SUP_AMD) += perf_event_msr.o -endif - - obj-$(CONFIG_X86_MCE) += mcheck/ obj-$(CONFIG_MTRR) += mtrr/ obj-$(CONFIG_MICROCODE) += microcode/ -obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o perf_event_amd_ibs.o +obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o obj-$(CONFIG_HYPERVISOR_GUEST) += vmware.o hypervisor.o mshyperv.o diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index a07956a08936..97c59fd60702 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -117,7 +117,7 @@ static void init_amd_k6(struct cpuinfo_x86 *c) void (*f_vide)(void); u64 d, d2; - printk(KERN_INFO "AMD K6 stepping B detected - "); + pr_info("AMD K6 stepping B detected - "); /* * It looks like AMD fixed the 2.6.2 bug and improved indirect @@ -133,10 +133,9 @@ static void init_amd_k6(struct cpuinfo_x86 *c) d = d2-d; if (d > 20*K6_BUG_LOOP) - printk(KERN_CONT - "system stability may be impaired when more than 32 MB are used.\n"); + pr_cont("system stability may be impaired when more than 32 MB are used.\n"); else - printk(KERN_CONT "probably OK (after B9730xxxx).\n"); + pr_cont("probably OK (after B9730xxxx).\n"); } /* K6 with old style WHCR */ @@ -154,7 +153,7 @@ static void init_amd_k6(struct cpuinfo_x86 *c) wbinvd(); wrmsr(MSR_K6_WHCR, l, h); local_irq_restore(flags); - printk(KERN_INFO "Enabling old style K6 write allocation for %d Mb\n", + pr_info("Enabling old style K6 write allocation for %d Mb\n", mbytes); } return; @@ -175,7 +174,7 @@ static void init_amd_k6(struct cpuinfo_x86 *c) wbinvd(); wrmsr(MSR_K6_WHCR, l, h); local_irq_restore(flags); - printk(KERN_INFO "Enabling new style K6 write allocation for %d Mb\n", + pr_info("Enabling new style K6 write allocation for %d Mb\n", mbytes); } @@ -202,7 +201,7 @@ static void init_amd_k7(struct cpuinfo_x86 *c) */ if (c->x86_model >= 6 && c->x86_model <= 10) { if (!cpu_has(c, X86_FEATURE_XMM)) { - printk(KERN_INFO "Enabling disabled K7/SSE Support.\n"); + pr_info("Enabling disabled K7/SSE Support.\n"); msr_clear_bit(MSR_K7_HWCR, 15); set_cpu_cap(c, X86_FEATURE_XMM); } @@ -216,9 +215,8 @@ static void init_amd_k7(struct cpuinfo_x86 *c) if ((c->x86_model == 8 && c->x86_mask >= 1) || (c->x86_model > 8)) { rdmsr(MSR_K7_CLK_CTL, l, h); if ((l & 0xfff00000) != 0x20000000) { - printk(KERN_INFO - "CPU: CLK_CTL MSR was %x. Reprogramming to %x\n", - l, ((l & 0x000fffff)|0x20000000)); + pr_info("CPU: CLK_CTL MSR was %x. Reprogramming to %x\n", + l, ((l & 0x000fffff)|0x20000000)); wrmsr(MSR_K7_CLK_CTL, (l & 0x000fffff)|0x20000000, h); } } @@ -485,7 +483,7 @@ static void bsp_init_amd(struct cpuinfo_x86 *c) if (!rdmsrl_safe(MSR_K8_TSEG_ADDR, &tseg)) { unsigned long pfn = tseg >> PAGE_SHIFT; - printk(KERN_DEBUG "tseg: %010llx\n", tseg); + pr_debug("tseg: %010llx\n", tseg); if (pfn_range_is_mapped(pfn, pfn + 1)) set_memory_4k((unsigned long)__va(tseg), 1); } @@ -500,8 +498,7 @@ static void bsp_init_amd(struct cpuinfo_x86 *c) rdmsrl(MSR_K7_HWCR, val); if (!(val & BIT(24))) - printk(KERN_WARNING FW_BUG "TSC doesn't count " - "with P0 frequency!\n"); + pr_warn(FW_BUG "TSC doesn't count with P0 frequency!\n"); } } diff --git a/arch/x86/kernel/cpu/bugs_64.c b/arch/x86/kernel/cpu/bugs_64.c index 04f0fe5af83e..a972ac4c7e7d 100644 --- a/arch/x86/kernel/cpu/bugs_64.c +++ b/arch/x86/kernel/cpu/bugs_64.c @@ -15,7 +15,7 @@ void __init check_bugs(void) { identify_boot_cpu(); #if !defined(CONFIG_SMP) - printk(KERN_INFO "CPU: "); + pr_info("CPU: "); print_cpu_info(&boot_cpu_data); #endif alternative_instructions(); diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c index ae20be6e483c..ce197bb7c129 100644 --- a/arch/x86/kernel/cpu/centaur.c +++ b/arch/x86/kernel/cpu/centaur.c @@ -29,7 +29,7 @@ static void init_c3(struct cpuinfo_x86 *c) rdmsr(MSR_VIA_FCR, lo, hi); lo |= ACE_FCR; /* enable ACE unit */ wrmsr(MSR_VIA_FCR, lo, hi); - printk(KERN_INFO "CPU: Enabled ACE h/w crypto\n"); + pr_info("CPU: Enabled ACE h/w crypto\n"); } /* enable RNG unit, if present and disabled */ @@ -37,7 +37,7 @@ static void init_c3(struct cpuinfo_x86 *c) rdmsr(MSR_VIA_RNG, lo, hi); lo |= RNG_ENABLE; /* enable RNG unit */ wrmsr(MSR_VIA_RNG, lo, hi); - printk(KERN_INFO "CPU: Enabled h/w RNG\n"); + pr_info("CPU: Enabled h/w RNG\n"); } /* store Centaur Extended Feature Flags as @@ -130,7 +130,7 @@ static void init_centaur(struct cpuinfo_x86 *c) name = "C6"; fcr_set = ECX8|DSMC|EDCTLB|EMMX|ERETSTK; fcr_clr = DPDC; - printk(KERN_NOTICE "Disabling bugged TSC.\n"); + pr_notice("Disabling bugged TSC.\n"); clear_cpu_cap(c, X86_FEATURE_TSC); break; case 8: @@ -163,11 +163,11 @@ static void init_centaur(struct cpuinfo_x86 *c) newlo = (lo|fcr_set) & (~fcr_clr); if (newlo != lo) { - printk(KERN_INFO "Centaur FCR was 0x%X now 0x%X\n", + pr_info("Centaur FCR was 0x%X now 0x%X\n", lo, newlo); wrmsr(MSR_IDT_FCR1, newlo, hi); } else { - printk(KERN_INFO "Centaur FCR is 0x%X\n", lo); + pr_info("Centaur FCR is 0x%X\n", lo); } /* Emulate MTRRs using Centaur's MCR. */ set_cpu_cap(c, X86_FEATURE_CENTAUR_MCR); diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 37830de8f60a..68a80e9b67fc 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -228,7 +228,7 @@ static void squash_the_stupid_serial_number(struct cpuinfo_x86 *c) lo |= 0x200000; wrmsr(MSR_IA32_BBL_CR_CTL, lo, hi); - printk(KERN_NOTICE "CPU serial number disabled.\n"); + pr_notice("CPU serial number disabled.\n"); clear_cpu_cap(c, X86_FEATURE_PN); /* Disabling the serial number may affect the cpuid level */ @@ -329,9 +329,8 @@ static void filter_cpuid_features(struct cpuinfo_x86 *c, bool warn) if (!warn) continue; - printk(KERN_WARNING - "CPU: CPU feature " X86_CAP_FMT " disabled, no CPUID level 0x%x\n", - x86_cap_flag(df->feature), df->level); + pr_warn("CPU: CPU feature " X86_CAP_FMT " disabled, no CPUID level 0x%x\n", + x86_cap_flag(df->feature), df->level); } } @@ -510,7 +509,7 @@ void detect_ht(struct cpuinfo_x86 *c) smp_num_siblings = (ebx & 0xff0000) >> 16; if (smp_num_siblings == 1) { - printk_once(KERN_INFO "CPU0: Hyper-Threading is disabled\n"); + pr_info_once("CPU0: Hyper-Threading is disabled\n"); goto out; } @@ -531,10 +530,10 @@ void detect_ht(struct cpuinfo_x86 *c) out: if (!printed && (c->x86_max_cores * smp_num_siblings) > 1) { - printk(KERN_INFO "CPU: Physical Processor ID: %d\n", - c->phys_proc_id); - printk(KERN_INFO "CPU: Processor Core ID: %d\n", - c->cpu_core_id); + pr_info("CPU: Physical Processor ID: %d\n", + c->phys_proc_id); + pr_info("CPU: Processor Core ID: %d\n", + c->cpu_core_id); printed = 1; } #endif @@ -559,9 +558,8 @@ static void get_cpu_vendor(struct cpuinfo_x86 *c) } } - printk_once(KERN_ERR - "CPU: vendor_id '%s' unknown, using generic init.\n" \ - "CPU: Your system may be unstable.\n", v); + pr_err_once("CPU: vendor_id '%s' unknown, using generic init.\n" \ + "CPU: Your system may be unstable.\n", v); c->x86_vendor = X86_VENDOR_UNKNOWN; this_cpu = &default_cpu; @@ -760,7 +758,7 @@ void __init early_cpu_init(void) int count = 0; #ifdef CONFIG_PROCESSOR_SELECT - printk(KERN_INFO "KERNEL supported cpus:\n"); + pr_info("KERNEL supported cpus:\n"); #endif for (cdev = __x86_cpu_dev_start; cdev < __x86_cpu_dev_end; cdev++) { @@ -778,7 +776,7 @@ void __init early_cpu_init(void) for (j = 0; j < 2; j++) { if (!cpudev->c_ident[j]) continue; - printk(KERN_INFO " %s %s\n", cpudev->c_vendor, + pr_info(" %s %s\n", cpudev->c_vendor, cpudev->c_ident[j]); } } @@ -1061,7 +1059,7 @@ static void __print_cpu_msr(void) for (index = index_min; index < index_max; index++) { if (rdmsrl_safe(index, &val)) continue; - printk(KERN_INFO " MSR%08x: %016llx\n", index, val); + pr_info(" MSR%08x: %016llx\n", index, val); } } } @@ -1100,19 +1098,19 @@ void print_cpu_info(struct cpuinfo_x86 *c) } if (vendor && !strstr(c->x86_model_id, vendor)) - printk(KERN_CONT "%s ", vendor); + pr_cont("%s ", vendor); if (c->x86_model_id[0]) - printk(KERN_CONT "%s", c->x86_model_id); + pr_cont("%s", c->x86_model_id); else - printk(KERN_CONT "%d86", c->x86); + pr_cont("%d86", c->x86); - printk(KERN_CONT " (family: 0x%x, model: 0x%x", c->x86, c->x86_model); + pr_cont(" (family: 0x%x, model: 0x%x", c->x86, c->x86_model); if (c->x86_mask || c->cpuid_level >= 0) - printk(KERN_CONT ", stepping: 0x%x)\n", c->x86_mask); + pr_cont(", stepping: 0x%x)\n", c->x86_mask); else - printk(KERN_CONT ")\n"); + pr_cont(")\n"); print_cpu_msr(c); } @@ -1438,7 +1436,7 @@ void cpu_init(void) show_ucode_info_early(); - printk(KERN_INFO "Initializing CPU#%d\n", cpu); + pr_info("Initializing CPU#%d\n", cpu); if (cpu_feature_enabled(X86_FEATURE_VME) || cpu_has_tsc || diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c index aaf152e79637..187bb583d0df 100644 --- a/arch/x86/kernel/cpu/cyrix.c +++ b/arch/x86/kernel/cpu/cyrix.c @@ -103,7 +103,7 @@ static void check_cx686_slop(struct cpuinfo_x86 *c) local_irq_restore(flags); if (ccr5 & 2) { /* possible wrong calibration done */ - printk(KERN_INFO "Recalibrating delay loop with SLOP bit reset\n"); + pr_info("Recalibrating delay loop with SLOP bit reset\n"); calibrate_delay(); c->loops_per_jiffy = loops_per_jiffy; } @@ -115,7 +115,7 @@ static void set_cx86_reorder(void) { u8 ccr3; - printk(KERN_INFO "Enable Memory access reorder on Cyrix/NSC processor.\n"); + pr_info("Enable Memory access reorder on Cyrix/NSC processor.\n"); ccr3 = getCx86(CX86_CCR3); setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ @@ -128,7 +128,7 @@ static void set_cx86_reorder(void) static void set_cx86_memwb(void) { - printk(KERN_INFO "Enable Memory-Write-back mode on Cyrix/NSC processor.\n"); + pr_info("Enable Memory-Write-back mode on Cyrix/NSC processor.\n"); /* CCR2 bit 2: unlock NW bit */ setCx86_old(CX86_CCR2, getCx86_old(CX86_CCR2) & ~0x04); @@ -268,7 +268,7 @@ static void init_cyrix(struct cpuinfo_x86 *c) * VSA1 we work around however. */ - printk(KERN_INFO "Working around Cyrix MediaGX virtual DMA bugs.\n"); + pr_info("Working around Cyrix MediaGX virtual DMA bugs.\n"); isa_dma_bridge_buggy = 2; /* We do this before the PCI layer is running. However we @@ -426,7 +426,7 @@ static void cyrix_identify(struct cpuinfo_x86 *c) if (dir0 == 5 || dir0 == 3) { unsigned char ccr3; unsigned long flags; - printk(KERN_INFO "Enabling CPUID on Cyrix processor.\n"); + pr_info("Enabling CPUID on Cyrix processor.\n"); local_irq_save(flags); ccr3 = getCx86(CX86_CCR3); /* enable MAPEN */ diff --git a/arch/x86/kernel/cpu/hypervisor.c b/arch/x86/kernel/cpu/hypervisor.c index d820d8eae96b..73d391ae452f 100644 --- a/arch/x86/kernel/cpu/hypervisor.c +++ b/arch/x86/kernel/cpu/hypervisor.c @@ -56,7 +56,7 @@ detect_hypervisor_vendor(void) } if (max_pri) - printk(KERN_INFO "Hypervisor detected: %s\n", x86_hyper->name); + pr_info("Hypervisor detected: %s\n", x86_hyper->name); } void init_hypervisor(struct cpuinfo_x86 *c) diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 565648bc1a0a..05b9211ea0f7 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -61,7 +61,7 @@ static void early_init_intel(struct cpuinfo_x86 *c) */ if (c->x86 == 6 && c->x86_model == 0x1c && c->x86_mask <= 2 && c->microcode < 0x20e) { - printk(KERN_WARNING "Atom PSE erratum detected, BIOS microcode update recommended\n"); + pr_warn("Atom PSE erratum detected, BIOS microcode update recommended\n"); clear_cpu_cap(c, X86_FEATURE_PSE); } @@ -140,7 +140,7 @@ static void early_init_intel(struct cpuinfo_x86 *c) if (c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xd)) { rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable); if (!(misc_enable & MSR_IA32_MISC_ENABLE_FAST_STRING)) { - printk(KERN_INFO "Disabled fast string operations\n"); + pr_info("Disabled fast string operations\n"); setup_clear_cpu_cap(X86_FEATURE_REP_GOOD); setup_clear_cpu_cap(X86_FEATURE_ERMS); } @@ -176,7 +176,7 @@ int ppro_with_ram_bug(void) boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 1 && boot_cpu_data.x86_mask < 8) { - printk(KERN_INFO "Pentium Pro with Errata#50 detected. Taking evasive action.\n"); + pr_info("Pentium Pro with Errata#50 detected. Taking evasive action.\n"); return 1; } return 0; @@ -225,7 +225,7 @@ static void intel_workarounds(struct cpuinfo_x86 *c) set_cpu_bug(c, X86_BUG_F00F); if (!f00f_workaround_enabled) { - printk(KERN_NOTICE "Intel Pentium with F0 0F bug - workaround enabled.\n"); + pr_notice("Intel Pentium with F0 0F bug - workaround enabled.\n"); f00f_workaround_enabled = 1; } } @@ -244,7 +244,7 @@ static void intel_workarounds(struct cpuinfo_x86 *c) * Forcefully enable PAE if kernel parameter "forcepae" is present. */ if (forcepae) { - printk(KERN_WARNING "PAE forced!\n"); + pr_warn("PAE forced!\n"); set_cpu_cap(c, X86_FEATURE_PAE); add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_NOW_UNRELIABLE); } diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index 0b6c52388cf4..6ed779efff26 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -444,7 +444,7 @@ static ssize_t store_cache_disable(struct cacheinfo *this_leaf, err = amd_set_l3_disable_slot(nb, cpu, slot, val); if (err) { if (err == -EEXIST) - pr_warning("L3 slot %d in use/index already disabled!\n", + pr_warn("L3 slot %d in use/index already disabled!\n", slot); return err; } diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c index 4cfba4371a71..517619ea6498 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-inject.c +++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c @@ -115,7 +115,7 @@ static int raise_local(void) int cpu = m->extcpu; if (m->inject_flags & MCJ_EXCEPTION) { - printk(KERN_INFO "Triggering MCE exception on CPU %d\n", cpu); + pr_info("Triggering MCE exception on CPU %d\n", cpu); switch (context) { case MCJ_CTX_IRQ: /* @@ -128,15 +128,15 @@ static int raise_local(void) raise_exception(m, NULL); break; default: - printk(KERN_INFO "Invalid MCE context\n"); + pr_info("Invalid MCE context\n"); ret = -EINVAL; } - printk(KERN_INFO "MCE exception done on CPU %d\n", cpu); + pr_info("MCE exception done on CPU %d\n", cpu); } else if (m->status) { - printk(KERN_INFO "Starting machine check poll CPU %d\n", cpu); + pr_info("Starting machine check poll CPU %d\n", cpu); raise_poll(m); mce_notify_irq(); - printk(KERN_INFO "Machine check poll done on CPU %d\n", cpu); + pr_info("Machine check poll done on CPU %d\n", cpu); } else m->finished = 0; @@ -183,8 +183,7 @@ static void raise_mce(struct mce *m) start = jiffies; while (!cpumask_empty(mce_inject_cpumask)) { if (!time_before(jiffies, start + 2*HZ)) { - printk(KERN_ERR - "Timeout waiting for mce inject %lx\n", + pr_err("Timeout waiting for mce inject %lx\n", *cpumask_bits(mce_inject_cpumask)); break; } @@ -241,7 +240,7 @@ static int inject_init(void) { if (!alloc_cpumask_var(&mce_inject_cpumask, GFP_KERNEL)) return -ENOMEM; - printk(KERN_INFO "Machine check injector initialized\n"); + pr_info("Machine check injector initialized\n"); register_mce_write_callback(mce_write); register_nmi_handler(NMI_LOCAL, mce_raise_notify, 0, "mce_notify"); diff --git a/arch/x86/kernel/cpu/mcheck/p5.c b/arch/x86/kernel/cpu/mcheck/p5.c index 12402e10aeff..2a0717bf8033 100644 --- a/arch/x86/kernel/cpu/mcheck/p5.c +++ b/arch/x86/kernel/cpu/mcheck/p5.c @@ -26,14 +26,12 @@ static void pentium_machine_check(struct pt_regs *regs, long error_code) rdmsr(MSR_IA32_P5_MC_ADDR, loaddr, hi); rdmsr(MSR_IA32_P5_MC_TYPE, lotype, hi); - printk(KERN_EMERG - "CPU#%d: Machine Check Exception: 0x%8X (type 0x%8X).\n", - smp_processor_id(), loaddr, lotype); + pr_emerg("CPU#%d: Machine Check Exception: 0x%8X (type 0x%8X).\n", + smp_processor_id(), loaddr, lotype); if (lotype & (1<<5)) { - printk(KERN_EMERG - "CPU#%d: Possible thermal failure (CPU on fire ?).\n", - smp_processor_id()); + pr_emerg("CPU#%d: Possible thermal failure (CPU on fire ?).\n", + smp_processor_id()); } add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE); @@ -61,12 +59,10 @@ void intel_p5_mcheck_init(struct cpuinfo_x86 *c) /* Read registers before enabling: */ rdmsr(MSR_IA32_P5_MC_ADDR, l, h); rdmsr(MSR_IA32_P5_MC_TYPE, l, h); - printk(KERN_INFO - "Intel old style machine check architecture supported.\n"); + pr_info("Intel old style machine check architecture supported.\n"); /* Enable MCE: */ cr4_set_bits(X86_CR4_MCE); - printk(KERN_INFO - "Intel old style machine check reporting enabled on CPU#%d.\n", - smp_processor_id()); + pr_info("Intel old style machine check reporting enabled on CPU#%d.\n", + smp_processor_id()); } diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index 2c5aaf8c2e2f..0b445c2ff735 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c @@ -190,7 +190,7 @@ static int therm_throt_process(bool new_event, int event, int level) /* if we just entered the thermal event */ if (new_event) { if (event == THERMAL_THROTTLING_EVENT) - printk(KERN_CRIT "CPU%d: %s temperature above threshold, cpu clock throttled (total events = %lu)\n", + pr_crit("CPU%d: %s temperature above threshold, cpu clock throttled (total events = %lu)\n", this_cpu, level == CORE_LEVEL ? "Core" : "Package", state->count); @@ -198,8 +198,7 @@ static int therm_throt_process(bool new_event, int event, int level) } if (old_event) { if (event == THERMAL_THROTTLING_EVENT) - printk(KERN_INFO "CPU%d: %s temperature/speed normal\n", - this_cpu, + pr_info("CPU%d: %s temperature/speed normal\n", this_cpu, level == CORE_LEVEL ? "Core" : "Package"); return 1; } @@ -417,8 +416,8 @@ static void intel_thermal_interrupt(void) static void unexpected_thermal_interrupt(void) { - printk(KERN_ERR "CPU%d: Unexpected LVT thermal interrupt!\n", - smp_processor_id()); + pr_err("CPU%d: Unexpected LVT thermal interrupt!\n", + smp_processor_id()); } static void (*smp_thermal_vector)(void) = unexpected_thermal_interrupt; @@ -499,7 +498,7 @@ void intel_init_thermal(struct cpuinfo_x86 *c) if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) { if (system_state == SYSTEM_BOOTING) - printk(KERN_DEBUG "CPU%d: Thermal monitoring handled by SMI\n", cpu); + pr_debug("CPU%d: Thermal monitoring handled by SMI\n", cpu); return; } @@ -557,8 +556,8 @@ void intel_init_thermal(struct cpuinfo_x86 *c) l = apic_read(APIC_LVTTHMR); apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED); - printk_once(KERN_INFO "CPU0: Thermal monitoring enabled (%s)\n", - tm2 ? "TM2" : "TM1"); + pr_info_once("CPU0: Thermal monitoring enabled (%s)\n", + tm2 ? "TM2" : "TM1"); /* enable thermal throttle processing */ atomic_set(&therm_throt_en, 1); diff --git a/arch/x86/kernel/cpu/mcheck/threshold.c b/arch/x86/kernel/cpu/mcheck/threshold.c index 7245980186ee..fcf9ae9384f4 100644 --- a/arch/x86/kernel/cpu/mcheck/threshold.c +++ b/arch/x86/kernel/cpu/mcheck/threshold.c @@ -12,8 +12,8 @@ static void default_threshold_interrupt(void) { - printk(KERN_ERR "Unexpected threshold interrupt at vector %x\n", - THRESHOLD_APIC_VECTOR); + pr_err("Unexpected threshold interrupt at vector %x\n", + THRESHOLD_APIC_VECTOR); } void (*mce_threshold_vector)(void) = default_threshold_interrupt; diff --git a/arch/x86/kernel/cpu/mcheck/winchip.c b/arch/x86/kernel/cpu/mcheck/winchip.c index 01dd8702880b..c6a722e1d011 100644 --- a/arch/x86/kernel/cpu/mcheck/winchip.c +++ b/arch/x86/kernel/cpu/mcheck/winchip.c @@ -17,7 +17,7 @@ static void winchip_machine_check(struct pt_regs *regs, long error_code) { ist_enter(regs); - printk(KERN_EMERG "CPU0: Machine Check Exception.\n"); + pr_emerg("CPU0: Machine Check Exception.\n"); add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE); ist_exit(regs); @@ -39,6 +39,5 @@ void winchip_mcheck_init(struct cpuinfo_x86 *c) cr4_set_bits(X86_CR4_MCE); - printk(KERN_INFO - "Winchip machine check reporting enabled on CPU#0.\n"); + pr_info("Winchip machine check reporting enabled on CPU#0.\n"); } diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index 2233f8a76615..75d3aab5f7b2 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -953,7 +953,7 @@ struct microcode_ops * __init init_amd_microcode(void) struct cpuinfo_x86 *c = &boot_cpu_data; if (c->x86_vendor != X86_VENDOR_AMD || c->x86 < 0x10) { - pr_warning("AMD CPU family 0x%x not supported\n", c->x86); + pr_warn("AMD CPU family 0x%x not supported\n", c->x86); return NULL; } diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index 20e242ea1bc4..4e7c6933691c 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -161,8 +161,8 @@ static void __init ms_hyperv_init_platform(void) ms_hyperv.misc_features = cpuid_edx(HYPERV_CPUID_FEATURES); ms_hyperv.hints = cpuid_eax(HYPERV_CPUID_ENLIGHTMENT_INFO); - printk(KERN_INFO "HyperV: features 0x%x, hints 0x%x\n", - ms_hyperv.features, ms_hyperv.hints); + pr_info("HyperV: features 0x%x, hints 0x%x\n", + ms_hyperv.features, ms_hyperv.hints); #ifdef CONFIG_X86_LOCAL_APIC if (ms_hyperv.features & HV_X64_MSR_APIC_FREQUENCY_AVAILABLE) { @@ -174,8 +174,8 @@ static void __init ms_hyperv_init_platform(void) rdmsrl(HV_X64_MSR_APIC_FREQUENCY, hv_lapic_frequency); hv_lapic_frequency = div_u64(hv_lapic_frequency, HZ); lapic_timer_frequency = hv_lapic_frequency; - printk(KERN_INFO "HyperV: LAPIC Timer Frequency: %#x\n", - lapic_timer_frequency); + pr_info("HyperV: LAPIC Timer Frequency: %#x\n", + lapic_timer_frequency); } #endif diff --git a/arch/x86/kernel/cpu/mtrr/centaur.c b/arch/x86/kernel/cpu/mtrr/centaur.c index 316fe3e60a97..3d689937fc1b 100644 --- a/arch/x86/kernel/cpu/mtrr/centaur.c +++ b/arch/x86/kernel/cpu/mtrr/centaur.c @@ -103,7 +103,7 @@ centaur_validate_add_page(unsigned long base, unsigned long size, unsigned int t */ if (type != MTRR_TYPE_WRCOMB && (centaur_mcr_type == 0 || type != MTRR_TYPE_UNCACHABLE)) { - pr_warning("mtrr: only write-combining%s supported\n", + pr_warn("mtrr: only write-combining%s supported\n", centaur_mcr_type ? " and uncacheable are" : " is"); return -EINVAL; } diff --git a/arch/x86/kernel/cpu/mtrr/cleanup.c b/arch/x86/kernel/cpu/mtrr/cleanup.c index 0d98503c2245..31e951ce6dff 100644 --- a/arch/x86/kernel/cpu/mtrr/cleanup.c +++ b/arch/x86/kernel/cpu/mtrr/cleanup.c @@ -57,9 +57,9 @@ static int __initdata nr_range; static struct var_mtrr_range_state __initdata range_state[RANGE_NUM]; static int __initdata debug_print; -#define Dprintk(x...) do { if (debug_print) printk(KERN_DEBUG x); } while (0) +#define Dprintk(x...) do { if (debug_print) pr_debug(x); } while (0) -#define BIOS_BUG_MSG KERN_WARNING \ +#define BIOS_BUG_MSG \ "WARNING: BIOS bug: VAR MTRR %d contains strange UC entry under 1M, check with your system vendor!\n" static int __init @@ -81,9 +81,9 @@ x86_get_mtrr_mem_range(struct range *range, int nr_range, base, base + size); } if (debug_print) { - printk(KERN_DEBUG "After WB checking\n"); + pr_debug("After WB checking\n"); for (i = 0; i < nr_range; i++) - printk(KERN_DEBUG "MTRR MAP PFN: %016llx - %016llx\n", + pr_debug("MTRR MAP PFN: %016llx - %016llx\n", range[i].start, range[i].end); } @@ -101,7 +101,7 @@ x86_get_mtrr_mem_range(struct range *range, int nr_range, (mtrr_state.enabled & MTRR_STATE_MTRR_ENABLED) && (mtrr_state.enabled & MTRR_STATE_MTRR_FIXED_ENABLED)) { /* Var MTRR contains UC entry below 1M? Skip it: */ - printk(BIOS_BUG_MSG, i); + pr_warn(BIOS_BUG_MSG, i); if (base + size <= (1<<(20-PAGE_SHIFT))) continue; size -= (1<<(20-PAGE_SHIFT)) - base; @@ -114,11 +114,11 @@ x86_get_mtrr_mem_range(struct range *range, int nr_range, extra_remove_base + extra_remove_size); if (debug_print) { - printk(KERN_DEBUG "After UC checking\n"); + pr_debug("After UC checking\n"); for (i = 0; i < RANGE_NUM; i++) { if (!range[i].end) continue; - printk(KERN_DEBUG "MTRR MAP PFN: %016llx - %016llx\n", + pr_debug("MTRR MAP PFN: %016llx - %016llx\n", range[i].start, range[i].end); } } @@ -126,9 +126,9 @@ x86_get_mtrr_mem_range(struct range *range, int nr_range, /* sort the ranges */ nr_range = clean_sort_range(range, RANGE_NUM); if (debug_print) { - printk(KERN_DEBUG "After sorting\n"); + pr_debug("After sorting\n"); for (i = 0; i < nr_range; i++) - printk(KERN_DEBUG "MTRR MAP PFN: %016llx - %016llx\n", + pr_debug("MTRR MAP PFN: %016llx - %016llx\n", range[i].start, range[i].end); } @@ -544,7 +544,7 @@ static void __init print_out_mtrr_range_state(void) start_base = to_size_factor(start_base, &start_factor), type = range_state[i].type; - printk(KERN_DEBUG "reg %d, base: %ld%cB, range: %ld%cB, type %s\n", + pr_debug("reg %d, base: %ld%cB, range: %ld%cB, type %s\n", i, start_base, start_factor, size_base, size_factor, (type == MTRR_TYPE_UNCACHABLE) ? "UC" : @@ -713,7 +713,7 @@ int __init mtrr_cleanup(unsigned address_bits) return 0; /* Print original var MTRRs at first, for debugging: */ - printk(KERN_DEBUG "original variable MTRRs\n"); + pr_debug("original variable MTRRs\n"); print_out_mtrr_range_state(); memset(range, 0, sizeof(range)); @@ -733,7 +733,7 @@ int __init mtrr_cleanup(unsigned address_bits) x_remove_base, x_remove_size); range_sums = sum_ranges(range, nr_range); - printk(KERN_INFO "total RAM covered: %ldM\n", + pr_info("total RAM covered: %ldM\n", range_sums >> (20 - PAGE_SHIFT)); if (mtrr_chunk_size && mtrr_gran_size) { @@ -745,12 +745,11 @@ int __init mtrr_cleanup(unsigned address_bits) if (!result[i].bad) { set_var_mtrr_all(address_bits); - printk(KERN_DEBUG "New variable MTRRs\n"); + pr_debug("New variable MTRRs\n"); print_out_mtrr_range_state(); return 1; } - printk(KERN_INFO "invalid mtrr_gran_size or mtrr_chunk_size, " - "will find optimal one\n"); + pr_info("invalid mtrr_gran_size or mtrr_chunk_size, will find optimal one\n"); } i = 0; @@ -768,7 +767,7 @@ int __init mtrr_cleanup(unsigned address_bits) x_remove_base, x_remove_size, i); if (debug_print) { mtrr_print_out_one_result(i); - printk(KERN_INFO "\n"); + pr_info("\n"); } i++; @@ -779,7 +778,7 @@ int __init mtrr_cleanup(unsigned address_bits) index_good = mtrr_search_optimal_index(); if (index_good != -1) { - printk(KERN_INFO "Found optimal setting for mtrr clean up\n"); + pr_info("Found optimal setting for mtrr clean up\n"); i = index_good; mtrr_print_out_one_result(i); @@ -790,7 +789,7 @@ int __init mtrr_cleanup(unsigned address_bits) gran_size <<= 10; x86_setup_var_mtrrs(range, nr_range, chunk_size, gran_size); set_var_mtrr_all(address_bits); - printk(KERN_DEBUG "New variable MTRRs\n"); + pr_debug("New variable MTRRs\n"); print_out_mtrr_range_state(); return 1; } else { @@ -799,8 +798,8 @@ int __init mtrr_cleanup(unsigned address_bits) mtrr_print_out_one_result(i); } - printk(KERN_INFO "mtrr_cleanup: can not find optimal value\n"); - printk(KERN_INFO "please specify mtrr_gran_size/mtrr_chunk_size\n"); + pr_info("mtrr_cleanup: can not find optimal value\n"); + pr_info("please specify mtrr_gran_size/mtrr_chunk_size\n"); return 0; } @@ -918,7 +917,7 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn) /* kvm/qemu doesn't have mtrr set right, don't trim them all: */ if (!highest_pfn) { - printk(KERN_INFO "CPU MTRRs all blank - virtualized system.\n"); + pr_info("CPU MTRRs all blank - virtualized system.\n"); return 0; } @@ -973,7 +972,8 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn) end_pfn); if (total_trim_size) { - pr_warning("WARNING: BIOS bug: CPU MTRRs don't cover all of memory, losing %lluMB of RAM.\n", total_trim_size >> 20); + pr_warn("WARNING: BIOS bug: CPU MTRRs don't cover all of memory, losing %lluMB of RAM.\n", + total_trim_size >> 20); if (!changed_by_mtrr_cleanup) WARN_ON(1); diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index c870af161008..fcbcb2f678ca 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c @@ -55,7 +55,7 @@ static inline void k8_check_syscfg_dram_mod_en(void) rdmsr(MSR_K8_SYSCFG, lo, hi); if (lo & K8_MTRRFIXRANGE_DRAM_MODIFY) { - printk(KERN_ERR FW_WARN "MTRR: CPU %u: SYSCFG[MtrrFixDramModEn]" + pr_err(FW_WARN "MTRR: CPU %u: SYSCFG[MtrrFixDramModEn]" " not cleared by BIOS, clearing this bit\n", smp_processor_id()); lo &= ~K8_MTRRFIXRANGE_DRAM_MODIFY; @@ -501,14 +501,14 @@ void __init mtrr_state_warn(void) if (!mask) return; if (mask & MTRR_CHANGE_MASK_FIXED) - pr_warning("mtrr: your CPUs had inconsistent fixed MTRR settings\n"); + pr_warn("mtrr: your CPUs had inconsistent fixed MTRR settings\n"); if (mask & MTRR_CHANGE_MASK_VARIABLE) - pr_warning("mtrr: your CPUs had inconsistent variable MTRR settings\n"); + pr_warn("mtrr: your CPUs had inconsistent variable MTRR settings\n"); if (mask & MTRR_CHANGE_MASK_DEFTYPE) - pr_warning("mtrr: your CPUs had inconsistent MTRRdefType settings\n"); + pr_warn("mtrr: your CPUs had inconsistent MTRRdefType settings\n"); - printk(KERN_INFO "mtrr: probably your BIOS does not setup all CPUs.\n"); - printk(KERN_INFO "mtrr: corrected configuration.\n"); + pr_info("mtrr: probably your BIOS does not setup all CPUs.\n"); + pr_info("mtrr: corrected configuration.\n"); } /* @@ -519,8 +519,7 @@ void __init mtrr_state_warn(void) void mtrr_wrmsr(unsigned msr, unsigned a, unsigned b) { if (wrmsr_safe(msr, a, b) < 0) { - printk(KERN_ERR - "MTRR: CPU %u: Writing MSR %x to %x:%x failed\n", + pr_err("MTRR: CPU %u: Writing MSR %x to %x:%x failed\n", smp_processor_id(), msr, a, b); } } @@ -607,7 +606,7 @@ static void generic_get_mtrr(unsigned int reg, unsigned long *base, tmp |= ~((1ULL<<(hi - 1)) - 1); if (tmp != mask) { - printk(KERN_WARNING "mtrr: your BIOS has configured an incorrect mask, fixing it.\n"); + pr_warn("mtrr: your BIOS has configured an incorrect mask, fixing it.\n"); add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK); mask = tmp; } @@ -858,13 +857,13 @@ int generic_validate_add_page(unsigned long base, unsigned long size, boot_cpu_data.x86_model == 1 && boot_cpu_data.x86_mask <= 7) { if (base & ((1 << (22 - PAGE_SHIFT)) - 1)) { - pr_warning("mtrr: base(0x%lx000) is not 4 MiB aligned\n", base); + pr_warn("mtrr: base(0x%lx000) is not 4 MiB aligned\n", base); return -EINVAL; } if (!(base + size < 0x70000 || base > 0x7003F) && (type == MTRR_TYPE_WRCOMB || type == MTRR_TYPE_WRBACK)) { - pr_warning("mtrr: writable mtrr between 0x70000000 and 0x7003FFFF may hang the CPU.\n"); + pr_warn("mtrr: writable mtrr between 0x70000000 and 0x7003FFFF may hang the CPU.\n"); return -EINVAL; } } @@ -878,7 +877,7 @@ int generic_validate_add_page(unsigned long base, unsigned long size, lbase = lbase >> 1, last = last >> 1) ; if (lbase != last) { - pr_warning("mtrr: base(0x%lx000) is not aligned on a size(0x%lx000) boundary\n", base, size); + pr_warn("mtrr: base(0x%lx000) is not aligned on a size(0x%lx000) boundary\n", base, size); return -EINVAL; } return 0; diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index 5c3d149ee91c..ba80d68f683e 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -300,24 +300,24 @@ int mtrr_add_page(unsigned long base, unsigned long size, return error; if (type >= MTRR_NUM_TYPES) { - pr_warning("mtrr: type: %u invalid\n", type); + pr_warn("mtrr: type: %u invalid\n", type); return -EINVAL; } /* If the type is WC, check that this processor supports it */ if ((type == MTRR_TYPE_WRCOMB) && !have_wrcomb()) { - pr_warning("mtrr: your processor doesn't support write-combining\n"); + pr_warn("mtrr: your processor doesn't support write-combining\n"); return -ENOSYS; } if (!size) { - pr_warning("mtrr: zero sized request\n"); + pr_warn("mtrr: zero sized request\n"); return -EINVAL; } if ((base | (base + size - 1)) >> (boot_cpu_data.x86_phys_bits - PAGE_SHIFT)) { - pr_warning("mtrr: base or size exceeds the MTRR width\n"); + pr_warn("mtrr: base or size exceeds the MTRR width\n"); return -EINVAL; } @@ -348,7 +348,7 @@ int mtrr_add_page(unsigned long base, unsigned long size, } else if (types_compatible(type, ltype)) continue; } - pr_warning("mtrr: 0x%lx000,0x%lx000 overlaps existing" + pr_warn("mtrr: 0x%lx000,0x%lx000 overlaps existing" " 0x%lx000,0x%lx000\n", base, size, lbase, lsize); goto out; @@ -357,7 +357,7 @@ int mtrr_add_page(unsigned long base, unsigned long size, if (ltype != type) { if (types_compatible(type, ltype)) continue; - pr_warning("mtrr: type mismatch for %lx000,%lx000 old: %s new: %s\n", + pr_warn("mtrr: type mismatch for %lx000,%lx000 old: %s new: %s\n", base, size, mtrr_attrib_to_str(ltype), mtrr_attrib_to_str(type)); goto out; @@ -395,7 +395,7 @@ int mtrr_add_page(unsigned long base, unsigned long size, static int mtrr_check(unsigned long base, unsigned long size) { if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) { - pr_warning("mtrr: size and base must be multiples of 4 kiB\n"); + pr_warn("mtrr: size and base must be multiples of 4 kiB\n"); pr_debug("mtrr: size: 0x%lx base: 0x%lx\n", size, base); dump_stack(); return -1; @@ -493,16 +493,16 @@ int mtrr_del_page(int reg, unsigned long base, unsigned long size) } } if (reg >= max) { - pr_warning("mtrr: register: %d too big\n", reg); + pr_warn("mtrr: register: %d too big\n", reg); goto out; } mtrr_if->get(reg, &lbase, &lsize, <ype); if (lsize < 1) { - pr_warning("mtrr: MTRR %d not used\n", reg); + pr_warn("mtrr: MTRR %d not used\n", reg); goto out; } if (mtrr_usage_table[reg] < 1) { - pr_warning("mtrr: reg: %d has count=0\n", reg); + pr_warn("mtrr: reg: %d has count=0\n", reg); goto out; } if (--mtrr_usage_table[reg] < 1) diff --git a/arch/x86/kernel/cpu/rdrand.c b/arch/x86/kernel/cpu/rdrand.c index 819d94982e07..f6f50c4ceaec 100644 --- a/arch/x86/kernel/cpu/rdrand.c +++ b/arch/x86/kernel/cpu/rdrand.c @@ -51,7 +51,7 @@ void x86_init_rdrand(struct cpuinfo_x86 *c) for (i = 0; i < SANITY_CHECK_LOOPS; i++) { if (!rdrand_long(&tmp)) { clear_cpu_cap(c, X86_FEATURE_RDRAND); - printk_once(KERN_WARNING "rdrand: disabled\n"); + pr_warn_once("rdrand: disabled\n"); return; } } diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c index 4c60eaf0571c..cd531355e838 100644 --- a/arch/x86/kernel/cpu/topology.c +++ b/arch/x86/kernel/cpu/topology.c @@ -87,10 +87,10 @@ void detect_extended_topology(struct cpuinfo_x86 *c) c->x86_max_cores = (core_level_siblings / smp_num_siblings); if (!printed) { - printk(KERN_INFO "CPU: Physical Processor ID: %d\n", + pr_info("CPU: Physical Processor ID: %d\n", c->phys_proc_id); if (c->x86_max_cores > 1) - printk(KERN_INFO "CPU: Processor Core ID: %d\n", + pr_info("CPU: Processor Core ID: %d\n", c->cpu_core_id); printed = 1; } diff --git a/arch/x86/kernel/cpu/transmeta.c b/arch/x86/kernel/cpu/transmeta.c index 252da7aceca6..e3b4d1841175 100644 --- a/arch/x86/kernel/cpu/transmeta.c +++ b/arch/x86/kernel/cpu/transmeta.c @@ -33,7 +33,7 @@ static void init_transmeta(struct cpuinfo_x86 *c) if (max >= 0x80860001) { cpuid(0x80860001, &dummy, &cpu_rev, &cpu_freq, &cpu_flags); if (cpu_rev != 0x02000000) { - printk(KERN_INFO "CPU: Processor revision %u.%u.%u.%u, %u MHz\n", + pr_info("CPU: Processor revision %u.%u.%u.%u, %u MHz\n", (cpu_rev >> 24) & 0xff, (cpu_rev >> 16) & 0xff, (cpu_rev >> 8) & 0xff, @@ -44,10 +44,10 @@ static void init_transmeta(struct cpuinfo_x86 *c) if (max >= 0x80860002) { cpuid(0x80860002, &new_cpu_rev, &cms_rev1, &cms_rev2, &dummy); if (cpu_rev == 0x02000000) { - printk(KERN_INFO "CPU: Processor revision %08X, %u MHz\n", + pr_info("CPU: Processor revision %08X, %u MHz\n", new_cpu_rev, cpu_freq); } - printk(KERN_INFO "CPU: Code Morphing Software revision %u.%u.%u-%u-%u\n", + pr_info("CPU: Code Morphing Software revision %u.%u.%u-%u-%u\n", (cms_rev1 >> 24) & 0xff, (cms_rev1 >> 16) & 0xff, (cms_rev1 >> 8) & 0xff, @@ -76,7 +76,7 @@ static void init_transmeta(struct cpuinfo_x86 *c) (void *)&cpu_info[56], (void *)&cpu_info[60]); cpu_info[64] = '\0'; - printk(KERN_INFO "CPU: %s\n", cpu_info); + pr_info("CPU: %s\n", cpu_info); } /* Unhide possibly hidden capability flags */ diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c index 628a059a9a06..364e58346897 100644 --- a/arch/x86/kernel/cpu/vmware.c +++ b/arch/x86/kernel/cpu/vmware.c @@ -62,7 +62,7 @@ static unsigned long vmware_get_tsc_khz(void) tsc_hz = eax | (((uint64_t)ebx) << 32); do_div(tsc_hz, 1000); BUG_ON(tsc_hz >> 32); - printk(KERN_INFO "TSC freq read from hypervisor : %lu.%03lu MHz\n", + pr_info("TSC freq read from hypervisor : %lu.%03lu MHz\n", (unsigned long) tsc_hz / 1000, (unsigned long) tsc_hz % 1000); @@ -84,8 +84,7 @@ static void __init vmware_platform_setup(void) if (ebx != UINT_MAX) x86_platform.calibrate_tsc = vmware_get_tsc_khz; else - printk(KERN_WARNING - "Failed to get TSC freq from the hypervisor\n"); + pr_warn("Failed to get TSC freq from the hypervisor\n"); } /* diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 30ca7607cbbb..97340f2c437c 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -408,7 +408,7 @@ static inline void __init construct_default_ISA_mptable(int mpc_default_type) processor.cpuflag = CPU_ENABLED; processor.cpufeature = (boot_cpu_data.x86 << 8) | (boot_cpu_data.x86_model << 4) | boot_cpu_data.x86_mask; - processor.featureflag = boot_cpu_data.x86_capability[0]; + processor.featureflag = boot_cpu_data.x86_capability[CPUID_1_EDX]; processor.reserved[0] = 0; processor.reserved[1] = 0; for (i = 0; i < 2; i++) { diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index 4ba229ac3f4f..a9033ae13369 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -1535,7 +1535,7 @@ __init void lguest_init(void) */ cpu_detect(&new_cpu_data); /* head.S usually sets up the first capability word, so do it here. */ - new_cpu_data.x86_capability[0] = cpuid_edx(1); + new_cpu_data.x86_capability[CPUID_1_EDX] = cpuid_edx(1); /* Math is always hard! */ set_cpu_cap(&new_cpu_data, X86_FEATURE_FPU); diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index d09e4c9d7cc5..2c261082eadf 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1654,7 +1654,7 @@ asmlinkage __visible void __init xen_start_kernel(void) cpu_detect(&new_cpu_data); set_cpu_cap(&new_cpu_data, X86_FEATURE_FPU); new_cpu_data.wp_works_ok = 1; - new_cpu_data.x86_capability[0] = cpuid_edx(1); + new_cpu_data.x86_capability[CPUID_1_EDX] = cpuid_edx(1); #endif if (xen_start_info->mod_start) { diff --git a/arch/x86/xen/pmu.c b/arch/x86/xen/pmu.c index 724a08740a04..9466354d3e49 100644 --- a/arch/x86/xen/pmu.c +++ b/arch/x86/xen/pmu.c @@ -11,7 +11,7 @@ #include "pmu.h" /* x86_pmu.handle_irq definition */ -#include "../kernel/cpu/perf_event.h" +#include "../events/perf_event.h" #define XENPMU_IRQ_PROCESSING 1 struct xenpmu { diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index c9956440d0e6..21b81a41dae5 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -30,7 +30,7 @@ struct trace_kprobe { struct list_head list; struct kretprobe rp; /* Use rp.kp for kprobe use */ - unsigned long nhit; + unsigned long __percpu *nhit; const char *symbol; /* symbol name */ struct trace_probe tp; }; @@ -274,6 +274,10 @@ static struct trace_kprobe *alloc_trace_kprobe(const char *group, if (!tk) return ERR_PTR(ret); + tk->nhit = alloc_percpu(unsigned long); + if (!tk->nhit) + goto error; + if (symbol) { tk->symbol = kstrdup(symbol, GFP_KERNEL); if (!tk->symbol) @@ -313,6 +317,7 @@ static struct trace_kprobe *alloc_trace_kprobe(const char *group, error: kfree(tk->tp.call.name); kfree(tk->symbol); + free_percpu(tk->nhit); kfree(tk); return ERR_PTR(ret); } @@ -327,6 +332,7 @@ static void free_trace_kprobe(struct trace_kprobe *tk) kfree(tk->tp.call.class->system); kfree(tk->tp.call.name); kfree(tk->symbol); + free_percpu(tk->nhit); kfree(tk); } @@ -874,9 +880,14 @@ static const struct file_operations kprobe_events_ops = { static int probes_profile_seq_show(struct seq_file *m, void *v) { struct trace_kprobe *tk = v; + unsigned long nhit = 0; + int cpu; + + for_each_possible_cpu(cpu) + nhit += *per_cpu_ptr(tk->nhit, cpu); seq_printf(m, " %-44s %15lu %15lu\n", - trace_event_name(&tk->tp.call), tk->nhit, + trace_event_name(&tk->tp.call), nhit, tk->rp.kp.nmissed); return 0; @@ -1225,7 +1236,7 @@ static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs) { struct trace_kprobe *tk = container_of(kp, struct trace_kprobe, rp.kp); - tk->nhit++; + raw_cpu_inc(*tk->nhit); if (tk->tp.flags & TP_FLAG_TRACE) kprobe_trace_func(tk, regs); @@ -1242,7 +1253,7 @@ kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs) { struct trace_kprobe *tk = container_of(ri->rp, struct trace_kprobe, rp); - tk->nhit++; + raw_cpu_inc(*tk->nhit); if (tk->tp.flags & TP_FLAG_TRACE) kretprobe_trace_func(tk, ri, regs); diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature index 02db3cdff20f..6b7707270aa3 100644 --- a/tools/build/Makefile.feature +++ b/tools/build/Makefile.feature @@ -27,7 +27,7 @@ endef # the rule that uses them - an example for that is the 'bionic' # feature check. ] # -FEATURE_TESTS ?= \ +FEATURE_TESTS_BASIC := \ backtrace \ dwarf \ fortify-source \ @@ -46,6 +46,7 @@ FEATURE_TESTS ?= \ libpython \ libpython-version \ libslang \ + libcrypto \ libunwind \ pthread-attr-setaffinity-np \ stackprotector-all \ @@ -56,6 +57,25 @@ FEATURE_TESTS ?= \ get_cpuid \ bpf +# FEATURE_TESTS_BASIC + FEATURE_TESTS_EXTRA is the complete list +# of all feature tests +FEATURE_TESTS_EXTRA := \ + bionic \ + compile-32 \ + compile-x32 \ + cplus-demangle \ + hello \ + libbabeltrace \ + liberty \ + liberty-z \ + libunwind-debug-frame + +FEATURE_TESTS ?= $(FEATURE_TESTS_BASIC) + +ifeq ($(FEATURE_TESTS),all) + FEATURE_TESTS := $(FEATURE_TESTS_BASIC) $(FEATURE_TESTS_EXTRA) +endif + FEATURE_DISPLAY ?= \ dwarf \ glibc \ @@ -68,6 +88,7 @@ FEATURE_DISPLAY ?= \ libperl \ libpython \ libslang \ + libcrypto \ libunwind \ libdw-dwarf-unwind \ zlib \ @@ -100,6 +121,14 @@ ifeq ($(feature-all), 1) # test-all.c passed - just set all the core feature flags to 1: # $(foreach feat,$(FEATURE_TESTS),$(call feature_set,$(feat))) + # + # test-all.c does not comprise these tests, so we need to + # for this case to get features proper values + # + $(call feature_check,compile-32) + $(call feature_check,compile-x32) + $(call feature_check,bionic) + $(call feature_check,libbabeltrace) else $(foreach feat,$(FEATURE_TESTS),$(call feature_check,$(feat))) endif diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile index bf8f0352264d..c5f4c417428d 100644 --- a/tools/build/feature/Makefile +++ b/tools/build/feature/Makefile @@ -23,6 +23,7 @@ FILES= \ test-libpython.bin \ test-libpython-version.bin \ test-libslang.bin \ + test-libcrypto.bin \ test-libunwind.bin \ test-libunwind-debug-frame.bin \ test-pthread-attr-setaffinity-np.bin \ @@ -105,6 +106,9 @@ $(OUTPUT)test-libaudit.bin: $(OUTPUT)test-libslang.bin: $(BUILD) -I/usr/include/slang -lslang +$(OUTPUT)test-libcrypto.bin: + $(BUILD) -lcrypto + $(OUTPUT)test-gtk2.bin: $(BUILD) $(shell $(PKG_CONFIG) --libs --cflags gtk+-2.0 2>/dev/null) diff --git a/tools/build/feature/test-all.c b/tools/build/feature/test-all.c index 81025cade45f..e499a36c1e4a 100644 --- a/tools/build/feature/test-all.c +++ b/tools/build/feature/test-all.c @@ -129,6 +129,10 @@ # include "test-bpf.c" #undef main +#define main main_test_libcrypto +# include "test-libcrypto.c" +#undef main + int main(int argc, char *argv[]) { main_test_libpython(); @@ -158,6 +162,7 @@ int main(int argc, char *argv[]) main_test_lzma(); main_test_get_cpuid(); main_test_bpf(); + main_test_libcrypto(); return 0; } diff --git a/tools/build/feature/test-compile.c b/tools/build/feature/test-compile.c index 31dbf45bf99c..c54e6551ae4c 100644 --- a/tools/build/feature/test-compile.c +++ b/tools/build/feature/test-compile.c @@ -1,4 +1,6 @@ +#include <stdio.h> int main(void) { + printf("Hello World!\n"); return 0; } diff --git a/tools/build/feature/test-libcrypto.c b/tools/build/feature/test-libcrypto.c new file mode 100644 index 000000000000..bd79dc7f28d3 --- /dev/null +++ b/tools/build/feature/test-libcrypto.c @@ -0,0 +1,17 @@ +#include <openssl/sha.h> +#include <openssl/md5.h> + +int main(void) +{ + MD5_CTX context; + unsigned char md[MD5_DIGEST_LENGTH + SHA_DIGEST_LENGTH]; + unsigned char dat[] = "12345"; + + MD5_Init(&context); + MD5_Update(&context, &dat[0], sizeof(dat)); + MD5_Final(&md[0], &context); + + SHA1(&dat[0], sizeof(dat), &md[0]); + + return 0; +} diff --git a/tools/lib/api/Build b/tools/lib/api/Build index e8b8a23b9bf4..954c644f7ad9 100644 --- a/tools/lib/api/Build +++ b/tools/lib/api/Build @@ -1,3 +1,4 @@ libapi-y += fd/ libapi-y += fs/ libapi-y += cpu.o +libapi-y += debug.o diff --git a/tools/lib/api/Makefile b/tools/lib/api/Makefile index d85904dc9b38..bbc82c614bee 100644 --- a/tools/lib/api/Makefile +++ b/tools/lib/api/Makefile @@ -18,6 +18,7 @@ LIBFILE = $(OUTPUT)libapi.a CFLAGS := $(EXTRA_WARNINGS) $(EXTRA_CFLAGS) CFLAGS += -ggdb3 -Wall -Wextra -std=gnu99 -Werror -O6 -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=2 -fPIC CFLAGS += -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 +CFLAGS += -I$(srctree)/tools/lib/api RM = rm -f diff --git a/tools/lib/api/debug-internal.h b/tools/lib/api/debug-internal.h new file mode 100644 index 000000000000..188f7880eafe --- /dev/null +++ b/tools/lib/api/debug-internal.h @@ -0,0 +1,20 @@ +#ifndef __API_DEBUG_INTERNAL_H__ +#define __API_DEBUG_INTERNAL_H__ + +#include "debug.h" + +#define __pr(func, fmt, ...) \ +do { \ + if ((func)) \ + (func)("libapi: " fmt, ##__VA_ARGS__); \ +} while (0) + +extern libapi_print_fn_t __pr_warning; +extern libapi_print_fn_t __pr_info; +extern libapi_print_fn_t __pr_debug; + +#define pr_warning(fmt, ...) __pr(__pr_warning, fmt, ##__VA_ARGS__) +#define pr_info(fmt, ...) __pr(__pr_info, fmt, ##__VA_ARGS__) +#define pr_debug(fmt, ...) __pr(__pr_debug, fmt, ##__VA_ARGS__) + +#endif /* __API_DEBUG_INTERNAL_H__ */ diff --git a/tools/lib/api/debug.c b/tools/lib/api/debug.c new file mode 100644 index 000000000000..5fa5cf500a1f --- /dev/null +++ b/tools/lib/api/debug.c @@ -0,0 +1,28 @@ +#include <stdio.h> +#include <stdarg.h> +#include "debug.h" +#include "debug-internal.h" + +static int __base_pr(const char *format, ...) +{ + va_list args; + int err; + + va_start(args, format); + err = vfprintf(stderr, format, args); + va_end(args); + return err; +} + +libapi_print_fn_t __pr_warning = __base_pr; +libapi_print_fn_t __pr_info = __base_pr; +libapi_print_fn_t __pr_debug; + +void libapi_set_print(libapi_print_fn_t warn, + libapi_print_fn_t info, + libapi_print_fn_t debug) +{ + __pr_warning = warn; + __pr_info = info; + __pr_debug = debug; +} diff --git a/tools/lib/api/debug.h b/tools/lib/api/debug.h new file mode 100644 index 000000000000..a0872f68fc56 --- /dev/null +++ b/tools/lib/api/debug.h @@ -0,0 +1,10 @@ +#ifndef __API_DEBUG_H__ +#define __API_DEBUG_H__ + +typedef int (*libapi_print_fn_t)(const char *, ...); + +void libapi_set_print(libapi_print_fn_t warn, + libapi_print_fn_t info, + libapi_print_fn_t debug); + +#endif /* __API_DEBUG_H__ */ diff --git a/tools/lib/api/fs/fs.c b/tools/lib/api/fs/fs.c index 459599d1b6c4..ef78c22ff44d 100644 --- a/tools/lib/api/fs/fs.c +++ b/tools/lib/api/fs/fs.c @@ -13,6 +13,7 @@ #include <sys/mount.h> #include "fs.h" +#include "debug-internal.h" #define _STR(x) #x #define STR(x) _STR(x) @@ -300,6 +301,56 @@ int filename__read_ull(const char *filename, unsigned long long *value) return err; } +#define STRERR_BUFSIZE 128 /* For the buffer size of strerror_r */ + +int filename__read_str(const char *filename, char **buf, size_t *sizep) +{ + size_t size = 0, alloc_size = 0; + void *bf = NULL, *nbf; + int fd, n, err = 0; + char sbuf[STRERR_BUFSIZE]; + + fd = open(filename, O_RDONLY); + if (fd < 0) + return -errno; + + do { + if (size == alloc_size) { + alloc_size += BUFSIZ; + nbf = realloc(bf, alloc_size); + if (!nbf) { + err = -ENOMEM; + break; + } + + bf = nbf; + } + + n = read(fd, bf + size, alloc_size - size); + if (n < 0) { + if (size) { + pr_warning("read failed %d: %s\n", errno, + strerror_r(errno, sbuf, sizeof(sbuf))); + err = 0; + } else + err = -errno; + + break; + } + + size += n; + } while (n > 0); + + if (!err) { + *sizep = size; + *buf = bf; + } else + free(bf); + + close(fd); + return err; +} + int sysfs__read_ull(const char *entry, unsigned long long *value) { char path[PATH_MAX]; @@ -326,6 +377,19 @@ int sysfs__read_int(const char *entry, int *value) return filename__read_int(path, value); } +int sysfs__read_str(const char *entry, char **buf, size_t *sizep) +{ + char path[PATH_MAX]; + const char *sysfs = sysfs__mountpoint(); + + if (!sysfs) + return -1; + + snprintf(path, sizeof(path), "%s/%s", sysfs, entry); + + return filename__read_str(path, buf, sizep); +} + int sysctl__read_int(const char *sysctl, int *value) { char path[PATH_MAX]; diff --git a/tools/lib/api/fs/fs.h b/tools/lib/api/fs/fs.h index d024a7f682f6..9f6598098dc5 100644 --- a/tools/lib/api/fs/fs.h +++ b/tools/lib/api/fs/fs.h @@ -2,6 +2,7 @@ #define __API_FS__ #include <stdbool.h> +#include <unistd.h> /* * On most systems <limits.h> would have given us this, but not on some systems @@ -26,8 +27,10 @@ FS(tracefs) int filename__read_int(const char *filename, int *value); int filename__read_ull(const char *filename, unsigned long long *value); +int filename__read_str(const char *filename, char **buf, size_t *sizep); int sysctl__read_int(const char *sysctl, int *value); int sysfs__read_int(const char *entry, int *value); int sysfs__read_ull(const char *entry, unsigned long long *value); +int sysfs__read_str(const char *entry, char **buf, size_t *sizep); #endif /* __API_FS__ */ diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 8334a5a9d5d7..7e543c3102d4 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -201,6 +201,7 @@ struct bpf_object { Elf_Data *data; } *reloc; int nr_reloc; + int maps_shndx; } efile; /* * All loaded bpf_object is linked in a list, which is @@ -350,6 +351,7 @@ static struct bpf_object *bpf_object__new(const char *path, */ obj->efile.obj_buf = obj_buf; obj->efile.obj_buf_sz = obj_buf_sz; + obj->efile.maps_shndx = -1; obj->loaded = false; @@ -529,12 +531,12 @@ bpf_object__init_maps(struct bpf_object *obj, void *data, } static int -bpf_object__init_maps_name(struct bpf_object *obj, int maps_shndx) +bpf_object__init_maps_name(struct bpf_object *obj) { int i; Elf_Data *symbols = obj->efile.symbols; - if (!symbols || maps_shndx < 0) + if (!symbols || obj->efile.maps_shndx < 0) return -EINVAL; for (i = 0; i < symbols->d_size / sizeof(GElf_Sym); i++) { @@ -544,7 +546,7 @@ bpf_object__init_maps_name(struct bpf_object *obj, int maps_shndx) if (!gelf_getsym(symbols, i, &sym)) continue; - if (sym.st_shndx != maps_shndx) + if (sym.st_shndx != obj->efile.maps_shndx) continue; map_name = elf_strptr(obj->efile.elf, @@ -572,7 +574,7 @@ static int bpf_object__elf_collect(struct bpf_object *obj) Elf *elf = obj->efile.elf; GElf_Ehdr *ep = &obj->efile.ehdr; Elf_Scn *scn = NULL; - int idx = 0, err = 0, maps_shndx = -1; + int idx = 0, err = 0; /* Elf is corrupted/truncated, avoid calling elf_strptr. */ if (!elf_rawdata(elf_getscn(elf, ep->e_shstrndx), NULL)) { @@ -625,7 +627,7 @@ static int bpf_object__elf_collect(struct bpf_object *obj) else if (strcmp(name, "maps") == 0) { err = bpf_object__init_maps(obj, data->d_buf, data->d_size); - maps_shndx = idx; + obj->efile.maps_shndx = idx; } else if (sh.sh_type == SHT_SYMTAB) { if (obj->efile.symbols) { pr_warning("bpf: multiple SYMTAB in %s\n", @@ -674,8 +676,8 @@ static int bpf_object__elf_collect(struct bpf_object *obj) pr_warning("Corrupted ELF file: index of strtab invalid\n"); return LIBBPF_ERRNO__FORMAT; } - if (maps_shndx >= 0) - err = bpf_object__init_maps_name(obj, maps_shndx); + if (obj->efile.maps_shndx >= 0) + err = bpf_object__init_maps_name(obj); out: return err; } @@ -697,7 +699,8 @@ bpf_object__find_prog_by_idx(struct bpf_object *obj, int idx) static int bpf_program__collect_reloc(struct bpf_program *prog, size_t nr_maps, GElf_Shdr *shdr, - Elf_Data *data, Elf_Data *symbols) + Elf_Data *data, Elf_Data *symbols, + int maps_shndx) { int i, nrels; @@ -724,9 +727,6 @@ bpf_program__collect_reloc(struct bpf_program *prog, return -LIBBPF_ERRNO__FORMAT; } - insn_idx = rel.r_offset / sizeof(struct bpf_insn); - pr_debug("relocation: insn_idx=%u\n", insn_idx); - if (!gelf_getsym(symbols, GELF_R_SYM(rel.r_info), &sym)) { @@ -735,6 +735,15 @@ bpf_program__collect_reloc(struct bpf_program *prog, return -LIBBPF_ERRNO__FORMAT; } + if (sym.st_shndx != maps_shndx) { + pr_warning("Program '%s' contains non-map related relo data pointing to section %u\n", + prog->section_name, sym.st_shndx); + return -LIBBPF_ERRNO__RELOC; + } + + insn_idx = rel.r_offset / sizeof(struct bpf_insn); + pr_debug("relocation: insn_idx=%u\n", insn_idx); + if (insns[insn_idx].code != (BPF_LD | BPF_IMM | BPF_DW)) { pr_warning("bpf: relocation: invalid relo for insns[%d].code 0x%x\n", insn_idx, insns[insn_idx].code); @@ -863,7 +872,8 @@ static int bpf_object__collect_reloc(struct bpf_object *obj) err = bpf_program__collect_reloc(prog, nr_maps, shdr, data, - obj->efile.symbols); + obj->efile.symbols, + obj->efile.maps_shndx); if (err) return err; } diff --git a/tools/perf/Documentation/perf-config.txt b/tools/perf/Documentation/perf-config.txt index b9ca1e304158..15949e2a7805 100644 --- a/tools/perf/Documentation/perf-config.txt +++ b/tools/perf/Documentation/perf-config.txt @@ -8,7 +8,7 @@ perf-config - Get and set variables in a configuration file. SYNOPSIS -------- [verse] -'perf config' -l | --list +'perf config' [<file-option>] -l | --list DESCRIPTION ----------- @@ -21,6 +21,14 @@ OPTIONS --list:: Show current config variables, name and value, for all sections. +--user:: + For writing and reading options: write to user + '$HOME/.perfconfig' file or read it. + +--system:: + For writing and reading options: write to system-wide + '$(sysconfdir)/perfconfig' or read it. + CONFIGURATION FILE ------------------ @@ -30,6 +38,10 @@ The '$HOME/.perfconfig' file is used to store a per-user configuration. The file '$(sysconfdir)/perfconfig' can be used to store a system-wide default configuration. +When reading or writing, the values are read from the system and user +configuration files by default, and options '--system' and '--user' +can be used to tell the command to read from or write to only that location. + Syntax ~~~~~~ @@ -62,7 +74,7 @@ Given a $HOME/.perfconfig like this: medium = green, default normal = lightgray, default selected = white, lightgray - code = blue, default + jump_arrows = blue, default addr = magenta, default root = white, blue @@ -98,6 +110,347 @@ Given a $HOME/.perfconfig like this: order = caller sort-key = function +Variables +~~~~~~~~~ + +colors.*:: + The variables for customizing the colors used in the output for the + 'report', 'top' and 'annotate' in the TUI. They should specify the + foreground and background colors, separated by a comma, for example: + + medium = green, lightgray + + If you want to use the color configured for you terminal, just leave it + as 'default', for example: + + medium = default, lightgray + + Available colors: + red, yellow, green, cyan, gray, black, blue, + white, default, magenta, lightgray + + colors.top:: + 'top' means a overhead percentage which is more than 5%. + And values of this variable specify percentage colors. + Basic key values are foreground-color 'red' and + background-color 'default'. + colors.medium:: + 'medium' means a overhead percentage which has more than 0.5%. + Default values are 'green' and 'default'. + colors.normal:: + 'normal' means the rest of overhead percentages + except 'top', 'medium', 'selected'. + Default values are 'lightgray' and 'default'. + colors.selected:: + This selects the colors for the current entry in a list of entries + from sub-commands (top, report, annotate). + Default values are 'black' and 'lightgray'. + colors.jump_arrows:: + Colors for jump arrows on assembly code listings + such as 'jns', 'jmp', 'jane', etc. + Default values are 'blue', 'default'. + colors.addr:: + This selects colors for addresses from 'annotate'. + Default values are 'magenta', 'default'. + colors.root:: + Colors for headers in the output of a sub-commands (top, report). + Default values are 'white', 'blue'. + +tui.*, gtk.*:: + Subcommands that can be configured here are 'top', 'report' and 'annotate'. + These values are booleans, for example: + + [tui] + top = true + + will make the TUI be the default for the 'top' subcommand. Those will be + available if the required libs were detected at tool build time. + +buildid.*:: + buildid.dir:: + Each executable and shared library in modern distributions comes with a + content based identifier that, if available, will be inserted in a + 'perf.data' file header to, at analysis time find what is needed to do + symbol resolution, code annotation, etc. + + The recording tools also stores a hard link or copy in a per-user + directory, $HOME/.debug/, of binaries, shared libraries, /proc/kallsyms + and /proc/kcore files to be used at analysis time. + + The buildid.dir variable can be used to either change this directory + cache location, or to disable it altogether. If you want to disable it, + set buildid.dir to /dev/null. The default is $HOME/.debug + +annotate.*:: + These options work only for TUI. + These are in control of addresses, jump function, source code + in lines of assembly code from a specific program. + + annotate.hide_src_code:: + If a program which is analyzed has source code, + this option lets 'annotate' print a list of assembly code with the source code. + For example, let's see a part of a program. There're four lines. + If this option is 'true', they can be printed + without source code from a program as below. + + │ push %rbp + │ mov %rsp,%rbp + │ sub $0x10,%rsp + │ mov (%rdi),%rdx + + But if this option is 'false', source code of the part + can be also printed as below. Default is 'false'. + + │ struct rb_node *rb_next(const struct rb_node *node) + │ { + │ push %rbp + │ mov %rsp,%rbp + │ sub $0x10,%rsp + │ struct rb_node *parent; + │ + │ if (RB_EMPTY_NODE(node)) + │ mov (%rdi),%rdx + │ return n; + + annotate.use_offset:: + Basing on a first address of a loaded function, offset can be used. + Instead of using original addresses of assembly code, + addresses subtracted from a base address can be printed. + Let's illustrate an example. + If a base address is 0XFFFFFFFF81624d50 as below, + + ffffffff81624d50 <load0> + + an address on assembly code has a specific absolute address as below + + ffffffff816250b8:│ mov 0x8(%r14),%rdi + + but if use_offset is 'true', an address subtracted from a base address is printed. + Default is true. This option is only applied to TUI. + + 368:│ mov 0x8(%r14),%rdi + + annotate.jump_arrows:: + There can be jump instruction among assembly code. + Depending on a boolean value of jump_arrows, + arrows can be printed or not which represent + where do the instruction jump into as below. + + │ ┌──jmp 1333 + │ │ xchg %ax,%ax + │1330:│ mov %r15,%r10 + │1333:└─→cmp %r15,%r14 + + If jump_arrow is 'false', the arrows isn't printed as below. + Default is 'false'. + + │ ↓ jmp 1333 + │ xchg %ax,%ax + │1330: mov %r15,%r10 + │1333: cmp %r15,%r14 + + annotate.show_linenr:: + When showing source code if this option is 'true', + line numbers are printed as below. + + │1628 if (type & PERF_SAMPLE_IDENTIFIER) { + │ ↓ jne 508 + │1628 data->id = *array; + │1629 array++; + │1630 } + + However if this option is 'false', they aren't printed as below. + Default is 'false'. + + │ if (type & PERF_SAMPLE_IDENTIFIER) { + │ ↓ jne 508 + │ data->id = *array; + │ array++; + │ } + + annotate.show_nr_jumps:: + Let's see a part of assembly code. + + │1382: movb $0x1,-0x270(%rbp) + + If use this, the number of branches jumping to that address can be printed as below. + Default is 'false'. + + │1 1382: movb $0x1,-0x270(%rbp) + + annotate.show_total_period:: + To compare two records on an instruction base, with this option + provided, display total number of samples that belong to a line + in assembly code. If this option is 'true', total periods are printed + instead of percent values as below. + + 302 │ mov %eax,%eax + + But if this option is 'false', percent values for overhead are printed i.e. + Default is 'false'. + + 99.93 │ mov %eax,%eax + +hist.*:: + hist.percentage:: + This option control the way to calculate overhead of filtered entries - + that means the value of this option is effective only if there's a + filter (by comm, dso or symbol name). Suppose a following example: + + Overhead Symbols + ........ ....... + 33.33% foo + 33.33% bar + 33.33% baz + + This is an original overhead and we'll filter out the first 'foo' + entry. The value of 'relative' would increase the overhead of 'bar' + and 'baz' to 50.00% for each, while 'absolute' would show their + current overhead (33.33%). + +ui.*:: + ui.show-headers:: + This option controls display of column headers (like 'Overhead' and 'Symbol') + in 'report' and 'top'. If this option is false, they are hidden. + This option is only applied to TUI. + +call-graph.*:: + When sub-commands 'top' and 'report' work with -g/—-children + there're options in control of call-graph. + + call-graph.record-mode:: + The record-mode can be 'fp' (frame pointer), 'dwarf' and 'lbr'. + The value of 'dwarf' is effective only if perf detect needed library + (libunwind or a recent version of libdw). + 'lbr' only work for cpus that support it. + + call-graph.dump-size:: + The size of stack to dump in order to do post-unwinding. Default is 8192 (byte). + When using dwarf into record-mode, the default size will be used if omitted. + + call-graph.print-type:: + The print-types can be graph (graph absolute), fractal (graph relative), + flat and folded. This option controls a way to show overhead for each callchain + entry. Suppose a following example. + + Overhead Symbols + ........ ....... + 40.00% foo + | + ---foo + | + |--50.00%--bar + | main + | + --50.00%--baz + main + + This output is a 'fractal' format. The 'foo' came from 'bar' and 'baz' exactly + half and half so 'fractal' shows 50.00% for each + (meaning that it assumes 100% total overhead of 'foo'). + + The 'graph' uses absolute overhead value of 'foo' as total so each of + 'bar' and 'baz' callchain will have 20.00% of overhead. + If 'flat' is used, single column and linear exposure of call chains. + 'folded' mean call chains are displayed in a line, separated by semicolons. + + call-graph.order:: + This option controls print order of callchains. The default is + 'callee' which means callee is printed at top and then followed by its + caller and so on. The 'caller' prints it in reverse order. + + If this option is not set and report.children or top.children is + set to true (or the equivalent command line option is given), + the default value of this option is changed to 'caller' for the + execution of 'perf report' or 'perf top'. Other commands will + still default to 'callee'. + + call-graph.sort-key:: + The callchains are merged if they contain same information. + The sort-key option determines a way to compare the callchains. + A value of 'sort-key' can be 'function' or 'address'. + The default is 'function'. + + call-graph.threshold:: + When there're many callchains it'd print tons of lines. So perf omits + small callchains under a certain overhead (threshold) and this option + control the threshold. Default is 0.5 (%). The overhead is calculated + by value depends on call-graph.print-type. + + call-graph.print-limit:: + This is a maximum number of lines of callchain printed for a single + histogram entry. Default is 0 which means no limitation. + +report.*:: + report.percent-limit:: + This one is mostly the same as call-graph.threshold but works for + histogram entries. Entries having an overhead lower than this + percentage will not be printed. Default is '0'. If percent-limit + is '10', only entries which have more than 10% of overhead will be + printed. + + report.queue-size:: + This option sets up the maximum allocation size of the internal + event queue for ordering events. Default is 0, meaning no limit. + + report.children:: + 'Children' means functions called from another function. + If this option is true, 'perf report' cumulates callchains of children + and show (accumulated) total overhead as well as 'Self' overhead. + Please refer to the 'perf report' manual. The default is 'true'. + + report.group:: + This option is to show event group information together. + Example output with this turned on, notice that there is one column + per event in the group, ref-cycles and cycles: + + # group: {ref-cycles,cycles} + # ======== + # + # Samples: 7K of event 'anon group { ref-cycles, cycles }' + # Event count (approx.): 6876107743 + # + # Overhead Command Shared Object Symbol + # ................ ....... ................. ................... + # + 99.84% 99.76% noploop noploop [.] main + 0.07% 0.00% noploop ld-2.15.so [.] strcmp + 0.03% 0.00% noploop [kernel.kallsyms] [k] timerqueue_del + +top.*:: + top.children:: + Same as 'report.children'. So if it is enabled, the output of 'top' + command will have 'Children' overhead column as well as 'Self' overhead + column by default. + The default is 'true'. + +man.*:: + man.viewer:: + This option can assign a tool to view manual pages when 'help' + subcommand was invoked. Supported tools are 'man', 'woman' + (with emacs client) and 'konqueror'. Default is 'man'. + + New man viewer tool can be also added using 'man.<tool>.cmd' + or use different path using 'man.<tool>.path' config option. + +pager.*:: + pager.<subcommand>:: + When the subcommand is run on stdio, determine whether it uses + pager or not based on this value. Default is 'unspecified'. + +kmem.*:: + kmem.default:: + This option decides which allocator is to be analyzed if neither + '--slab' nor '--page' option is used. Default is 'slab'. + +record.*:: + record.build-id:: + This option can be 'cache', 'no-cache' or 'skip'. + 'cache' is to post-process data and save/update the binaries into + the build-id cache (in ~/.debug). This is the default. + But if this option is 'no-cache', it will not update the build-id cache. + 'skip' skips post-processing and does not update the cache. + SEE ALSO -------- linkperf:perf[1] diff --git a/tools/perf/Documentation/perf-inject.txt b/tools/perf/Documentation/perf-inject.txt index 0b1cedeef895..87b2588d1cbd 100644 --- a/tools/perf/Documentation/perf-inject.txt +++ b/tools/perf/Documentation/perf-inject.txt @@ -53,6 +53,13 @@ include::itrace.txt[] --strip:: Use with --itrace to strip out non-synthesized events. +-j:: +--jit:: + Process jitdump files by injecting the mmap records corresponding to jitted + functions. This option also generates the ELF images for each jitted function + found in the jitdumps files captured in the input perf.data file. Use this option + if you are monitoring environment using JIT runtimes, such as Java, DART or V8. + SEE ALSO -------- linkperf:perf-record[1], linkperf:perf-report[1], linkperf:perf-archive[1] diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index 8a301f6afb37..89cab84e92fd 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -117,6 +117,22 @@ OPTIONS And default sort keys are changed to comm, dso_from, symbol_from, dso_to and symbol_to, see '--branch-stack'. + If the --mem-mode option is used, the following sort keys are also available + (incompatible with --branch-stack): + symbol_daddr, dso_daddr, locked, tlb, mem, snoop, dcacheline. + + - symbol_daddr: name of data symbol being executed on at the time of sample + - dso_daddr: name of library or module containing the data being executed + on at the time of the sample + - locked: whether the bus was locked at the time of the sample + - tlb: type of tlb access for the data at the time of the sample + - mem: type of memory access for the data at the time of the sample + - snoop: type of snoop (if any) for the data at the time of the sample + - dcacheline: the cacheline the data address is on at the time of the sample + + And the default sort keys are changed to local_weight, mem, sym, dso, + symbol_daddr, dso_daddr, snoop, tlb, locked, see '--mem-mode'. + If the data file has tracepoint event(s), following (dynamic) sort keys are also available: trace, trace_fields, [<event>.]<field>[/raw] @@ -151,22 +167,6 @@ OPTIONS By default, every sort keys not specified in -F will be appended automatically. - If --mem-mode option is used, following sort keys are also available - (incompatible with --branch-stack): - symbol_daddr, dso_daddr, locked, tlb, mem, snoop, dcacheline. - - - symbol_daddr: name of data symbol being executed on at the time of sample - - dso_daddr: name of library or module containing the data being executed - on at the time of sample - - locked: whether the bus was locked at the time of sample - - tlb: type of tlb access for the data at the time of sample - - mem: type of memory access for the data at the time of sample - - snoop: type of snoop (if any) for the data at the time of sample - - dcacheline: the cacheline the data address is on at the time of sample - - And default sort keys are changed to local_weight, mem, sym, dso, - symbol_daddr, dso_daddr, snoop, tlb, locked, see '--mem-mode'. - -p:: --parent=<regex>:: A regex filter to identify parent. The parent is a caller of this @@ -351,7 +351,10 @@ OPTIONS --percent-limit:: Do not show entries which have an overhead under that percent. - (Default: 0). + (Default: 0). Note that this option also sets the percent limit (threshold) + of callchains. However the default value of callchain threshold is + different than the default value of hist entries. Please see the + --call-graph option for details. --percentage:: Determine how to display the overhead percentage of filtered entries. diff --git a/tools/perf/Documentation/perfconfig.example b/tools/perf/Documentation/perfconfig.example index 767ea2436e1c..1d8d5bc4cd2d 100644 --- a/tools/perf/Documentation/perfconfig.example +++ b/tools/perf/Documentation/perfconfig.example @@ -5,7 +5,7 @@ medium = green, lightgray normal = black, lightgray selected = lightgray, magenta - code = blue, lightgray + jump_arrows = blue, lightgray addr = magenta, lightgray [tui] diff --git a/tools/perf/Makefile b/tools/perf/Makefile index dcd9a70c7193..32a64e619028 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -68,6 +68,20 @@ all tags TAGS: $(print_msg) $(make) +ifdef MAKECMDGOALS +has_clean := 0 +ifneq ($(filter clean,$(MAKECMDGOALS)),) + has_clean := 1 +endif # clean + +ifeq ($(has_clean),1) + rest := $(filter-out clean,$(MAKECMDGOALS)) + ifneq ($(rest),) +$(rest): clean + endif # rest +endif # has_clean +endif # MAKECMDGOALS + # # The clean target is not really parallel, don't print the jobs info: # @@ -75,10 +89,17 @@ clean: $(make) # -# The build-test target is not really parallel, don't print the jobs info: +# The build-test target is not really parallel, don't print the jobs info, +# it also uses only the tests/make targets that don't pollute the source +# repository, i.e. that uses O= or builds the tarpkg outside the source +# repo directories. +# +# For a full test, use: +# +# make -C tools/perf -f tests/make # build-test: - @$(MAKE) SHUF=1 -f tests/make --no-print-directory + @$(MAKE) SHUF=1 -f tests/make REUSE_FEATURES_DUMP=1 MK=Makefile SET_PARALLEL=1 --no-print-directory tarpkg out # # All other targets get passed through: diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 5d34815c7ccb..4a4fad4182f5 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -58,6 +58,9 @@ include config/utilities.mak # # Define NO_LIBBIONIC if you do not want bionic support # +# Define NO_LIBCRYPTO if you do not want libcrypto (openssl) support +# used for generating build-ids for ELFs generated by jitdump. +# # Define NO_LIBDW_DWARF_UNWIND if you do not want libdw support # for dwarf backtrace post unwind. # @@ -136,6 +139,8 @@ $(call allow-override,CC,$(CROSS_COMPILE)gcc) $(call allow-override,AR,$(CROSS_COMPILE)ar) $(call allow-override,LD,$(CROSS_COMPILE)ld) +LD += $(EXTRA_LDFLAGS) + PKG_CONFIG = $(CROSS_COMPILE)pkg-config RM = rm -f @@ -165,7 +170,16 @@ ifeq ($(filter-out $(NON_CONFIG_TARGETS),$(MAKECMDGOALS)),) endif endif +# Set FEATURE_TESTS to 'all' so all possible feature checkers are executed. +# Without this setting the output feature dump file misses some features, for +# example, liberty. Select all checkers so we won't get an incomplete feature +# dump file. ifeq ($(config),1) +ifdef MAKECMDGOALS +ifeq ($(filter feature-dump,$(MAKECMDGOALS)),feature-dump) +FEATURE_TESTS := all +endif +endif include config/Makefile endif @@ -618,7 +632,7 @@ clean: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clean $(call QUIET_CLEAN, core-progs) $(RM) $(ALL_PROGRAMS) perf perf-read-vdso32 perf-read-vdsox32 $(call QUIET_CLEAN, core-gen) $(RM) *.spec *.pyc *.pyo */*.pyc */*.pyo $(OUTPUT)common-cmds.h TAGS tags cscope* $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)FEATURE-DUMP $(OUTPUT)util/*-bison* $(OUTPUT)util/*-flex* \ $(OUTPUT)util/intel-pt-decoder/inat-tables.c $(OUTPUT)fixdep \ - $(OUTPUT)tests/llvm-src-{base,kbuild,prologue}.c + $(OUTPUT)tests/llvm-src-{base,kbuild,prologue,relocation}.c $(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) clean $(python-clean) diff --git a/tools/perf/arch/powerpc/Makefile b/tools/perf/arch/powerpc/Makefile index 7fbca175099e..9f9cea3478fd 100644 --- a/tools/perf/arch/powerpc/Makefile +++ b/tools/perf/arch/powerpc/Makefile @@ -1,3 +1,5 @@ ifndef NO_DWARF PERF_HAVE_DWARF_REGS := 1 endif + +HAVE_KVM_STAT_SUPPORT := 1 diff --git a/tools/perf/arch/powerpc/util/Build b/tools/perf/arch/powerpc/util/Build index 7b8b0d1a1b62..c8fe2074d217 100644 --- a/tools/perf/arch/powerpc/util/Build +++ b/tools/perf/arch/powerpc/util/Build @@ -1,5 +1,6 @@ libperf-y += header.o libperf-y += sym-handling.o +libperf-y += kvm-stat.o libperf-$(CONFIG_DWARF) += dwarf-regs.o libperf-$(CONFIG_DWARF) += skip-callchain-idx.o diff --git a/tools/perf/arch/powerpc/util/book3s_hcalls.h b/tools/perf/arch/powerpc/util/book3s_hcalls.h new file mode 100644 index 000000000000..0dd6b7f2d44f --- /dev/null +++ b/tools/perf/arch/powerpc/util/book3s_hcalls.h @@ -0,0 +1,123 @@ +#ifndef ARCH_PERF_BOOK3S_HV_HCALLS_H +#define ARCH_PERF_BOOK3S_HV_HCALLS_H + +/* + * PowerPC HCALL codes : hcall code to name mapping + */ +#define kvm_trace_symbol_hcall \ + {0x4, "H_REMOVE"}, \ + {0x8, "H_ENTER"}, \ + {0xc, "H_READ"}, \ + {0x10, "H_CLEAR_MOD"}, \ + {0x14, "H_CLEAR_REF"}, \ + {0x18, "H_PROTECT"}, \ + {0x1c, "H_GET_TCE"}, \ + {0x20, "H_PUT_TCE"}, \ + {0x24, "H_SET_SPRG0"}, \ + {0x28, "H_SET_DABR"}, \ + {0x2c, "H_PAGE_INIT"}, \ + {0x30, "H_SET_ASR"}, \ + {0x34, "H_ASR_ON"}, \ + {0x38, "H_ASR_OFF"}, \ + {0x3c, "H_LOGICAL_CI_LOAD"}, \ + {0x40, "H_LOGICAL_CI_STORE"}, \ + {0x44, "H_LOGICAL_CACHE_LOAD"}, \ + {0x48, "H_LOGICAL_CACHE_STORE"}, \ + {0x4c, "H_LOGICAL_ICBI"}, \ + {0x50, "H_LOGICAL_DCBF"}, \ + {0x54, "H_GET_TERM_CHAR"}, \ + {0x58, "H_PUT_TERM_CHAR"}, \ + {0x5c, "H_REAL_TO_LOGICAL"}, \ + {0x60, "H_HYPERVISOR_DATA"}, \ + {0x64, "H_EOI"}, \ + {0x68, "H_CPPR"}, \ + {0x6c, "H_IPI"}, \ + {0x70, "H_IPOLL"}, \ + {0x74, "H_XIRR"}, \ + {0x78, "H_MIGRATE_DMA"}, \ + {0x7c, "H_PERFMON"}, \ + {0xdc, "H_REGISTER_VPA"}, \ + {0xe0, "H_CEDE"}, \ + {0xe4, "H_CONFER"}, \ + {0xe8, "H_PROD"}, \ + {0xec, "H_GET_PPP"}, \ + {0xf0, "H_SET_PPP"}, \ + {0xf4, "H_PURR"}, \ + {0xf8, "H_PIC"}, \ + {0xfc, "H_REG_CRQ"}, \ + {0x100, "H_FREE_CRQ"}, \ + {0x104, "H_VIO_SIGNAL"}, \ + {0x108, "H_SEND_CRQ"}, \ + {0x110, "H_COPY_RDMA"}, \ + {0x114, "H_REGISTER_LOGICAL_LAN"}, \ + {0x118, "H_FREE_LOGICAL_LAN"}, \ + {0x11c, "H_ADD_LOGICAL_LAN_BUFFER"}, \ + {0x120, "H_SEND_LOGICAL_LAN"}, \ + {0x124, "H_BULK_REMOVE"}, \ + {0x130, "H_MULTICAST_CTRL"}, \ + {0x134, "H_SET_XDABR"}, \ + {0x138, "H_STUFF_TCE"}, \ + {0x13c, "H_PUT_TCE_INDIRECT"}, \ + {0x14c, "H_CHANGE_LOGICAL_LAN_MAC"}, \ + {0x150, "H_VTERM_PARTNER_INFO"}, \ + {0x154, "H_REGISTER_VTERM"}, \ + {0x158, "H_FREE_VTERM"}, \ + {0x15c, "H_RESET_EVENTS"}, \ + {0x160, "H_ALLOC_RESOURCE"}, \ + {0x164, "H_FREE_RESOURCE"}, \ + {0x168, "H_MODIFY_QP"}, \ + {0x16c, "H_QUERY_QP"}, \ + {0x170, "H_REREGISTER_PMR"}, \ + {0x174, "H_REGISTER_SMR"}, \ + {0x178, "H_QUERY_MR"}, \ + {0x17c, "H_QUERY_MW"}, \ + {0x180, "H_QUERY_HCA"}, \ + {0x184, "H_QUERY_PORT"}, \ + {0x188, "H_MODIFY_PORT"}, \ + {0x18c, "H_DEFINE_AQP1"}, \ + {0x190, "H_GET_TRACE_BUFFER"}, \ + {0x194, "H_DEFINE_AQP0"}, \ + {0x198, "H_RESIZE_MR"}, \ + {0x19c, "H_ATTACH_MCQP"}, \ + {0x1a0, "H_DETACH_MCQP"}, \ + {0x1a4, "H_CREATE_RPT"}, \ + {0x1a8, "H_REMOVE_RPT"}, \ + {0x1ac, "H_REGISTER_RPAGES"}, \ + {0x1b0, "H_DISABLE_AND_GETC"}, \ + {0x1b4, "H_ERROR_DATA"}, \ + {0x1b8, "H_GET_HCA_INFO"}, \ + {0x1bc, "H_GET_PERF_COUNT"}, \ + {0x1c0, "H_MANAGE_TRACE"}, \ + {0x1d4, "H_FREE_LOGICAL_LAN_BUFFER"}, \ + {0x1d8, "H_POLL_PENDING"}, \ + {0x1e4, "H_QUERY_INT_STATE"}, \ + {0x244, "H_ILLAN_ATTRIBUTES"}, \ + {0x250, "H_MODIFY_HEA_QP"}, \ + {0x254, "H_QUERY_HEA_QP"}, \ + {0x258, "H_QUERY_HEA"}, \ + {0x25c, "H_QUERY_HEA_PORT"}, \ + {0x260, "H_MODIFY_HEA_PORT"}, \ + {0x264, "H_REG_BCMC"}, \ + {0x268, "H_DEREG_BCMC"}, \ + {0x26c, "H_REGISTER_HEA_RPAGES"}, \ + {0x270, "H_DISABLE_AND_GET_HEA"}, \ + {0x274, "H_GET_HEA_INFO"}, \ + {0x278, "H_ALLOC_HEA_RESOURCE"}, \ + {0x284, "H_ADD_CONN"}, \ + {0x288, "H_DEL_CONN"}, \ + {0x298, "H_JOIN"}, \ + {0x2a4, "H_VASI_STATE"}, \ + {0x2b0, "H_ENABLE_CRQ"}, \ + {0x2b8, "H_GET_EM_PARMS"}, \ + {0x2d0, "H_SET_MPP"}, \ + {0x2d4, "H_GET_MPP"}, \ + {0x2ec, "H_HOME_NODE_ASSOCIATIVITY"}, \ + {0x2f4, "H_BEST_ENERGY"}, \ + {0x2fc, "H_XIRR_X"}, \ + {0x300, "H_RANDOM"}, \ + {0x304, "H_COP"}, \ + {0x314, "H_GET_MPP_X"}, \ + {0x31c, "H_SET_MODE"}, \ + {0xf000, "H_RTAS"} \ + +#endif diff --git a/tools/perf/arch/powerpc/util/book3s_hv_exits.h b/tools/perf/arch/powerpc/util/book3s_hv_exits.h new file mode 100644 index 000000000000..e68ba2da8970 --- /dev/null +++ b/tools/perf/arch/powerpc/util/book3s_hv_exits.h @@ -0,0 +1,33 @@ +#ifndef ARCH_PERF_BOOK3S_HV_EXITS_H +#define ARCH_PERF_BOOK3S_HV_EXITS_H + +/* + * PowerPC Interrupt vectors : exit code to name mapping + */ + +#define kvm_trace_symbol_exit \ + {0x0, "RETURN_TO_HOST"}, \ + {0x100, "SYSTEM_RESET"}, \ + {0x200, "MACHINE_CHECK"}, \ + {0x300, "DATA_STORAGE"}, \ + {0x380, "DATA_SEGMENT"}, \ + {0x400, "INST_STORAGE"}, \ + {0x480, "INST_SEGMENT"}, \ + {0x500, "EXTERNAL"}, \ + {0x501, "EXTERNAL_LEVEL"}, \ + {0x502, "EXTERNAL_HV"}, \ + {0x600, "ALIGNMENT"}, \ + {0x700, "PROGRAM"}, \ + {0x800, "FP_UNAVAIL"}, \ + {0x900, "DECREMENTER"}, \ + {0x980, "HV_DECREMENTER"}, \ + {0xc00, "SYSCALL"}, \ + {0xd00, "TRACE"}, \ + {0xe00, "H_DATA_STORAGE"}, \ + {0xe20, "H_INST_STORAGE"}, \ + {0xe40, "H_EMUL_ASSIST"}, \ + {0xf00, "PERFMON"}, \ + {0xf20, "ALTIVEC"}, \ + {0xf40, "VSX"} + +#endif diff --git a/tools/perf/arch/powerpc/util/kvm-stat.c b/tools/perf/arch/powerpc/util/kvm-stat.c new file mode 100644 index 000000000000..74eee30398f8 --- /dev/null +++ b/tools/perf/arch/powerpc/util/kvm-stat.c @@ -0,0 +1,170 @@ +#include "util/kvm-stat.h" +#include "util/parse-events.h" +#include "util/debug.h" + +#include "book3s_hv_exits.h" +#include "book3s_hcalls.h" + +#define NR_TPS 4 + +const char *vcpu_id_str = "vcpu_id"; +const int decode_str_len = 40; +const char *kvm_entry_trace = "kvm_hv:kvm_guest_enter"; +const char *kvm_exit_trace = "kvm_hv:kvm_guest_exit"; + +define_exit_reasons_table(hv_exit_reasons, kvm_trace_symbol_exit); +define_exit_reasons_table(hcall_reasons, kvm_trace_symbol_hcall); + +/* Tracepoints specific to ppc_book3s_hv */ +const char *ppc_book3s_hv_kvm_tp[] = { + "kvm_hv:kvm_guest_enter", + "kvm_hv:kvm_guest_exit", + "kvm_hv:kvm_hcall_enter", + "kvm_hv:kvm_hcall_exit", + NULL, +}; + +/* 1 extra placeholder for NULL */ +const char *kvm_events_tp[NR_TPS + 1]; +const char *kvm_exit_reason; + +static void hcall_event_get_key(struct perf_evsel *evsel, + struct perf_sample *sample, + struct event_key *key) +{ + key->info = 0; + key->key = perf_evsel__intval(evsel, sample, "req"); +} + +static const char *get_hcall_exit_reason(u64 exit_code) +{ + struct exit_reasons_table *tbl = hcall_reasons; + + while (tbl->reason != NULL) { + if (tbl->exit_code == exit_code) + return tbl->reason; + tbl++; + } + + pr_debug("Unknown hcall code: %lld\n", + (unsigned long long)exit_code); + return "UNKNOWN"; +} + +static bool hcall_event_end(struct perf_evsel *evsel, + struct perf_sample *sample __maybe_unused, + struct event_key *key __maybe_unused) +{ + return (!strcmp(evsel->name, kvm_events_tp[3])); +} + +static bool hcall_event_begin(struct perf_evsel *evsel, + struct perf_sample *sample, struct event_key *key) +{ + if (!strcmp(evsel->name, kvm_events_tp[2])) { + hcall_event_get_key(evsel, sample, key); + return true; + } + + return false; +} +static void hcall_event_decode_key(struct perf_kvm_stat *kvm __maybe_unused, + struct event_key *key, + char *decode) +{ + const char *hcall_reason = get_hcall_exit_reason(key->key); + + scnprintf(decode, decode_str_len, "%s", hcall_reason); +} + +static struct kvm_events_ops hcall_events = { + .is_begin_event = hcall_event_begin, + .is_end_event = hcall_event_end, + .decode_key = hcall_event_decode_key, + .name = "HCALL-EVENT", +}; + +static struct kvm_events_ops exit_events = { + .is_begin_event = exit_event_begin, + .is_end_event = exit_event_end, + .decode_key = exit_event_decode_key, + .name = "VM-EXIT" +}; + +struct kvm_reg_events_ops kvm_reg_events_ops[] = { + { .name = "vmexit", .ops = &exit_events }, + { .name = "hcall", .ops = &hcall_events }, + { NULL, NULL }, +}; + +const char * const kvm_skip_events[] = { + NULL, +}; + + +static int is_tracepoint_available(const char *str, struct perf_evlist *evlist) +{ + struct parse_events_error err; + int ret; + + err.str = NULL; + ret = parse_events(evlist, str, &err); + if (err.str) + pr_err("%s : %s\n", str, err.str); + return ret; +} + +static int ppc__setup_book3s_hv(struct perf_kvm_stat *kvm, + struct perf_evlist *evlist) +{ + const char **events_ptr; + int i, nr_tp = 0, err = -1; + + /* Check for book3s_hv tracepoints */ + for (events_ptr = ppc_book3s_hv_kvm_tp; *events_ptr; events_ptr++) { + err = is_tracepoint_available(*events_ptr, evlist); + if (err) + return -1; + nr_tp++; + } + + for (i = 0; i < nr_tp; i++) + kvm_events_tp[i] = ppc_book3s_hv_kvm_tp[i]; + + kvm_events_tp[i] = NULL; + kvm_exit_reason = "trap"; + kvm->exit_reasons = hv_exit_reasons; + kvm->exit_reasons_isa = "HV"; + + return 0; +} + +/* Wrapper to setup kvm tracepoints */ +static int ppc__setup_kvm_tp(struct perf_kvm_stat *kvm) +{ + struct perf_evlist *evlist = perf_evlist__new(); + + if (evlist == NULL) + return -ENOMEM; + + /* Right now, only supported on book3s_hv */ + return ppc__setup_book3s_hv(kvm, evlist); +} + +int setup_kvm_events_tp(struct perf_kvm_stat *kvm) +{ + return ppc__setup_kvm_tp(kvm); +} + +int cpu_isa_init(struct perf_kvm_stat *kvm, const char *cpuid __maybe_unused) +{ + int ret; + + ret = ppc__setup_kvm_tp(kvm); + if (ret) { + kvm->exit_reasons = NULL; + kvm->exit_reasons_isa = NULL; + } + + return ret; +} diff --git a/tools/perf/arch/s390/util/kvm-stat.c b/tools/perf/arch/s390/util/kvm-stat.c index a5dbc07ec9dc..ed57df2e6d68 100644 --- a/tools/perf/arch/s390/util/kvm-stat.c +++ b/tools/perf/arch/s390/util/kvm-stat.c @@ -10,7 +10,7 @@ */ #include "../../util/kvm-stat.h" -#include <asm/kvm_perf.h> +#include <asm/sie.h> define_exit_reasons_table(sie_exit_reasons, sie_intercept_code); define_exit_reasons_table(sie_icpt_insn_codes, icpt_insn_codes); @@ -18,6 +18,12 @@ define_exit_reasons_table(sie_sigp_order_codes, sigp_order_codes); define_exit_reasons_table(sie_diagnose_codes, diagnose_codes); define_exit_reasons_table(sie_icpt_prog_codes, icpt_prog_codes); +const char *vcpu_id_str = "id"; +const int decode_str_len = 40; +const char *kvm_exit_reason = "icptcode"; +const char *kvm_entry_trace = "kvm:kvm_s390_sie_enter"; +const char *kvm_exit_trace = "kvm:kvm_s390_sie_exit"; + static void event_icpt_insn_get_key(struct perf_evsel *evsel, struct perf_sample *sample, struct event_key *key) @@ -73,7 +79,7 @@ static struct kvm_events_ops exit_events = { .name = "VM-EXIT" }; -const char * const kvm_events_tp[] = { +const char *kvm_events_tp[] = { "kvm:kvm_s390_sie_enter", "kvm:kvm_s390_sie_exit", "kvm:kvm_s390_intercept_instruction", diff --git a/tools/perf/arch/x86/util/intel-bts.c b/tools/perf/arch/x86/util/intel-bts.c index 8d8150f1cf9b..d66f9ad4df2e 100644 --- a/tools/perf/arch/x86/util/intel-bts.c +++ b/tools/perf/arch/x86/util/intel-bts.c @@ -60,7 +60,9 @@ struct branch { u64 misc; }; -static size_t intel_bts_info_priv_size(struct auxtrace_record *itr __maybe_unused) +static size_t +intel_bts_info_priv_size(struct auxtrace_record *itr __maybe_unused, + struct perf_evlist *evlist __maybe_unused) { return INTEL_BTS_AUXTRACE_PRIV_SIZE; } diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c index f05daacc9e78..a3395179c9ee 100644 --- a/tools/perf/arch/x86/util/intel-pt.c +++ b/tools/perf/arch/x86/util/intel-pt.c @@ -89,7 +89,7 @@ static int intel_pt_parse_terms_with_default(struct list_head *formats, *config = attr.config; out_free: - parse_events__free_terms(terms); + parse_events_terms__delete(terms); return err; } @@ -273,7 +273,9 @@ intel_pt_pmu_default_config(struct perf_pmu *intel_pt_pmu) return attr; } -static size_t intel_pt_info_priv_size(struct auxtrace_record *itr __maybe_unused) +static size_t +intel_pt_info_priv_size(struct auxtrace_record *itr __maybe_unused, + struct perf_evlist *evlist __maybe_unused) { return INTEL_PT_AUXTRACE_PRIV_SIZE; } diff --git a/tools/perf/arch/x86/util/kvm-stat.c b/tools/perf/arch/x86/util/kvm-stat.c index 14e4e668fad7..b63d4be655a2 100644 --- a/tools/perf/arch/x86/util/kvm-stat.c +++ b/tools/perf/arch/x86/util/kvm-stat.c @@ -1,5 +1,7 @@ #include "../../util/kvm-stat.h" -#include <asm/kvm_perf.h> +#include <asm/svm.h> +#include <asm/vmx.h> +#include <asm/kvm.h> define_exit_reasons_table(vmx_exit_reasons, VMX_EXIT_REASONS); define_exit_reasons_table(svm_exit_reasons, SVM_EXIT_REASONS); @@ -11,6 +13,12 @@ static struct kvm_events_ops exit_events = { .name = "VM-EXIT" }; +const char *vcpu_id_str = "vcpu_id"; +const int decode_str_len = 20; +const char *kvm_exit_reason = "exit_reason"; +const char *kvm_entry_trace = "kvm:kvm_entry"; +const char *kvm_exit_trace = "kvm:kvm_exit"; + /* * For the mmio events, we treat: * the time of MMIO write: kvm_mmio(KVM_TRACE_MMIO_WRITE...) -> kvm_entry @@ -65,7 +73,7 @@ static void mmio_event_decode_key(struct perf_kvm_stat *kvm __maybe_unused, struct event_key *key, char *decode) { - scnprintf(decode, DECODE_STR_LEN, "%#lx:%s", + scnprintf(decode, decode_str_len, "%#lx:%s", (unsigned long)key->key, key->info == KVM_TRACE_MMIO_WRITE ? "W" : "R"); } @@ -109,7 +117,7 @@ static void ioport_event_decode_key(struct perf_kvm_stat *kvm __maybe_unused, struct event_key *key, char *decode) { - scnprintf(decode, DECODE_STR_LEN, "%#llx:%s", + scnprintf(decode, decode_str_len, "%#llx:%s", (unsigned long long)key->key, key->info ? "POUT" : "PIN"); } @@ -121,7 +129,7 @@ static struct kvm_events_ops ioport_events = { .name = "IO Port Access" }; -const char * const kvm_events_tp[] = { +const char *kvm_events_tp[] = { "kvm:kvm_entry", "kvm:kvm_exit", "kvm:kvm_mmio", diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index cc5c1267c738..cfe366375c4b 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -245,7 +245,7 @@ static int __cmd_annotate(struct perf_annotate *ann) hists__collapse_resort(hists, NULL); /* Don't sort callchain */ perf_evsel__reset_sample_bit(pos, CALLCHAIN); - hists__output_resort(hists, NULL); + perf_evsel__output_resort(pos, NULL); if (symbol_conf.event_group && !perf_evsel__is_group_leader(pos)) diff --git a/tools/perf/builtin-buildid-cache.c b/tools/perf/builtin-buildid-cache.c index d93bff7fc0e4..632efc6b79a0 100644 --- a/tools/perf/builtin-buildid-cache.c +++ b/tools/perf/builtin-buildid-cache.c @@ -38,19 +38,7 @@ static int build_id_cache__kcore_buildid(const char *proc_dir, char *sbuildid) static int build_id_cache__kcore_dir(char *dir, size_t sz) { - struct timeval tv; - struct tm tm; - char dt[32]; - - if (gettimeofday(&tv, NULL) || !localtime_r(&tv.tv_sec, &tm)) - return -1; - - if (!strftime(dt, sizeof(dt), "%Y%m%d%H%M%S", &tm)) - return -1; - - scnprintf(dir, sz, "%s%02u", dt, (unsigned)tv.tv_usec / 10000); - - return 0; + return fetch_current_timestamp(dir, sz); } static bool same_kallsyms_reloc(const char *from_dir, char *to_dir) diff --git a/tools/perf/builtin-config.c b/tools/perf/builtin-config.c index f04e804a9fad..c42448ed5dfe 100644 --- a/tools/perf/builtin-config.c +++ b/tools/perf/builtin-config.c @@ -13,8 +13,10 @@ #include "util/util.h" #include "util/debug.h" +static bool use_system_config, use_user_config; + static const char * const config_usage[] = { - "perf config [options]", + "perf config [<file-option>] [options]", NULL }; @@ -25,6 +27,8 @@ enum actions { static struct option config_options[] = { OPT_SET_UINT('l', "list", &actions, "show current config variables", ACTION_LIST), + OPT_BOOLEAN(0, "system", &use_system_config, "use system config file"), + OPT_BOOLEAN(0, "user", &use_user_config, "use user config file"), OPT_END() }; @@ -42,10 +46,23 @@ static int show_config(const char *key, const char *value, int cmd_config(int argc, const char **argv, const char *prefix __maybe_unused) { int ret = 0; + char *user_config = mkpath("%s/.perfconfig", getenv("HOME")); argc = parse_options(argc, argv, config_options, config_usage, PARSE_OPT_STOP_AT_NON_OPTION); + if (use_system_config && use_user_config) { + pr_err("Error: only one config file at a time\n"); + parse_options_usage(config_usage, config_options, "user", 0); + parse_options_usage(NULL, config_options, "system", 0); + return -1; + } + + if (use_system_config) + config_exclusive_filename = perf_etc_perfconfig(); + else if (use_user_config) + config_exclusive_filename = user_config; + switch (actions) { case ACTION_LIST: if (argc) { @@ -53,9 +70,13 @@ int cmd_config(int argc, const char **argv, const char *prefix __maybe_unused) parse_options_usage(config_usage, config_options, "l", 1); } else { ret = perf_config(show_config, NULL); - if (ret < 0) + if (ret < 0) { + const char * config_filename = config_exclusive_filename; + if (!config_exclusive_filename) + config_filename = user_config; pr_err("Nothing configured, " - "please check your ~/.perfconfig file\n"); + "please check your %s \n", config_filename); + } } break; default: diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index 0022e02ed31a..b38445f08c2f 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -17,6 +17,7 @@ #include "util/build-id.h" #include "util/data.h" #include "util/auxtrace.h" +#include "util/jit.h" #include <subcmd/parse-options.h> @@ -29,6 +30,7 @@ struct perf_inject { bool sched_stat; bool have_auxtrace; bool strip; + bool jit_mode; const char *input_name; struct perf_data_file output; u64 bytes_written; @@ -71,6 +73,15 @@ static int perf_event__repipe_oe_synth(struct perf_tool *tool, return perf_event__repipe_synth(tool, event); } +#ifdef HAVE_LIBELF_SUPPORT +static int perf_event__drop_oe(struct perf_tool *tool __maybe_unused, + union perf_event *event __maybe_unused, + struct ordered_events *oe __maybe_unused) +{ + return 0; +} +#endif + static int perf_event__repipe_op2_synth(struct perf_tool *tool, union perf_event *event, struct perf_session *session @@ -234,6 +245,27 @@ static int perf_event__repipe_mmap(struct perf_tool *tool, return err; } +#ifdef HAVE_LIBELF_SUPPORT +static int perf_event__jit_repipe_mmap(struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct perf_inject *inject = container_of(tool, struct perf_inject, tool); + u64 n = 0; + + /* + * if jit marker, then inject jit mmaps and generate ELF images + */ + if (!jit_process(inject->session, &inject->output, machine, + event->mmap.filename, sample->pid, &n)) { + inject->bytes_written += n; + return 0; + } + return perf_event__repipe_mmap(tool, event, sample, machine); +} +#endif + static int perf_event__repipe_mmap2(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, @@ -247,6 +279,27 @@ static int perf_event__repipe_mmap2(struct perf_tool *tool, return err; } +#ifdef HAVE_LIBELF_SUPPORT +static int perf_event__jit_repipe_mmap2(struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct perf_inject *inject = container_of(tool, struct perf_inject, tool); + u64 n = 0; + + /* + * if jit marker, then inject jit mmaps and generate ELF images + */ + if (!jit_process(inject->session, &inject->output, machine, + event->mmap2.filename, sample->pid, &n)) { + inject->bytes_written += n; + return 0; + } + return perf_event__repipe_mmap2(tool, event, sample, machine); +} +#endif + static int perf_event__repipe_fork(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, @@ -664,6 +717,23 @@ static int __cmd_inject(struct perf_inject *inject) return ret; } +#ifdef HAVE_LIBELF_SUPPORT +static int +jit_validate_events(struct perf_session *session) +{ + struct perf_evsel *evsel; + + /* + * check that all events use CLOCK_MONOTONIC + */ + evlist__for_each(session->evlist, evsel) { + if (evsel->attr.use_clockid == 0 || evsel->attr.clockid != CLOCK_MONOTONIC) + return -1; + } + return 0; +} +#endif + int cmd_inject(int argc, const char **argv, const char *prefix __maybe_unused) { struct perf_inject inject = { @@ -703,7 +773,7 @@ int cmd_inject(int argc, const char **argv, const char *prefix __maybe_unused) }; int ret; - const struct option options[] = { + struct option options[] = { OPT_BOOLEAN('b', "build-ids", &inject.build_ids, "Inject build-ids into the output stream"), OPT_STRING('i', "input", &inject.input_name, "file", @@ -713,6 +783,7 @@ int cmd_inject(int argc, const char **argv, const char *prefix __maybe_unused) OPT_BOOLEAN('s', "sched-stat", &inject.sched_stat, "Merge sched-stat and sched-switch for getting events " "where and how long tasks slept"), + OPT_BOOLEAN('j', "jit", &inject.jit_mode, "merge jitdump files into perf.data file"), OPT_INCR('v', "verbose", &verbose, "be more verbose (show build ids, etc)"), OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name, "file", @@ -729,7 +800,9 @@ int cmd_inject(int argc, const char **argv, const char *prefix __maybe_unused) "perf inject [<options>]", NULL }; - +#ifndef HAVE_LIBELF_SUPPORT + set_option_nobuild(options, 'j', "jit", "NO_LIBELF=1", true); +#endif argc = parse_options(argc, argv, options, inject_usage, 0); /* @@ -755,6 +828,36 @@ int cmd_inject(int argc, const char **argv, const char *prefix __maybe_unused) if (inject.session == NULL) return -1; + if (inject.build_ids) { + /* + * to make sure the mmap records are ordered correctly + * and so that the correct especially due to jitted code + * mmaps. We cannot generate the buildid hit list and + * inject the jit mmaps at the same time for now. + */ + inject.tool.ordered_events = true; + inject.tool.ordering_requires_timestamps = true; + } +#ifdef HAVE_LIBELF_SUPPORT + if (inject.jit_mode) { + /* + * validate event is using the correct clockid + */ + if (jit_validate_events(inject.session)) { + fprintf(stderr, "error, jitted code must be sampled with perf record -k 1\n"); + return -1; + } + inject.tool.mmap2 = perf_event__jit_repipe_mmap2; + inject.tool.mmap = perf_event__jit_repipe_mmap; + inject.tool.ordered_events = true; + inject.tool.ordering_requires_timestamps = true; + /* + * JIT MMAP injection injects all MMAP events in one go, so it + * does not obey finished_round semantics. + */ + inject.tool.finished_round = perf_event__drop_oe; + } +#endif ret = symbol__init(&inject.session->header.env); if (ret < 0) goto out_delete; diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c index 4418d9214872..bff666458b28 100644 --- a/tools/perf/builtin-kvm.c +++ b/tools/perf/builtin-kvm.c @@ -30,7 +30,6 @@ #include <math.h> #ifdef HAVE_KVM_STAT_SUPPORT -#include <asm/kvm_perf.h> #include "util/kvm-stat.h" void exit_event_get_key(struct perf_evsel *evsel, @@ -38,12 +37,12 @@ void exit_event_get_key(struct perf_evsel *evsel, struct event_key *key) { key->info = 0; - key->key = perf_evsel__intval(evsel, sample, KVM_EXIT_REASON); + key->key = perf_evsel__intval(evsel, sample, kvm_exit_reason); } bool kvm_exit_event(struct perf_evsel *evsel) { - return !strcmp(evsel->name, KVM_EXIT_TRACE); + return !strcmp(evsel->name, kvm_exit_trace); } bool exit_event_begin(struct perf_evsel *evsel, @@ -59,7 +58,7 @@ bool exit_event_begin(struct perf_evsel *evsel, bool kvm_entry_event(struct perf_evsel *evsel) { - return !strcmp(evsel->name, KVM_ENTRY_TRACE); + return !strcmp(evsel->name, kvm_entry_trace); } bool exit_event_end(struct perf_evsel *evsel, @@ -91,7 +90,7 @@ void exit_event_decode_key(struct perf_kvm_stat *kvm, const char *exit_reason = get_exit_reason(kvm, key->exit_reasons, key->key); - scnprintf(decode, DECODE_STR_LEN, "%s", exit_reason); + scnprintf(decode, decode_str_len, "%s", exit_reason); } static bool register_kvm_events_ops(struct perf_kvm_stat *kvm) @@ -357,7 +356,7 @@ static bool handle_end_event(struct perf_kvm_stat *kvm, time_diff = sample->time - time_begin; if (kvm->duration && time_diff > kvm->duration) { - char decode[DECODE_STR_LEN]; + char decode[decode_str_len]; kvm->events_ops->decode_key(kvm, &event->key, decode); if (!skip_event(decode)) { @@ -385,7 +384,8 @@ struct vcpu_event_record *per_vcpu_record(struct thread *thread, return NULL; } - vcpu_record->vcpu_id = perf_evsel__intval(evsel, sample, VCPU_ID); + vcpu_record->vcpu_id = perf_evsel__intval(evsel, sample, + vcpu_id_str); thread__set_priv(thread, vcpu_record); } @@ -574,7 +574,7 @@ static void show_timeofday(void) static void print_result(struct perf_kvm_stat *kvm) { - char decode[DECODE_STR_LEN]; + char decode[decode_str_len]; struct kvm_event *event; int vcpu = kvm->trace_vcpu; @@ -585,7 +585,7 @@ static void print_result(struct perf_kvm_stat *kvm) pr_info("\n\n"); print_vcpu_info(kvm); - pr_info("%*s ", DECODE_STR_LEN, kvm->events_ops->name); + pr_info("%*s ", decode_str_len, kvm->events_ops->name); pr_info("%10s ", "Samples"); pr_info("%9s ", "Samples%"); @@ -604,7 +604,7 @@ static void print_result(struct perf_kvm_stat *kvm) min = get_event_min(event, vcpu); kvm->events_ops->decode_key(kvm, &event->key, decode); - pr_info("%*s ", DECODE_STR_LEN, decode); + pr_info("%*s ", decode_str_len, decode); pr_info("%10llu ", (unsigned long long)ecount); pr_info("%8.2f%% ", (double)ecount / kvm->total_count * 100); pr_info("%8.2f%% ", (double)etime / kvm->total_time * 100); @@ -1132,6 +1132,11 @@ exit: _p; \ }) +int __weak setup_kvm_events_tp(struct perf_kvm_stat *kvm __maybe_unused) +{ + return 0; +} + static int kvm_events_record(struct perf_kvm_stat *kvm, int argc, const char **argv) { @@ -1148,7 +1153,14 @@ kvm_events_record(struct perf_kvm_stat *kvm, int argc, const char **argv) NULL }; const char * const *events_tp; + int ret; + events_tp_size = 0; + ret = setup_kvm_events_tp(kvm); + if (ret < 0) { + pr_err("Unable to setup the kvm tracepoints\n"); + return ret; + } for (events_tp = kvm_events_tp; *events_tp; events_tp++) events_tp_size++; @@ -1377,6 +1389,12 @@ static int kvm_events_live(struct perf_kvm_stat *kvm, /* * generate the event list */ + err = setup_kvm_events_tp(kvm); + if (err < 0) { + pr_err("Unable to setup the kvm tracepoints\n"); + return err; + } + kvm->evlist = kvm_live_event_list(); if (kvm->evlist == NULL) { err = -1; diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 319712a4e02b..0ee0d5cd31a7 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -49,7 +49,9 @@ struct record { const char *progname; int realtime_prio; bool no_buildid; + bool no_buildid_set; bool no_buildid_cache; + bool no_buildid_cache_set; bool buildid_all; unsigned long long samples; }; @@ -1097,10 +1099,12 @@ struct option __record_options[] = { OPT_BOOLEAN('P', "period", &record.opts.period, "Record the sample period"), OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples, "don't sample"), - OPT_BOOLEAN('N', "no-buildid-cache", &record.no_buildid_cache, - "do not update the buildid cache"), - OPT_BOOLEAN('B', "no-buildid", &record.no_buildid, - "do not collect buildids in perf.data"), + OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache, + &record.no_buildid_cache_set, + "do not update the buildid cache"), + OPT_BOOLEAN_SET('B', "no-buildid", &record.no_buildid, + &record.no_buildid_set, + "do not collect buildids in perf.data"), OPT_CALLBACK('G', "cgroup", &record.evlist, "name", "monitor event in cgroup name only", parse_cgroups), diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 2bf537f190a0..1eab50ac1ef6 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -75,7 +75,10 @@ static int report__config(const char *var, const char *value, void *cb) return 0; } if (!strcmp(var, "report.percent-limit")) { - rep->min_percent = strtof(value, NULL); + double pcnt = strtof(value, NULL); + + rep->min_percent = pcnt; + callchain_param.min_percent = pcnt; return 0; } if (!strcmp(var, "report.children")) { @@ -504,7 +507,7 @@ static void report__output_resort(struct report *rep) ui_progress__init(&prog, rep->nr_entries, "Sorting events for output..."); evlist__for_each(rep->session->evlist, pos) - hists__output_resort(evsel__hists(pos), &prog); + perf_evsel__output_resort(pos, &prog); ui_progress__finish(); } @@ -633,8 +636,10 @@ parse_percent_limit(const struct option *opt, const char *str, int unset __maybe_unused) { struct report *rep = opt->value; + double pcnt = strtof(str, NULL); - rep->min_percent = strtof(str, NULL); + rep->min_percent = pcnt; + callchain_param.min_percent = pcnt; return 0; } @@ -907,15 +912,6 @@ repeat: symbol_conf.cumulate_callchain = false; } - if (setup_sorting(session->evlist) < 0) { - if (sort_order) - parse_options_usage(report_usage, options, "s", 1); - if (field_order) - parse_options_usage(sort_order ? NULL : report_usage, - options, "F", 1); - goto error; - } - /* Force tty output for header output and per-thread stat. */ if (report.header || report.header_only || report.show_threads) use_browser = 0; @@ -925,6 +921,15 @@ repeat: else use_browser = 0; + if (setup_sorting(session->evlist) < 0) { + if (sort_order) + parse_options_usage(report_usage, options, "s", 1); + if (field_order) + parse_options_usage(sort_order ? NULL : report_usage, + options, "F", 1); + goto error; + } + if (report.header || report.header_only) { perf_session__fprintf_info(session, stdout, report.show_full_info); diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 038e877081b6..15e4fcf34e0c 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -735,6 +735,60 @@ static void aggr_printout(struct perf_evsel *evsel, int id, int nr) } } +struct outstate { + FILE *fh; + bool newline; + const char *prefix; +}; + +#define METRIC_LEN 35 + +static void new_line_std(void *ctx) +{ + struct outstate *os = ctx; + + os->newline = true; +} + +static void do_new_line_std(struct outstate *os) +{ + fputc('\n', os->fh); + fputs(os->prefix, os->fh); + if (stat_config.aggr_mode == AGGR_NONE) + fprintf(os->fh, " "); + if (stat_config.aggr_mode == AGGR_CORE) + fprintf(os->fh, " "); + if (stat_config.aggr_mode == AGGR_SOCKET) + fprintf(os->fh, " "); + fprintf(os->fh, " "); +} + +static void print_metric_std(void *ctx, const char *color, const char *fmt, + const char *unit, double val) +{ + struct outstate *os = ctx; + FILE *out = os->fh; + int n; + bool newline = os->newline; + + os->newline = false; + + if (unit == NULL || fmt == NULL) { + fprintf(out, "%-*s", METRIC_LEN, ""); + return; + } + + if (newline) + do_new_line_std(os); + + n = fprintf(out, " # "); + if (color) + n += color_fprintf(out, color, fmt, val); + else + n += fprintf(out, fmt, val); + fprintf(out, " %-*s", METRIC_LEN - n - 1, unit); +} + static void nsec_printout(int id, int nr, struct perf_evsel *evsel, double avg) { FILE *output = stat_config.output; @@ -793,22 +847,60 @@ static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg) fprintf(output, "%s%s", csv_sep, evsel->cgrp->name); } -static void printout(int id, int nr, struct perf_evsel *counter, double uval) +static void printout(int id, int nr, struct perf_evsel *counter, double uval, + char *prefix, u64 run, u64 ena, double noise) { - int cpu = cpu_map__id_to_cpu(id); + struct perf_stat_output_ctx out; + struct outstate os = { + .fh = stat_config.output, + .prefix = prefix ? prefix : "" + }; + print_metric_t pm = print_metric_std; + void (*nl)(void *); + + nl = new_line_std; + + if (run == 0 || ena == 0) { + aggr_printout(counter, id, nr); + + fprintf(stat_config.output, "%*s%s", + csv_output ? 0 : 18, + counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED, + csv_sep); - if (stat_config.aggr_mode == AGGR_GLOBAL) - cpu = 0; + fprintf(stat_config.output, "%-*s%s", + csv_output ? 0 : unit_width, + counter->unit, csv_sep); + + fprintf(stat_config.output, "%*s", + csv_output ? 0 : -25, + perf_evsel__name(counter)); + + if (counter->cgrp) + fprintf(stat_config.output, "%s%s", + csv_sep, counter->cgrp->name); + + print_running(run, ena); + return; + } if (nsec_counter(counter)) nsec_printout(id, nr, counter, uval); else abs_printout(id, nr, counter, uval); - if (!csv_output && !stat_config.interval) - perf_stat__print_shadow_stats(stat_config.output, counter, - uval, cpu, - stat_config.aggr_mode); + out.print_metric = pm; + out.new_line = nl; + out.ctx = &os; + + if (!csv_output) + perf_stat__print_shadow_stats(counter, uval, + stat_config.aggr_mode == AGGR_GLOBAL ? 0 : + cpu_map__id_to_cpu(id), + &out); + + print_noise(counter, noise); + print_running(run, ena); } static void print_aggr(char *prefix) @@ -839,36 +931,8 @@ static void print_aggr(char *prefix) if (prefix) fprintf(output, "%s", prefix); - if (run == 0 || ena == 0) { - aggr_printout(counter, id, nr); - - fprintf(output, "%*s%s", - csv_output ? 0 : 18, - counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED, - csv_sep); - - fprintf(output, "%-*s%s", - csv_output ? 0 : unit_width, - counter->unit, csv_sep); - - fprintf(output, "%*s", - csv_output ? 0 : -25, - perf_evsel__name(counter)); - - if (counter->cgrp) - fprintf(output, "%s%s", - csv_sep, counter->cgrp->name); - - print_running(run, ena); - fputc('\n', output); - continue; - } uval = val * counter->scale; - printout(id, nr, counter, uval); - if (!csv_output) - print_noise(counter, 1.0); - - print_running(run, ena); + printout(id, nr, counter, uval, prefix, run, ena, 1.0); fputc('\n', output); } } @@ -895,12 +959,7 @@ static void print_aggr_thread(struct perf_evsel *counter, char *prefix) fprintf(output, "%s", prefix); uval = val * counter->scale; - printout(thread, 0, counter, uval); - - if (!csv_output) - print_noise(counter, 1.0); - - print_running(run, ena); + printout(thread, 0, counter, uval, prefix, run, ena, 1.0); fputc('\n', output); } } @@ -914,7 +973,6 @@ static void print_counter_aggr(struct perf_evsel *counter, char *prefix) FILE *output = stat_config.output; struct perf_stat_evsel *ps = counter->priv; double avg = avg_stats(&ps->res_stats[0]); - int scaled = counter->counts->scaled; double uval; double avg_enabled, avg_running; @@ -924,32 +982,8 @@ static void print_counter_aggr(struct perf_evsel *counter, char *prefix) if (prefix) fprintf(output, "%s", prefix); - if (scaled == -1 || !counter->supported) { - fprintf(output, "%*s%s", - csv_output ? 0 : 18, - counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED, - csv_sep); - fprintf(output, "%-*s%s", - csv_output ? 0 : unit_width, - counter->unit, csv_sep); - fprintf(output, "%*s", - csv_output ? 0 : -25, - perf_evsel__name(counter)); - - if (counter->cgrp) - fprintf(output, "%s%s", csv_sep, counter->cgrp->name); - - print_running(avg_running, avg_enabled); - fputc('\n', output); - return; - } - uval = avg * counter->scale; - printout(-1, 0, counter, uval); - - print_noise(counter, avg); - - print_running(avg_running, avg_enabled); + printout(-1, 0, counter, uval, prefix, avg_running, avg_enabled, avg); fprintf(output, "\n"); } @@ -972,36 +1006,8 @@ static void print_counter(struct perf_evsel *counter, char *prefix) if (prefix) fprintf(output, "%s", prefix); - if (run == 0 || ena == 0) { - fprintf(output, "CPU%*d%s%*s%s", - csv_output ? 0 : -4, - perf_evsel__cpus(counter)->map[cpu], csv_sep, - csv_output ? 0 : 18, - counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED, - csv_sep); - - fprintf(output, "%-*s%s", - csv_output ? 0 : unit_width, - counter->unit, csv_sep); - - fprintf(output, "%*s", - csv_output ? 0 : -25, - perf_evsel__name(counter)); - - if (counter->cgrp) - fprintf(output, "%s%s", - csv_sep, counter->cgrp->name); - - print_running(run, ena); - fputc('\n', output); - continue; - } - uval = val * counter->scale; - printout(cpu, 0, counter, uval); - if (!csv_output) - print_noise(counter, 1.0); - print_running(run, ena); + printout(cpu, 0, counter, uval, prefix, run, ena, 1.0); fputc('\n', output); } diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index bf01cbb0ef23..a75de3940b97 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -252,7 +252,8 @@ static void perf_top__print_sym_table(struct perf_top *top) char bf[160]; int printed = 0; const int win_width = top->winsize.ws_col - 1; - struct hists *hists = evsel__hists(top->sym_evsel); + struct perf_evsel *evsel = top->sym_evsel; + struct hists *hists = evsel__hists(evsel); puts(CONSOLE_CLEAR); @@ -288,7 +289,7 @@ static void perf_top__print_sym_table(struct perf_top *top) } hists__collapse_resort(hists, NULL); - hists__output_resort(hists, NULL); + perf_evsel__output_resort(evsel, NULL); hists__output_recalc_col_len(hists, top->print_entries - printed); putchar('\n'); @@ -540,6 +541,7 @@ static bool perf_top__handle_keypress(struct perf_top *top, int c) static void perf_top__sort_new_samples(void *arg) { struct perf_top *t = arg; + struct perf_evsel *evsel = t->sym_evsel; struct hists *hists; perf_top__reset_sample_counters(t); @@ -547,7 +549,7 @@ static void perf_top__sort_new_samples(void *arg) if (t->evlist->selected != NULL) t->sym_evsel = t->evlist->selected; - hists = evsel__hists(t->sym_evsel); + hists = evsel__hists(evsel); if (t->evlist->enabled) { if (t->zero) { @@ -559,7 +561,7 @@ static void perf_top__sort_new_samples(void *arg) } hists__collapse_resort(hists, NULL); - hists__output_resort(hists, NULL); + perf_evsel__output_resort(evsel, NULL); } static void *display_thread_tui(void *arg) @@ -1243,6 +1245,13 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused) /* display thread wants entries to be collapsed in a different tree */ sort__need_collapse = 1; + if (top.use_stdio) + use_browser = 0; + else if (top.use_tui) + use_browser = 1; + + setup_browser(false); + if (setup_sorting(top.evlist) < 0) { if (sort_order) parse_options_usage(top_usage, options, "s", 1); @@ -1252,13 +1261,6 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused) goto out_delete_evlist; } - if (top.use_stdio) - use_browser = 0; - else if (top.use_tui) - use_browser = 1; - - setup_browser(false); - status = target__validate(target); if (status) { target__strerror(target, status, errbuf, BUFSIZ); diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index 511141b102e8..f7aeaf303f5a 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -61,50 +61,45 @@ endif ifeq ($(LIBUNWIND_LIBS),) NO_LIBUNWIND := 1 -else - # - # For linking with debug library, run like: - # - # make DEBUG=1 LIBUNWIND_DIR=/opt/libunwind/ - # - ifdef LIBUNWIND_DIR - LIBUNWIND_CFLAGS = -I$(LIBUNWIND_DIR)/include - LIBUNWIND_LDFLAGS = -L$(LIBUNWIND_DIR)/lib - endif - LIBUNWIND_LDFLAGS += $(LIBUNWIND_LIBS) - - # Set per-feature check compilation flags - FEATURE_CHECK_CFLAGS-libunwind = $(LIBUNWIND_CFLAGS) - FEATURE_CHECK_LDFLAGS-libunwind = $(LIBUNWIND_LDFLAGS) - FEATURE_CHECK_CFLAGS-libunwind-debug-frame = $(LIBUNWIND_CFLAGS) - FEATURE_CHECK_LDFLAGS-libunwind-debug-frame = $(LIBUNWIND_LDFLAGS) endif +# +# For linking with debug library, run like: +# +# make DEBUG=1 LIBUNWIND_DIR=/opt/libunwind/ +# +ifdef LIBUNWIND_DIR + LIBUNWIND_CFLAGS = -I$(LIBUNWIND_DIR)/include + LIBUNWIND_LDFLAGS = -L$(LIBUNWIND_DIR)/lib +endif +LIBUNWIND_LDFLAGS += $(LIBUNWIND_LIBS) + +# Set per-feature check compilation flags +FEATURE_CHECK_CFLAGS-libunwind = $(LIBUNWIND_CFLAGS) +FEATURE_CHECK_LDFLAGS-libunwind = $(LIBUNWIND_LDFLAGS) +FEATURE_CHECK_CFLAGS-libunwind-debug-frame = $(LIBUNWIND_CFLAGS) +FEATURE_CHECK_LDFLAGS-libunwind-debug-frame = $(LIBUNWIND_LDFLAGS) ifeq ($(NO_PERF_REGS),0) CFLAGS += -DHAVE_PERF_REGS_SUPPORT endif -ifndef NO_LIBELF - # for linking with debug library, run like: - # make DEBUG=1 LIBDW_DIR=/opt/libdw/ - ifdef LIBDW_DIR - LIBDW_CFLAGS := -I$(LIBDW_DIR)/include - LIBDW_LDFLAGS := -L$(LIBDW_DIR)/lib - endif - FEATURE_CHECK_CFLAGS-libdw-dwarf-unwind := $(LIBDW_CFLAGS) - FEATURE_CHECK_LDFLAGS-libdw-dwarf-unwind := $(LIBDW_LDFLAGS) -ldw +# for linking with debug library, run like: +# make DEBUG=1 LIBDW_DIR=/opt/libdw/ +ifdef LIBDW_DIR + LIBDW_CFLAGS := -I$(LIBDW_DIR)/include + LIBDW_LDFLAGS := -L$(LIBDW_DIR)/lib endif +FEATURE_CHECK_CFLAGS-libdw-dwarf-unwind := $(LIBDW_CFLAGS) +FEATURE_CHECK_LDFLAGS-libdw-dwarf-unwind := $(LIBDW_LDFLAGS) -ldw -ifdef LIBBABELTRACE - # for linking with debug library, run like: - # make DEBUG=1 LIBBABELTRACE_DIR=/opt/libbabeltrace/ - ifdef LIBBABELTRACE_DIR - LIBBABELTRACE_CFLAGS := -I$(LIBBABELTRACE_DIR)/include - LIBBABELTRACE_LDFLAGS := -L$(LIBBABELTRACE_DIR)/lib - endif - FEATURE_CHECK_CFLAGS-libbabeltrace := $(LIBBABELTRACE_CFLAGS) - FEATURE_CHECK_LDFLAGS-libbabeltrace := $(LIBBABELTRACE_LDFLAGS) -lbabeltrace-ctf +# for linking with debug library, run like: +# make DEBUG=1 LIBBABELTRACE_DIR=/opt/libbabeltrace/ +ifdef LIBBABELTRACE_DIR + LIBBABELTRACE_CFLAGS := -I$(LIBBABELTRACE_DIR)/include + LIBBABELTRACE_LDFLAGS := -L$(LIBBABELTRACE_DIR)/lib endif +FEATURE_CHECK_CFLAGS-libbabeltrace := $(LIBBABELTRACE_CFLAGS) +FEATURE_CHECK_LDFLAGS-libbabeltrace := $(LIBBABELTRACE_LDFLAGS) -lbabeltrace-ctf FEATURE_CHECK_CFLAGS-bpf = -I. -I$(srctree)/tools/include -I$(srctree)/arch/$(ARCH)/include/uapi -I$(srctree)/include/uapi # include ARCH specific config @@ -145,28 +140,26 @@ ifdef PARSER_DEBUG $(call detected_var,PARSER_DEBUG_FLEX) endif -ifndef NO_LIBPYTHON - # Try different combinations to accommodate systems that only have - # python[2][-config] in weird combinations but always preferring - # python2 and python2-config as per pep-0394. If we catch a - # python[-config] in version 3, the version check will kill it. - PYTHON2 := $(if $(call get-executable,python2),python2,python) - override PYTHON := $(call get-executable-or-default,PYTHON,$(PYTHON2)) - PYTHON2_CONFIG := \ - $(if $(call get-executable,$(PYTHON)-config),$(PYTHON)-config,python-config) - override PYTHON_CONFIG := \ - $(call get-executable-or-default,PYTHON_CONFIG,$(PYTHON2_CONFIG)) +# Try different combinations to accommodate systems that only have +# python[2][-config] in weird combinations but always preferring +# python2 and python2-config as per pep-0394. If we catch a +# python[-config] in version 3, the version check will kill it. +PYTHON2 := $(if $(call get-executable,python2),python2,python) +override PYTHON := $(call get-executable-or-default,PYTHON,$(PYTHON2)) +PYTHON2_CONFIG := \ + $(if $(call get-executable,$(PYTHON)-config),$(PYTHON)-config,python-config) +override PYTHON_CONFIG := \ + $(call get-executable-or-default,PYTHON_CONFIG,$(PYTHON2_CONFIG)) - PYTHON_CONFIG_SQ := $(call shell-sq,$(PYTHON_CONFIG)) +PYTHON_CONFIG_SQ := $(call shell-sq,$(PYTHON_CONFIG)) - PYTHON_EMBED_LDOPTS := $(shell $(PYTHON_CONFIG_SQ) --ldflags 2>/dev/null) - PYTHON_EMBED_CCOPTS := $(shell $(PYTHON_CONFIG_SQ) --cflags 2>/dev/null) +PYTHON_EMBED_LDOPTS := $(shell $(PYTHON_CONFIG_SQ) --ldflags 2>/dev/null) +PYTHON_EMBED_CCOPTS := $(shell $(PYTHON_CONFIG_SQ) --cflags 2>/dev/null) - FEATURE_CHECK_CFLAGS-libpython := $(PYTHON_EMBED_CCOPTS) - FEATURE_CHECK_LDFLAGS-libpython := $(PYTHON_EMBED_LDOPTS) - FEATURE_CHECK_CFLAGS-libpython-version := $(PYTHON_EMBED_CCOPTS) - FEATURE_CHECK_LDFLAGS-libpython-version := $(PYTHON_EMBED_LDOPTS) -endif +FEATURE_CHECK_CFLAGS-libpython := $(PYTHON_EMBED_CCOPTS) +FEATURE_CHECK_LDFLAGS-libpython := $(PYTHON_EMBED_LDOPTS) +FEATURE_CHECK_CFLAGS-libpython-version := $(PYTHON_EMBED_CCOPTS) +FEATURE_CHECK_LDFLAGS-libpython-version := $(PYTHON_EMBED_LDOPTS) CFLAGS += -fno-omit-frame-pointer CFLAGS += -ggdb3 @@ -411,6 +404,17 @@ ifndef NO_LIBAUDIT endif endif +ifndef NO_LIBCRYPTO + ifneq ($(feature-libcrypto), 1) + msg := $(warning No libcrypto.h found, disables jitted code injection, please install libssl-devel or libssl-dev); + NO_LIBCRYPTO := 1 + else + CFLAGS += -DHAVE_LIBCRYPTO_SUPPORT + EXTLIBS += -lcrypto + $(call detected,CONFIG_CRYPTO) + endif +endif + ifdef NO_NEWT NO_SLANG=1 endif diff --git a/tools/perf/jvmti/Makefile b/tools/perf/jvmti/Makefile new file mode 100644 index 000000000000..0277a64b391b --- /dev/null +++ b/tools/perf/jvmti/Makefile @@ -0,0 +1,80 @@ +ARCH=$(shell uname -m) + +ifeq ($(ARCH), x86_64) +JARCH=amd64 +endif +ifeq ($(ARCH), armv7l) +JARCH=armhf +endif +ifeq ($(ARCH), armv6l) +JARCH=armhf +endif +ifeq ($(ARCH), aarch64) +JARCH=aarch64 +endif +ifeq ($(ARCH), ppc64) +JARCH=powerpc +endif +ifeq ($(ARCH), ppc64le) +JARCH=powerpc +endif + +DESTDIR=/usr/local + +VERSION=1 +REVISION=0 +AGE=0 + +LN=ln -sf +RM=rm + +SLIBJVMTI=libjvmti.so.$(VERSION).$(REVISION).$(AGE) +VLIBJVMTI=libjvmti.so.$(VERSION) +SLDFLAGS=-shared -Wl,-soname -Wl,$(VLIBJVMTI) +SOLIBEXT=so + +# The following works at least on fedora 23, you may need the next +# line for other distros. +ifeq (,$(wildcard /usr/sbin/update-java-alternatives)) +JDIR=$(shell alternatives --display java | tail -1 | cut -d' ' -f 5 | sed 's%/jre/bin/java.%%g') +else +JDIR=$(shell /usr/sbin/update-java-alternatives -l | head -1 | cut -d ' ' -f 3) +endif + +# -lrt required in 32-bit mode for clock_gettime() +LIBS=-lelf -lrt +INCDIR=-I $(JDIR)/include -I $(JDIR)/include/linux + +TARGETS=$(SLIBJVMTI) + +SRCS=libjvmti.c jvmti_agent.c +OBJS=$(SRCS:.c=.o) +SOBJS=$(OBJS:.o=.lo) +OPT=-O2 -g -Werror -Wall + +CFLAGS=$(INCDIR) $(OPT) + +all: $(TARGETS) + +.c.o: + $(CC) $(CFLAGS) -c $*.c +.c.lo: + $(CC) -fPIC -DPIC $(CFLAGS) -c $*.c -o $*.lo + +$(OBJS) $(SOBJS): Makefile jvmti_agent.h ../util/jitdump.h + +$(SLIBJVMTI): $(SOBJS) + $(CC) $(CFLAGS) $(SLDFLAGS) -o $@ $(SOBJS) $(LIBS) + $(LN) $@ libjvmti.$(SOLIBEXT) + +clean: + $(RM) -f *.o *.so.* *.so *.lo + +install: + -mkdir -p $(DESTDIR)/lib + install -m 755 $(SLIBJVMTI) $(DESTDIR)/lib/ + (cd $(DESTDIR)/lib; $(LN) $(SLIBJVMTI) $(VLIBJVMTI)) + (cd $(DESTDIR)/lib; $(LN) $(SLIBJVMTI) libjvmti.$(SOLIBEXT)) + ldconfig + +.SUFFIXES: .c .S .o .lo diff --git a/tools/perf/jvmti/jvmti_agent.c b/tools/perf/jvmti/jvmti_agent.c new file mode 100644 index 000000000000..6461e02ab940 --- /dev/null +++ b/tools/perf/jvmti/jvmti_agent.c @@ -0,0 +1,465 @@ +/* + * jvmti_agent.c: JVMTI agent interface + * + * Adapted from the Oprofile code in opagent.c: + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Copyright 2007 OProfile authors + * Jens Wilke + * Daniel Hansel + * Copyright IBM Corporation 2007 + */ +#include <sys/types.h> +#include <sys/stat.h> /* for mkdir() */ +#include <stdio.h> +#include <errno.h> +#include <string.h> +#include <stdlib.h> +#include <stdint.h> +#include <limits.h> +#include <fcntl.h> +#include <unistd.h> +#include <time.h> +#include <sys/mman.h> +#include <syscall.h> /* for gettid() */ +#include <err.h> + +#include "jvmti_agent.h" +#include "../util/jitdump.h" + +#define JIT_LANG "java" + +static char jit_path[PATH_MAX]; +static void *marker_addr; + +/* + * padding buffer + */ +static const char pad_bytes[7]; + +static inline pid_t gettid(void) +{ + return (pid_t)syscall(__NR_gettid); +} + +static int get_e_machine(struct jitheader *hdr) +{ + ssize_t sret; + char id[16]; + int fd, ret = -1; + int m = -1; + struct { + uint16_t e_type; + uint16_t e_machine; + } info; + + fd = open("/proc/self/exe", O_RDONLY); + if (fd == -1) + return -1; + + sret = read(fd, id, sizeof(id)); + if (sret != sizeof(id)) + goto error; + + /* check ELF signature */ + if (id[0] != 0x7f || id[1] != 'E' || id[2] != 'L' || id[3] != 'F') + goto error; + + sret = read(fd, &info, sizeof(info)); + if (sret != sizeof(info)) + goto error; + + m = info.e_machine; + if (m < 0) + m = 0; /* ELF EM_NONE */ + + hdr->elf_mach = m; + ret = 0; +error: + close(fd); + return ret; +} + +#define NSEC_PER_SEC 1000000000 +static int perf_clk_id = CLOCK_MONOTONIC; + +static inline uint64_t +timespec_to_ns(const struct timespec *ts) +{ + return ((uint64_t) ts->tv_sec * NSEC_PER_SEC) + ts->tv_nsec; +} + +static inline uint64_t +perf_get_timestamp(void) +{ + struct timespec ts; + int ret; + + ret = clock_gettime(perf_clk_id, &ts); + if (ret) + return 0; + + return timespec_to_ns(&ts); +} + +static int +debug_cache_init(void) +{ + char str[32]; + char *base, *p; + struct tm tm; + time_t t; + int ret; + + time(&t); + localtime_r(&t, &tm); + + base = getenv("JITDUMPDIR"); + if (!base) + base = getenv("HOME"); + if (!base) + base = "."; + + strftime(str, sizeof(str), JIT_LANG"-jit-%Y%m%d", &tm); + + snprintf(jit_path, PATH_MAX - 1, "%s/.debug/", base); + + ret = mkdir(jit_path, 0755); + if (ret == -1) { + if (errno != EEXIST) { + warn("jvmti: cannot create jit cache dir %s", jit_path); + return -1; + } + } + + snprintf(jit_path, PATH_MAX - 1, "%s/.debug/jit", base); + ret = mkdir(jit_path, 0755); + if (ret == -1) { + if (errno != EEXIST) { + warn("cannot create jit cache dir %s", jit_path); + return -1; + } + } + + snprintf(jit_path, PATH_MAX - 1, "%s/.debug/jit/%s.XXXXXXXX", base, str); + + p = mkdtemp(jit_path); + if (p != jit_path) { + warn("cannot create jit cache dir %s", jit_path); + return -1; + } + + return 0; +} + +static int +perf_open_marker_file(int fd) +{ + long pgsz; + + pgsz = sysconf(_SC_PAGESIZE); + if (pgsz == -1) + return -1; + + /* + * we mmap the jitdump to create an MMAP RECORD in perf.data file. + * The mmap is captured either live (perf record running when we mmap) + * or in deferred mode, via /proc/PID/maps + * the MMAP record is used as a marker of a jitdump file for more meta + * data info about the jitted code. Perf report/annotate detect this + * special filename and process the jitdump file. + * + * mapping must be PROT_EXEC to ensure it is captured by perf record + * even when not using -d option + */ + marker_addr = mmap(NULL, pgsz, PROT_READ|PROT_EXEC, MAP_PRIVATE, fd, 0); + return (marker_addr == MAP_FAILED) ? -1 : 0; +} + +static void +perf_close_marker_file(void) +{ + long pgsz; + + if (!marker_addr) + return; + + pgsz = sysconf(_SC_PAGESIZE); + if (pgsz == -1) + return; + + munmap(marker_addr, pgsz); +} + +void *jvmti_open(void) +{ + int pad_cnt; + char dump_path[PATH_MAX]; + struct jitheader header; + int fd; + FILE *fp; + + /* + * check if clockid is supported + */ + if (!perf_get_timestamp()) + warnx("jvmti: kernel does not support %d clock id", perf_clk_id); + + memset(&header, 0, sizeof(header)); + + debug_cache_init(); + + /* + * jitdump file name + */ + snprintf(dump_path, PATH_MAX, "%s/jit-%i.dump", jit_path, getpid()); + + fd = open(dump_path, O_CREAT|O_TRUNC|O_RDWR, 0666); + if (fd == -1) + return NULL; + + /* + * create perf.data maker for the jitdump file + */ + if (perf_open_marker_file(fd)) { + warnx("jvmti: failed to create marker file"); + return NULL; + } + + fp = fdopen(fd, "w+"); + if (!fp) { + warn("jvmti: cannot create %s", dump_path); + close(fd); + goto error; + } + + warnx("jvmti: jitdump in %s", dump_path); + + if (get_e_machine(&header)) { + warn("get_e_machine failed\n"); + goto error; + } + + header.magic = JITHEADER_MAGIC; + header.version = JITHEADER_VERSION; + header.total_size = sizeof(header); + header.pid = getpid(); + + /* calculate amount of padding '\0' */ + pad_cnt = PADDING_8ALIGNED(header.total_size); + header.total_size += pad_cnt; + + header.timestamp = perf_get_timestamp(); + + if (!fwrite(&header, sizeof(header), 1, fp)) { + warn("jvmti: cannot write dumpfile header"); + goto error; + } + + /* write padding '\0' if necessary */ + if (pad_cnt && !fwrite(pad_bytes, pad_cnt, 1, fp)) { + warn("jvmti: cannot write dumpfile header padding"); + goto error; + } + + return fp; +error: + fclose(fp); + return NULL; +} + +int +jvmti_close(void *agent) +{ + struct jr_code_close rec; + FILE *fp = agent; + + if (!fp) { + warnx("jvmti: incalid fd in close_agent"); + return -1; + } + + rec.p.id = JIT_CODE_CLOSE; + rec.p.total_size = sizeof(rec); + + rec.p.timestamp = perf_get_timestamp(); + + if (!fwrite(&rec, sizeof(rec), 1, fp)) + return -1; + + fclose(fp); + + fp = NULL; + + perf_close_marker_file(); + + return 0; +} + +int +jvmti_write_code(void *agent, char const *sym, + uint64_t vma, void const *code, unsigned int const size) +{ + static int code_generation = 1; + struct jr_code_load rec; + size_t sym_len; + size_t padding_count; + FILE *fp = agent; + int ret = -1; + + /* don't care about 0 length function, no samples */ + if (size == 0) + return 0; + + if (!fp) { + warnx("jvmti: invalid fd in write_native_code"); + return -1; + } + + sym_len = strlen(sym) + 1; + + rec.p.id = JIT_CODE_LOAD; + rec.p.total_size = sizeof(rec) + sym_len; + padding_count = PADDING_8ALIGNED(rec.p.total_size); + rec.p. total_size += padding_count; + rec.p.timestamp = perf_get_timestamp(); + + rec.code_size = size; + rec.vma = vma; + rec.code_addr = vma; + rec.pid = getpid(); + rec.tid = gettid(); + + if (code) + rec.p.total_size += size; + + /* + * If JVM is multi-threaded, nultiple concurrent calls to agent + * may be possible, so protect file writes + */ + flockfile(fp); + + /* + * get code index inside lock to avoid race condition + */ + rec.code_index = code_generation++; + + ret = fwrite_unlocked(&rec, sizeof(rec), 1, fp); + fwrite_unlocked(sym, sym_len, 1, fp); + + if (padding_count) + fwrite_unlocked(pad_bytes, padding_count, 1, fp); + + if (code) + fwrite_unlocked(code, size, 1, fp); + + funlockfile(fp); + + ret = 0; + + return ret; +} + +int +jvmti_write_debug_info(void *agent, uint64_t code, const char *file, + jvmti_line_info_t *li, int nr_lines) +{ + struct jr_code_debug_info rec; + size_t sret, len, size, flen; + size_t padding_count; + uint64_t addr; + const char *fn = file; + FILE *fp = agent; + int i; + + /* + * no entry to write + */ + if (!nr_lines) + return 0; + + if (!fp) { + warnx("jvmti: invalid fd in write_debug_info"); + return -1; + } + + flen = strlen(file) + 1; + + rec.p.id = JIT_CODE_DEBUG_INFO; + size = sizeof(rec); + rec.p.timestamp = perf_get_timestamp(); + rec.code_addr = (uint64_t)(uintptr_t)code; + rec.nr_entry = nr_lines; + + /* + * on disk source line info layout: + * uint64_t : addr + * int : line number + * int : column discriminator + * file[] : source file name + * padding : pad to multiple of 8 bytes + */ + size += nr_lines * sizeof(struct debug_entry); + size += flen * nr_lines; + /* + * pad to 8 bytes + */ + padding_count = PADDING_8ALIGNED(size); + + rec.p.total_size = size + padding_count; + + /* + * If JVM is multi-threaded, nultiple concurrent calls to agent + * may be possible, so protect file writes + */ + flockfile(fp); + + sret = fwrite_unlocked(&rec, sizeof(rec), 1, fp); + if (sret != 1) + goto error; + + for (i = 0; i < nr_lines; i++) { + + addr = (uint64_t)li[i].pc; + len = sizeof(addr); + sret = fwrite_unlocked(&addr, len, 1, fp); + if (sret != 1) + goto error; + + len = sizeof(li[0].line_number); + sret = fwrite_unlocked(&li[i].line_number, len, 1, fp); + if (sret != 1) + goto error; + + len = sizeof(li[0].discrim); + sret = fwrite_unlocked(&li[i].discrim, len, 1, fp); + if (sret != 1) + goto error; + + sret = fwrite_unlocked(fn, flen, 1, fp); + if (sret != 1) + goto error; + } + if (padding_count) + sret = fwrite_unlocked(pad_bytes, padding_count, 1, fp); + if (sret != 1) + goto error; + + funlockfile(fp); + return 0; +error: + funlockfile(fp); + return -1; +} diff --git a/tools/perf/jvmti/jvmti_agent.h b/tools/perf/jvmti/jvmti_agent.h new file mode 100644 index 000000000000..bedf5d0ba9ff --- /dev/null +++ b/tools/perf/jvmti/jvmti_agent.h @@ -0,0 +1,36 @@ +#ifndef __JVMTI_AGENT_H__ +#define __JVMTI_AGENT_H__ + +#include <sys/types.h> +#include <stdint.h> +#include <jvmti.h> + +#define __unused __attribute__((unused)) + +#if defined(__cplusplus) +extern "C" { +#endif + +typedef struct { + unsigned long pc; + int line_number; + int discrim; /* discriminator -- 0 for now */ +} jvmti_line_info_t; + +void *jvmti_open(void); +int jvmti_close(void *agent); +int jvmti_write_code(void *agent, char const *symbol_name, + uint64_t vma, void const *code, + const unsigned int code_size); + +int jvmti_write_debug_info(void *agent, + uint64_t code, + const char *file, + jvmti_line_info_t *li, + int nr_lines); + +#if defined(__cplusplus) +} + +#endif +#endif /* __JVMTI_H__ */ diff --git a/tools/perf/jvmti/libjvmti.c b/tools/perf/jvmti/libjvmti.c new file mode 100644 index 000000000000..ac12e4b91a92 --- /dev/null +++ b/tools/perf/jvmti/libjvmti.c @@ -0,0 +1,304 @@ +#include <sys/types.h> +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <err.h> +#include <jvmti.h> +#include <jvmticmlr.h> +#include <limits.h> + +#include "jvmti_agent.h" + +static int has_line_numbers; +void *jvmti_agent; + +static jvmtiError +do_get_line_numbers(jvmtiEnv *jvmti, void *pc, jmethodID m, jint bci, + jvmti_line_info_t *tab, jint *nr) +{ + jint i, lines = 0; + jint nr_lines = 0; + jvmtiLineNumberEntry *loc_tab = NULL; + jvmtiError ret; + + ret = (*jvmti)->GetLineNumberTable(jvmti, m, &nr_lines, &loc_tab); + if (ret != JVMTI_ERROR_NONE) + return ret; + + for (i = 0; i < nr_lines; i++) { + if (loc_tab[i].start_location < bci) { + tab[lines].pc = (unsigned long)pc; + tab[lines].line_number = loc_tab[i].line_number; + tab[lines].discrim = 0; /* not yet used */ + lines++; + } else { + break; + } + } + (*jvmti)->Deallocate(jvmti, (unsigned char *)loc_tab); + *nr = lines; + return JVMTI_ERROR_NONE; +} + +static jvmtiError +get_line_numbers(jvmtiEnv *jvmti, const void *compile_info, jvmti_line_info_t **tab, int *nr_lines) +{ + const jvmtiCompiledMethodLoadRecordHeader *hdr; + jvmtiCompiledMethodLoadInlineRecord *rec; + jvmtiLineNumberEntry *lne = NULL; + PCStackInfo *c; + jint nr, ret; + int nr_total = 0; + int i, lines_total = 0; + + if (!(tab && nr_lines)) + return JVMTI_ERROR_NULL_POINTER; + + /* + * Phase 1 -- get the number of lines necessary + */ + for (hdr = compile_info; hdr != NULL; hdr = hdr->next) { + if (hdr->kind == JVMTI_CMLR_INLINE_INFO) { + rec = (jvmtiCompiledMethodLoadInlineRecord *)hdr; + for (i = 0; i < rec->numpcs; i++) { + c = rec->pcinfo + i; + nr = 0; + /* + * unfortunately, need a tab to get the number of lines! + */ + ret = (*jvmti)->GetLineNumberTable(jvmti, c->methods[0], &nr, &lne); + if (ret == JVMTI_ERROR_NONE) { + /* free what was allocated for nothing */ + (*jvmti)->Deallocate(jvmti, (unsigned char *)lne); + nr_total += (int)nr; + } + } + } + } + + if (nr_total == 0) + return JVMTI_ERROR_NOT_FOUND; + + /* + * Phase 2 -- allocate big enough line table + */ + *tab = malloc(nr_total * sizeof(**tab)); + if (!*tab) + return JVMTI_ERROR_OUT_OF_MEMORY; + + for (hdr = compile_info; hdr != NULL; hdr = hdr->next) { + if (hdr->kind == JVMTI_CMLR_INLINE_INFO) { + rec = (jvmtiCompiledMethodLoadInlineRecord *)hdr; + for (i = 0; i < rec->numpcs; i++) { + c = rec->pcinfo + i; + nr = 0; + ret = do_get_line_numbers(jvmti, c->pc, + c->methods[0], + c->bcis[0], + *tab + lines_total, + &nr); + if (ret == JVMTI_ERROR_NONE) + lines_total += nr; + } + } + } + *nr_lines = lines_total; + return JVMTI_ERROR_NONE; +} + +static void JNICALL +compiled_method_load_cb(jvmtiEnv *jvmti, + jmethodID method, + jint code_size, + void const *code_addr, + jint map_length, + jvmtiAddrLocationMap const *map, + const void *compile_info) +{ + jvmti_line_info_t *line_tab = NULL; + jclass decl_class; + char *class_sign = NULL; + char *func_name = NULL; + char *func_sign = NULL; + char *file_name= NULL; + char fn[PATH_MAX]; + uint64_t addr = (uint64_t)(uintptr_t)code_addr; + jvmtiError ret; + int nr_lines = 0; /* in line_tab[] */ + size_t len; + + ret = (*jvmti)->GetMethodDeclaringClass(jvmti, method, + &decl_class); + if (ret != JVMTI_ERROR_NONE) { + warnx("jvmti: cannot get declaring class"); + return; + } + + if (has_line_numbers && map && map_length) { + ret = get_line_numbers(jvmti, compile_info, &line_tab, &nr_lines); + if (ret != JVMTI_ERROR_NONE) { + warnx("jvmti: cannot get line table for method"); + nr_lines = 0; + } + } + + ret = (*jvmti)->GetSourceFileName(jvmti, decl_class, &file_name); + if (ret != JVMTI_ERROR_NONE) { + warnx("jvmti: cannot get source filename ret=%d", ret); + goto error; + } + + ret = (*jvmti)->GetClassSignature(jvmti, decl_class, + &class_sign, NULL); + if (ret != JVMTI_ERROR_NONE) { + warnx("jvmti: getclassignature failed"); + goto error; + } + + ret = (*jvmti)->GetMethodName(jvmti, method, &func_name, + &func_sign, NULL); + if (ret != JVMTI_ERROR_NONE) { + warnx("jvmti: failed getmethodname"); + goto error; + } + + /* + * Assume path name is class hierarchy, this is a common practice with Java programs + */ + if (*class_sign == 'L') { + int j, i = 0; + char *p = strrchr(class_sign, '/'); + if (p) { + /* drop the 'L' prefix and copy up to the final '/' */ + for (i = 0; i < (p - class_sign); i++) + fn[i] = class_sign[i+1]; + } + /* + * append file name, we use loops and not string ops to avoid modifying + * class_sign which is used later for the symbol name + */ + for (j = 0; i < (PATH_MAX - 1) && file_name && j < strlen(file_name); j++, i++) + fn[i] = file_name[j]; + fn[i] = '\0'; + } else { + /* fallback case */ + strcpy(fn, file_name); + } + /* + * write source line info record if we have it + */ + if (jvmti_write_debug_info(jvmti_agent, addr, fn, line_tab, nr_lines)) + warnx("jvmti: write_debug_info() failed"); + + len = strlen(func_name) + strlen(class_sign) + strlen(func_sign) + 2; + { + char str[len]; + snprintf(str, len, "%s%s%s", class_sign, func_name, func_sign); + + if (jvmti_write_code(jvmti_agent, str, addr, code_addr, code_size)) + warnx("jvmti: write_code() failed"); + } +error: + (*jvmti)->Deallocate(jvmti, (unsigned char *)func_name); + (*jvmti)->Deallocate(jvmti, (unsigned char *)func_sign); + (*jvmti)->Deallocate(jvmti, (unsigned char *)class_sign); + (*jvmti)->Deallocate(jvmti, (unsigned char *)file_name); + free(line_tab); +} + +static void JNICALL +code_generated_cb(jvmtiEnv *jvmti, + char const *name, + void const *code_addr, + jint code_size) +{ + uint64_t addr = (uint64_t)(unsigned long)code_addr; + int ret; + + ret = jvmti_write_code(jvmti_agent, name, addr, code_addr, code_size); + if (ret) + warnx("jvmti: write_code() failed for code_generated"); +} + +JNIEXPORT jint JNICALL +Agent_OnLoad(JavaVM *jvm, char *options, void *reserved __unused) +{ + jvmtiEventCallbacks cb; + jvmtiCapabilities caps1; + jvmtiJlocationFormat format; + jvmtiEnv *jvmti = NULL; + jint ret; + + jvmti_agent = jvmti_open(); + if (!jvmti_agent) { + warnx("jvmti: open_agent failed"); + return -1; + } + + /* + * Request a JVMTI interface version 1 environment + */ + ret = (*jvm)->GetEnv(jvm, (void *)&jvmti, JVMTI_VERSION_1); + if (ret != JNI_OK) { + warnx("jvmti: jvmti version 1 not supported"); + return -1; + } + + /* + * acquire method_load capability, we require it + * request line numbers (optional) + */ + memset(&caps1, 0, sizeof(caps1)); + caps1.can_generate_compiled_method_load_events = 1; + + ret = (*jvmti)->AddCapabilities(jvmti, &caps1); + if (ret != JVMTI_ERROR_NONE) { + warnx("jvmti: acquire compiled_method capability failed"); + return -1; + } + ret = (*jvmti)->GetJLocationFormat(jvmti, &format); + if (ret == JVMTI_ERROR_NONE && format == JVMTI_JLOCATION_JVMBCI) { + memset(&caps1, 0, sizeof(caps1)); + caps1.can_get_line_numbers = 1; + caps1.can_get_source_file_name = 1; + ret = (*jvmti)->AddCapabilities(jvmti, &caps1); + if (ret == JVMTI_ERROR_NONE) + has_line_numbers = 1; + } + + memset(&cb, 0, sizeof(cb)); + + cb.CompiledMethodLoad = compiled_method_load_cb; + cb.DynamicCodeGenerated = code_generated_cb; + + ret = (*jvmti)->SetEventCallbacks(jvmti, &cb, sizeof(cb)); + if (ret != JVMTI_ERROR_NONE) { + warnx("jvmti: cannot set event callbacks"); + return -1; + } + + ret = (*jvmti)->SetEventNotificationMode(jvmti, JVMTI_ENABLE, + JVMTI_EVENT_COMPILED_METHOD_LOAD, NULL); + if (ret != JVMTI_ERROR_NONE) { + warnx("jvmti: setnotification failed for method_load"); + return -1; + } + + ret = (*jvmti)->SetEventNotificationMode(jvmti, JVMTI_ENABLE, + JVMTI_EVENT_DYNAMIC_CODE_GENERATED, NULL); + if (ret != JVMTI_ERROR_NONE) { + warnx("jvmti: setnotification failed on code_generated"); + return -1; + } + return 0; +} + +JNIEXPORT void JNICALL +Agent_OnUnload(JavaVM *jvm __unused) +{ + int ret; + + ret = jvmti_close(jvmti_agent); + if (ret) + errx(1, "Error: op_close_agent()"); +} diff --git a/tools/perf/perf.c b/tools/perf/perf.c index a929618b8eb6..144047c396f0 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c @@ -613,6 +613,8 @@ int main(int argc, const char **argv) */ pthread__block_sigwinch(); + perf_debug_setup(); + while (1) { static int done_help; int was_alias = run_argv(&argc, &argv); diff --git a/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Util.py b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Util.py index 15c8400240fd..1d95009592eb 100644 --- a/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Util.py +++ b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Util.py @@ -71,7 +71,10 @@ try: except: if not audit_package_warned: audit_package_warned = True - print "Install the audit-libs-python package to get syscall names" + print "Install the audit-libs-python package to get syscall names.\n" \ + "For example:\n # apt-get install python-audit (Ubuntu)" \ + "\n # yum install audit-libs-python (Fedora)" \ + "\n etc.\n" def syscall_name(id): try: diff --git a/tools/perf/tests/.gitignore b/tools/perf/tests/.gitignore index bf016c439fbd..8cc30e731c73 100644 --- a/tools/perf/tests/.gitignore +++ b/tools/perf/tests/.gitignore @@ -1,3 +1,4 @@ llvm-src-base.c llvm-src-kbuild.c llvm-src-prologue.c +llvm-src-relocation.c diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build index 614899b88b37..1ba628ed049a 100644 --- a/tools/perf/tests/Build +++ b/tools/perf/tests/Build @@ -31,7 +31,7 @@ perf-y += sample-parsing.o perf-y += parse-no-sample-id-all.o perf-y += kmod-path.o perf-y += thread-map.o -perf-y += llvm.o llvm-src-base.o llvm-src-kbuild.o llvm-src-prologue.o +perf-y += llvm.o llvm-src-base.o llvm-src-kbuild.o llvm-src-prologue.o llvm-src-relocation.o perf-y += bpf.o perf-y += topology.o perf-y += cpumap.o @@ -59,6 +59,13 @@ $(OUTPUT)tests/llvm-src-prologue.c: tests/bpf-script-test-prologue.c tests/Build $(Q)sed -e 's/"/\\"/g' -e 's/\(.*\)/"\1\\n"/g' $< >> $@ $(Q)echo ';' >> $@ +$(OUTPUT)tests/llvm-src-relocation.c: tests/bpf-script-test-relocation.c tests/Build + $(call rule_mkdir) + $(Q)echo '#include <tests/llvm.h>' > $@ + $(Q)echo 'const char test_llvm__bpf_test_relocation[] =' >> $@ + $(Q)sed -e 's/"/\\"/g' -e 's/\(.*\)/"\1\\n"/g' $< >> $@ + $(Q)echo ';' >> $@ + ifeq ($(ARCH),$(filter $(ARCH),x86 arm arm64)) perf-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o endif diff --git a/tools/perf/tests/bp_signal.c b/tools/perf/tests/bp_signal.c index fb80c9eb6a95..e7664fe3bd33 100644 --- a/tools/perf/tests/bp_signal.c +++ b/tools/perf/tests/bp_signal.c @@ -29,14 +29,59 @@ static int fd1; static int fd2; +static int fd3; static int overflows; +static int overflows_2; + +volatile long the_var; + + +/* + * Use ASM to ensure watchpoint and breakpoint can be triggered + * at one instruction. + */ +#if defined (__x86_64__) +extern void __test_function(volatile long *ptr); +asm ( + ".globl __test_function\n" + "__test_function:\n" + "incq (%rdi)\n" + "ret\n"); +#elif defined (__aarch64__) +extern void __test_function(volatile long *ptr); +asm ( + ".globl __test_function\n" + "__test_function:\n" + "str x30, [x0]\n" + "ret\n"); + +#else +static void __test_function(volatile long *ptr) +{ + *ptr = 0x1234; +} +#endif __attribute__ ((noinline)) static int test_function(void) { + __test_function(&the_var); + the_var++; return time(NULL); } +static void sig_handler_2(int signum __maybe_unused, + siginfo_t *oh __maybe_unused, + void *uc __maybe_unused) +{ + overflows_2++; + if (overflows_2 > 10) { + ioctl(fd1, PERF_EVENT_IOC_DISABLE, 0); + ioctl(fd2, PERF_EVENT_IOC_DISABLE, 0); + ioctl(fd3, PERF_EVENT_IOC_DISABLE, 0); + } +} + static void sig_handler(int signum __maybe_unused, siginfo_t *oh __maybe_unused, void *uc __maybe_unused) @@ -54,10 +99,11 @@ static void sig_handler(int signum __maybe_unused, */ ioctl(fd1, PERF_EVENT_IOC_DISABLE, 0); ioctl(fd2, PERF_EVENT_IOC_DISABLE, 0); + ioctl(fd3, PERF_EVENT_IOC_DISABLE, 0); } } -static int bp_event(void *fn, int setup_signal) +static int __event(bool is_x, void *addr, int sig) { struct perf_event_attr pe; int fd; @@ -67,8 +113,8 @@ static int bp_event(void *fn, int setup_signal) pe.size = sizeof(struct perf_event_attr); pe.config = 0; - pe.bp_type = HW_BREAKPOINT_X; - pe.bp_addr = (unsigned long) fn; + pe.bp_type = is_x ? HW_BREAKPOINT_X : HW_BREAKPOINT_W; + pe.bp_addr = (unsigned long) addr; pe.bp_len = sizeof(long); pe.sample_period = 1; @@ -86,17 +132,25 @@ static int bp_event(void *fn, int setup_signal) return TEST_FAIL; } - if (setup_signal) { - fcntl(fd, F_SETFL, O_RDWR|O_NONBLOCK|O_ASYNC); - fcntl(fd, F_SETSIG, SIGIO); - fcntl(fd, F_SETOWN, getpid()); - } + fcntl(fd, F_SETFL, O_RDWR|O_NONBLOCK|O_ASYNC); + fcntl(fd, F_SETSIG, sig); + fcntl(fd, F_SETOWN, getpid()); ioctl(fd, PERF_EVENT_IOC_RESET, 0); return fd; } +static int bp_event(void *addr, int sig) +{ + return __event(true, addr, sig); +} + +static int wp_event(void *addr, int sig) +{ + return __event(false, addr, sig); +} + static long long bp_count(int fd) { long long count; @@ -114,7 +168,7 @@ static long long bp_count(int fd) int test__bp_signal(int subtest __maybe_unused) { struct sigaction sa; - long long count1, count2; + long long count1, count2, count3; /* setup SIGIO signal handler */ memset(&sa, 0, sizeof(struct sigaction)); @@ -126,21 +180,52 @@ int test__bp_signal(int subtest __maybe_unused) return TEST_FAIL; } + sa.sa_sigaction = (void *) sig_handler_2; + if (sigaction(SIGUSR1, &sa, NULL) < 0) { + pr_debug("failed setting up signal handler 2\n"); + return TEST_FAIL; + } + /* * We create following events: * - * fd1 - breakpoint event on test_function with SIGIO + * fd1 - breakpoint event on __test_function with SIGIO * signal configured. We should get signal * notification each time the breakpoint is hit * - * fd2 - breakpoint event on sig_handler without SIGIO + * fd2 - breakpoint event on sig_handler with SIGUSR1 + * configured. We should get SIGUSR1 each time when + * breakpoint is hit + * + * fd3 - watchpoint event on __test_function with SIGIO * configured. * * Following processing should happen: - * - execute test_function - * - fd1 event breakpoint hit -> count1 == 1 - * - SIGIO is delivered -> overflows == 1 - * - fd2 event breakpoint hit -> count2 == 1 + * Exec: Action: Result: + * incq (%rdi) - fd1 event breakpoint hit -> count1 == 1 + * - SIGIO is delivered + * sig_handler - fd2 event breakpoint hit -> count2 == 1 + * - SIGUSR1 is delivered + * sig_handler_2 -> overflows_2 == 1 (nested signal) + * sys_rt_sigreturn - return from sig_handler_2 + * overflows++ -> overflows = 1 + * sys_rt_sigreturn - return from sig_handler + * incq (%rdi) - fd3 event watchpoint hit -> count3 == 1 (wp and bp in one insn) + * - SIGIO is delivered + * sig_handler - fd2 event breakpoint hit -> count2 == 2 + * - SIGUSR1 is delivered + * sig_handler_2 -> overflows_2 == 2 (nested signal) + * sys_rt_sigreturn - return from sig_handler_2 + * overflows++ -> overflows = 2 + * sys_rt_sigreturn - return from sig_handler + * the_var++ - fd3 event watchpoint hit -> count3 == 2 (standalone watchpoint) + * - SIGIO is delivered + * sig_handler - fd2 event breakpoint hit -> count2 == 3 + * - SIGUSR1 is delivered + * sig_handler_2 -> overflows_2 == 3 (nested signal) + * sys_rt_sigreturn - return from sig_handler_2 + * overflows++ -> overflows == 3 + * sys_rt_sigreturn - return from sig_handler * * The test case check following error conditions: * - we get stuck in signal handler because of debug @@ -152,11 +237,13 @@ int test__bp_signal(int subtest __maybe_unused) * */ - fd1 = bp_event(test_function, 1); - fd2 = bp_event(sig_handler, 0); + fd1 = bp_event(__test_function, SIGIO); + fd2 = bp_event(sig_handler, SIGUSR1); + fd3 = wp_event((void *)&the_var, SIGIO); ioctl(fd1, PERF_EVENT_IOC_ENABLE, 0); ioctl(fd2, PERF_EVENT_IOC_ENABLE, 0); + ioctl(fd3, PERF_EVENT_IOC_ENABLE, 0); /* * Kick off the test by trigering 'fd1' @@ -166,15 +253,18 @@ int test__bp_signal(int subtest __maybe_unused) ioctl(fd1, PERF_EVENT_IOC_DISABLE, 0); ioctl(fd2, PERF_EVENT_IOC_DISABLE, 0); + ioctl(fd3, PERF_EVENT_IOC_DISABLE, 0); count1 = bp_count(fd1); count2 = bp_count(fd2); + count3 = bp_count(fd3); close(fd1); close(fd2); + close(fd3); - pr_debug("count1 %lld, count2 %lld, overflow %d\n", - count1, count2, overflows); + pr_debug("count1 %lld, count2 %lld, count3 %lld, overflow %d, overflows_2 %d\n", + count1, count2, count3, overflows, overflows_2); if (count1 != 1) { if (count1 == 11) @@ -183,12 +273,18 @@ int test__bp_signal(int subtest __maybe_unused) pr_debug("failed: wrong count for bp1%lld\n", count1); } - if (overflows != 1) + if (overflows != 3) pr_debug("failed: wrong overflow hit\n"); - if (count2 != 1) + if (overflows_2 != 3) + pr_debug("failed: wrong overflow_2 hit\n"); + + if (count2 != 3) pr_debug("failed: wrong count for bp2\n"); - return count1 == 1 && overflows == 1 && count2 == 1 ? + if (count3 != 2) + pr_debug("failed: wrong count for bp3\n"); + + return count1 == 1 && overflows == 3 && count2 == 3 && overflows_2 == 3 && count3 == 2 ? TEST_OK : TEST_FAIL; } diff --git a/tools/perf/tests/bpf-script-test-relocation.c b/tools/perf/tests/bpf-script-test-relocation.c new file mode 100644 index 000000000000..93af77421816 --- /dev/null +++ b/tools/perf/tests/bpf-script-test-relocation.c @@ -0,0 +1,50 @@ +/* + * bpf-script-test-relocation.c + * Test BPF loader checking relocation + */ +#ifndef LINUX_VERSION_CODE +# error Need LINUX_VERSION_CODE +# error Example: for 4.2 kernel, put 'clang-opt="-DLINUX_VERSION_CODE=0x40200" into llvm section of ~/.perfconfig' +#endif +#define BPF_ANY 0 +#define BPF_MAP_TYPE_ARRAY 2 +#define BPF_FUNC_map_lookup_elem 1 +#define BPF_FUNC_map_update_elem 2 + +static void *(*bpf_map_lookup_elem)(void *map, void *key) = + (void *) BPF_FUNC_map_lookup_elem; +static void *(*bpf_map_update_elem)(void *map, void *key, void *value, int flags) = + (void *) BPF_FUNC_map_update_elem; + +struct bpf_map_def { + unsigned int type; + unsigned int key_size; + unsigned int value_size; + unsigned int max_entries; +}; + +#define SEC(NAME) __attribute__((section(NAME), used)) +struct bpf_map_def SEC("maps") my_table = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(int), + .value_size = sizeof(int), + .max_entries = 1, +}; + +int this_is_a_global_val; + +SEC("func=sys_write") +int bpf_func__sys_write(void *ctx) +{ + int key = 0; + int value = 0; + + /* + * Incorrect relocation. Should not allow this program be + * loaded into kernel. + */ + bpf_map_update_elem(&this_is_a_global_val, &key, &value, 0); + return 0; +} +char _license[] SEC("license") = "GPL"; +int _version SEC("version") = LINUX_VERSION_CODE; diff --git a/tools/perf/tests/bpf.c b/tools/perf/tests/bpf.c index 33689a0cf821..4aed5cb4ac2d 100644 --- a/tools/perf/tests/bpf.c +++ b/tools/perf/tests/bpf.c @@ -1,7 +1,11 @@ #include <stdio.h> #include <sys/epoll.h> +#include <util/util.h> #include <util/bpf-loader.h> #include <util/evlist.h> +#include <linux/bpf.h> +#include <linux/filter.h> +#include <bpf/bpf.h> #include "tests.h" #include "llvm.h" #include "debug.h" @@ -71,6 +75,15 @@ static struct { (NR_ITERS + 1) / 4, }, #endif + { + LLVM_TESTCASE_BPF_RELOCATION, + "Test BPF relocation checker", + "[bpf_relocation_test]", + "fix 'perf test LLVM' first", + "libbpf error when dealing with relocation", + NULL, + 0, + }, }; static int do_test(struct bpf_object *obj, int (*func)(void), @@ -190,7 +203,7 @@ static int __test__bpf(int idx) ret = test_llvm__fetch_bpf_obj(&obj_buf, &obj_buf_sz, bpf_testcase_table[idx].prog_id, - true); + true, NULL); if (ret != TEST_OK || !obj_buf || !obj_buf_sz) { pr_debug("Unable to get BPF object, %s\n", bpf_testcase_table[idx].msg_compile_fail); @@ -202,14 +215,21 @@ static int __test__bpf(int idx) obj = prepare_bpf(obj_buf, obj_buf_sz, bpf_testcase_table[idx].name); - if (!obj) { + if ((!!bpf_testcase_table[idx].target_func) != (!!obj)) { + if (!obj) + pr_debug("Fail to load BPF object: %s\n", + bpf_testcase_table[idx].msg_load_fail); + else + pr_debug("Success unexpectedly: %s\n", + bpf_testcase_table[idx].msg_load_fail); ret = TEST_FAIL; goto out; } - ret = do_test(obj, - bpf_testcase_table[idx].target_func, - bpf_testcase_table[idx].expect_result); + if (obj) + ret = do_test(obj, + bpf_testcase_table[idx].target_func, + bpf_testcase_table[idx].expect_result); out: bpf__clear(); return ret; @@ -227,6 +247,36 @@ const char *test__bpf_subtest_get_desc(int i) return bpf_testcase_table[i].desc; } +static int check_env(void) +{ + int err; + unsigned int kver_int; + char license[] = "GPL"; + + struct bpf_insn insns[] = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }; + + err = fetch_kernel_version(&kver_int, NULL, 0); + if (err) { + pr_debug("Unable to get kernel version\n"); + return err; + } + + err = bpf_load_program(BPF_PROG_TYPE_KPROBE, insns, + sizeof(insns) / sizeof(insns[0]), + license, kver_int, NULL, 0); + if (err < 0) { + pr_err("Missing basic BPF support, skip this test: %s\n", + strerror(errno)); + return err; + } + close(err); + + return 0; +} + int test__bpf(int i) { int err; @@ -239,6 +289,9 @@ int test__bpf(int i) return TEST_SKIP; } + if (check_env()) + return TEST_SKIP; + err = __test__bpf(i); return err; } diff --git a/tools/perf/tests/hists_cumulate.c b/tools/perf/tests/hists_cumulate.c index 5e6a86e50fb9..ecf136c385d5 100644 --- a/tools/perf/tests/hists_cumulate.c +++ b/tools/perf/tests/hists_cumulate.c @@ -191,7 +191,7 @@ static int do_test(struct hists *hists, struct result *expected, size_t nr_expec * function since TEST_ASSERT_VAL() returns in case of failure. */ hists__collapse_resort(hists, NULL); - hists__output_resort(hists, NULL); + perf_evsel__output_resort(hists_to_evsel(hists), NULL); if (verbose > 2) { pr_info("use callchain: %d, cumulate callchain: %d\n", diff --git a/tools/perf/tests/hists_filter.c b/tools/perf/tests/hists_filter.c index 351a42463444..34b945a55d4d 100644 --- a/tools/perf/tests/hists_filter.c +++ b/tools/perf/tests/hists_filter.c @@ -145,7 +145,7 @@ int test__hists_filter(int subtest __maybe_unused) struct hists *hists = evsel__hists(evsel); hists__collapse_resort(hists, NULL); - hists__output_resort(hists, NULL); + perf_evsel__output_resort(evsel, NULL); if (verbose > 2) { pr_info("Normal histogram\n"); diff --git a/tools/perf/tests/hists_output.c b/tools/perf/tests/hists_output.c index b231265148d8..23cce67c7e48 100644 --- a/tools/perf/tests/hists_output.c +++ b/tools/perf/tests/hists_output.c @@ -156,7 +156,7 @@ static int test1(struct perf_evsel *evsel, struct machine *machine) goto out; hists__collapse_resort(hists, NULL); - hists__output_resort(hists, NULL); + perf_evsel__output_resort(evsel, NULL); if (verbose > 2) { pr_info("[fields = %s, sort = %s]\n", field_order, sort_order); @@ -256,7 +256,7 @@ static int test2(struct perf_evsel *evsel, struct machine *machine) goto out; hists__collapse_resort(hists, NULL); - hists__output_resort(hists, NULL); + perf_evsel__output_resort(evsel, NULL); if (verbose > 2) { pr_info("[fields = %s, sort = %s]\n", field_order, sort_order); @@ -310,7 +310,7 @@ static int test3(struct perf_evsel *evsel, struct machine *machine) goto out; hists__collapse_resort(hists, NULL); - hists__output_resort(hists, NULL); + perf_evsel__output_resort(evsel, NULL); if (verbose > 2) { pr_info("[fields = %s, sort = %s]\n", field_order, sort_order); @@ -388,7 +388,7 @@ static int test4(struct perf_evsel *evsel, struct machine *machine) goto out; hists__collapse_resort(hists, NULL); - hists__output_resort(hists, NULL); + perf_evsel__output_resort(evsel, NULL); if (verbose > 2) { pr_info("[fields = %s, sort = %s]\n", field_order, sort_order); @@ -491,7 +491,7 @@ static int test5(struct perf_evsel *evsel, struct machine *machine) goto out; hists__collapse_resort(hists, NULL); - hists__output_resort(hists, NULL); + perf_evsel__output_resort(evsel, NULL); if (verbose > 2) { pr_info("[fields = %s, sort = %s]\n", field_order, sort_order); diff --git a/tools/perf/tests/llvm.c b/tools/perf/tests/llvm.c index 06f45c1d4256..70edcdfa5672 100644 --- a/tools/perf/tests/llvm.c +++ b/tools/perf/tests/llvm.c @@ -35,6 +35,7 @@ static int test__bpf_parsing(void *obj_buf __maybe_unused, static struct { const char *source; const char *desc; + bool should_load_fail; } bpf_source_table[__LLVM_TESTCASE_MAX] = { [LLVM_TESTCASE_BASE] = { .source = test_llvm__bpf_base_prog, @@ -48,14 +49,19 @@ static struct { .source = test_llvm__bpf_test_prologue_prog, .desc = "Compile source for BPF prologue generation test", }, + [LLVM_TESTCASE_BPF_RELOCATION] = { + .source = test_llvm__bpf_test_relocation, + .desc = "Compile source for BPF relocation test", + .should_load_fail = true, + }, }; - int test_llvm__fetch_bpf_obj(void **p_obj_buf, size_t *p_obj_buf_sz, enum test_llvm__testcase idx, - bool force) + bool force, + bool *should_load_fail) { const char *source; const char *desc; @@ -68,6 +74,8 @@ test_llvm__fetch_bpf_obj(void **p_obj_buf, source = bpf_source_table[idx].source; desc = bpf_source_table[idx].desc; + if (should_load_fail) + *should_load_fail = bpf_source_table[idx].should_load_fail; perf_config(perf_config_cb, NULL); @@ -136,14 +144,15 @@ int test__llvm(int subtest) int ret; void *obj_buf = NULL; size_t obj_buf_sz = 0; + bool should_load_fail = false; if ((subtest < 0) || (subtest >= __LLVM_TESTCASE_MAX)) return TEST_FAIL; ret = test_llvm__fetch_bpf_obj(&obj_buf, &obj_buf_sz, - subtest, false); + subtest, false, &should_load_fail); - if (ret == TEST_OK) { + if (ret == TEST_OK && !should_load_fail) { ret = test__bpf_parsing(obj_buf, obj_buf_sz); if (ret != TEST_OK) { pr_debug("Failed to parse test case '%s'\n", diff --git a/tools/perf/tests/llvm.h b/tools/perf/tests/llvm.h index 5150b4d6ef50..0eaa604be99d 100644 --- a/tools/perf/tests/llvm.h +++ b/tools/perf/tests/llvm.h @@ -7,14 +7,17 @@ extern const char test_llvm__bpf_base_prog[]; extern const char test_llvm__bpf_test_kbuild_prog[]; extern const char test_llvm__bpf_test_prologue_prog[]; +extern const char test_llvm__bpf_test_relocation[]; enum test_llvm__testcase { LLVM_TESTCASE_BASE, LLVM_TESTCASE_KBUILD, LLVM_TESTCASE_BPF_PROLOGUE, + LLVM_TESTCASE_BPF_RELOCATION, __LLVM_TESTCASE_MAX, }; int test_llvm__fetch_bpf_obj(void **p_obj_buf, size_t *p_obj_buf_sz, - enum test_llvm__testcase index, bool force); + enum test_llvm__testcase index, bool force, + bool *should_load_fail); #endif diff --git a/tools/perf/tests/make b/tools/perf/tests/make index f918015512af..cac15d93aea6 100644 --- a/tools/perf/tests/make +++ b/tools/perf/tests/make @@ -15,6 +15,7 @@ else PERF := . PERF_O := $(PERF) O_OPT := +FULL_O := $(shell readlink -f $(PERF_O) || echo $(PERF_O)) ifneq ($(O),) FULL_O := $(shell readlink -f $(O) || echo $(O)) @@ -79,6 +80,7 @@ make_no_libaudit := NO_LIBAUDIT=1 make_no_libbionic := NO_LIBBIONIC=1 make_no_auxtrace := NO_AUXTRACE=1 make_no_libbpf := NO_LIBBPF=1 +make_no_libcrypto := NO_LIBCRYPTO=1 make_tags := tags make_cscope := cscope make_help := help @@ -102,6 +104,7 @@ make_minimal := NO_LIBPERL=1 NO_LIBPYTHON=1 NO_NEWT=1 NO_GTK2=1 make_minimal += NO_DEMANGLE=1 NO_LIBELF=1 NO_LIBUNWIND=1 NO_BACKTRACE=1 make_minimal += NO_LIBNUMA=1 NO_LIBAUDIT=1 NO_LIBBIONIC=1 make_minimal += NO_LIBDW_DWARF_UNWIND=1 NO_AUXTRACE=1 NO_LIBBPF=1 +make_minimal += NO_LIBCRYPTO=1 # $(run) contains all available tests run := make_pure @@ -110,6 +113,9 @@ run := make_pure # disable features detection ifeq ($(MK),Makefile) run += make_clean_all +MAKE_F := $(MAKE) +else +MAKE_F := $(MAKE) -f $(MK) endif run += make_python_perf_so run += make_debug @@ -260,6 +266,8 @@ run := $(shell shuf -e $(run)) run_O := $(shell shuf -e $(run_O)) endif +max_width := $(shell echo $(run_O) | sed 's/ /\n/g' | wc -L) + ifdef DEBUG d := $(info run $(run)) d := $(info run_O $(run_O)) @@ -267,13 +275,13 @@ endif MAKEFLAGS := --no-print-directory -clean := @(cd $(PERF); make -s -f $(MK) $(O_OPT) clean >/dev/null) +clean := @(cd $(PERF); $(MAKE_F) -s $(O_OPT) clean >/dev/null) $(run): $(call clean) @TMP_DEST=$$(mktemp -d); \ - cmd="cd $(PERF) && make -f $(MK) $(PARALLEL_OPT) $(O_OPT) DESTDIR=$$TMP_DEST $($@)"; \ - echo "- $@: $$cmd" && echo $$cmd > $@ && \ + cmd="cd $(PERF) && $(MAKE_F) $($@) $(PARALLEL_OPT) $(O_OPT) DESTDIR=$$TMP_DEST"; \ + printf "%*.*s: %s\n" $(max_width) $(max_width) "$@" "$$cmd" && echo $$cmd > $@ && \ ( eval $$cmd ) >> $@ 2>&1; \ echo " test: $(call test,$@)" >> $@ 2>&1; \ $(call test,$@) && \ @@ -283,8 +291,8 @@ $(run_O): $(call clean) @TMP_O=$$(mktemp -d); \ TMP_DEST=$$(mktemp -d); \ - cmd="cd $(PERF) && make -f $(MK) $(PARALLEL_OPT) O=$$TMP_O DESTDIR=$$TMP_DEST $($(patsubst %_O,%,$@))"; \ - echo "- $@: $$cmd" && echo $$cmd > $@ && \ + cmd="cd $(PERF) && $(MAKE_F) $($(patsubst %_O,%,$@)) $(PARALLEL_OPT) O=$$TMP_O DESTDIR=$$TMP_DEST"; \ + printf "%*.*s: %s\n" $(max_width) $(max_width) "$@" "$$cmd" && echo $$cmd > $@ && \ ( eval $$cmd ) >> $@ 2>&1 && \ echo " test: $(call test_O,$@)" >> $@ 2>&1; \ $(call test_O,$@) && \ @@ -313,11 +321,43 @@ make_kernelsrc_tools: (make -C ../../tools $(PARALLEL_OPT) $(K_O_OPT) perf) > $@ 2>&1 && \ test -x $(KERNEL_O)/tools/perf/perf && rm -f $@ || (cat $@ ; false) +FEATURES_DUMP_FILE := $(FULL_O)/BUILD_TEST_FEATURE_DUMP +FEATURES_DUMP_FILE_STATIC := $(FULL_O)/BUILD_TEST_FEATURE_DUMP_STATIC + all: $(run) $(run_O) tarpkg make_kernelsrc make_kernelsrc_tools @echo OK + @rm -f $(FEATURES_DUMP_FILE) $(FEATURES_DUMP_FILE_STATIC) out: $(run_O) @echo OK + @rm -f $(FEATURES_DUMP_FILE) $(FEATURES_DUMP_FILE_STATIC) + +ifeq ($(REUSE_FEATURES_DUMP),1) +$(FEATURES_DUMP_FILE): + $(call clean) + @cmd="cd $(PERF) && make FEATURE_DUMP_COPY=$@ $(O_OPT) feature-dump"; \ + echo "- $@: $$cmd" && echo $$cmd && \ + ( eval $$cmd ) > /dev/null 2>&1 + +$(FEATURES_DUMP_FILE_STATIC): + $(call clean) + @cmd="cd $(PERF) && make FEATURE_DUMP_COPY=$@ $(O_OPT) LDFLAGS='-static' feature-dump"; \ + echo "- $@: $$cmd" && echo $$cmd && \ + ( eval $$cmd ) > /dev/null 2>&1 + +# Add feature dump dependency for run/run_O targets +$(foreach t,$(run) $(run_O),$(eval \ + $(t): $(if $(findstring make_static,$(t)),\ + $(FEATURES_DUMP_FILE_STATIC),\ + $(FEATURES_DUMP_FILE)))) + +# Append 'FEATURES_DUMP=' option to all test cases. For example: +# make_no_libbpf: NO_LIBBPF=1 --> NO_LIBBPF=1 FEATURES_DUMP=/a/b/BUILD_TEST_FEATURE_DUMP +# make_static: LDFLAGS=-static --> LDFLAGS=-static FEATURES_DUMP=/a/b/BUILD_TEST_FEATURE_DUMP_STATIC +$(foreach t,$(run),$(if $(findstring make_static,$(t)),\ + $(eval $(t) := $($(t)) FEATURES_DUMP=$(FEATURES_DUMP_FILE_STATIC)),\ + $(eval $(t) := $($(t)) FEATURES_DUMP=$(FEATURES_DUMP_FILE)))) +endif .PHONY: all $(run) $(run_O) tarpkg clean make_kernelsrc make_kernelsrc_tools endif # ifndef MK diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c index abe8849d1d70..6648274f4601 100644 --- a/tools/perf/tests/parse-events.c +++ b/tools/perf/tests/parse-events.c @@ -1666,7 +1666,7 @@ static int test_term(struct terms_test *t) } ret = t->check(&terms); - parse_events__free_terms(&terms); + parse_events_terms__purge(&terms); return ret; } diff --git a/tools/perf/tests/vmlinux-kallsyms.c b/tools/perf/tests/vmlinux-kallsyms.c index f0bfc9e8fd9f..630b0b409b97 100644 --- a/tools/perf/tests/vmlinux-kallsyms.c +++ b/tools/perf/tests/vmlinux-kallsyms.c @@ -110,7 +110,6 @@ int test__vmlinux_matches_kallsyms(int subtest __maybe_unused) */ for (nd = rb_first(&vmlinux_map->dso->symbols[type]); nd; nd = rb_next(nd)) { struct symbol *pair, *first_pair; - bool backwards = true; sym = rb_entry(nd, struct symbol, rb_node); @@ -151,27 +150,14 @@ next_pair: continue; } else { - struct rb_node *nnd; -detour: - nnd = backwards ? rb_prev(&pair->rb_node) : - rb_next(&pair->rb_node); - if (nnd) { - struct symbol *next = rb_entry(nnd, struct symbol, rb_node); - - if (UM(next->start) == mem_start) { - pair = next; + pair = machine__find_kernel_symbol_by_name(&kallsyms, type, sym->name, NULL, NULL); + if (pair) { + if (UM(pair->start) == mem_start) goto next_pair; - } - } - if (backwards) { - backwards = false; - pair = first_pair; - goto detour; + pr_debug("%#" PRIx64 ": diff name v: %s k: %s\n", + mem_start, sym->name, pair->name); } - - pr_debug("%#" PRIx64 ": diff name v: %s k: %s\n", - mem_start, sym->name, pair->name); } } else pr_debug("%#" PRIx64 ": %s not on kallsyms\n", diff --git a/tools/perf/ui/browser.c b/tools/perf/ui/browser.c index d37202121689..af68a9d488bf 100644 --- a/tools/perf/ui/browser.c +++ b/tools/perf/ui/browser.c @@ -531,8 +531,8 @@ static struct ui_browser_colorset { .bg = "yellow", }, { - .colorset = HE_COLORSET_CODE, - .name = "code", + .colorset = HE_COLORSET_JUMP_ARROWS, + .name = "jump_arrows", .fg = "blue", .bg = "default", }, diff --git a/tools/perf/ui/browser.h b/tools/perf/ui/browser.h index 01781de59532..be3b70eb5fca 100644 --- a/tools/perf/ui/browser.h +++ b/tools/perf/ui/browser.h @@ -7,7 +7,7 @@ #define HE_COLORSET_MEDIUM 51 #define HE_COLORSET_NORMAL 52 #define HE_COLORSET_SELECTED 53 -#define HE_COLORSET_CODE 54 +#define HE_COLORSET_JUMP_ARROWS 54 #define HE_COLORSET_ADDR 55 #define HE_COLORSET_ROOT 56 diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index 718bd46d47fa..4fc208e82c6f 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -284,7 +284,7 @@ static void annotate_browser__draw_current_jump(struct ui_browser *browser) to = (u64)btarget->idx; } - ui_browser__set_color(browser, HE_COLORSET_CODE); + ui_browser__set_color(browser, HE_COLORSET_JUMP_ARROWS); __ui_browser__line_arrow(browser, pcnt_width + 2 + ab->addr_width, from, to); } diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index 08c09ad755d2..1819771243f9 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -657,9 +657,24 @@ static int hist_browser__show_callchain_list(struct hist_browser *browser, return 1; } +static bool check_percent_display(struct rb_node *node, u64 parent_total) +{ + struct callchain_node *child; + + if (node == NULL) + return false; + + if (rb_next(node)) + return true; + + child = rb_entry(node, struct callchain_node, rb_node); + return callchain_cumul_hits(child) != parent_total; +} + static int hist_browser__show_callchain_flat(struct hist_browser *browser, struct rb_root *root, unsigned short row, u64 total, + u64 parent_total, print_callchain_entry_fn print, struct callchain_print_arg *arg, check_output_full_fn is_output_full) @@ -669,7 +684,7 @@ static int hist_browser__show_callchain_flat(struct hist_browser *browser, bool need_percent; node = rb_first(root); - need_percent = node && rb_next(node); + need_percent = check_percent_display(node, parent_total); while (node) { struct callchain_node *child = rb_entry(node, struct callchain_node, rb_node); @@ -763,6 +778,7 @@ static char *hist_browser__folded_callchain_str(struct hist_browser *browser, static int hist_browser__show_callchain_folded(struct hist_browser *browser, struct rb_root *root, unsigned short row, u64 total, + u64 parent_total, print_callchain_entry_fn print, struct callchain_print_arg *arg, check_output_full_fn is_output_full) @@ -772,7 +788,7 @@ static int hist_browser__show_callchain_folded(struct hist_browser *browser, bool need_percent; node = rb_first(root); - need_percent = node && rb_next(node); + need_percent = check_percent_display(node, parent_total); while (node) { struct callchain_node *child = rb_entry(node, struct callchain_node, rb_node); @@ -844,20 +860,24 @@ next: return row - first_row; } -static int hist_browser__show_callchain(struct hist_browser *browser, +static int hist_browser__show_callchain_graph(struct hist_browser *browser, struct rb_root *root, int level, unsigned short row, u64 total, + u64 parent_total, print_callchain_entry_fn print, struct callchain_print_arg *arg, check_output_full_fn is_output_full) { struct rb_node *node; int first_row = row, offset = level * LEVEL_OFFSET_STEP; - u64 new_total; bool need_percent; + u64 percent_total = total; + + if (callchain_param.mode == CHAIN_GRAPH_REL) + percent_total = parent_total; node = rb_first(root); - need_percent = node && rb_next(node); + need_percent = check_percent_display(node, parent_total); while (node) { struct callchain_node *child = rb_entry(node, struct callchain_node, rb_node); @@ -878,7 +898,7 @@ static int hist_browser__show_callchain(struct hist_browser *browser, folded_sign = callchain_list__folded(chain); row += hist_browser__show_callchain_list(browser, child, - chain, row, total, + chain, row, percent_total, was_first && need_percent, offset + extra_offset, print, arg); @@ -893,13 +913,9 @@ static int hist_browser__show_callchain(struct hist_browser *browser, if (folded_sign == '-') { const int new_level = level + (extra_offset ? 2 : 1); - if (callchain_param.mode == CHAIN_GRAPH_REL) - new_total = child->children_hit; - else - new_total = total; - - row += hist_browser__show_callchain(browser, &child->rb_root, - new_level, row, new_total, + row += hist_browser__show_callchain_graph(browser, &child->rb_root, + new_level, row, total, + child->children_hit, print, arg, is_output_full); } if (is_output_full(browser, row)) @@ -910,6 +926,45 @@ out: return row - first_row; } +static int hist_browser__show_callchain(struct hist_browser *browser, + struct hist_entry *entry, int level, + unsigned short row, + print_callchain_entry_fn print, + struct callchain_print_arg *arg, + check_output_full_fn is_output_full) +{ + u64 total = hists__total_period(entry->hists); + u64 parent_total; + int printed; + + if (symbol_conf.cumulate_callchain) + parent_total = entry->stat_acc->period; + else + parent_total = entry->stat.period; + + if (callchain_param.mode == CHAIN_FLAT) { + printed = hist_browser__show_callchain_flat(browser, + &entry->sorted_chain, row, + total, parent_total, print, arg, + is_output_full); + } else if (callchain_param.mode == CHAIN_FOLDED) { + printed = hist_browser__show_callchain_folded(browser, + &entry->sorted_chain, row, + total, parent_total, print, arg, + is_output_full); + } else { + printed = hist_browser__show_callchain_graph(browser, + &entry->sorted_chain, level, row, + total, parent_total, print, arg, + is_output_full); + } + + if (arg->is_current_entry) + browser->he_selection = entry; + + return printed; +} + struct hpp_arg { struct ui_browser *b; char folded_sign; @@ -1006,7 +1061,6 @@ static int hist_browser__show_entry(struct hist_browser *browser, struct hist_entry *entry, unsigned short row) { - char s[256]; int printed = 0; int width = browser->b.width; char folded_sign = ' '; @@ -1031,16 +1085,18 @@ static int hist_browser__show_entry(struct hist_browser *browser, .folded_sign = folded_sign, .current_entry = current_entry, }; - struct perf_hpp hpp = { - .buf = s, - .size = sizeof(s), - .ptr = &arg, - }; int column = 0; hist_browser__gotorc(browser, row, 0); - perf_hpp__for_each_format(fmt) { + hists__for_each_format(browser->hists, fmt) { + char s[2048]; + struct perf_hpp hpp = { + .buf = s, + .size = sizeof(s), + .ptr = &arg, + }; + if (perf_hpp__should_skip(fmt, entry->hists) || column++ < browser->b.horiz_scroll) continue; @@ -1065,11 +1121,18 @@ static int hist_browser__show_entry(struct hist_browser *browser, } if (fmt->color) { - width -= fmt->color(fmt, &hpp, entry); + int ret = fmt->color(fmt, &hpp, entry); + hist_entry__snprintf_alignment(entry, &hpp, fmt, ret); + /* + * fmt->color() already used ui_browser to + * print the non alignment bits, skip it (+ret): + */ + ui_browser__printf(&browser->b, "%s", s + ret); } else { - width -= fmt->entry(fmt, &hpp, entry); + hist_entry__snprintf_alignment(entry, &hpp, fmt, fmt->entry(fmt, &hpp, entry)); ui_browser__printf(&browser->b, "%s", s); } + width -= hpp.buf - s; } /* The scroll bar isn't being used */ @@ -1084,38 +1147,14 @@ static int hist_browser__show_entry(struct hist_browser *browser, --row_offset; if (folded_sign == '-' && row != browser->b.rows) { - u64 total = hists__total_period(entry->hists); struct callchain_print_arg arg = { .row_offset = row_offset, .is_current_entry = current_entry, }; - if (callchain_param.mode == CHAIN_GRAPH_REL) { - if (symbol_conf.cumulate_callchain) - total = entry->stat_acc->period; - else - total = entry->stat.period; - } - - if (callchain_param.mode == CHAIN_FLAT) { - printed += hist_browser__show_callchain_flat(browser, - &entry->sorted_chain, row, total, + printed += hist_browser__show_callchain(browser, entry, 1, row, hist_browser__show_callchain_entry, &arg, hist_browser__check_output_full); - } else if (callchain_param.mode == CHAIN_FOLDED) { - printed += hist_browser__show_callchain_folded(browser, - &entry->sorted_chain, row, total, - hist_browser__show_callchain_entry, &arg, - hist_browser__check_output_full); - } else { - printed += hist_browser__show_callchain(browser, - &entry->sorted_chain, 1, row, total, - hist_browser__show_callchain_entry, &arg, - hist_browser__check_output_full); - } - - if (arg.is_current_entry) - browser->he_selection = entry; } return printed; @@ -1144,7 +1183,7 @@ static int hists_browser__scnprintf_headers(struct hist_browser *browser, char * return ret; } - perf_hpp__for_each_format(fmt) { + hists__for_each_format(browser->hists, fmt) { if (perf_hpp__should_skip(fmt, hists) || column++ < browser->b.horiz_scroll) continue; @@ -1380,15 +1419,11 @@ do_offset: static int hist_browser__fprintf_callchain(struct hist_browser *browser, struct hist_entry *he, FILE *fp) { - u64 total = hists__total_period(he->hists); struct callchain_print_arg arg = { .fp = fp, }; - if (symbol_conf.cumulate_callchain) - total = he->stat_acc->period; - - hist_browser__show_callchain(browser, &he->sorted_chain, 1, 0, total, + hist_browser__show_callchain(browser, he, 1, 0, hist_browser__fprintf_callchain_entry, &arg, hist_browser__check_dump_full); return arg.printed; @@ -1414,7 +1449,7 @@ static int hist_browser__fprintf_entry(struct hist_browser *browser, if (symbol_conf.use_callchain) printed += fprintf(fp, "%c ", folded_sign); - perf_hpp__for_each_format(fmt) { + hists__for_each_format(browser->hists, fmt) { if (perf_hpp__should_skip(fmt, he->hists)) continue; @@ -1425,9 +1460,10 @@ static int hist_browser__fprintf_entry(struct hist_browser *browser, first = false; ret = fmt->entry(fmt, &hpp, he); + ret = hist_entry__snprintf_alignment(he, &hpp, fmt, ret); advance_hpp(&hpp, ret); } - printed += fprintf(fp, "%s\n", rtrim(s)); + printed += fprintf(fp, "%s\n", s); if (folded_sign == '-') printed += hist_browser__fprintf_callchain(browser, he, fp); @@ -1782,7 +1818,7 @@ static int add_thread_opt(struct hist_browser *browser, struct popup_action *act, char **optstr, struct thread *thread) { - if (thread == NULL) + if (!sort__has_thread || thread == NULL) return 0; if (asprintf(optstr, "Zoom %s %s(%d) thread", @@ -1825,7 +1861,7 @@ static int add_dso_opt(struct hist_browser *browser, struct popup_action *act, char **optstr, struct map *map) { - if (map == NULL) + if (!sort__has_dso || map == NULL) return 0; if (asprintf(optstr, "Zoom %s %s DSO", @@ -1850,7 +1886,7 @@ static int add_map_opt(struct hist_browser *browser __maybe_unused, struct popup_action *act, char **optstr, struct map *map) { - if (map == NULL) + if (!sort__has_dso || map == NULL) return 0; if (asprintf(optstr, "Browse map details") < 0) @@ -1971,7 +2007,7 @@ static int add_socket_opt(struct hist_browser *browser, struct popup_action *act, char **optstr, int socket_id) { - if (socket_id < 0) + if (!sort__has_socket || socket_id < 0) return 0; if (asprintf(optstr, "Zoom %s Processor Socket %d", @@ -2002,6 +2038,42 @@ static void hist_browser__update_nr_entries(struct hist_browser *hb) hb->nr_non_filtered_entries = nr_entries; } +static void hist_browser__update_percent_limit(struct hist_browser *hb, + double percent) +{ + struct hist_entry *he; + struct rb_node *nd = rb_first(&hb->hists->entries); + u64 total = hists__total_period(hb->hists); + u64 min_callchain_hits = total * (percent / 100); + + hb->min_pcnt = callchain_param.min_percent = percent; + + if (!symbol_conf.use_callchain) + return; + + while ((nd = hists__filter_entries(nd, hb->min_pcnt)) != NULL) { + he = rb_entry(nd, struct hist_entry, rb_node); + + if (callchain_param.mode == CHAIN_GRAPH_REL) { + total = he->stat.period; + + if (symbol_conf.cumulate_callchain) + total = he->stat_acc->period; + + min_callchain_hits = total * (percent / 100); + } + + callchain_param.sort(&he->sorted_chain, he->callchain, + min_callchain_hits, &callchain_param); + + /* force to re-evaluate folding state of callchains */ + he->init_have_children = false; + hist_entry__set_folding(he, false); + + nd = rb_next(nd); + } +} + static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, const char *helpline, bool left_exits, @@ -2037,6 +2109,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, "E Expand all callchains\n" \ "F Toggle percentage of filtered entries\n" \ "H Display column headers\n" \ + "L Change percent limit\n" \ "m Display context menu\n" \ "S Zoom into current Processor Socket\n" \ @@ -2077,7 +2150,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, memset(options, 0, sizeof(options)); memset(actions, 0, sizeof(actions)); - perf_hpp__for_each_format(fmt) { + hists__for_each_format(browser->hists, fmt) { perf_hpp__reset_width(fmt, hists); /* * This is done just once, and activates the horizontal scrolling @@ -2192,6 +2265,24 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, top->zero = !top->zero; } continue; + case 'L': + if (ui_browser__input_window("Percent Limit", + "Please enter the value you want to hide entries under that percent.", + buf, "ENTER: OK, ESC: Cancel", + delay_secs * 2) == K_ENTER) { + char *end; + double new_percent = strtod(buf, &end); + + if (new_percent < 0 || new_percent > 100) { + ui_browser__warning(&browser->b, delay_secs * 2, + "Invalid percent: %.2f", new_percent); + continue; + } + + hist_browser__update_percent_limit(browser, new_percent); + hist_browser__reset(browser); + } + continue; case K_F1: case 'h': case '?': @@ -2263,10 +2354,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, continue; } - if (!sort__has_sym) - goto add_exit_option; - - if (browser->selection == NULL) + if (!sort__has_sym || browser->selection == NULL) goto skip_annotation; if (sort__mode == SORT_MODE__BRANCH) { @@ -2306,11 +2394,16 @@ skip_annotation: &options[nr_options], socked_id); /* perf script support */ + if (!is_report_browser(hbt)) + goto skip_scripting; + if (browser->he_selection) { - nr_options += add_script_opt(browser, - &actions[nr_options], - &options[nr_options], - thread, NULL); + if (sort__has_thread && thread) { + nr_options += add_script_opt(browser, + &actions[nr_options], + &options[nr_options], + thread, NULL); + } /* * Note that browser->selection != NULL * when browser->he_selection is not NULL, @@ -2320,16 +2413,18 @@ skip_annotation: * * See hist_browser__show_entry. */ - nr_options += add_script_opt(browser, - &actions[nr_options], - &options[nr_options], - NULL, browser->selection->sym); + if (sort__has_sym && browser->selection->sym) { + nr_options += add_script_opt(browser, + &actions[nr_options], + &options[nr_options], + NULL, browser->selection->sym); + } } nr_options += add_script_opt(browser, &actions[nr_options], &options[nr_options], NULL, NULL); nr_options += add_switch_opt(browser, &actions[nr_options], &options[nr_options]); -add_exit_option: +skip_scripting: nr_options += add_exit_opt(browser, &actions[nr_options], &options[nr_options]); diff --git a/tools/perf/ui/gtk/hists.c b/tools/perf/ui/gtk/hists.c index 0f8dcfdfb10f..32cc38a5b57f 100644 --- a/tools/perf/ui/gtk/hists.c +++ b/tools/perf/ui/gtk/hists.c @@ -306,7 +306,7 @@ static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists, nr_cols = 0; - perf_hpp__for_each_format(fmt) + hists__for_each_format(hists, fmt) col_types[nr_cols++] = G_TYPE_STRING; store = gtk_tree_store_newv(nr_cols, col_types); @@ -317,7 +317,7 @@ static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists, col_idx = 0; - perf_hpp__for_each_format(fmt) { + hists__for_each_format(hists, fmt) { if (perf_hpp__should_skip(fmt, hists)) continue; @@ -367,7 +367,7 @@ static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists, col_idx = 0; - perf_hpp__for_each_format(fmt) { + hists__for_each_format(hists, fmt) { if (perf_hpp__should_skip(fmt, h->hists)) continue; diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c index bf2a66e254ea..1ba4117d9c2d 100644 --- a/tools/perf/ui/hist.c +++ b/tools/perf/ui/hist.c @@ -371,7 +371,20 @@ static int64_t hpp__nop_cmp(struct perf_hpp_fmt *fmt __maybe_unused, return 0; } -#define HPP__COLOR_PRINT_FNS(_name, _fn) \ +static bool perf_hpp__is_hpp_entry(struct perf_hpp_fmt *a) +{ + return a->header == hpp__header_fn; +} + +static bool hpp__equal(struct perf_hpp_fmt *a, struct perf_hpp_fmt *b) +{ + if (!perf_hpp__is_hpp_entry(a) || !perf_hpp__is_hpp_entry(b)) + return false; + + return a->idx == b->idx; +} + +#define HPP__COLOR_PRINT_FNS(_name, _fn, _idx) \ { \ .name = _name, \ .header = hpp__header_fn, \ @@ -381,9 +394,11 @@ static int64_t hpp__nop_cmp(struct perf_hpp_fmt *fmt __maybe_unused, .cmp = hpp__nop_cmp, \ .collapse = hpp__nop_cmp, \ .sort = hpp__sort_ ## _fn, \ + .idx = PERF_HPP__ ## _idx, \ + .equal = hpp__equal, \ } -#define HPP__COLOR_ACC_PRINT_FNS(_name, _fn) \ +#define HPP__COLOR_ACC_PRINT_FNS(_name, _fn, _idx) \ { \ .name = _name, \ .header = hpp__header_fn, \ @@ -393,9 +408,11 @@ static int64_t hpp__nop_cmp(struct perf_hpp_fmt *fmt __maybe_unused, .cmp = hpp__nop_cmp, \ .collapse = hpp__nop_cmp, \ .sort = hpp__sort_ ## _fn, \ + .idx = PERF_HPP__ ## _idx, \ + .equal = hpp__equal, \ } -#define HPP__PRINT_FNS(_name, _fn) \ +#define HPP__PRINT_FNS(_name, _fn, _idx) \ { \ .name = _name, \ .header = hpp__header_fn, \ @@ -404,22 +421,25 @@ static int64_t hpp__nop_cmp(struct perf_hpp_fmt *fmt __maybe_unused, .cmp = hpp__nop_cmp, \ .collapse = hpp__nop_cmp, \ .sort = hpp__sort_ ## _fn, \ + .idx = PERF_HPP__ ## _idx, \ + .equal = hpp__equal, \ } struct perf_hpp_fmt perf_hpp__format[] = { - HPP__COLOR_PRINT_FNS("Overhead", overhead), - HPP__COLOR_PRINT_FNS("sys", overhead_sys), - HPP__COLOR_PRINT_FNS("usr", overhead_us), - HPP__COLOR_PRINT_FNS("guest sys", overhead_guest_sys), - HPP__COLOR_PRINT_FNS("guest usr", overhead_guest_us), - HPP__COLOR_ACC_PRINT_FNS("Children", overhead_acc), - HPP__PRINT_FNS("Samples", samples), - HPP__PRINT_FNS("Period", period) + HPP__COLOR_PRINT_FNS("Overhead", overhead, OVERHEAD), + HPP__COLOR_PRINT_FNS("sys", overhead_sys, OVERHEAD_SYS), + HPP__COLOR_PRINT_FNS("usr", overhead_us, OVERHEAD_US), + HPP__COLOR_PRINT_FNS("guest sys", overhead_guest_sys, OVERHEAD_GUEST_SYS), + HPP__COLOR_PRINT_FNS("guest usr", overhead_guest_us, OVERHEAD_GUEST_US), + HPP__COLOR_ACC_PRINT_FNS("Children", overhead_acc, OVERHEAD_ACC), + HPP__PRINT_FNS("Samples", samples, SAMPLES), + HPP__PRINT_FNS("Period", period, PERIOD) }; -LIST_HEAD(perf_hpp__list); -LIST_HEAD(perf_hpp__sort_list); - +struct perf_hpp_list perf_hpp_list = { + .fields = LIST_HEAD_INIT(perf_hpp_list.fields), + .sorts = LIST_HEAD_INIT(perf_hpp_list.sorts), +}; #undef HPP__COLOR_PRINT_FNS #undef HPP__COLOR_ACC_PRINT_FNS @@ -485,63 +505,60 @@ void perf_hpp__init(void) hpp_dimension__add_output(PERF_HPP__PERIOD); } -void perf_hpp__column_register(struct perf_hpp_fmt *format) -{ - list_add_tail(&format->list, &perf_hpp__list); -} - -void perf_hpp__column_unregister(struct perf_hpp_fmt *format) -{ - list_del(&format->list); -} - -void perf_hpp__register_sort_field(struct perf_hpp_fmt *format) +void perf_hpp_list__column_register(struct perf_hpp_list *list, + struct perf_hpp_fmt *format) { - list_add_tail(&format->sort_list, &perf_hpp__sort_list); + list_add_tail(&format->list, &list->fields); } -void perf_hpp__column_enable(unsigned col) +void perf_hpp_list__register_sort_field(struct perf_hpp_list *list, + struct perf_hpp_fmt *format) { - BUG_ON(col >= PERF_HPP__MAX_INDEX); - perf_hpp__column_register(&perf_hpp__format[col]); + list_add_tail(&format->sort_list, &list->sorts); } -void perf_hpp__column_disable(unsigned col) +void perf_hpp__column_unregister(struct perf_hpp_fmt *format) { - BUG_ON(col >= PERF_HPP__MAX_INDEX); - perf_hpp__column_unregister(&perf_hpp__format[col]); + list_del(&format->list); } void perf_hpp__cancel_cumulate(void) { + struct perf_hpp_fmt *fmt, *acc, *ovh, *tmp; + if (is_strict_order(field_order)) return; - perf_hpp__column_disable(PERF_HPP__OVERHEAD_ACC); - perf_hpp__format[PERF_HPP__OVERHEAD].name = "Overhead"; + ovh = &perf_hpp__format[PERF_HPP__OVERHEAD]; + acc = &perf_hpp__format[PERF_HPP__OVERHEAD_ACC]; + + perf_hpp_list__for_each_format_safe(&perf_hpp_list, fmt, tmp) { + if (acc->equal(acc, fmt)) { + perf_hpp__column_unregister(fmt); + continue; + } + + if (ovh->equal(ovh, fmt)) + fmt->name = "Overhead"; + } +} + +static bool fmt_equal(struct perf_hpp_fmt *a, struct perf_hpp_fmt *b) +{ + return a->equal && a->equal(a, b); } -void perf_hpp__setup_output_field(void) +void perf_hpp__setup_output_field(struct perf_hpp_list *list) { struct perf_hpp_fmt *fmt; /* append sort keys to output field */ - perf_hpp__for_each_sort_list(fmt) { - if (!list_empty(&fmt->list)) - continue; - - /* - * sort entry fields are dynamically created, - * so they can share a same sort key even though - * the list is empty. - */ - if (perf_hpp__is_sort_entry(fmt)) { - struct perf_hpp_fmt *pos; + perf_hpp_list__for_each_sort_list(list, fmt) { + struct perf_hpp_fmt *pos; - perf_hpp__for_each_format(pos) { - if (perf_hpp__same_sort_entry(pos, fmt)) - goto next; - } + perf_hpp_list__for_each_format(list, pos) { + if (fmt_equal(fmt, pos)) + goto next; } perf_hpp__column_register(fmt); @@ -550,27 +567,17 @@ next: } } -void perf_hpp__append_sort_keys(void) +void perf_hpp__append_sort_keys(struct perf_hpp_list *list) { struct perf_hpp_fmt *fmt; /* append output fields to sort keys */ - perf_hpp__for_each_format(fmt) { - if (!list_empty(&fmt->sort_list)) - continue; + perf_hpp_list__for_each_format(list, fmt) { + struct perf_hpp_fmt *pos; - /* - * sort entry fields are dynamically created, - * so they can share a same sort key even though - * the list is empty. - */ - if (perf_hpp__is_sort_entry(fmt)) { - struct perf_hpp_fmt *pos; - - perf_hpp__for_each_sort_list(pos) { - if (perf_hpp__same_sort_entry(pos, fmt)) - goto next; - } + perf_hpp_list__for_each_sort_list(list, pos) { + if (fmt_equal(fmt, pos)) + goto next; } perf_hpp__register_sort_field(fmt); @@ -579,20 +586,29 @@ next: } } -void perf_hpp__reset_output_field(void) + +static void fmt_free(struct perf_hpp_fmt *fmt) +{ + if (fmt->free) + fmt->free(fmt); +} + +void perf_hpp__reset_output_field(struct perf_hpp_list *list) { struct perf_hpp_fmt *fmt, *tmp; /* reset output fields */ - perf_hpp__for_each_format_safe(fmt, tmp) { + perf_hpp_list__for_each_format_safe(list, fmt, tmp) { list_del_init(&fmt->list); list_del_init(&fmt->sort_list); + fmt_free(fmt); } /* reset sort keys */ - perf_hpp__for_each_sort_list_safe(fmt, tmp) { + perf_hpp_list__for_each_sort_list_safe(list, fmt, tmp) { list_del_init(&fmt->list); list_del_init(&fmt->sort_list); + fmt_free(fmt); } } @@ -606,7 +622,7 @@ unsigned int hists__sort_list_width(struct hists *hists) bool first = true; struct perf_hpp dummy_hpp; - perf_hpp__for_each_format(fmt) { + hists__for_each_format(hists, fmt) { if (perf_hpp__should_skip(fmt, hists)) continue; @@ -626,20 +642,12 @@ unsigned int hists__sort_list_width(struct hists *hists) void perf_hpp__reset_width(struct perf_hpp_fmt *fmt, struct hists *hists) { - int idx; - if (perf_hpp__is_sort_entry(fmt)) return perf_hpp__reset_sort_width(fmt, hists); - for (idx = 0; idx < PERF_HPP__MAX_INDEX; idx++) { - if (fmt == &perf_hpp__format[idx]) - break; - } - - if (idx == PERF_HPP__MAX_INDEX) - return; + BUG_ON(fmt->idx >= PERF_HPP__MAX_INDEX); - switch (idx) { + switch (fmt->idx) { case PERF_HPP__OVERHEAD: case PERF_HPP__OVERHEAD_SYS: case PERF_HPP__OVERHEAD_US: @@ -667,7 +675,7 @@ void perf_hpp__set_user_width(const char *width_list_str) struct perf_hpp_fmt *fmt; const char *ptr = width_list_str; - perf_hpp__for_each_format(fmt) { + perf_hpp_list__for_each_format(&perf_hpp_list, fmt) { char *p; int len = strtol(ptr, &p, 10); diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c index 387110d50b00..87b022ff03d8 100644 --- a/tools/perf/ui/stdio/hist.c +++ b/tools/perf/ui/stdio/hist.c @@ -165,8 +165,28 @@ static size_t __callchain__fprintf_graph(FILE *fp, struct rb_root *root, return ret; } +/* + * If have one single callchain root, don't bother printing + * its percentage (100 % in fractal mode and the same percentage + * than the hist in graph mode). This also avoid one level of column. + * + * However when percent-limit applied, it's possible that single callchain + * node have different (non-100% in fractal mode) percentage. + */ +static bool need_percent_display(struct rb_node *node, u64 parent_samples) +{ + struct callchain_node *cnode; + + if (rb_next(node)) + return true; + + cnode = rb_entry(node, struct callchain_node, rb_node); + return callchain_cumul_hits(cnode) != parent_samples; +} + static size_t callchain__fprintf_graph(FILE *fp, struct rb_root *root, - u64 total_samples, int left_margin) + u64 total_samples, u64 parent_samples, + int left_margin) { struct callchain_node *cnode; struct callchain_list *chain; @@ -177,13 +197,8 @@ static size_t callchain__fprintf_graph(FILE *fp, struct rb_root *root, int ret = 0; char bf[1024]; - /* - * If have one single callchain root, don't bother printing - * its percentage (100 % in fractal mode and the same percentage - * than the hist in graph mode). This also avoid one level of column. - */ node = rb_first(root); - if (node && !rb_next(node)) { + if (node && !need_percent_display(node, parent_samples)) { cnode = rb_entry(node, struct callchain_node, rb_node); list_for_each_entry(chain, &cnode->val, list) { /* @@ -213,9 +228,15 @@ static size_t callchain__fprintf_graph(FILE *fp, struct rb_root *root, root = &cnode->rb_root; } + if (callchain_param.mode == CHAIN_GRAPH_REL) + total_samples = parent_samples; + ret += __callchain__fprintf_graph(fp, root, total_samples, 1, 1, left_margin); - ret += fprintf(fp, "\n"); + if (ret) { + /* do not add a blank line if it printed nothing */ + ret += fprintf(fp, "\n"); + } return ret; } @@ -323,16 +344,19 @@ static size_t hist_entry_callchain__fprintf(struct hist_entry *he, u64 total_samples, int left_margin, FILE *fp) { + u64 parent_samples = he->stat.period; + + if (symbol_conf.cumulate_callchain) + parent_samples = he->stat_acc->period; + switch (callchain_param.mode) { case CHAIN_GRAPH_REL: - return callchain__fprintf_graph(fp, &he->sorted_chain, - symbol_conf.cumulate_callchain ? - he->stat_acc->period : he->stat.period, - left_margin); + return callchain__fprintf_graph(fp, &he->sorted_chain, total_samples, + parent_samples, left_margin); break; case CHAIN_GRAPH_ABS: return callchain__fprintf_graph(fp, &he->sorted_chain, total_samples, - left_margin); + parent_samples, left_margin); break; case CHAIN_FLAT: return callchain__fprintf_flat(fp, &he->sorted_chain, total_samples); @@ -349,30 +373,6 @@ static size_t hist_entry_callchain__fprintf(struct hist_entry *he, return 0; } -static size_t hist_entry__callchain_fprintf(struct hist_entry *he, - struct hists *hists, - FILE *fp) -{ - int left_margin = 0; - u64 total_period = hists->stats.total_period; - - if (field_order == NULL && (sort_order == NULL || - !prefixcmp(sort_order, "comm"))) { - struct perf_hpp_fmt *fmt; - - perf_hpp__for_each_format(fmt) { - if (!perf_hpp__is_sort_entry(fmt)) - continue; - - /* must be 'comm' sort entry */ - left_margin = fmt->width(fmt, NULL, hists_to_evsel(hists)); - left_margin -= thread__comm_len(he->thread); - break; - } - } - return hist_entry_callchain__fprintf(he, total_period, left_margin, fp); -} - static int hist_entry__snprintf(struct hist_entry *he, struct perf_hpp *hpp) { const char *sep = symbol_conf.field_sep; @@ -384,7 +384,7 @@ static int hist_entry__snprintf(struct hist_entry *he, struct perf_hpp *hpp) if (symbol_conf.exclude_other && !he->parent) return 0; - perf_hpp__for_each_format(fmt) { + hists__for_each_format(he->hists, fmt) { if (perf_hpp__should_skip(fmt, he->hists)) continue; @@ -403,6 +403,7 @@ static int hist_entry__snprintf(struct hist_entry *he, struct perf_hpp *hpp) else ret = fmt->entry(fmt, hpp, he); + ret = hist_entry__snprintf_alignment(he, hpp, fmt, ret); advance_hpp(hpp, ret); } @@ -418,6 +419,7 @@ static int hist_entry__fprintf(struct hist_entry *he, size_t size, .buf = bf, .size = size, }; + u64 total_period = hists->stats.total_period; if (size == 0 || size > bfsz) size = hpp.size = bfsz; @@ -427,7 +429,7 @@ static int hist_entry__fprintf(struct hist_entry *he, size_t size, ret = fprintf(fp, "%s\n", bf); if (symbol_conf.use_callchain) - ret += hist_entry__callchain_fprintf(he, hists, fp); + ret += hist_entry_callchain__fprintf(he, total_period, 0, fp); return ret; } @@ -452,7 +454,7 @@ size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows, init_rem_hits(); - perf_hpp__for_each_format(fmt) + hists__for_each_format(hists, fmt) perf_hpp__reset_width(fmt, hists); if (symbol_conf.col_width_list_str) @@ -463,7 +465,7 @@ size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows, fprintf(fp, "# "); - perf_hpp__for_each_format(fmt) { + hists__for_each_format(hists, fmt) { if (perf_hpp__should_skip(fmt, hists)) continue; @@ -487,7 +489,7 @@ size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows, fprintf(fp, "# "); - perf_hpp__for_each_format(fmt) { + hists__for_each_format(hists, fmt) { unsigned int i; if (perf_hpp__should_skip(fmt, hists)) diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 5eec53a3f4ac..a34752d28488 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -105,8 +105,14 @@ libperf-y += scripting-engines/ libperf-$(CONFIG_ZLIB) += zlib.o libperf-$(CONFIG_LZMA) += lzma.o +libperf-y += demangle-java.o +libperf-$(CONFIG_LIBELF) += jitdump.o +libperf-$(CONFIG_LIBELF) += genelf.o +libperf-$(CONFIG_LIBELF) += genelf_debug.o CFLAGS_config.o += -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))" +# avoid compiler warnings in 32-bit mode +CFLAGS_genelf_debug.o += -Wno-packed $(OUTPUT)util/parse-events-flex.c: util/parse-events.l $(OUTPUT)util/parse-events-bison.c $(call rule_mkdir) diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index 360fda01f3b0..ec164fe70718 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -478,10 +478,11 @@ void auxtrace_heap__pop(struct auxtrace_heap *heap) heap_array[last].ordinal); } -size_t auxtrace_record__info_priv_size(struct auxtrace_record *itr) +size_t auxtrace_record__info_priv_size(struct auxtrace_record *itr, + struct perf_evlist *evlist) { if (itr) - return itr->info_priv_size(itr); + return itr->info_priv_size(itr, evlist); return 0; } @@ -852,7 +853,7 @@ int perf_event__synthesize_auxtrace_info(struct auxtrace_record *itr, int err; pr_debug2("Synthesizing auxtrace information\n"); - priv_size = auxtrace_record__info_priv_size(itr); + priv_size = auxtrace_record__info_priv_size(itr, session->evlist); ev = zalloc(sizeof(struct auxtrace_info_event) + priv_size); if (!ev) return -ENOMEM; diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h index b86f90db1352..e5a8e2d4f2af 100644 --- a/tools/perf/util/auxtrace.h +++ b/tools/perf/util/auxtrace.h @@ -293,7 +293,8 @@ struct auxtrace_record { int (*recording_options)(struct auxtrace_record *itr, struct perf_evlist *evlist, struct record_opts *opts); - size_t (*info_priv_size)(struct auxtrace_record *itr); + size_t (*info_priv_size)(struct auxtrace_record *itr, + struct perf_evlist *evlist); int (*info_fill)(struct auxtrace_record *itr, struct perf_session *session, struct auxtrace_info_event *auxtrace_info, @@ -429,7 +430,8 @@ int auxtrace_parse_snapshot_options(struct auxtrace_record *itr, int auxtrace_record__options(struct auxtrace_record *itr, struct perf_evlist *evlist, struct record_opts *opts); -size_t auxtrace_record__info_priv_size(struct auxtrace_record *itr); +size_t auxtrace_record__info_priv_size(struct auxtrace_record *itr, + struct perf_evlist *evlist); int auxtrace_record__info_fill(struct auxtrace_record *itr, struct perf_session *session, struct auxtrace_info_event *auxtrace_info, diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index 6a7e273a514a..f1479eeef7da 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c @@ -166,6 +166,50 @@ char *dso__build_id_filename(const struct dso *dso, char *bf, size_t size) return build_id__filename(build_id_hex, bf, size); } +bool dso__build_id_is_kmod(const struct dso *dso, char *bf, size_t size) +{ + char *id_name, *ch; + struct stat sb; + + id_name = dso__build_id_filename(dso, bf, size); + if (!id_name) + goto err; + if (access(id_name, F_OK)) + goto err; + if (lstat(id_name, &sb) == -1) + goto err; + if ((size_t)sb.st_size > size - 1) + goto err; + if (readlink(id_name, bf, size - 1) < 0) + goto err; + + bf[sb.st_size] = '\0'; + + /* + * link should be: + * ../../lib/modules/4.4.0-rc4/kernel/net/ipv4/netfilter/nf_nat_ipv4.ko/a09fe3eb3147dafa4e3b31dbd6257e4d696bdc92 + */ + ch = strrchr(bf, '/'); + if (!ch) + goto err; + if (ch - 3 < bf) + goto err; + + return strncmp(".ko", ch - 3, 3) == 0; +err: + /* + * If dso__build_id_filename work, get id_name again, + * because id_name points to bf and is broken. + */ + if (id_name) + id_name = dso__build_id_filename(dso, bf, size); + pr_err("Invalid build id: %s\n", id_name ? : + dso->long_name ? : + dso->short_name ? : + "[unknown]"); + return false; +} + #define dsos__for_each_with_build_id(pos, head) \ list_for_each_entry(pos, head, node) \ if (!pos->has_build_id) \ @@ -211,6 +255,7 @@ static int machine__write_buildid_table(struct machine *machine, int fd) dsos__for_each_with_build_id(pos, &machine->dsos.head) { const char *name; size_t name_len; + bool in_kernel = false; if (!pos->hit) continue; @@ -227,8 +272,11 @@ static int machine__write_buildid_table(struct machine *machine, int fd) name_len = pos->long_name_len + 1; } + in_kernel = pos->kernel || + is_kernel_module(name, + PERF_RECORD_MISC_CPUMODE_UNKNOWN); err = write_buildid(name, name_len, pos->build_id, machine->pid, - pos->kernel ? kmisc : umisc, fd); + in_kernel ? kmisc : umisc, fd); if (err) break; } diff --git a/tools/perf/util/build-id.h b/tools/perf/util/build-id.h index 27a14a8a945b..64af3e20610d 100644 --- a/tools/perf/util/build-id.h +++ b/tools/perf/util/build-id.h @@ -16,6 +16,7 @@ int sysfs__sprintf_build_id(const char *root_dir, char *sbuild_id); int filename__sprintf_build_id(const char *pathname, char *sbuild_id); char *dso__build_id_filename(const struct dso *dso, char *bf, size_t size); +bool dso__build_id_is_kmod(const struct dso *dso, char *bf, size_t size); int build_id__mark_dso_hit(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, struct perf_evsel *evsel, diff --git a/tools/perf/util/cache.h b/tools/perf/util/cache.h index 07b5d63947b1..3ca453f0c51f 100644 --- a/tools/perf/util/cache.h +++ b/tools/perf/util/cache.h @@ -23,6 +23,8 @@ #define PERF_TRACEFS_ENVIRONMENT "PERF_TRACEFS_DIR" #define PERF_PAGER_ENVIRONMENT "PERF_PAGER" +extern const char *config_exclusive_filename; + typedef int (*config_fn_t)(const char *, const char *, void *); extern int perf_default_config(const char *, const char *, void *); extern int perf_config(config_fn_t fn, void *); @@ -31,6 +33,7 @@ extern u64 perf_config_u64(const char *, const char *); extern int perf_config_bool(const char *, const char *); extern int config_error_nonbool(const char *); extern const char *perf_config_dirname(const char *, const char *); +extern const char *perf_etc_perfconfig(void); char *alias_lookup(const char *alias); int split_cmdline(char *cmdline, const char ***argv); diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c index d3e12e30e1d5..4e727635476e 100644 --- a/tools/perf/util/config.c +++ b/tools/perf/util/config.c @@ -26,7 +26,7 @@ static const char *config_file_name; static int config_linenr; static int config_file_eof; -static const char *config_exclusive_filename; +const char *config_exclusive_filename; static int get_next_char(void) { @@ -434,7 +434,7 @@ static int perf_config_from_file(config_fn_t fn, const char *filename, void *dat return ret; } -static const char *perf_etc_perfconfig(void) +const char *perf_etc_perfconfig(void) { static const char *system_wide; if (!system_wide) diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c index fa935093a599..9bcf2bed3a6d 100644 --- a/tools/perf/util/cpumap.c +++ b/tools/perf/util/cpumap.c @@ -8,6 +8,10 @@ #include <linux/bitmap.h> #include "asm/bug.h" +static int max_cpu_num; +static int max_node_num; +static int *cpunode_map; + static struct cpu_map *cpu_map__default_new(void) { struct cpu_map *cpus; @@ -486,6 +490,32 @@ out: pr_err("Failed to read max nodes, using default of %d\n", max_node_num); } +int cpu__max_node(void) +{ + if (unlikely(!max_node_num)) + set_max_node_num(); + + return max_node_num; +} + +int cpu__max_cpu(void) +{ + if (unlikely(!max_cpu_num)) + set_max_cpu_num(); + + return max_cpu_num; +} + +int cpu__get_node(int cpu) +{ + if (unlikely(cpunode_map == NULL)) { + pr_debug("cpu_map not initialized\n"); + return -1; + } + + return cpunode_map[cpu]; +} + static int init_cpunode_map(void) { int i; diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h index 71c41b9efabb..81a2562aaa2b 100644 --- a/tools/perf/util/cpumap.h +++ b/tools/perf/util/cpumap.h @@ -57,37 +57,11 @@ static inline bool cpu_map__empty(const struct cpu_map *map) return map ? map->map[0] == -1 : true; } -int max_cpu_num; -int max_node_num; -int *cpunode_map; - int cpu__setup_cpunode_map(void); -static inline int cpu__max_node(void) -{ - if (unlikely(!max_node_num)) - pr_debug("cpu_map not initialized\n"); - - return max_node_num; -} - -static inline int cpu__max_cpu(void) -{ - if (unlikely(!max_cpu_num)) - pr_debug("cpu_map not initialized\n"); - - return max_cpu_num; -} - -static inline int cpu__get_node(int cpu) -{ - if (unlikely(cpunode_map == NULL)) { - pr_debug("cpu_map not initialized\n"); - return -1; - } - - return cpunode_map[cpu]; -} +int cpu__max_node(void); +int cpu__max_cpu(void); +int cpu__get_node(int cpu); int cpu_map__build_map(struct cpu_map *cpus, struct cpu_map **res, int (*f)(struct cpu_map *map, int cpu, void *data), diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c index 34cd1e4039d3..b722e57d5a87 100644 --- a/tools/perf/util/data-convert-bt.c +++ b/tools/perf/util/data-convert-bt.c @@ -858,6 +858,23 @@ static int setup_events(struct ctf_writer *cw, struct perf_session *session) return 0; } +static void cleanup_events(struct perf_session *session) +{ + struct perf_evlist *evlist = session->evlist; + struct perf_evsel *evsel; + + evlist__for_each(evlist, evsel) { + struct evsel_priv *priv; + + priv = evsel->priv; + bt_ctf_event_class_put(priv->event_class); + zfree(&evsel->priv); + } + + perf_evlist__delete(evlist); + session->evlist = NULL; +} + static int setup_streams(struct ctf_writer *cw, struct perf_session *session) { struct ctf_stream **stream; @@ -1171,6 +1188,7 @@ int bt_convert__perf2ctf(const char *input, const char *path, bool force) (double) c.events_size / 1024.0 / 1024.0, c.events_count); + cleanup_events(session); perf_session__delete(session); ctf_writer__cleanup(cw); diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c index 86d9c7302598..ff7e86ad1b06 100644 --- a/tools/perf/util/debug.c +++ b/tools/perf/util/debug.c @@ -5,6 +5,7 @@ #include <string.h> #include <stdarg.h> #include <stdio.h> +#include <api/debug.h> #include "cache.h" #include "color.h" @@ -22,7 +23,7 @@ int debug_ordered_events; static int redirect_to_stderr; int debug_data_convert; -static int _eprintf(int level, int var, const char *fmt, va_list args) +int veprintf(int level, int var, const char *fmt, va_list args) { int ret = 0; @@ -36,24 +37,19 @@ static int _eprintf(int level, int var, const char *fmt, va_list args) return ret; } -int veprintf(int level, int var, const char *fmt, va_list args) -{ - return _eprintf(level, var, fmt, args); -} - int eprintf(int level, int var, const char *fmt, ...) { va_list args; int ret; va_start(args, fmt); - ret = _eprintf(level, var, fmt, args); + ret = veprintf(level, var, fmt, args); va_end(args); return ret; } -static int __eprintf_time(u64 t, const char *fmt, va_list args) +static int veprintf_time(u64 t, const char *fmt, va_list args) { int ret = 0; u64 secs, usecs, nsecs = t; @@ -75,7 +71,7 @@ int eprintf_time(int level, int var, u64 t, const char *fmt, ...) if (var >= level) { va_start(args, fmt); - ret = __eprintf_time(t, fmt, args); + ret = veprintf_time(t, fmt, args); va_end(args); } @@ -91,7 +87,7 @@ void pr_stat(const char *fmt, ...) va_list args; va_start(args, fmt); - _eprintf(1, verbose, fmt, args); + veprintf(1, verbose, fmt, args); va_end(args); eprintf(1, verbose, "\n"); } @@ -192,3 +188,23 @@ int perf_debug_option(const char *str) free(s); return 0; } + +#define DEBUG_WRAPPER(__n, __l) \ +static int pr_ ## __n ## _wrapper(const char *fmt, ...) \ +{ \ + va_list args; \ + int ret; \ + \ + va_start(args, fmt); \ + ret = veprintf(__l, verbose, fmt, args); \ + va_end(args); \ + return ret; \ +} + +DEBUG_WRAPPER(warning, 0); +DEBUG_WRAPPER(debug, 1); + +void perf_debug_setup(void) +{ + libapi_set_print(pr_warning_wrapper, pr_warning_wrapper, pr_debug_wrapper); +} diff --git a/tools/perf/util/debug.h b/tools/perf/util/debug.h index 8b9a088c32ab..14bafda79eda 100644 --- a/tools/perf/util/debug.h +++ b/tools/perf/util/debug.h @@ -53,5 +53,6 @@ int eprintf_time(int level, int var, u64 t, const char *fmt, ...) __attribute__( int veprintf(int level, int var, const char *fmt, va_list args); int perf_debug_option(const char *str); +void perf_debug_setup(void); #endif /* __PERF_DEBUG_H */ diff --git a/tools/perf/util/demangle-java.c b/tools/perf/util/demangle-java.c new file mode 100644 index 000000000000..3e6062ab2cdd --- /dev/null +++ b/tools/perf/util/demangle-java.c @@ -0,0 +1,199 @@ +#include <sys/types.h> +#include <stdio.h> +#include <string.h> +#include "util.h" +#include "debug.h" +#include "symbol.h" + +#include "demangle-java.h" + +enum { + MODE_PREFIX = 0, + MODE_CLASS = 1, + MODE_FUNC = 2, + MODE_TYPE = 3, + MODE_CTYPE = 3, /* class arg */ +}; + +#define BASE_ENT(c, n) [c - 'A']=n +static const char *base_types['Z' - 'A' + 1] = { + BASE_ENT('B', "byte" ), + BASE_ENT('C', "char" ), + BASE_ENT('D', "double" ), + BASE_ENT('F', "float" ), + BASE_ENT('I', "int" ), + BASE_ENT('J', "long" ), + BASE_ENT('S', "short" ), + BASE_ENT('Z', "bool" ), +}; + +/* + * demangle Java symbol between str and end positions and stores + * up to maxlen characters into buf. The parser starts in mode. + * + * Use MODE_PREFIX to process entire prototype till end position + * Use MODE_TYPE to process return type if str starts on return type char + * + * Return: + * success: buf + * error : NULL + */ +static char * +__demangle_java_sym(const char *str, const char *end, char *buf, int maxlen, int mode) +{ + int rlen = 0; + int array = 0; + int narg = 0; + const char *q; + + if (!end) + end = str + strlen(str); + + for (q = str; q != end; q++) { + + if (rlen == (maxlen - 1)) + break; + + switch (*q) { + case 'L': + if (mode == MODE_PREFIX || mode == MODE_CTYPE) { + if (mode == MODE_CTYPE) { + if (narg) + rlen += scnprintf(buf + rlen, maxlen - rlen, ", "); + narg++; + } + rlen += scnprintf(buf + rlen, maxlen - rlen, "class "); + if (mode == MODE_PREFIX) + mode = MODE_CLASS; + } else + buf[rlen++] = *q; + break; + case 'B': + case 'C': + case 'D': + case 'F': + case 'I': + case 'J': + case 'S': + case 'Z': + if (mode == MODE_TYPE) { + if (narg) + rlen += scnprintf(buf + rlen, maxlen - rlen, ", "); + rlen += scnprintf(buf + rlen, maxlen - rlen, "%s", base_types[*q - 'A']); + while (array--) + rlen += scnprintf(buf + rlen, maxlen - rlen, "[]"); + array = 0; + narg++; + } else + buf[rlen++] = *q; + break; + case 'V': + if (mode == MODE_TYPE) { + rlen += scnprintf(buf + rlen, maxlen - rlen, "void"); + while (array--) + rlen += scnprintf(buf + rlen, maxlen - rlen, "[]"); + array = 0; + } else + buf[rlen++] = *q; + break; + case '[': + if (mode != MODE_TYPE) + goto error; + array++; + break; + case '(': + if (mode != MODE_FUNC) + goto error; + buf[rlen++] = *q; + mode = MODE_TYPE; + break; + case ')': + if (mode != MODE_TYPE) + goto error; + buf[rlen++] = *q; + narg = 0; + break; + case ';': + if (mode != MODE_CLASS && mode != MODE_CTYPE) + goto error; + /* safe because at least one other char to process */ + if (isalpha(*(q + 1))) + rlen += scnprintf(buf + rlen, maxlen - rlen, "."); + if (mode == MODE_CLASS) + mode = MODE_FUNC; + else if (mode == MODE_CTYPE) + mode = MODE_TYPE; + break; + case '/': + if (mode != MODE_CLASS && mode != MODE_CTYPE) + goto error; + rlen += scnprintf(buf + rlen, maxlen - rlen, "."); + break; + default : + buf[rlen++] = *q; + } + } + buf[rlen] = '\0'; + return buf; +error: + return NULL; +} + +/* + * Demangle Java function signature (openJDK, not GCJ) + * input: + * str: string to parse. String is not modified + * flags: comobination of JAVA_DEMANGLE_* flags to modify demangling + * return: + * if input can be demangled, then a newly allocated string is returned. + * if input cannot be demangled, then NULL is returned + * + * Note: caller is responsible for freeing demangled string + */ +char * +java_demangle_sym(const char *str, int flags) +{ + char *buf, *ptr; + char *p; + size_t len, l1 = 0; + + if (!str) + return NULL; + + /* find start of retunr type */ + p = strrchr(str, ')'); + if (!p) + return NULL; + + /* + * expansion factor estimated to 3x + */ + len = strlen(str) * 3 + 1; + buf = malloc(len); + if (!buf) + return NULL; + + buf[0] = '\0'; + if (!(flags & JAVA_DEMANGLE_NORET)) { + /* + * get return type first + */ + ptr = __demangle_java_sym(p + 1, NULL, buf, len, MODE_TYPE); + if (!ptr) + goto error; + + /* add space between return type and function prototype */ + l1 = strlen(buf); + buf[l1++] = ' '; + } + + /* process function up to return type */ + ptr = __demangle_java_sym(str, p + 1, buf + l1, len - l1, MODE_PREFIX); + if (!ptr) + goto error; + + return buf; +error: + free(buf); + return NULL; +} diff --git a/tools/perf/util/demangle-java.h b/tools/perf/util/demangle-java.h new file mode 100644 index 000000000000..a981c1f968fe --- /dev/null +++ b/tools/perf/util/demangle-java.h @@ -0,0 +1,10 @@ +#ifndef __PERF_DEMANGLE_JAVA +#define __PERF_DEMANGLE_JAVA 1 +/* + * demangle function flags + */ +#define JAVA_DEMANGLE_NORET 0x1 /* do not process return type */ + +char * java_demangle_sym(const char *str, int flags); + +#endif /* __PERF_DEMANGLE_JAVA */ diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c index e8e9a9dbf5e3..8e6395439ca0 100644 --- a/tools/perf/util/dso.c +++ b/tools/perf/util/dso.c @@ -52,6 +52,11 @@ int dso__read_binary_type_filename(const struct dso *dso, debuglink--; if (*debuglink == '/') debuglink++; + + ret = -1; + if (!is_regular_file(filename)) + break; + ret = filename__read_debuglink(filename, debuglink, size - (debuglink - filename)); } diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c index 7dd5939dea2e..49a11d9d8b8f 100644 --- a/tools/perf/util/env.c +++ b/tools/perf/util/env.c @@ -6,6 +6,8 @@ struct perf_env perf_env; void perf_env__exit(struct perf_env *env) { + int i; + zfree(&env->hostname); zfree(&env->os_release); zfree(&env->version); @@ -19,6 +21,10 @@ void perf_env__exit(struct perf_env *env) zfree(&env->numa_nodes); zfree(&env->pmu_mappings); zfree(&env->cpu); + + for (i = 0; i < env->caches_cnt; i++) + cpu_cache_level__free(&env->caches[i]); + zfree(&env->caches); } int perf_env__set_cmdline(struct perf_env *env, int argc, const char *argv[]) @@ -75,3 +81,10 @@ int perf_env__read_cpu_topology_map(struct perf_env *env) env->nr_cpus_avail = nr_cpus; return 0; } + +void cpu_cache_level__free(struct cpu_cache_level *cache) +{ + free(cache->type); + free(cache->map); + free(cache->size); +} diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h index 0132b9557c02..56cffb60a0b4 100644 --- a/tools/perf/util/env.h +++ b/tools/perf/util/env.h @@ -1,11 +1,23 @@ #ifndef __PERF_ENV_H #define __PERF_ENV_H +#include <linux/types.h> + struct cpu_topology_map { int socket_id; int core_id; }; +struct cpu_cache_level { + u32 level; + u32 line_size; + u32 sets; + u32 ways; + char *type; + char *size; + char *map; +}; + struct perf_env { char *hostname; char *os_release; @@ -31,6 +43,8 @@ struct perf_env { char *numa_nodes; char *pmu_mappings; struct cpu_topology_map *cpu; + struct cpu_cache_level *caches; + int caches_cnt; }; extern struct perf_env perf_env; @@ -41,4 +55,5 @@ int perf_env__set_cmdline(struct perf_env *env, int argc, const char *argv[]); int perf_env__read_cpu_topology_map(struct perf_env *env); +void cpu_cache_level__free(struct cpu_cache_level *cache); #endif /* __PERF_ENV_H */ diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 85155e91b61b..7bad5c3fa7b7 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -282,7 +282,7 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool, strcpy(execname, ""); /* 00400000-0040c000 r-xp 00000000 fd:01 41038 /bin/cat */ - n = sscanf(bf, "%"PRIx64"-%"PRIx64" %s %"PRIx64" %x:%x %u %s\n", + n = sscanf(bf, "%"PRIx64"-%"PRIx64" %s %"PRIx64" %x:%x %u %[^\n]\n", &event->mmap2.start, &event->mmap2.len, prot, &event->mmap2.pgoff, &event->mmap2.maj, &event->mmap2.min, diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index cdbaf9b51e42..467808680ee4 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -2362,12 +2362,15 @@ int perf_evsel__open_strerror(struct perf_evsel *evsel, struct target *target, case EPERM: case EACCES: return scnprintf(msg, size, - "You may not have permission to collect %sstats.\n" - "Consider tweaking /proc/sys/kernel/perf_event_paranoid:\n" - " -1 - Not paranoid at all\n" - " 0 - Disallow raw tracepoint access for unpriv\n" - " 1 - Disallow cpu events for unpriv\n" - " 2 - Disallow kernel profiling for unpriv", + "You may not have permission to collect %sstats.\n\n" + "Consider tweaking /proc/sys/kernel/perf_event_paranoid,\n" + "which controls use of the performance events system by\n" + "unprivileged users (without CAP_SYS_ADMIN).\n\n" + "The default value is 1:\n\n" + " -1: Allow use of (almost) all events by all users\n" + ">= 0: Disallow raw tracepoint access by users without CAP_IOC_LOCK\n" + ">= 1: Disallow CPU event access by users without CAP_SYS_ADMIN\n" + ">= 2: Disallow kernel profiling by users without CAP_SYS_ADMIN", target->system_wide ? "system-wide " : ""); case ENOENT: return scnprintf(msg, size, "The %s event is not supported.", diff --git a/tools/perf/util/genelf.c b/tools/perf/util/genelf.c new file mode 100644 index 000000000000..c1ef805c6a8f --- /dev/null +++ b/tools/perf/util/genelf.c @@ -0,0 +1,449 @@ +/* + * genelf.c + * Copyright (C) 2014, Google, Inc + * + * Contributed by: + * Stephane Eranian <eranian@gmail.com> + * + * Released under the GPL v2. (and only v2, not any later version) + */ + +#include <sys/types.h> +#include <stdio.h> +#include <getopt.h> +#include <stddef.h> +#include <libelf.h> +#include <string.h> +#include <stdlib.h> +#include <inttypes.h> +#include <limits.h> +#include <fcntl.h> +#include <err.h> +#include <dwarf.h> + +#include "perf.h" +#include "genelf.h" +#include "../util/jitdump.h" + +#define JVMTI + +#define BUILD_ID_URANDOM /* different uuid for each run */ + +#ifdef HAVE_LIBCRYPTO + +#define BUILD_ID_MD5 +#undef BUILD_ID_SHA /* does not seem to work well when linked with Java */ +#undef BUILD_ID_URANDOM /* different uuid for each run */ + +#ifdef BUILD_ID_SHA +#include <openssl/sha.h> +#endif + +#ifdef BUILD_ID_MD5 +#include <openssl/md5.h> +#endif +#endif + + +typedef struct { + unsigned int namesz; /* Size of entry's owner string */ + unsigned int descsz; /* Size of the note descriptor */ + unsigned int type; /* Interpretation of the descriptor */ + char name[0]; /* Start of the name+desc data */ +} Elf_Note; + +struct options { + char *output; + int fd; +}; + +static char shd_string_table[] = { + 0, + '.', 't', 'e', 'x', 't', 0, /* 1 */ + '.', 's', 'h', 's', 't', 'r', 't', 'a', 'b', 0, /* 7 */ + '.', 's', 'y', 'm', 't', 'a', 'b', 0, /* 17 */ + '.', 's', 't', 'r', 't', 'a', 'b', 0, /* 25 */ + '.', 'n', 'o', 't', 'e', '.', 'g', 'n', 'u', '.', 'b', 'u', 'i', 'l', 'd', '-', 'i', 'd', 0, /* 33 */ + '.', 'd', 'e', 'b', 'u', 'g', '_', 'l', 'i', 'n', 'e', 0, /* 52 */ + '.', 'd', 'e', 'b', 'u', 'g', '_', 'i', 'n', 'f', 'o', 0, /* 64 */ + '.', 'd', 'e', 'b', 'u', 'g', '_', 'a', 'b', 'b', 'r', 'e', 'v', 0, /* 76 */ +}; + +static struct buildid_note { + Elf_Note desc; /* descsz: size of build-id, must be multiple of 4 */ + char name[4]; /* GNU\0 */ + char build_id[20]; +} bnote; + +static Elf_Sym symtab[]={ + /* symbol 0 MUST be the undefined symbol */ + { .st_name = 0, /* index in sym_string table */ + .st_info = ELF_ST_TYPE(STT_NOTYPE), + .st_shndx = 0, /* for now */ + .st_value = 0x0, + .st_other = ELF_ST_VIS(STV_DEFAULT), + .st_size = 0, + }, + { .st_name = 1, /* index in sym_string table */ + .st_info = ELF_ST_BIND(STB_LOCAL) | ELF_ST_TYPE(STT_FUNC), + .st_shndx = 1, + .st_value = 0, /* for now */ + .st_other = ELF_ST_VIS(STV_DEFAULT), + .st_size = 0, /* for now */ + } +}; + +#ifdef BUILD_ID_URANDOM +static void +gen_build_id(struct buildid_note *note, + unsigned long load_addr __maybe_unused, + const void *code __maybe_unused, + size_t csize __maybe_unused) +{ + int fd; + size_t sz = sizeof(note->build_id); + ssize_t sret; + + fd = open("/dev/urandom", O_RDONLY); + if (fd == -1) + err(1, "cannot access /dev/urandom for builid"); + + sret = read(fd, note->build_id, sz); + + close(fd); + + if (sret != (ssize_t)sz) + memset(note->build_id, 0, sz); +} +#endif + +#ifdef BUILD_ID_SHA +static void +gen_build_id(struct buildid_note *note, + unsigned long load_addr __maybe_unused, + const void *code, + size_t csize) +{ + if (sizeof(note->build_id) < SHA_DIGEST_LENGTH) + errx(1, "build_id too small for SHA1"); + + SHA1(code, csize, (unsigned char *)note->build_id); +} +#endif + +#ifdef BUILD_ID_MD5 +static void +gen_build_id(struct buildid_note *note, unsigned long load_addr, const void *code, size_t csize) +{ + MD5_CTX context; + + if (sizeof(note->build_id) < 16) + errx(1, "build_id too small for MD5"); + + MD5_Init(&context); + MD5_Update(&context, &load_addr, sizeof(load_addr)); + MD5_Update(&context, code, csize); + MD5_Final((unsigned char *)note->build_id, &context); +} +#endif + +/* + * fd: file descriptor open for writing for the output file + * load_addr: code load address (could be zero, just used for buildid) + * sym: function name (for native code - used as the symbol) + * code: the native code + * csize: the code size in bytes + */ +int +jit_write_elf(int fd, uint64_t load_addr, const char *sym, + const void *code, int csize, + void *debug, int nr_debug_entries) +{ + Elf *e; + Elf_Data *d; + Elf_Scn *scn; + Elf_Ehdr *ehdr; + Elf_Shdr *shdr; + char *strsym = NULL; + int symlen; + int retval = -1; + + if (elf_version(EV_CURRENT) == EV_NONE) { + warnx("ELF initialization failed"); + return -1; + } + + e = elf_begin(fd, ELF_C_WRITE, NULL); + if (!e) { + warnx("elf_begin failed"); + goto error; + } + + /* + * setup ELF header + */ + ehdr = elf_newehdr(e); + if (!ehdr) { + warnx("cannot get ehdr"); + goto error; + } + + ehdr->e_ident[EI_DATA] = GEN_ELF_ENDIAN; + ehdr->e_ident[EI_CLASS] = GEN_ELF_CLASS; + ehdr->e_machine = GEN_ELF_ARCH; + ehdr->e_type = ET_DYN; + ehdr->e_entry = GEN_ELF_TEXT_OFFSET; + ehdr->e_version = EV_CURRENT; + ehdr->e_shstrndx= 2; /* shdr index for section name */ + + /* + * setup text section + */ + scn = elf_newscn(e); + if (!scn) { + warnx("cannot create section"); + goto error; + } + + d = elf_newdata(scn); + if (!d) { + warnx("cannot get new data"); + goto error; + } + + d->d_align = 16; + d->d_off = 0LL; + d->d_buf = (void *)code; + d->d_type = ELF_T_BYTE; + d->d_size = csize; + d->d_version = EV_CURRENT; + + shdr = elf_getshdr(scn); + if (!shdr) { + warnx("cannot get section header"); + goto error; + } + + shdr->sh_name = 1; + shdr->sh_type = SHT_PROGBITS; + shdr->sh_addr = GEN_ELF_TEXT_OFFSET; + shdr->sh_flags = SHF_EXECINSTR | SHF_ALLOC; + shdr->sh_entsize = 0; + + /* + * setup section headers string table + */ + scn = elf_newscn(e); + if (!scn) { + warnx("cannot create section"); + goto error; + } + + d = elf_newdata(scn); + if (!d) { + warnx("cannot get new data"); + goto error; + } + + d->d_align = 1; + d->d_off = 0LL; + d->d_buf = shd_string_table; + d->d_type = ELF_T_BYTE; + d->d_size = sizeof(shd_string_table); + d->d_version = EV_CURRENT; + + shdr = elf_getshdr(scn); + if (!shdr) { + warnx("cannot get section header"); + goto error; + } + + shdr->sh_name = 7; /* offset of '.shstrtab' in shd_string_table */ + shdr->sh_type = SHT_STRTAB; + shdr->sh_flags = 0; + shdr->sh_entsize = 0; + + /* + * setup symtab section + */ + symtab[1].st_size = csize; + symtab[1].st_value = GEN_ELF_TEXT_OFFSET; + + scn = elf_newscn(e); + if (!scn) { + warnx("cannot create section"); + goto error; + } + + d = elf_newdata(scn); + if (!d) { + warnx("cannot get new data"); + goto error; + } + + d->d_align = 8; + d->d_off = 0LL; + d->d_buf = symtab; + d->d_type = ELF_T_SYM; + d->d_size = sizeof(symtab); + d->d_version = EV_CURRENT; + + shdr = elf_getshdr(scn); + if (!shdr) { + warnx("cannot get section header"); + goto error; + } + + shdr->sh_name = 17; /* offset of '.symtab' in shd_string_table */ + shdr->sh_type = SHT_SYMTAB; + shdr->sh_flags = 0; + shdr->sh_entsize = sizeof(Elf_Sym); + shdr->sh_link = 4; /* index of .strtab section */ + + /* + * setup symbols string table + * 2 = 1 for 0 in 1st entry, 1 for the 0 at end of symbol for 2nd entry + */ + symlen = 2 + strlen(sym); + strsym = calloc(1, symlen); + if (!strsym) { + warnx("cannot allocate strsym"); + goto error; + } + strcpy(strsym + 1, sym); + + scn = elf_newscn(e); + if (!scn) { + warnx("cannot create section"); + goto error; + } + + d = elf_newdata(scn); + if (!d) { + warnx("cannot get new data"); + goto error; + } + + d->d_align = 1; + d->d_off = 0LL; + d->d_buf = strsym; + d->d_type = ELF_T_BYTE; + d->d_size = symlen; + d->d_version = EV_CURRENT; + + shdr = elf_getshdr(scn); + if (!shdr) { + warnx("cannot get section header"); + goto error; + } + + shdr->sh_name = 25; /* offset in shd_string_table */ + shdr->sh_type = SHT_STRTAB; + shdr->sh_flags = 0; + shdr->sh_entsize = 0; + + /* + * setup build-id section + */ + scn = elf_newscn(e); + if (!scn) { + warnx("cannot create section"); + goto error; + } + + d = elf_newdata(scn); + if (!d) { + warnx("cannot get new data"); + goto error; + } + + /* + * build-id generation + */ + gen_build_id(&bnote, load_addr, code, csize); + bnote.desc.namesz = sizeof(bnote.name); /* must include 0 termination */ + bnote.desc.descsz = sizeof(bnote.build_id); + bnote.desc.type = NT_GNU_BUILD_ID; + strcpy(bnote.name, "GNU"); + + d->d_align = 4; + d->d_off = 0LL; + d->d_buf = &bnote; + d->d_type = ELF_T_BYTE; + d->d_size = sizeof(bnote); + d->d_version = EV_CURRENT; + + shdr = elf_getshdr(scn); + if (!shdr) { + warnx("cannot get section header"); + goto error; + } + + shdr->sh_name = 33; /* offset in shd_string_table */ + shdr->sh_type = SHT_NOTE; + shdr->sh_addr = 0x0; + shdr->sh_flags = SHF_ALLOC; + shdr->sh_size = sizeof(bnote); + shdr->sh_entsize = 0; + + if (debug && nr_debug_entries) { + retval = jit_add_debug_info(e, load_addr, debug, nr_debug_entries); + if (retval) + goto error; + } else { + if (elf_update(e, ELF_C_WRITE) < 0) { + warnx("elf_update 4 failed"); + goto error; + } + } + + retval = 0; +error: + (void)elf_end(e); + + free(strsym); + + + return retval; +} + +#ifndef JVMTI + +static unsigned char x86_code[] = { + 0xBB, 0x2A, 0x00, 0x00, 0x00, /* movl $42, %ebx */ + 0xB8, 0x01, 0x00, 0x00, 0x00, /* movl $1, %eax */ + 0xCD, 0x80 /* int $0x80 */ +}; + +static struct options options; + +int main(int argc, char **argv) +{ + int c, fd, ret; + + while ((c = getopt(argc, argv, "o:h")) != -1) { + switch (c) { + case 'o': + options.output = optarg; + break; + case 'h': + printf("Usage: genelf -o output_file [-h]\n"); + return 0; + default: + errx(1, "unknown option"); + } + } + + fd = open(options.output, O_CREAT|O_TRUNC|O_RDWR, 0666); + if (fd == -1) + err(1, "cannot create file %s", options.output); + + ret = jit_write_elf(fd, "main", x86_code, sizeof(x86_code)); + close(fd); + + if (ret != 0) + unlink(options.output); + + return ret; +} +#endif diff --git a/tools/perf/util/genelf.h b/tools/perf/util/genelf.h new file mode 100644 index 000000000000..45bf9c6d3257 --- /dev/null +++ b/tools/perf/util/genelf.h @@ -0,0 +1,67 @@ +#ifndef __GENELF_H__ +#define __GENELF_H__ + +/* genelf.c */ +extern int jit_write_elf(int fd, uint64_t code_addr, const char *sym, + const void *code, int csize, + void *debug, int nr_debug_entries); +/* genelf_debug.c */ +extern int jit_add_debug_info(Elf *e, uint64_t code_addr, + void *debug, int nr_debug_entries); + +#if defined(__arm__) +#define GEN_ELF_ARCH EM_ARM +#define GEN_ELF_ENDIAN ELFDATA2LSB +#define GEN_ELF_CLASS ELFCLASS32 +#elif defined(__aarch64__) +#define GEN_ELF_ARCH EM_AARCH64 +#define GEN_ELF_ENDIAN ELFDATA2LSB +#define GEN_ELF_CLASS ELFCLASS64 +#elif defined(__x86_64__) +#define GEN_ELF_ARCH EM_X86_64 +#define GEN_ELF_ENDIAN ELFDATA2LSB +#define GEN_ELF_CLASS ELFCLASS64 +#elif defined(__i386__) +#define GEN_ELF_ARCH EM_386 +#define GEN_ELF_ENDIAN ELFDATA2LSB +#define GEN_ELF_CLASS ELFCLASS32 +#elif defined(__ppcle__) +#define GEN_ELF_ARCH EM_PPC +#define GEN_ELF_ENDIAN ELFDATA2LSB +#define GEN_ELF_CLASS ELFCLASS64 +#elif defined(__powerpc__) +#define GEN_ELF_ARCH EM_PPC64 +#define GEN_ELF_ENDIAN ELFDATA2MSB +#define GEN_ELF_CLASS ELFCLASS64 +#elif defined(__powerpcle__) +#define GEN_ELF_ARCH EM_PPC64 +#define GEN_ELF_ENDIAN ELFDATA2LSB +#define GEN_ELF_CLASS ELFCLASS64 +#else +#error "unsupported architecture" +#endif + +#if GEN_ELF_CLASS == ELFCLASS64 +#define elf_newehdr elf64_newehdr +#define elf_getshdr elf64_getshdr +#define Elf_Ehdr Elf64_Ehdr +#define Elf_Shdr Elf64_Shdr +#define Elf_Sym Elf64_Sym +#define ELF_ST_TYPE(a) ELF64_ST_TYPE(a) +#define ELF_ST_BIND(a) ELF64_ST_BIND(a) +#define ELF_ST_VIS(a) ELF64_ST_VISIBILITY(a) +#else +#define elf_newehdr elf32_newehdr +#define elf_getshdr elf32_getshdr +#define Elf_Ehdr Elf32_Ehdr +#define Elf_Shdr Elf32_Shdr +#define Elf_Sym Elf32_Sym +#define ELF_ST_TYPE(a) ELF32_ST_TYPE(a) +#define ELF_ST_BIND(a) ELF32_ST_BIND(a) +#define ELF_ST_VIS(a) ELF32_ST_VISIBILITY(a) +#endif + +/* The .text section is directly after the ELF header */ +#define GEN_ELF_TEXT_OFFSET sizeof(Elf_Ehdr) + +#endif diff --git a/tools/perf/util/genelf_debug.c b/tools/perf/util/genelf_debug.c new file mode 100644 index 000000000000..5980f7d256b1 --- /dev/null +++ b/tools/perf/util/genelf_debug.c @@ -0,0 +1,610 @@ +/* + * genelf_debug.c + * Copyright (C) 2015, Google, Inc + * + * Contributed by: + * Stephane Eranian <eranian@google.com> + * + * Released under the GPL v2. + * + * based on GPLv2 source code from Oprofile + * @remark Copyright 2007 OProfile authors + * @author Philippe Elie + */ +#include <sys/types.h> +#include <stdio.h> +#include <getopt.h> +#include <stddef.h> +#include <libelf.h> +#include <string.h> +#include <stdlib.h> +#include <inttypes.h> +#include <limits.h> +#include <fcntl.h> +#include <err.h> +#include <dwarf.h> + +#include "perf.h" +#include "genelf.h" +#include "../util/jitdump.h" + +#define BUFFER_EXT_DFL_SIZE (4 * 1024) + +typedef uint32_t uword; +typedef uint16_t uhalf; +typedef int32_t sword; +typedef int16_t shalf; +typedef uint8_t ubyte; +typedef int8_t sbyte; + +struct buffer_ext { + size_t cur_pos; + size_t max_sz; + void *data; +}; + +static void +buffer_ext_dump(struct buffer_ext *be, const char *msg) +{ + size_t i; + warnx("DUMP for %s", msg); + for (i = 0 ; i < be->cur_pos; i++) + warnx("%4zu 0x%02x", i, (((char *)be->data)[i]) & 0xff); +} + +static inline int +buffer_ext_add(struct buffer_ext *be, void *addr, size_t sz) +{ + void *tmp; + size_t be_sz = be->max_sz; + +retry: + if ((be->cur_pos + sz) < be_sz) { + memcpy(be->data + be->cur_pos, addr, sz); + be->cur_pos += sz; + return 0; + } + + if (!be_sz) + be_sz = BUFFER_EXT_DFL_SIZE; + else + be_sz <<= 1; + + tmp = realloc(be->data, be_sz); + if (!tmp) + return -1; + + be->data = tmp; + be->max_sz = be_sz; + + goto retry; +} + +static void +buffer_ext_init(struct buffer_ext *be) +{ + be->data = NULL; + be->cur_pos = 0; + be->max_sz = 0; +} + +static inline size_t +buffer_ext_size(struct buffer_ext *be) +{ + return be->cur_pos; +} + +static inline void * +buffer_ext_addr(struct buffer_ext *be) +{ + return be->data; +} + +struct debug_line_header { + // Not counting this field + uword total_length; + // version number (2 currently) + uhalf version; + // relative offset from next field to + // program statement + uword prolog_length; + ubyte minimum_instruction_length; + ubyte default_is_stmt; + // line_base - see DWARF 2 specs + sbyte line_base; + // line_range - see DWARF 2 specs + ubyte line_range; + // number of opcode + 1 + ubyte opcode_base; + /* follow the array of opcode args nr: ubytes [nr_opcode_base] */ + /* follow the search directories index, zero terminated string + * terminated by an empty string. + */ + /* follow an array of { filename, LEB128, LEB128, LEB128 }, first is + * the directory index entry, 0 means current directory, then mtime + * and filesize, last entry is followed by en empty string. + */ + /* follow the first program statement */ +} __attribute__((packed)); + +/* DWARF 2 spec talk only about one possible compilation unit header while + * binutils can handle two flavours of dwarf 2, 32 and 64 bits, this is not + * related to the used arch, an ELF 32 can hold more than 4 Go of debug + * information. For now we handle only DWARF 2 32 bits comp unit. It'll only + * become a problem if we generate more than 4GB of debug information. + */ +struct compilation_unit_header { + uword total_length; + uhalf version; + uword debug_abbrev_offset; + ubyte pointer_size; +} __attribute__((packed)); + +#define DW_LNS_num_opcode (DW_LNS_set_isa + 1) + +/* field filled at run time are marked with -1 */ +static struct debug_line_header const default_debug_line_header = { + .total_length = -1, + .version = 2, + .prolog_length = -1, + .minimum_instruction_length = 1, /* could be better when min instruction size != 1 */ + .default_is_stmt = 1, /* we don't take care about basic block */ + .line_base = -5, /* sensible value for line base ... */ + .line_range = -14, /* ... and line range are guessed statically */ + .opcode_base = DW_LNS_num_opcode +}; + +static ubyte standard_opcode_length[] = +{ + 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1 +}; +#if 0 +{ + [DW_LNS_advance_pc] = 1, + [DW_LNS_advance_line] = 1, + [DW_LNS_set_file] = 1, + [DW_LNS_set_column] = 1, + [DW_LNS_fixed_advance_pc] = 1, + [DW_LNS_set_isa] = 1, +}; +#endif + +/* field filled at run time are marked with -1 */ +static struct compilation_unit_header default_comp_unit_header = { + .total_length = -1, + .version = 2, + .debug_abbrev_offset = 0, /* we reuse the same abbrev entries for all comp unit */ + .pointer_size = sizeof(void *) +}; + +static void emit_uword(struct buffer_ext *be, uword data) +{ + buffer_ext_add(be, &data, sizeof(uword)); +} + +static void emit_string(struct buffer_ext *be, const char *s) +{ + buffer_ext_add(be, (void *)s, strlen(s) + 1); +} + +static void emit_unsigned_LEB128(struct buffer_ext *be, + unsigned long data) +{ + do { + ubyte cur = data & 0x7F; + data >>= 7; + if (data) + cur |= 0x80; + buffer_ext_add(be, &cur, 1); + } while (data); +} + +static void emit_signed_LEB128(struct buffer_ext *be, long data) +{ + int more = 1; + int negative = data < 0; + int size = sizeof(long) * CHAR_BIT; + while (more) { + ubyte cur = data & 0x7F; + data >>= 7; + if (negative) + data |= - (1 << (size - 7)); + if ((data == 0 && !(cur & 0x40)) || + (data == -1l && (cur & 0x40))) + more = 0; + else + cur |= 0x80; + buffer_ext_add(be, &cur, 1); + } +} + +static void emit_extended_opcode(struct buffer_ext *be, ubyte opcode, + void *data, size_t data_len) +{ + buffer_ext_add(be, (char *)"", 1); + + emit_unsigned_LEB128(be, data_len + 1); + + buffer_ext_add(be, &opcode, 1); + buffer_ext_add(be, data, data_len); +} + +static void emit_opcode(struct buffer_ext *be, ubyte opcode) +{ + buffer_ext_add(be, &opcode, 1); +} + +static void emit_opcode_signed(struct buffer_ext *be, + ubyte opcode, long data) +{ + buffer_ext_add(be, &opcode, 1); + emit_signed_LEB128(be, data); +} + +static void emit_opcode_unsigned(struct buffer_ext *be, ubyte opcode, + unsigned long data) +{ + buffer_ext_add(be, &opcode, 1); + emit_unsigned_LEB128(be, data); +} + +static void emit_advance_pc(struct buffer_ext *be, unsigned long delta_pc) +{ + emit_opcode_unsigned(be, DW_LNS_advance_pc, delta_pc); +} + +static void emit_advance_lineno(struct buffer_ext *be, long delta_lineno) +{ + emit_opcode_signed(be, DW_LNS_advance_line, delta_lineno); +} + +static void emit_lne_end_of_sequence(struct buffer_ext *be) +{ + emit_extended_opcode(be, DW_LNE_end_sequence, NULL, 0); +} + +static void emit_set_file(struct buffer_ext *be, unsigned long idx) +{ + emit_opcode_unsigned(be, DW_LNS_set_file, idx); +} + +static void emit_lne_define_filename(struct buffer_ext *be, + const char *filename) +{ + buffer_ext_add(be, (void *)"", 1); + + /* LNE field, strlen(filename) + zero termination, 3 bytes for: the dir entry, timestamp, filesize */ + emit_unsigned_LEB128(be, strlen(filename) + 5); + emit_opcode(be, DW_LNE_define_file); + emit_string(be, filename); + /* directory index 0=do not know */ + emit_unsigned_LEB128(be, 0); + /* last modification date on file 0=do not know */ + emit_unsigned_LEB128(be, 0); + /* filesize 0=do not know */ + emit_unsigned_LEB128(be, 0); +} + +static void emit_lne_set_address(struct buffer_ext *be, + void *address) +{ + emit_extended_opcode(be, DW_LNE_set_address, &address, sizeof(unsigned long)); +} + +static ubyte get_special_opcode(struct debug_entry *ent, + unsigned int last_line, + unsigned long last_vma) +{ + unsigned int temp; + unsigned long delta_addr; + + /* + * delta from line_base + */ + temp = (ent->lineno - last_line) - default_debug_line_header.line_base; + + if (temp >= default_debug_line_header.line_range) + return 0; + + /* + * delta of addresses + */ + delta_addr = (ent->addr - last_vma) / default_debug_line_header.minimum_instruction_length; + + /* This is not sufficient to ensure opcode will be in [0-256] but + * sufficient to ensure when summing with the delta lineno we will + * not overflow the unsigned long opcode */ + + if (delta_addr <= 256 / default_debug_line_header.line_range) { + unsigned long opcode = temp + + (delta_addr * default_debug_line_header.line_range) + + default_debug_line_header.opcode_base; + + return opcode <= 255 ? opcode : 0; + } + return 0; +} + +static void emit_lineno_info(struct buffer_ext *be, + struct debug_entry *ent, size_t nr_entry, + unsigned long code_addr) +{ + size_t i; + + /* + * Machine state at start of a statement program + * address = 0 + * file = 1 + * line = 1 + * column = 0 + * is_stmt = default_is_stmt as given in the debug_line_header + * basic block = 0 + * end sequence = 0 + */ + + /* start state of the state machine we take care of */ + unsigned long last_vma = code_addr; + char const *cur_filename = NULL; + unsigned long cur_file_idx = 0; + int last_line = 1; + + emit_lne_set_address(be, (void *)code_addr); + + for (i = 0; i < nr_entry; i++, ent = debug_entry_next(ent)) { + int need_copy = 0; + ubyte special_opcode; + + /* + * check if filename changed, if so add it + */ + if (!cur_filename || strcmp(cur_filename, ent->name)) { + emit_lne_define_filename(be, ent->name); + cur_filename = ent->name; + emit_set_file(be, ++cur_file_idx); + need_copy = 1; + } + + special_opcode = get_special_opcode(ent, last_line, last_vma); + if (special_opcode != 0) { + last_line = ent->lineno; + last_vma = ent->addr; + emit_opcode(be, special_opcode); + } else { + /* + * lines differ, emit line delta + */ + if (last_line != ent->lineno) { + emit_advance_lineno(be, ent->lineno - last_line); + last_line = ent->lineno; + need_copy = 1; + } + /* + * addresses differ, emit address delta + */ + if (last_vma != ent->addr) { + emit_advance_pc(be, ent->addr - last_vma); + last_vma = ent->addr; + need_copy = 1; + } + /* + * add new row to matrix + */ + if (need_copy) + emit_opcode(be, DW_LNS_copy); + } + } +} + +static void add_debug_line(struct buffer_ext *be, + struct debug_entry *ent, size_t nr_entry, + unsigned long code_addr) +{ + struct debug_line_header * dbg_header; + size_t old_size; + + old_size = buffer_ext_size(be); + + buffer_ext_add(be, (void *)&default_debug_line_header, + sizeof(default_debug_line_header)); + + buffer_ext_add(be, &standard_opcode_length, sizeof(standard_opcode_length)); + + // empty directory entry + buffer_ext_add(be, (void *)"", 1); + + // empty filename directory + buffer_ext_add(be, (void *)"", 1); + + dbg_header = buffer_ext_addr(be) + old_size; + dbg_header->prolog_length = (buffer_ext_size(be) - old_size) - + offsetof(struct debug_line_header, minimum_instruction_length); + + emit_lineno_info(be, ent, nr_entry, code_addr); + + emit_lne_end_of_sequence(be); + + dbg_header = buffer_ext_addr(be) + old_size; + dbg_header->total_length = (buffer_ext_size(be) - old_size) - + offsetof(struct debug_line_header, version); +} + +static void +add_debug_abbrev(struct buffer_ext *be) +{ + emit_unsigned_LEB128(be, 1); + emit_unsigned_LEB128(be, DW_TAG_compile_unit); + emit_unsigned_LEB128(be, DW_CHILDREN_yes); + emit_unsigned_LEB128(be, DW_AT_stmt_list); + emit_unsigned_LEB128(be, DW_FORM_data4); + emit_unsigned_LEB128(be, 0); + emit_unsigned_LEB128(be, 0); + emit_unsigned_LEB128(be, 0); +} + +static void +add_compilation_unit(struct buffer_ext *be, + size_t offset_debug_line) +{ + struct compilation_unit_header *comp_unit_header; + size_t old_size = buffer_ext_size(be); + + buffer_ext_add(be, &default_comp_unit_header, + sizeof(default_comp_unit_header)); + + emit_unsigned_LEB128(be, 1); + emit_uword(be, offset_debug_line); + + comp_unit_header = buffer_ext_addr(be) + old_size; + comp_unit_header->total_length = (buffer_ext_size(be) - old_size) - + offsetof(struct compilation_unit_header, version); +} + +static int +jit_process_debug_info(uint64_t code_addr, + void *debug, int nr_debug_entries, + struct buffer_ext *dl, + struct buffer_ext *da, + struct buffer_ext *di) +{ + struct debug_entry *ent = debug; + int i; + + for (i = 0; i < nr_debug_entries; i++) { + ent->addr = ent->addr - code_addr; + ent = debug_entry_next(ent); + } + add_compilation_unit(di, buffer_ext_size(dl)); + add_debug_line(dl, debug, nr_debug_entries, 0); + add_debug_abbrev(da); + if (0) buffer_ext_dump(da, "abbrev"); + + return 0; +} + +int +jit_add_debug_info(Elf *e, uint64_t code_addr, void *debug, int nr_debug_entries) +{ + Elf_Data *d; + Elf_Scn *scn; + Elf_Shdr *shdr; + struct buffer_ext dl, di, da; + int ret; + + buffer_ext_init(&dl); + buffer_ext_init(&di); + buffer_ext_init(&da); + + ret = jit_process_debug_info(code_addr, debug, nr_debug_entries, &dl, &da, &di); + if (ret) + return -1; + /* + * setup .debug_line section + */ + scn = elf_newscn(e); + if (!scn) { + warnx("cannot create section"); + return -1; + } + + d = elf_newdata(scn); + if (!d) { + warnx("cannot get new data"); + return -1; + } + + d->d_align = 1; + d->d_off = 0LL; + d->d_buf = buffer_ext_addr(&dl); + d->d_type = ELF_T_BYTE; + d->d_size = buffer_ext_size(&dl); + d->d_version = EV_CURRENT; + + shdr = elf_getshdr(scn); + if (!shdr) { + warnx("cannot get section header"); + return -1; + } + + shdr->sh_name = 52; /* .debug_line */ + shdr->sh_type = SHT_PROGBITS; + shdr->sh_addr = 0; /* must be zero or == sh_offset -> dynamic object */ + shdr->sh_flags = 0; + shdr->sh_entsize = 0; + + /* + * setup .debug_info section + */ + scn = elf_newscn(e); + if (!scn) { + warnx("cannot create section"); + return -1; + } + + d = elf_newdata(scn); + if (!d) { + warnx("cannot get new data"); + return -1; + } + + d->d_align = 1; + d->d_off = 0LL; + d->d_buf = buffer_ext_addr(&di); + d->d_type = ELF_T_BYTE; + d->d_size = buffer_ext_size(&di); + d->d_version = EV_CURRENT; + + shdr = elf_getshdr(scn); + if (!shdr) { + warnx("cannot get section header"); + return -1; + } + + shdr->sh_name = 64; /* .debug_info */ + shdr->sh_type = SHT_PROGBITS; + shdr->sh_addr = 0; /* must be zero or == sh_offset -> dynamic object */ + shdr->sh_flags = 0; + shdr->sh_entsize = 0; + + /* + * setup .debug_abbrev section + */ + scn = elf_newscn(e); + if (!scn) { + warnx("cannot create section"); + return -1; + } + + d = elf_newdata(scn); + if (!d) { + warnx("cannot get new data"); + return -1; + } + + d->d_align = 1; + d->d_off = 0LL; + d->d_buf = buffer_ext_addr(&da); + d->d_type = ELF_T_BYTE; + d->d_size = buffer_ext_size(&da); + d->d_version = EV_CURRENT; + + shdr = elf_getshdr(scn); + if (!shdr) { + warnx("cannot get section header"); + return -1; + } + + shdr->sh_name = 76; /* .debug_info */ + shdr->sh_type = SHT_PROGBITS; + shdr->sh_addr = 0; /* must be zero or == sh_offset -> dynamic object */ + shdr->sh_flags = 0; + shdr->sh_entsize = 0; + + /* + * now we update the ELF image with all the sections + */ + if (elf_update(e, ELF_C_WRITE) < 0) { + warnx("elf_update debug failed"); + return -1; + } + return 0; +} diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index f50b7235ecb6..73e38e472ecd 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -23,6 +23,8 @@ #include "strbuf.h" #include "build-id.h" #include "data.h" +#include <api/fs/fs.h> +#include "asm/bug.h" /* * magic2 = "PERFILE2" @@ -868,6 +870,199 @@ static int write_auxtrace(int fd, struct perf_header *h, return err; } +static int cpu_cache_level__sort(const void *a, const void *b) +{ + struct cpu_cache_level *cache_a = (struct cpu_cache_level *)a; + struct cpu_cache_level *cache_b = (struct cpu_cache_level *)b; + + return cache_a->level - cache_b->level; +} + +static bool cpu_cache_level__cmp(struct cpu_cache_level *a, struct cpu_cache_level *b) +{ + if (a->level != b->level) + return false; + + if (a->line_size != b->line_size) + return false; + + if (a->sets != b->sets) + return false; + + if (a->ways != b->ways) + return false; + + if (strcmp(a->type, b->type)) + return false; + + if (strcmp(a->size, b->size)) + return false; + + if (strcmp(a->map, b->map)) + return false; + + return true; +} + +static int cpu_cache_level__read(struct cpu_cache_level *cache, u32 cpu, u16 level) +{ + char path[PATH_MAX], file[PATH_MAX]; + struct stat st; + size_t len; + + scnprintf(path, PATH_MAX, "devices/system/cpu/cpu%d/cache/index%d/", cpu, level); + scnprintf(file, PATH_MAX, "%s/%s", sysfs__mountpoint(), path); + + if (stat(file, &st)) + return 1; + + scnprintf(file, PATH_MAX, "%s/level", path); + if (sysfs__read_int(file, (int *) &cache->level)) + return -1; + + scnprintf(file, PATH_MAX, "%s/coherency_line_size", path); + if (sysfs__read_int(file, (int *) &cache->line_size)) + return -1; + + scnprintf(file, PATH_MAX, "%s/number_of_sets", path); + if (sysfs__read_int(file, (int *) &cache->sets)) + return -1; + + scnprintf(file, PATH_MAX, "%s/ways_of_associativity", path); + if (sysfs__read_int(file, (int *) &cache->ways)) + return -1; + + scnprintf(file, PATH_MAX, "%s/type", path); + if (sysfs__read_str(file, &cache->type, &len)) + return -1; + + cache->type[len] = 0; + cache->type = rtrim(cache->type); + + scnprintf(file, PATH_MAX, "%s/size", path); + if (sysfs__read_str(file, &cache->size, &len)) { + free(cache->type); + return -1; + } + + cache->size[len] = 0; + cache->size = rtrim(cache->size); + + scnprintf(file, PATH_MAX, "%s/shared_cpu_list", path); + if (sysfs__read_str(file, &cache->map, &len)) { + free(cache->map); + free(cache->type); + return -1; + } + + cache->map[len] = 0; + cache->map = rtrim(cache->map); + return 0; +} + +static void cpu_cache_level__fprintf(FILE *out, struct cpu_cache_level *c) +{ + fprintf(out, "L%d %-15s %8s [%s]\n", c->level, c->type, c->size, c->map); +} + +static int build_caches(struct cpu_cache_level caches[], u32 size, u32 *cntp) +{ + u32 i, cnt = 0; + long ncpus; + u32 nr, cpu; + u16 level; + + ncpus = sysconf(_SC_NPROCESSORS_CONF); + if (ncpus < 0) + return -1; + + nr = (u32)(ncpus & UINT_MAX); + + for (cpu = 0; cpu < nr; cpu++) { + for (level = 0; level < 10; level++) { + struct cpu_cache_level c; + int err; + + err = cpu_cache_level__read(&c, cpu, level); + if (err < 0) + return err; + + if (err == 1) + break; + + for (i = 0; i < cnt; i++) { + if (cpu_cache_level__cmp(&c, &caches[i])) + break; + } + + if (i == cnt) + caches[cnt++] = c; + else + cpu_cache_level__free(&c); + + if (WARN_ONCE(cnt == size, "way too many cpu caches..")) + goto out; + } + } + out: + *cntp = cnt; + return 0; +} + +#define MAX_CACHES 2000 + +static int write_cache(int fd, struct perf_header *h __maybe_unused, + struct perf_evlist *evlist __maybe_unused) +{ + struct cpu_cache_level caches[MAX_CACHES]; + u32 cnt = 0, i, version = 1; + int ret; + + ret = build_caches(caches, MAX_CACHES, &cnt); + if (ret) + goto out; + + qsort(&caches, cnt, sizeof(struct cpu_cache_level), cpu_cache_level__sort); + + ret = do_write(fd, &version, sizeof(u32)); + if (ret < 0) + goto out; + + ret = do_write(fd, &cnt, sizeof(u32)); + if (ret < 0) + goto out; + + for (i = 0; i < cnt; i++) { + struct cpu_cache_level *c = &caches[i]; + + #define _W(v) \ + ret = do_write(fd, &c->v, sizeof(u32)); \ + if (ret < 0) \ + goto out; + + _W(level) + _W(line_size) + _W(sets) + _W(ways) + #undef _W + + #define _W(v) \ + ret = do_write_string(fd, (const char *) c->v); \ + if (ret < 0) \ + goto out; + + _W(type) + _W(size) + _W(map) + #undef _W + } + +out: + for (i = 0; i < cnt; i++) + cpu_cache_level__free(&caches[i]); + return ret; +} + static int write_stat(int fd __maybe_unused, struct perf_header *h __maybe_unused, struct perf_evlist *evlist __maybe_unused) @@ -1172,6 +1367,18 @@ static void print_stat(struct perf_header *ph __maybe_unused, fprintf(fp, "# contains stat data\n"); } +static void print_cache(struct perf_header *ph __maybe_unused, + int fd __maybe_unused, FILE *fp __maybe_unused) +{ + int i; + + fprintf(fp, "# CPU cache info:\n"); + for (i = 0; i < ph->env.caches_cnt; i++) { + fprintf(fp, "# "); + cpu_cache_level__fprintf(fp, &ph->env.caches[i]); + } +} + static void print_pmu_mappings(struct perf_header *ph, int fd __maybe_unused, FILE *fp) { @@ -1920,6 +2127,68 @@ static int process_auxtrace(struct perf_file_section *section, return err; } +static int process_cache(struct perf_file_section *section __maybe_unused, + struct perf_header *ph __maybe_unused, int fd __maybe_unused, + void *data __maybe_unused) +{ + struct cpu_cache_level *caches; + u32 cnt, i, version; + + if (readn(fd, &version, sizeof(version)) != sizeof(version)) + return -1; + + if (ph->needs_swap) + version = bswap_32(version); + + if (version != 1) + return -1; + + if (readn(fd, &cnt, sizeof(cnt)) != sizeof(cnt)) + return -1; + + if (ph->needs_swap) + cnt = bswap_32(cnt); + + caches = zalloc(sizeof(*caches) * cnt); + if (!caches) + return -1; + + for (i = 0; i < cnt; i++) { + struct cpu_cache_level c; + + #define _R(v) \ + if (readn(fd, &c.v, sizeof(u32)) != sizeof(u32))\ + goto out_free_caches; \ + if (ph->needs_swap) \ + c.v = bswap_32(c.v); \ + + _R(level) + _R(line_size) + _R(sets) + _R(ways) + #undef _R + + #define _R(v) \ + c.v = do_read_string(fd, ph); \ + if (!c.v) \ + goto out_free_caches; + + _R(type) + _R(size) + _R(map) + #undef _R + + caches[i] = c; + } + + ph->env.caches = caches; + ph->env.caches_cnt = cnt; + return 0; +out_free_caches: + free(caches); + return -1; +} + struct feature_ops { int (*write)(int fd, struct perf_header *h, struct perf_evlist *evlist); void (*print)(struct perf_header *h, int fd, FILE *fp); @@ -1962,6 +2231,7 @@ static const struct feature_ops feat_ops[HEADER_LAST_FEATURE] = { FEAT_OPP(HEADER_GROUP_DESC, group_desc), FEAT_OPP(HEADER_AUXTRACE, auxtrace), FEAT_OPA(HEADER_STAT, stat), + FEAT_OPF(HEADER_CACHE, cache), }; struct header_print_data { diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h index cff9892452ee..3d87ca823c0a 100644 --- a/tools/perf/util/header.h +++ b/tools/perf/util/header.h @@ -32,6 +32,7 @@ enum { HEADER_GROUP_DESC, HEADER_AUXTRACE, HEADER_STAT, + HEADER_CACHE, HEADER_LAST_FEATURE, HEADER_FEAT_BITS = 256, }; diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 68a7612019dc..561e9473a915 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -432,8 +432,12 @@ static struct hist_entry *hists__findnew_entry(struct hists *hists, cmp = hist_entry__cmp(he, entry); if (!cmp) { - if (sample_self) + if (sample_self) { he_stat__add_period(&he->stat, period, weight); + hists->stats.total_period += period; + if (!he->filtered) + hists->stats.total_non_filtered_period += period; + } if (symbol_conf.cumulate_callchain) he_stat__add_period(he->stat_acc, period, weight); @@ -466,7 +470,10 @@ static struct hist_entry *hists__findnew_entry(struct hists *hists, if (!he) return NULL; - hists->nr_entries++; + if (sample_self) + hists__inc_stats(hists, he); + else + hists->nr_entries++; rb_link_node(&he->rb_node_in, parent, p); rb_insert_color(&he->rb_node_in, hists->entries_in); @@ -951,10 +958,11 @@ out: int64_t hist_entry__cmp(struct hist_entry *left, struct hist_entry *right) { + struct hists *hists = left->hists; struct perf_hpp_fmt *fmt; int64_t cmp = 0; - perf_hpp__for_each_sort_list(fmt) { + hists__for_each_sort_list(hists, fmt) { cmp = fmt->cmp(fmt, left, right); if (cmp) break; @@ -966,10 +974,11 @@ hist_entry__cmp(struct hist_entry *left, struct hist_entry *right) int64_t hist_entry__collapse(struct hist_entry *left, struct hist_entry *right) { + struct hists *hists = left->hists; struct perf_hpp_fmt *fmt; int64_t cmp = 0; - perf_hpp__for_each_sort_list(fmt) { + hists__for_each_sort_list(hists, fmt) { cmp = fmt->collapse(fmt, left, right); if (cmp) break; @@ -1006,6 +1015,27 @@ void hist_entry__delete(struct hist_entry *he) } /* + * If this is not the last column, then we need to pad it according to the + * pre-calculated max lenght for this column, otherwise don't bother adding + * spaces because that would break viewing this with, for instance, 'less', + * that would show tons of trailing spaces when a long C++ demangled method + * names is sampled. +*/ +int hist_entry__snprintf_alignment(struct hist_entry *he, struct perf_hpp *hpp, + struct perf_hpp_fmt *fmt, int printed) +{ + if (!list_is_last(&fmt->list, &he->hists->hpp_list->fields)) { + const int width = fmt->width(fmt, hpp, hists_to_evsel(he->hists)); + if (printed < width) { + advance_hpp(hpp, printed); + printed = scnprintf(hpp->buf, hpp->size, "%-*s", width - printed, " "); + } + } + + return printed; +} + +/* * collapse the histogram */ @@ -1110,10 +1140,11 @@ void hists__collapse_resort(struct hists *hists, struct ui_progress *prog) static int hist_entry__sort(struct hist_entry *a, struct hist_entry *b) { + struct hists *hists = a->hists; struct perf_hpp_fmt *fmt; int64_t cmp = 0; - perf_hpp__for_each_sort_list(fmt) { + hists__for_each_sort_list(hists, fmt) { if (perf_hpp__should_skip(fmt, a->hists)) continue; @@ -1163,9 +1194,18 @@ static void __hists__insert_output_entry(struct rb_root *entries, struct rb_node *parent = NULL; struct hist_entry *iter; - if (use_callchain) + if (use_callchain) { + if (callchain_param.mode == CHAIN_GRAPH_REL) { + u64 total = he->stat.period; + + if (symbol_conf.cumulate_callchain) + total = he->stat_acc->period; + + min_callchain_hits = total * (callchain_param.min_percent / 100); + } callchain_param.sort(&he->sorted_chain, he->callchain, min_callchain_hits, &callchain_param); + } while (*p != NULL) { parent = *p; @@ -1181,21 +1221,15 @@ static void __hists__insert_output_entry(struct rb_root *entries, rb_insert_color(&he->rb_node, entries); } -void hists__output_resort(struct hists *hists, struct ui_progress *prog) +static void output_resort(struct hists *hists, struct ui_progress *prog, + bool use_callchain) { struct rb_root *root; struct rb_node *next; struct hist_entry *n; u64 min_callchain_hits; - struct perf_evsel *evsel = hists_to_evsel(hists); - bool use_callchain; - - if (evsel && symbol_conf.use_callchain && !symbol_conf.show_ref_callgraph) - use_callchain = evsel->attr.sample_type & PERF_SAMPLE_CALLCHAIN; - else - use_callchain = symbol_conf.use_callchain; - min_callchain_hits = hists->stats.total_period * (callchain_param.min_percent / 100); + min_callchain_hits = hists__total_period(hists) * (callchain_param.min_percent / 100); if (sort__need_collapse) root = &hists->entries_collapsed; @@ -1223,6 +1257,23 @@ void hists__output_resort(struct hists *hists, struct ui_progress *prog) } } +void perf_evsel__output_resort(struct perf_evsel *evsel, struct ui_progress *prog) +{ + bool use_callchain; + + if (evsel && symbol_conf.use_callchain && !symbol_conf.show_ref_callgraph) + use_callchain = evsel->attr.sample_type & PERF_SAMPLE_CALLCHAIN; + else + use_callchain = symbol_conf.use_callchain; + + output_resort(evsel__hists(evsel), prog, use_callchain); +} + +void hists__output_resort(struct hists *hists, struct ui_progress *prog) +{ + output_resort(hists, prog, symbol_conf.use_callchain); +} + static void hists__remove_entry_filter(struct hists *hists, struct hist_entry *h, enum hist_filter filter) { @@ -1254,28 +1305,6 @@ static bool hists__filter_entry_by_dso(struct hists *hists, return false; } -void hists__filter_by_dso(struct hists *hists) -{ - struct rb_node *nd; - - hists->stats.nr_non_filtered_samples = 0; - - hists__reset_filter_stats(hists); - hists__reset_col_len(hists); - - for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) { - struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); - - if (symbol_conf.exclude_other && !h->parent) - continue; - - if (hists__filter_entry_by_dso(hists, h)) - continue; - - hists__remove_entry_filter(hists, h, HIST_FILTER__DSO); - } -} - static bool hists__filter_entry_by_thread(struct hists *hists, struct hist_entry *he) { @@ -1288,25 +1317,6 @@ static bool hists__filter_entry_by_thread(struct hists *hists, return false; } -void hists__filter_by_thread(struct hists *hists) -{ - struct rb_node *nd; - - hists->stats.nr_non_filtered_samples = 0; - - hists__reset_filter_stats(hists); - hists__reset_col_len(hists); - - for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) { - struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); - - if (hists__filter_entry_by_thread(hists, h)) - continue; - - hists__remove_entry_filter(hists, h, HIST_FILTER__THREAD); - } -} - static bool hists__filter_entry_by_symbol(struct hists *hists, struct hist_entry *he) { @@ -1320,25 +1330,6 @@ static bool hists__filter_entry_by_symbol(struct hists *hists, return false; } -void hists__filter_by_symbol(struct hists *hists) -{ - struct rb_node *nd; - - hists->stats.nr_non_filtered_samples = 0; - - hists__reset_filter_stats(hists); - hists__reset_col_len(hists); - - for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) { - struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); - - if (hists__filter_entry_by_symbol(hists, h)) - continue; - - hists__remove_entry_filter(hists, h, HIST_FILTER__SYMBOL); - } -} - static bool hists__filter_entry_by_socket(struct hists *hists, struct hist_entry *he) { @@ -1351,7 +1342,9 @@ static bool hists__filter_entry_by_socket(struct hists *hists, return false; } -void hists__filter_by_socket(struct hists *hists) +typedef bool (*filter_fn_t)(struct hists *hists, struct hist_entry *he); + +static void hists__filter_by_type(struct hists *hists, int type, filter_fn_t filter) { struct rb_node *nd; @@ -1363,13 +1356,37 @@ void hists__filter_by_socket(struct hists *hists) for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) { struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); - if (hists__filter_entry_by_socket(hists, h)) + if (filter(hists, h)) continue; - hists__remove_entry_filter(hists, h, HIST_FILTER__SOCKET); + hists__remove_entry_filter(hists, h, type); } } +void hists__filter_by_thread(struct hists *hists) +{ + hists__filter_by_type(hists, HIST_FILTER__THREAD, + hists__filter_entry_by_thread); +} + +void hists__filter_by_dso(struct hists *hists) +{ + hists__filter_by_type(hists, HIST_FILTER__DSO, + hists__filter_entry_by_dso); +} + +void hists__filter_by_symbol(struct hists *hists) +{ + hists__filter_by_type(hists, HIST_FILTER__SYMBOL, + hists__filter_entry_by_symbol); +} + +void hists__filter_by_socket(struct hists *hists) +{ + hists__filter_by_type(hists, HIST_FILTER__SOCKET, + hists__filter_entry_by_socket); +} + void events_stats__inc(struct events_stats *stats, u32 type) { ++stats->nr_events[0]; @@ -1585,7 +1602,7 @@ int perf_hist_config(const char *var, const char *value) return 0; } -int __hists__init(struct hists *hists) +int __hists__init(struct hists *hists, struct perf_hpp_list *hpp_list) { memset(hists, 0, sizeof(*hists)); hists->entries_in_array[0] = hists->entries_in_array[1] = RB_ROOT; @@ -1594,6 +1611,7 @@ int __hists__init(struct hists *hists) hists->entries = RB_ROOT; pthread_mutex_init(&hists->lock, NULL); hists->socket_filter = -1; + hists->hpp_list = hpp_list; return 0; } @@ -1630,7 +1648,7 @@ static int hists_evsel__init(struct perf_evsel *evsel) { struct hists *hists = evsel__hists(evsel); - __hists__init(hists); + __hists__init(hists, &perf_hpp_list); return 0; } @@ -1649,3 +1667,9 @@ int hists__init(void) return err; } + +void perf_hpp_list__init(struct perf_hpp_list *list) +{ + INIT_LIST_HEAD(&list->fields); + INIT_LIST_HEAD(&list->sorts); +} diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index d4ec4822a103..840b6d6aa44f 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -75,6 +75,7 @@ struct hists { u64 event_stream; u16 col_len[HISTC_NR_COLS]; int socket_filter; + struct perf_hpp_list *hpp_list; }; struct hist_entry_iter; @@ -121,13 +122,19 @@ struct hist_entry *__hists__add_entry(struct hists *hists, int hist_entry_iter__add(struct hist_entry_iter *iter, struct addr_location *al, int max_stack_depth, void *arg); +struct perf_hpp; +struct perf_hpp_fmt; + int64_t hist_entry__cmp(struct hist_entry *left, struct hist_entry *right); int64_t hist_entry__collapse(struct hist_entry *left, struct hist_entry *right); int hist_entry__transaction_len(void); int hist_entry__sort_snprintf(struct hist_entry *he, char *bf, size_t size, struct hists *hists); +int hist_entry__snprintf_alignment(struct hist_entry *he, struct perf_hpp *hpp, + struct perf_hpp_fmt *fmt, int printed); void hist_entry__delete(struct hist_entry *he); +void perf_evsel__output_resort(struct perf_evsel *evsel, struct ui_progress *prog); void hists__output_resort(struct hists *hists, struct ui_progress *prog); void hists__collapse_resort(struct hists *hists, struct ui_progress *prog); @@ -185,7 +192,7 @@ static inline struct hists *evsel__hists(struct perf_evsel *evsel) } int hists__init(void); -int __hists__init(struct hists *hists); +int __hists__init(struct hists *hists, struct perf_hpp_list *hpp_list); struct rb_root *hists__get_rotate_entries_in(struct hists *hists); bool hists__collapse_insert_entry(struct hists *hists __maybe_unused, @@ -214,28 +221,56 @@ struct perf_hpp_fmt { struct hist_entry *a, struct hist_entry *b); int64_t (*sort)(struct perf_hpp_fmt *fmt, struct hist_entry *a, struct hist_entry *b); + bool (*equal)(struct perf_hpp_fmt *a, struct perf_hpp_fmt *b); + void (*free)(struct perf_hpp_fmt *fmt); struct list_head list; struct list_head sort_list; bool elide; int len; int user_len; + int idx; }; -extern struct list_head perf_hpp__list; -extern struct list_head perf_hpp__sort_list; +struct perf_hpp_list { + struct list_head fields; + struct list_head sorts; +}; -#define perf_hpp__for_each_format(format) \ - list_for_each_entry(format, &perf_hpp__list, list) +extern struct perf_hpp_list perf_hpp_list; -#define perf_hpp__for_each_format_safe(format, tmp) \ - list_for_each_entry_safe(format, tmp, &perf_hpp__list, list) +void perf_hpp_list__column_register(struct perf_hpp_list *list, + struct perf_hpp_fmt *format); +void perf_hpp_list__register_sort_field(struct perf_hpp_list *list, + struct perf_hpp_fmt *format); -#define perf_hpp__for_each_sort_list(format) \ - list_for_each_entry(format, &perf_hpp__sort_list, sort_list) +static inline void perf_hpp__column_register(struct perf_hpp_fmt *format) +{ + perf_hpp_list__column_register(&perf_hpp_list, format); +} + +static inline void perf_hpp__register_sort_field(struct perf_hpp_fmt *format) +{ + perf_hpp_list__register_sort_field(&perf_hpp_list, format); +} -#define perf_hpp__for_each_sort_list_safe(format, tmp) \ - list_for_each_entry_safe(format, tmp, &perf_hpp__sort_list, sort_list) +#define perf_hpp_list__for_each_format(_list, format) \ + list_for_each_entry(format, &(_list)->fields, list) + +#define perf_hpp_list__for_each_format_safe(_list, format, tmp) \ + list_for_each_entry_safe(format, tmp, &(_list)->fields, list) + +#define perf_hpp_list__for_each_sort_list(_list, format) \ + list_for_each_entry(format, &(_list)->sorts, sort_list) + +#define perf_hpp_list__for_each_sort_list_safe(_list, format, tmp) \ + list_for_each_entry_safe(format, tmp, &(_list)->sorts, sort_list) + +#define hists__for_each_format(hists, format) \ + perf_hpp_list__for_each_format((hists)->hpp_list, fmt) + +#define hists__for_each_sort_list(hists, format) \ + perf_hpp_list__for_each_sort_list((hists)->hpp_list, fmt) extern struct perf_hpp_fmt perf_hpp__format[]; @@ -254,19 +289,14 @@ enum { }; void perf_hpp__init(void); -void perf_hpp__column_register(struct perf_hpp_fmt *format); void perf_hpp__column_unregister(struct perf_hpp_fmt *format); -void perf_hpp__column_enable(unsigned col); -void perf_hpp__column_disable(unsigned col); void perf_hpp__cancel_cumulate(void); +void perf_hpp__setup_output_field(struct perf_hpp_list *list); +void perf_hpp__reset_output_field(struct perf_hpp_list *list); +void perf_hpp__append_sort_keys(struct perf_hpp_list *list); -void perf_hpp__register_sort_field(struct perf_hpp_fmt *format); -void perf_hpp__setup_output_field(void); -void perf_hpp__reset_output_field(void); -void perf_hpp__append_sort_keys(void); bool perf_hpp__is_sort_entry(struct perf_hpp_fmt *format); -bool perf_hpp__same_sort_entry(struct perf_hpp_fmt *a, struct perf_hpp_fmt *b); bool perf_hpp__is_dynamic_entry(struct perf_hpp_fmt *format); bool perf_hpp__defined_dynamic_entry(struct perf_hpp_fmt *fmt, struct hists *hists); @@ -381,4 +411,6 @@ int parse_filter_percentage(const struct option *opt __maybe_unused, const char *arg, int unset __maybe_unused); int perf_hist_config(const char *var, const char *value); +void perf_hpp_list__init(struct perf_hpp_list *list); + #endif /* __PERF_HIST_H */ diff --git a/tools/perf/util/jit.h b/tools/perf/util/jit.h new file mode 100644 index 000000000000..a1e99da0715a --- /dev/null +++ b/tools/perf/util/jit.h @@ -0,0 +1,15 @@ +#ifndef __JIT_H__ +#define __JIT_H__ + +#include <data.h> + +extern int jit_process(struct perf_session *session, + struct perf_data_file *output, + struct machine *machine, + char *filename, + pid_t pid, + u64 *nbytes); + +extern int jit_inject_record(const char *filename); + +#endif /* __JIT_H__ */ diff --git a/tools/perf/util/jitdump.c b/tools/perf/util/jitdump.c new file mode 100644 index 000000000000..99fa5eee9fe0 --- /dev/null +++ b/tools/perf/util/jitdump.c @@ -0,0 +1,672 @@ +#include <sys/types.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <fcntl.h> +#include <unistd.h> +#include <inttypes.h> +#include <byteswap.h> +#include <sys/stat.h> +#include <sys/mman.h> + +#include "util.h" +#include "event.h" +#include "debug.h" +#include "evlist.h" +#include "symbol.h" +#include "strlist.h" +#include <elf.h> + +#include "session.h" +#include "jit.h" +#include "jitdump.h" +#include "genelf.h" +#include "../builtin.h" + +struct jit_buf_desc { + struct perf_data_file *output; + struct perf_session *session; + struct machine *machine; + union jr_entry *entry; + void *buf; + uint64_t sample_type; + size_t bufsize; + FILE *in; + bool needs_bswap; /* handles cross-endianess */ + void *debug_data; + size_t nr_debug_entries; + uint32_t code_load_count; + u64 bytes_written; + struct rb_root code_root; + char dir[PATH_MAX]; +}; + +struct debug_line_info { + unsigned long vma; + unsigned int lineno; + /* The filename format is unspecified, absolute path, relative etc. */ + char const filename[0]; +}; + +struct jit_tool { + struct perf_tool tool; + struct perf_data_file output; + struct perf_data_file input; + u64 bytes_written; +}; + +#define hmax(a, b) ((a) > (b) ? (a) : (b)) +#define get_jit_tool(t) (container_of(tool, struct jit_tool, tool)) + +static int +jit_emit_elf(char *filename, + const char *sym, + uint64_t code_addr, + const void *code, + int csize, + void *debug, + int nr_debug_entries) +{ + int ret, fd; + + if (verbose > 0) + fprintf(stderr, "write ELF image %s\n", filename); + + fd = open(filename, O_CREAT|O_TRUNC|O_WRONLY, 0644); + if (fd == -1) { + pr_warning("cannot create jit ELF %s: %s\n", filename, strerror(errno)); + return -1; + } + + ret = jit_write_elf(fd, code_addr, sym, (const void *)code, csize, debug, nr_debug_entries); + + close(fd); + + if (ret) + unlink(filename); + + return ret; +} + +static void +jit_close(struct jit_buf_desc *jd) +{ + if (!(jd && jd->in)) + return; + funlockfile(jd->in); + fclose(jd->in); + jd->in = NULL; +} + +static int +jit_open(struct jit_buf_desc *jd, const char *name) +{ + struct jitheader header; + struct jr_prefix *prefix; + ssize_t bs, bsz = 0; + void *n, *buf = NULL; + int ret, retval = -1; + + jd->in = fopen(name, "r"); + if (!jd->in) + return -1; + + bsz = hmax(sizeof(header), sizeof(*prefix)); + + buf = malloc(bsz); + if (!buf) + goto error; + + /* + * protect from writer modifying the file while we are reading it + */ + flockfile(jd->in); + + ret = fread(buf, sizeof(header), 1, jd->in); + if (ret != 1) + goto error; + + memcpy(&header, buf, sizeof(header)); + + if (header.magic != JITHEADER_MAGIC) { + if (header.magic != JITHEADER_MAGIC_SW) + goto error; + jd->needs_bswap = true; + } + + if (jd->needs_bswap) { + header.version = bswap_32(header.version); + header.total_size = bswap_32(header.total_size); + header.pid = bswap_32(header.pid); + header.elf_mach = bswap_32(header.elf_mach); + header.timestamp = bswap_64(header.timestamp); + header.flags = bswap_64(header.flags); + } + + if (verbose > 2) + pr_debug("version=%u\nhdr.size=%u\nts=0x%llx\npid=%d\nelf_mach=%d\n", + header.version, + header.total_size, + (unsigned long long)header.timestamp, + header.pid, + header.elf_mach); + + if (header.flags & JITDUMP_FLAGS_RESERVED) { + pr_err("jitdump file contains invalid or unsupported flags 0x%llx\n", + (unsigned long long)header.flags & JITDUMP_FLAGS_RESERVED); + goto error; + } + + bs = header.total_size - sizeof(header); + + if (bs > bsz) { + n = realloc(buf, bs); + if (!n) + goto error; + bsz = bs; + buf = n; + /* read extra we do not know about */ + ret = fread(buf, bs - bsz, 1, jd->in); + if (ret != 1) + goto error; + } + /* + * keep dirname for generating files and mmap records + */ + strcpy(jd->dir, name); + dirname(jd->dir); + + return 0; +error: + funlockfile(jd->in); + fclose(jd->in); + return retval; +} + +static union jr_entry * +jit_get_next_entry(struct jit_buf_desc *jd) +{ + struct jr_prefix *prefix; + union jr_entry *jr; + void *addr; + size_t bs, size; + int id, ret; + + if (!(jd && jd->in)) + return NULL; + + if (jd->buf == NULL) { + size_t sz = getpagesize(); + if (sz < sizeof(*prefix)) + sz = sizeof(*prefix); + + jd->buf = malloc(sz); + if (jd->buf == NULL) + return NULL; + + jd->bufsize = sz; + } + + prefix = jd->buf; + + /* + * file is still locked at this point + */ + ret = fread(prefix, sizeof(*prefix), 1, jd->in); + if (ret != 1) + return NULL; + + if (jd->needs_bswap) { + prefix->id = bswap_32(prefix->id); + prefix->total_size = bswap_32(prefix->total_size); + prefix->timestamp = bswap_64(prefix->timestamp); + } + id = prefix->id; + size = prefix->total_size; + + bs = (size_t)size; + if (bs < sizeof(*prefix)) + return NULL; + + if (id >= JIT_CODE_MAX) { + pr_warning("next_entry: unknown prefix %d, skipping\n", id); + return NULL; + } + if (bs > jd->bufsize) { + void *n; + n = realloc(jd->buf, bs); + if (!n) + return NULL; + jd->buf = n; + jd->bufsize = bs; + } + + addr = ((void *)jd->buf) + sizeof(*prefix); + + ret = fread(addr, bs - sizeof(*prefix), 1, jd->in); + if (ret != 1) + return NULL; + + jr = (union jr_entry *)jd->buf; + + switch(id) { + case JIT_CODE_DEBUG_INFO: + if (jd->needs_bswap) { + uint64_t n; + jr->info.code_addr = bswap_64(jr->info.code_addr); + jr->info.nr_entry = bswap_64(jr->info.nr_entry); + for (n = 0 ; n < jr->info.nr_entry; n++) { + jr->info.entries[n].addr = bswap_64(jr->info.entries[n].addr); + jr->info.entries[n].lineno = bswap_32(jr->info.entries[n].lineno); + jr->info.entries[n].discrim = bswap_32(jr->info.entries[n].discrim); + } + } + break; + case JIT_CODE_CLOSE: + break; + case JIT_CODE_LOAD: + if (jd->needs_bswap) { + jr->load.pid = bswap_32(jr->load.pid); + jr->load.tid = bswap_32(jr->load.tid); + jr->load.vma = bswap_64(jr->load.vma); + jr->load.code_addr = bswap_64(jr->load.code_addr); + jr->load.code_size = bswap_64(jr->load.code_size); + jr->load.code_index= bswap_64(jr->load.code_index); + } + jd->code_load_count++; + break; + case JIT_CODE_MOVE: + if (jd->needs_bswap) { + jr->move.pid = bswap_32(jr->move.pid); + jr->move.tid = bswap_32(jr->move.tid); + jr->move.vma = bswap_64(jr->move.vma); + jr->move.old_code_addr = bswap_64(jr->move.old_code_addr); + jr->move.new_code_addr = bswap_64(jr->move.new_code_addr); + jr->move.code_size = bswap_64(jr->move.code_size); + jr->move.code_index = bswap_64(jr->move.code_index); + } + break; + case JIT_CODE_MAX: + default: + return NULL; + } + return jr; +} + +static int +jit_inject_event(struct jit_buf_desc *jd, union perf_event *event) +{ + ssize_t size; + + size = perf_data_file__write(jd->output, event, event->header.size); + if (size < 0) + return -1; + + jd->bytes_written += size; + return 0; +} + +static int jit_repipe_code_load(struct jit_buf_desc *jd, union jr_entry *jr) +{ + struct perf_sample sample; + union perf_event *event; + struct perf_tool *tool = jd->session->tool; + uint64_t code, addr; + uintptr_t uaddr; + char *filename; + struct stat st; + size_t size; + u16 idr_size; + const char *sym; + uint32_t count; + int ret, csize; + pid_t pid, tid; + struct { + u32 pid, tid; + u64 time; + } *id; + + pid = jr->load.pid; + tid = jr->load.tid; + csize = jr->load.code_size; + addr = jr->load.code_addr; + sym = (void *)((unsigned long)jr + sizeof(jr->load)); + code = (unsigned long)jr + jr->load.p.total_size - csize; + count = jr->load.code_index; + idr_size = jd->machine->id_hdr_size; + + event = calloc(1, sizeof(*event) + idr_size); + if (!event) + return -1; + + filename = event->mmap2.filename; + size = snprintf(filename, PATH_MAX, "%s/jitted-%d-%u.so", + jd->dir, + pid, + count); + + size++; /* for \0 */ + + size = PERF_ALIGN(size, sizeof(u64)); + uaddr = (uintptr_t)code; + ret = jit_emit_elf(filename, sym, addr, (const void *)uaddr, csize, jd->debug_data, jd->nr_debug_entries); + + if (jd->debug_data && jd->nr_debug_entries) { + free(jd->debug_data); + jd->debug_data = NULL; + jd->nr_debug_entries = 0; + } + + if (ret) { + free(event); + return -1; + } + if (stat(filename, &st)) + memset(&st, 0, sizeof(stat)); + + event->mmap2.header.type = PERF_RECORD_MMAP2; + event->mmap2.header.misc = PERF_RECORD_MISC_USER; + event->mmap2.header.size = (sizeof(event->mmap2) - + (sizeof(event->mmap2.filename) - size) + idr_size); + + event->mmap2.pgoff = GEN_ELF_TEXT_OFFSET; + event->mmap2.start = addr; + event->mmap2.len = csize; + event->mmap2.pid = pid; + event->mmap2.tid = tid; + event->mmap2.ino = st.st_ino; + event->mmap2.maj = major(st.st_dev); + event->mmap2.min = minor(st.st_dev); + event->mmap2.prot = st.st_mode; + event->mmap2.flags = MAP_SHARED; + event->mmap2.ino_generation = 1; + + id = (void *)((unsigned long)event + event->mmap.header.size - idr_size); + if (jd->sample_type & PERF_SAMPLE_TID) { + id->pid = pid; + id->tid = tid; + } + if (jd->sample_type & PERF_SAMPLE_TIME) + id->time = jr->load.p.timestamp; + + /* + * create pseudo sample to induce dso hit increment + * use first address as sample address + */ + memset(&sample, 0, sizeof(sample)); + sample.pid = pid; + sample.tid = tid; + sample.time = id->time; + sample.ip = addr; + + ret = perf_event__process_mmap2(tool, event, &sample, jd->machine); + if (ret) + return ret; + + ret = jit_inject_event(jd, event); + /* + * mark dso as use to generate buildid in the header + */ + if (!ret) + build_id__mark_dso_hit(tool, event, &sample, NULL, jd->machine); + + return ret; +} + +static int jit_repipe_code_move(struct jit_buf_desc *jd, union jr_entry *jr) +{ + struct perf_sample sample; + union perf_event *event; + struct perf_tool *tool = jd->session->tool; + char *filename; + size_t size; + struct stat st; + u16 idr_size; + int ret; + pid_t pid, tid; + struct { + u32 pid, tid; + u64 time; + } *id; + + pid = jr->move.pid; + tid = jr->move.tid; + idr_size = jd->machine->id_hdr_size; + + /* + * +16 to account for sample_id_all (hack) + */ + event = calloc(1, sizeof(*event) + 16); + if (!event) + return -1; + + filename = event->mmap2.filename; + size = snprintf(filename, PATH_MAX, "%s/jitted-%d-%"PRIu64, + jd->dir, + pid, + jr->move.code_index); + + size++; /* for \0 */ + + if (stat(filename, &st)) + memset(&st, 0, sizeof(stat)); + + size = PERF_ALIGN(size, sizeof(u64)); + + event->mmap2.header.type = PERF_RECORD_MMAP2; + event->mmap2.header.misc = PERF_RECORD_MISC_USER; + event->mmap2.header.size = (sizeof(event->mmap2) - + (sizeof(event->mmap2.filename) - size) + idr_size); + event->mmap2.pgoff = GEN_ELF_TEXT_OFFSET; + event->mmap2.start = jr->move.new_code_addr; + event->mmap2.len = jr->move.code_size; + event->mmap2.pid = pid; + event->mmap2.tid = tid; + event->mmap2.ino = st.st_ino; + event->mmap2.maj = major(st.st_dev); + event->mmap2.min = minor(st.st_dev); + event->mmap2.prot = st.st_mode; + event->mmap2.flags = MAP_SHARED; + event->mmap2.ino_generation = 1; + + id = (void *)((unsigned long)event + event->mmap.header.size - idr_size); + if (jd->sample_type & PERF_SAMPLE_TID) { + id->pid = pid; + id->tid = tid; + } + if (jd->sample_type & PERF_SAMPLE_TIME) + id->time = jr->load.p.timestamp; + + /* + * create pseudo sample to induce dso hit increment + * use first address as sample address + */ + memset(&sample, 0, sizeof(sample)); + sample.pid = pid; + sample.tid = tid; + sample.time = id->time; + sample.ip = jr->move.new_code_addr; + + ret = perf_event__process_mmap2(tool, event, &sample, jd->machine); + if (ret) + return ret; + + ret = jit_inject_event(jd, event); + if (!ret) + build_id__mark_dso_hit(tool, event, &sample, NULL, jd->machine); + + return ret; +} + +static int jit_repipe_debug_info(struct jit_buf_desc *jd, union jr_entry *jr) +{ + void *data; + size_t sz; + + if (!(jd && jr)) + return -1; + + sz = jr->prefix.total_size - sizeof(jr->info); + data = malloc(sz); + if (!data) + return -1; + + memcpy(data, &jr->info.entries, sz); + + jd->debug_data = data; + + /* + * we must use nr_entry instead of size here because + * we cannot distinguish actual entry from padding otherwise + */ + jd->nr_debug_entries = jr->info.nr_entry; + + return 0; +} + +static int +jit_process_dump(struct jit_buf_desc *jd) +{ + union jr_entry *jr; + int ret; + + while ((jr = jit_get_next_entry(jd))) { + switch(jr->prefix.id) { + case JIT_CODE_LOAD: + ret = jit_repipe_code_load(jd, jr); + break; + case JIT_CODE_MOVE: + ret = jit_repipe_code_move(jd, jr); + break; + case JIT_CODE_DEBUG_INFO: + ret = jit_repipe_debug_info(jd, jr); + break; + default: + ret = 0; + continue; + } + } + return ret; +} + +static int +jit_inject(struct jit_buf_desc *jd, char *path) +{ + int ret; + + if (verbose > 0) + fprintf(stderr, "injecting: %s\n", path); + + ret = jit_open(jd, path); + if (ret) + return -1; + + ret = jit_process_dump(jd); + + jit_close(jd); + + if (verbose > 0) + fprintf(stderr, "injected: %s (%d)\n", path, ret); + + return 0; +} + +/* + * File must be with pattern .../jit-XXXX.dump + * where XXXX is the PID of the process which did the mmap() + * as captured in the RECORD_MMAP record + */ +static int +jit_detect(char *mmap_name, pid_t pid) + { + char *p; + char *end = NULL; + pid_t pid2; + + if (verbose > 2) + fprintf(stderr, "jit marker trying : %s\n", mmap_name); + /* + * get file name + */ + p = strrchr(mmap_name, '/'); + if (!p) + return -1; + + /* + * match prefix + */ + if (strncmp(p, "/jit-", 5)) + return -1; + + /* + * skip prefix + */ + p += 5; + + /* + * must be followed by a pid + */ + if (!isdigit(*p)) + return -1; + + pid2 = (int)strtol(p, &end, 10); + if (!end) + return -1; + + /* + * pid does not match mmap pid + * pid==0 in system-wide mode (synthesized) + */ + if (pid && pid2 != pid) + return -1; + /* + * validate suffix + */ + if (strcmp(end, ".dump")) + return -1; + + if (verbose > 0) + fprintf(stderr, "jit marker found: %s\n", mmap_name); + + return 0; +} + +int +jit_process(struct perf_session *session, + struct perf_data_file *output, + struct machine *machine, + char *filename, + pid_t pid, + u64 *nbytes) +{ + struct perf_evsel *first; + struct jit_buf_desc jd; + int ret; + + /* + * first, detect marker mmap (i.e., the jitdump mmap) + */ + if (jit_detect(filename, pid)) + return -1; + + memset(&jd, 0, sizeof(jd)); + + jd.session = session; + jd.output = output; + jd.machine = machine; + + /* + * track sample_type to compute id_all layout + * perf sets the same sample type to all events as of now + */ + first = perf_evlist__first(session->evlist); + jd.sample_type = first->attr.sample_type; + + *nbytes = 0; + + ret = jit_inject(&jd, filename); + if (!ret) + *nbytes = jd.bytes_written; + + return ret; +} diff --git a/tools/perf/util/jitdump.h b/tools/perf/util/jitdump.h new file mode 100644 index 000000000000..b66c1f503d9e --- /dev/null +++ b/tools/perf/util/jitdump.h @@ -0,0 +1,124 @@ +/* + * jitdump.h: jitted code info encapsulation file format + * + * Adapted from OProfile GPLv2 support jidump.h: + * Copyright 2007 OProfile authors + * Jens Wilke + * Daniel Hansel + * Copyright IBM Corporation 2007 + */ +#ifndef JITDUMP_H +#define JITDUMP_H + +#include <sys/time.h> +#include <time.h> +#include <stdint.h> + +/* JiTD */ +#define JITHEADER_MAGIC 0x4A695444 +#define JITHEADER_MAGIC_SW 0x4454694A + +#define PADDING_8ALIGNED(x) ((((x) + 7) & 7) ^ 7) + +#define JITHEADER_VERSION 1 + +enum jitdump_flags_bits { + JITDUMP_FLAGS_MAX_BIT, +}; + +#define JITDUMP_FLAGS_RESERVED (JITDUMP_FLAGS_MAX_BIT < 64 ? \ + (~((1ULL << JITDUMP_FLAGS_MAX_BIT) - 1)) : 0) + +struct jitheader { + uint32_t magic; /* characters "jItD" */ + uint32_t version; /* header version */ + uint32_t total_size; /* total size of header */ + uint32_t elf_mach; /* elf mach target */ + uint32_t pad1; /* reserved */ + uint32_t pid; /* JIT process id */ + uint64_t timestamp; /* timestamp */ + uint64_t flags; /* flags */ +}; + +enum jit_record_type { + JIT_CODE_LOAD = 0, + JIT_CODE_MOVE = 1, + JIT_CODE_DEBUG_INFO = 2, + JIT_CODE_CLOSE = 3, + + JIT_CODE_MAX, +}; + +/* record prefix (mandatory in each record) */ +struct jr_prefix { + uint32_t id; + uint32_t total_size; + uint64_t timestamp; +}; + +struct jr_code_load { + struct jr_prefix p; + + uint32_t pid; + uint32_t tid; + uint64_t vma; + uint64_t code_addr; + uint64_t code_size; + uint64_t code_index; +}; + +struct jr_code_close { + struct jr_prefix p; +}; + +struct jr_code_move { + struct jr_prefix p; + + uint32_t pid; + uint32_t tid; + uint64_t vma; + uint64_t old_code_addr; + uint64_t new_code_addr; + uint64_t code_size; + uint64_t code_index; +}; + +struct debug_entry { + uint64_t addr; + int lineno; /* source line number starting at 1 */ + int discrim; /* column discriminator, 0 is default */ + const char name[0]; /* null terminated filename, \xff\0 if same as previous entry */ +}; + +struct jr_code_debug_info { + struct jr_prefix p; + + uint64_t code_addr; + uint64_t nr_entry; + struct debug_entry entries[0]; +}; + +union jr_entry { + struct jr_code_debug_info info; + struct jr_code_close close; + struct jr_code_load load; + struct jr_code_move move; + struct jr_prefix prefix; +}; + +static inline struct debug_entry * +debug_entry_next(struct debug_entry *ent) +{ + void *a = ent + 1; + size_t l = strlen(ent->name) + 1; + return a + l; +} + +static inline char * +debug_entry_file(struct debug_entry *ent) +{ + void *a = ent + 1; + return a; +} + +#endif /* !JITDUMP_H */ diff --git a/tools/perf/util/kvm-stat.h b/tools/perf/util/kvm-stat.h index ae825d4ec110..d01e73592f6e 100644 --- a/tools/perf/util/kvm-stat.h +++ b/tools/perf/util/kvm-stat.h @@ -122,6 +122,7 @@ void exit_event_decode_key(struct perf_kvm_stat *kvm, bool kvm_exit_event(struct perf_evsel *evsel); bool kvm_entry_event(struct perf_evsel *evsel); +int setup_kvm_events_tp(struct perf_kvm_stat *kvm); #define define_exit_reasons_table(name, symbols) \ static struct exit_reasons_table name[] = { \ @@ -133,8 +134,13 @@ bool kvm_entry_event(struct perf_evsel *evsel); */ int cpu_isa_init(struct perf_kvm_stat *kvm, const char *cpuid); -extern const char * const kvm_events_tp[]; +extern const char *kvm_events_tp[]; extern struct kvm_reg_events_ops kvm_reg_events_ops[]; extern const char * const kvm_skip_events[]; +extern const char *vcpu_id_str; +extern const int decode_str_len; +extern const char *kvm_exit_reason; +extern const char *kvm_entry_trace; +extern const char *kvm_exit_trace; #endif /* __PERF_KVM_STAT_H */ diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h index 2c2b443df5ba..1a3e45baf97f 100644 --- a/tools/perf/util/machine.h +++ b/tools/perf/util/machine.h @@ -180,6 +180,16 @@ struct symbol *machine__find_kernel_symbol(struct machine *machine, } static inline +struct symbol *machine__find_kernel_symbol_by_name(struct machine *machine, + enum map_type type, const char *name, + struct map **mapp, + symbol_filter_t filter) +{ + return map_groups__find_symbol_by_name(&machine->kmaps, type, name, + mapp, filter); +} + +static inline struct symbol *machine__find_kernel_function(struct machine *machine, u64 addr, struct map **mapp, symbol_filter_t filter) diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 813d9b272c81..e5583fd4e7bd 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -1386,8 +1386,7 @@ int parse_events_terms(struct list_head *terms, const char *str) return 0; } - if (data.terms) - parse_events__free_terms(data.terms); + parse_events_terms__delete(data.terms); return ret; } @@ -2068,12 +2067,22 @@ int parse_events_term__clone(struct parse_events_term **new, term->err_term, term->err_val); } -void parse_events__free_terms(struct list_head *terms) +void parse_events_terms__purge(struct list_head *terms) { struct parse_events_term *term, *h; - list_for_each_entry_safe(term, h, terms, list) + list_for_each_entry_safe(term, h, terms, list) { + list_del_init(&term->list); free(term); + } +} + +void parse_events_terms__delete(struct list_head *terms) +{ + if (!terms) + return; + parse_events_terms__purge(terms); + free(terms); } void parse_events_evlist_error(struct parse_events_evlist *data, diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index f1a6db107241..53628bf3da67 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -115,7 +115,8 @@ int parse_events_term__sym_hw(struct parse_events_term **term, char *config, unsigned idx); int parse_events_term__clone(struct parse_events_term **new, struct parse_events_term *term); -void parse_events__free_terms(struct list_head *terms); +void parse_events_terms__delete(struct list_head *terms); +void parse_events_terms__purge(struct list_head *terms); int parse_events__modifier_event(struct list_head *list, char *str, bool add); int parse_events__modifier_group(struct list_head *list, char *event_mod); int parse_events_name(struct list_head *list, char *name); diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y index ad379968d4c1..c0eac88ef474 100644 --- a/tools/perf/util/parse-events.y +++ b/tools/perf/util/parse-events.y @@ -218,7 +218,7 @@ PE_NAME '/' event_config '/' ALLOC_LIST(list); ABORT_ON(parse_events_add_pmu(data, list, $1, $3)); - parse_events__free_terms($3); + parse_events_terms__delete($3); $$ = list; } | @@ -246,7 +246,7 @@ PE_KERNEL_PMU_EVENT sep_dc ALLOC_LIST(list); ABORT_ON(parse_events_add_pmu(data, list, "cpu", head)); - parse_events__free_terms(head); + parse_events_terms__delete(head); $$ = list; } | @@ -266,7 +266,7 @@ PE_PMU_EVENT_PRE '-' PE_PMU_EVENT_SUF sep_dc ALLOC_LIST(list); ABORT_ON(parse_events_add_pmu(data, list, "cpu", head)); - parse_events__free_terms(head); + parse_events_terms__delete(head); $$ = list; } @@ -285,7 +285,7 @@ value_sym '/' event_config '/' ALLOC_LIST(list); ABORT_ON(parse_events_add_numeric(data, list, type, config, $3)); - parse_events__free_terms($3); + parse_events_terms__delete($3); $$ = list; } | diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index b597bcc8fc78..cf59fbaee491 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -153,7 +153,7 @@ static int perf_pmu__parse_unit(struct perf_pmu_alias *alias, char *dir, char *n if (fd == -1) return -1; - sret = read(fd, alias->unit, UNIT_MAX_LEN); + sret = read(fd, alias->unit, UNIT_MAX_LEN); if (sret < 0) goto error; @@ -354,7 +354,7 @@ static int pmu_alias_terms(struct perf_pmu_alias *alias, list_for_each_entry(term, &alias->terms, list) { ret = parse_events_term__clone(&cloned, term); if (ret) { - parse_events__free_terms(&list); + parse_events_terms__purge(&list); return ret; } list_add_tail(&cloned->list, &list); diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index ec722346e6ff..de715756f281 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -25,9 +25,18 @@ int sort__has_parent = 0; int sort__has_sym = 0; int sort__has_dso = 0; int sort__has_socket = 0; +int sort__has_thread = 0; enum sort_mode sort__mode = SORT_MODE__NORMAL; - +/* + * Replaces all occurrences of a char used with the: + * + * -t, --field-separator + * + * option, that uses a special separator character and don't pad with spaces, + * replacing all occurances of this separator in symbol names (and other + * output) with a '.' character, that thus it's the only non valid separator. +*/ static int repsep_snprintf(char *bf, size_t size, const char *fmt, ...) { int n; @@ -246,10 +255,8 @@ static int _hist_entry__sym_snprintf(struct map *map, struct symbol *sym, ret += repsep_snprintf(bf + ret, size - ret, "%s", sym->name); ret += repsep_snprintf(bf + ret, size - ret, "+0x%llx", ip - map->unmap_ip(map, sym->start)); - ret += repsep_snprintf(bf + ret, size - ret, "%-*s", - width - ret, ""); } else { - ret += repsep_snprintf(bf + ret, size - ret, "%-*s", + ret += repsep_snprintf(bf + ret, size - ret, "%.*s", width - ret, sym->name); } @@ -257,14 +264,9 @@ static int _hist_entry__sym_snprintf(struct map *map, struct symbol *sym, size_t len = BITS_PER_LONG / 4; ret += repsep_snprintf(bf + ret, size - ret, "%-#.*llx", len, ip); - ret += repsep_snprintf(bf + ret, size - ret, "%-*s", - width - ret, ""); } - if (ret > width) - bf[width] = '\0'; - - return width; + return ret; } static int hist_entry__sym_snprintf(struct hist_entry *he, char *bf, @@ -810,7 +812,7 @@ static int hist_entry__locked_snprintf(struct hist_entry *he, char *bf, else out = "No"; - return repsep_snprintf(bf, size, "%-*s", width, out); + return repsep_snprintf(bf, size, "%.*s", width, out); } static int64_t @@ -1440,20 +1442,6 @@ struct hpp_sort_entry { struct sort_entry *se; }; -bool perf_hpp__same_sort_entry(struct perf_hpp_fmt *a, struct perf_hpp_fmt *b) -{ - struct hpp_sort_entry *hse_a; - struct hpp_sort_entry *hse_b; - - if (!perf_hpp__is_sort_entry(a) || !perf_hpp__is_sort_entry(b)) - return false; - - hse_a = container_of(a, struct hpp_sort_entry, hpp); - hse_b = container_of(b, struct hpp_sort_entry, hpp); - - return hse_a->se == hse_b->se; -} - void perf_hpp__reset_sort_width(struct perf_hpp_fmt *fmt, struct hists *hists) { struct hpp_sort_entry *hse; @@ -1539,6 +1527,33 @@ static int64_t __sort__hpp_sort(struct perf_hpp_fmt *fmt, return sort_fn(a, b); } +bool perf_hpp__is_sort_entry(struct perf_hpp_fmt *format) +{ + return format->header == __sort__hpp_header; +} + +static bool __sort__hpp_equal(struct perf_hpp_fmt *a, struct perf_hpp_fmt *b) +{ + struct hpp_sort_entry *hse_a; + struct hpp_sort_entry *hse_b; + + if (!perf_hpp__is_sort_entry(a) || !perf_hpp__is_sort_entry(b)) + return false; + + hse_a = container_of(a, struct hpp_sort_entry, hpp); + hse_b = container_of(b, struct hpp_sort_entry, hpp); + + return hse_a->se == hse_b->se; +} + +static void hse_free(struct perf_hpp_fmt *fmt) +{ + struct hpp_sort_entry *hse; + + hse = container_of(fmt, struct hpp_sort_entry, hpp); + free(hse); +} + static struct hpp_sort_entry * __sort_dimension__alloc_hpp(struct sort_dimension *sd) { @@ -1560,6 +1575,8 @@ __sort_dimension__alloc_hpp(struct sort_dimension *sd) hse->hpp.cmp = __sort__hpp_cmp; hse->hpp.collapse = __sort__hpp_collapse; hse->hpp.sort = __sort__hpp_sort; + hse->hpp.equal = __sort__hpp_equal; + hse->hpp.free = hse_free; INIT_LIST_HEAD(&hse->hpp.list); INIT_LIST_HEAD(&hse->hpp.sort_list); @@ -1570,9 +1587,23 @@ __sort_dimension__alloc_hpp(struct sort_dimension *sd) return hse; } -bool perf_hpp__is_sort_entry(struct perf_hpp_fmt *format) +static void hpp_free(struct perf_hpp_fmt *fmt) { - return format->header == __sort__hpp_header; + free(fmt); +} + +static struct perf_hpp_fmt *__hpp_dimension__alloc_hpp(struct hpp_dimension *hd) +{ + struct perf_hpp_fmt *fmt; + + fmt = memdup(hd->fmt, sizeof(*fmt)); + if (fmt) { + INIT_LIST_HEAD(&fmt->list); + INIT_LIST_HEAD(&fmt->sort_list); + fmt->free = hpp_free; + } + + return fmt; } static int __sort_dimension__add_hpp_sort(struct sort_dimension *sd) @@ -1586,14 +1617,15 @@ static int __sort_dimension__add_hpp_sort(struct sort_dimension *sd) return 0; } -static int __sort_dimension__add_hpp_output(struct sort_dimension *sd) +static int __sort_dimension__add_hpp_output(struct perf_hpp_list *list, + struct sort_dimension *sd) { struct hpp_sort_entry *hse = __sort_dimension__alloc_hpp(sd); if (hse == NULL) return -1; - perf_hpp__column_register(&hse->hpp); + perf_hpp_list__column_register(list, &hse->hpp); return 0; } @@ -1803,6 +1835,14 @@ bool perf_hpp__is_dynamic_entry(struct perf_hpp_fmt *fmt) return fmt->cmp == __sort__hde_cmp; } +static void hde_free(struct perf_hpp_fmt *fmt) +{ + struct hpp_dynamic_entry *hde; + + hde = container_of(fmt, struct hpp_dynamic_entry, hpp); + free(hde); +} + static struct hpp_dynamic_entry * __alloc_dynamic_entry(struct perf_evsel *evsel, struct format_field *field) { @@ -1827,6 +1867,7 @@ __alloc_dynamic_entry(struct perf_evsel *evsel, struct format_field *field) hde->hpp.cmp = __sort__hde_cmp; hde->hpp.collapse = __sort__hde_cmp; hde->hpp.sort = __sort__hde_cmp; + hde->hpp.free = hde_free; INIT_LIST_HEAD(&hde->hpp.list); INIT_LIST_HEAD(&hde->hpp.sort_list); @@ -2064,40 +2105,54 @@ static int __sort_dimension__add(struct sort_dimension *sd) static int __hpp_dimension__add(struct hpp_dimension *hd) { - if (!hd->taken) { - hd->taken = 1; + struct perf_hpp_fmt *fmt; - perf_hpp__register_sort_field(hd->fmt); - } + if (hd->taken) + return 0; + + fmt = __hpp_dimension__alloc_hpp(hd); + if (!fmt) + return -1; + + hd->taken = 1; + perf_hpp__register_sort_field(fmt); return 0; } -static int __sort_dimension__add_output(struct sort_dimension *sd) +static int __sort_dimension__add_output(struct perf_hpp_list *list, + struct sort_dimension *sd) { if (sd->taken) return 0; - if (__sort_dimension__add_hpp_output(sd) < 0) + if (__sort_dimension__add_hpp_output(list, sd) < 0) return -1; sd->taken = 1; return 0; } -static int __hpp_dimension__add_output(struct hpp_dimension *hd) +static int __hpp_dimension__add_output(struct perf_hpp_list *list, + struct hpp_dimension *hd) { - if (!hd->taken) { - hd->taken = 1; + struct perf_hpp_fmt *fmt; - perf_hpp__column_register(hd->fmt); - } + if (hd->taken) + return 0; + + fmt = __hpp_dimension__alloc_hpp(hd); + if (!fmt) + return -1; + + hd->taken = 1; + perf_hpp_list__column_register(list, fmt); return 0; } int hpp_dimension__add_output(unsigned col) { BUG_ON(col >= PERF_HPP__MAX_INDEX); - return __hpp_dimension__add_output(&hpp_sort_dimensions[col]); + return __hpp_dimension__add_output(&perf_hpp_list, &hpp_sort_dimensions[col]); } static int sort_dimension__add(const char *tok, @@ -2136,6 +2191,8 @@ static int sort_dimension__add(const char *tok, sort__has_dso = 1; } else if (sd->entry == &sort_socket) { sort__has_socket = 1; + } else if (sd->entry == &sort_thread) { + sort__has_thread = 1; } return __sort_dimension__add(sd); @@ -2188,6 +2245,26 @@ static int sort_dimension__add(const char *tok, return -ESRCH; } +static int setup_sort_list(char *str, struct perf_evlist *evlist) +{ + char *tmp, *tok; + int ret = 0; + + for (tok = strtok_r(str, ", ", &tmp); + tok; tok = strtok_r(NULL, ", ", &tmp)) { + ret = sort_dimension__add(tok, evlist); + if (ret == -EINVAL) { + error("Invalid --sort key: `%s'", tok); + break; + } else if (ret == -ESRCH) { + error("Unknown --sort key: `%s'", tok); + break; + } + } + + return ret; +} + static const char *get_default_sort_order(struct perf_evlist *evlist) { const char *default_sort_orders[] = { @@ -2282,7 +2359,7 @@ static char *setup_overhead(char *keys) static int __setup_sorting(struct perf_evlist *evlist) { - char *tmp, *tok, *str; + char *str; const char *sort_keys; int ret = 0; @@ -2320,17 +2397,7 @@ static int __setup_sorting(struct perf_evlist *evlist) } } - for (tok = strtok_r(str, ", ", &tmp); - tok; tok = strtok_r(NULL, ", ", &tmp)) { - ret = sort_dimension__add(tok, evlist); - if (ret == -EINVAL) { - error("Invalid --sort key: `%s'", tok); - break; - } else if (ret == -ESRCH) { - error("Unknown --sort key: `%s'", tok); - break; - } - } + ret = setup_sort_list(str, evlist); free(str); return ret; @@ -2341,7 +2408,7 @@ void perf_hpp__set_elide(int idx, bool elide) struct perf_hpp_fmt *fmt; struct hpp_sort_entry *hse; - perf_hpp__for_each_format(fmt) { + perf_hpp_list__for_each_format(&perf_hpp_list, fmt) { if (!perf_hpp__is_sort_entry(fmt)) continue; @@ -2401,7 +2468,7 @@ void sort__setup_elide(FILE *output) struct perf_hpp_fmt *fmt; struct hpp_sort_entry *hse; - perf_hpp__for_each_format(fmt) { + perf_hpp_list__for_each_format(&perf_hpp_list, fmt) { if (!perf_hpp__is_sort_entry(fmt)) continue; @@ -2413,7 +2480,7 @@ void sort__setup_elide(FILE *output) * It makes no sense to elide all of sort entries. * Just revert them to show up again. */ - perf_hpp__for_each_format(fmt) { + perf_hpp_list__for_each_format(&perf_hpp_list, fmt) { if (!perf_hpp__is_sort_entry(fmt)) continue; @@ -2421,7 +2488,7 @@ void sort__setup_elide(FILE *output) return; } - perf_hpp__for_each_format(fmt) { + perf_hpp_list__for_each_format(&perf_hpp_list, fmt) { if (!perf_hpp__is_sort_entry(fmt)) continue; @@ -2429,7 +2496,7 @@ void sort__setup_elide(FILE *output) } } -static int output_field_add(char *tok) +static int output_field_add(struct perf_hpp_list *list, char *tok) { unsigned int i; @@ -2439,7 +2506,7 @@ static int output_field_add(char *tok) if (strncasecmp(tok, sd->name, strlen(tok))) continue; - return __sort_dimension__add_output(sd); + return __sort_dimension__add_output(list, sd); } for (i = 0; i < ARRAY_SIZE(hpp_sort_dimensions); i++) { @@ -2448,7 +2515,7 @@ static int output_field_add(char *tok) if (strncasecmp(tok, hd->name, strlen(tok))) continue; - return __hpp_dimension__add_output(hd); + return __hpp_dimension__add_output(list, hd); } for (i = 0; i < ARRAY_SIZE(bstack_sort_dimensions); i++) { @@ -2457,7 +2524,7 @@ static int output_field_add(char *tok) if (strncasecmp(tok, sd->name, strlen(tok))) continue; - return __sort_dimension__add_output(sd); + return __sort_dimension__add_output(list, sd); } for (i = 0; i < ARRAY_SIZE(memory_sort_dimensions); i++) { @@ -2466,12 +2533,32 @@ static int output_field_add(char *tok) if (strncasecmp(tok, sd->name, strlen(tok))) continue; - return __sort_dimension__add_output(sd); + return __sort_dimension__add_output(list, sd); } return -ESRCH; } +static int setup_output_list(struct perf_hpp_list *list, char *str) +{ + char *tmp, *tok; + int ret = 0; + + for (tok = strtok_r(str, ", ", &tmp); + tok; tok = strtok_r(NULL, ", ", &tmp)) { + ret = output_field_add(list, tok); + if (ret == -EINVAL) { + error("Invalid --fields key: `%s'", tok); + break; + } else if (ret == -ESRCH) { + error("Unknown --fields key: `%s'", tok); + break; + } + } + + return ret; +} + static void reset_dimensions(void) { unsigned int i; @@ -2496,7 +2583,7 @@ bool is_strict_order(const char *order) static int __setup_output_field(void) { - char *tmp, *tok, *str, *strp; + char *str, *strp; int ret = -EINVAL; if (field_order == NULL) @@ -2516,17 +2603,7 @@ static int __setup_output_field(void) goto out; } - for (tok = strtok_r(strp, ", ", &tmp); - tok; tok = strtok_r(NULL, ", ", &tmp)) { - ret = output_field_add(tok); - if (ret == -EINVAL) { - error("Invalid --fields key: `%s'", tok); - break; - } else if (ret == -ESRCH) { - error("Unknown --fields key: `%s'", tok); - break; - } - } + ret = setup_output_list(&perf_hpp_list, strp); out: free(str); @@ -2560,9 +2637,9 @@ int setup_sorting(struct perf_evlist *evlist) return err; /* copy sort keys to output fields */ - perf_hpp__setup_output_field(); + perf_hpp__setup_output_field(&perf_hpp_list); /* and then copy output fields to sort keys */ - perf_hpp__append_sort_keys(); + perf_hpp__append_sort_keys(&perf_hpp_list); return 0; } @@ -2578,5 +2655,5 @@ void reset_output_field(void) sort_order = NULL; reset_dimensions(); - perf_hpp__reset_output_field(); + perf_hpp__reset_output_field(&perf_hpp_list); } diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index 687bbb124428..89a1273fd2da 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -32,9 +32,11 @@ extern const char default_sort_order[]; extern regex_t ignore_callees_regex; extern int have_ignore_callees; extern int sort__need_collapse; +extern int sort__has_dso; extern int sort__has_parent; extern int sort__has_sym; extern int sort__has_socket; +extern int sort__has_thread; extern enum sort_mode sort__mode; extern struct sort_entry sort_comm; extern struct sort_entry sort_dso; diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index 6ac03146889d..4d8f18581b9b 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -137,9 +137,10 @@ static const char *get_ratio_color(enum grc_type type, double ratio) return color; } -static void print_stalled_cycles_frontend(FILE *out, int cpu, +static void print_stalled_cycles_frontend(int cpu, struct perf_evsel *evsel - __maybe_unused, double avg) + __maybe_unused, double avg, + struct perf_stat_output_ctx *out) { double total, ratio = 0.0; const char *color; @@ -152,14 +153,17 @@ static void print_stalled_cycles_frontend(FILE *out, int cpu, color = get_ratio_color(GRC_STALLED_CYCLES_FE, ratio); - fprintf(out, " # "); - color_fprintf(out, color, "%6.2f%%", ratio); - fprintf(out, " frontend cycles idle "); + if (ratio) + out->print_metric(out->ctx, color, "%7.2f%%", "frontend cycles idle", + ratio); + else + out->print_metric(out->ctx, NULL, NULL, "frontend cycles idle", 0); } -static void print_stalled_cycles_backend(FILE *out, int cpu, +static void print_stalled_cycles_backend(int cpu, struct perf_evsel *evsel - __maybe_unused, double avg) + __maybe_unused, double avg, + struct perf_stat_output_ctx *out) { double total, ratio = 0.0; const char *color; @@ -172,14 +176,13 @@ static void print_stalled_cycles_backend(FILE *out, int cpu, color = get_ratio_color(GRC_STALLED_CYCLES_BE, ratio); - fprintf(out, " # "); - color_fprintf(out, color, "%6.2f%%", ratio); - fprintf(out, " backend cycles idle "); + out->print_metric(out->ctx, color, "%6.2f%%", "backend cycles idle", ratio); } -static void print_branch_misses(FILE *out, int cpu, +static void print_branch_misses(int cpu, struct perf_evsel *evsel __maybe_unused, - double avg) + double avg, + struct perf_stat_output_ctx *out) { double total, ratio = 0.0; const char *color; @@ -192,14 +195,13 @@ static void print_branch_misses(FILE *out, int cpu, color = get_ratio_color(GRC_CACHE_MISSES, ratio); - fprintf(out, " # "); - color_fprintf(out, color, "%6.2f%%", ratio); - fprintf(out, " of all branches "); + out->print_metric(out->ctx, color, "%7.2f%%", "of all branches", ratio); } -static void print_l1_dcache_misses(FILE *out, int cpu, +static void print_l1_dcache_misses(int cpu, struct perf_evsel *evsel __maybe_unused, - double avg) + double avg, + struct perf_stat_output_ctx *out) { double total, ratio = 0.0; const char *color; @@ -212,14 +214,13 @@ static void print_l1_dcache_misses(FILE *out, int cpu, color = get_ratio_color(GRC_CACHE_MISSES, ratio); - fprintf(out, " # "); - color_fprintf(out, color, "%6.2f%%", ratio); - fprintf(out, " of all L1-dcache hits "); + out->print_metric(out->ctx, color, "%7.2f%%", "of all L1-dcache hits", ratio); } -static void print_l1_icache_misses(FILE *out, int cpu, +static void print_l1_icache_misses(int cpu, struct perf_evsel *evsel __maybe_unused, - double avg) + double avg, + struct perf_stat_output_ctx *out) { double total, ratio = 0.0; const char *color; @@ -231,15 +232,13 @@ static void print_l1_icache_misses(FILE *out, int cpu, ratio = avg / total * 100.0; color = get_ratio_color(GRC_CACHE_MISSES, ratio); - - fprintf(out, " # "); - color_fprintf(out, color, "%6.2f%%", ratio); - fprintf(out, " of all L1-icache hits "); + out->print_metric(out->ctx, color, "%7.2f%%", "of all L1-icache hits", ratio); } -static void print_dtlb_cache_misses(FILE *out, int cpu, +static void print_dtlb_cache_misses(int cpu, struct perf_evsel *evsel __maybe_unused, - double avg) + double avg, + struct perf_stat_output_ctx *out) { double total, ratio = 0.0; const char *color; @@ -251,15 +250,13 @@ static void print_dtlb_cache_misses(FILE *out, int cpu, ratio = avg / total * 100.0; color = get_ratio_color(GRC_CACHE_MISSES, ratio); - - fprintf(out, " # "); - color_fprintf(out, color, "%6.2f%%", ratio); - fprintf(out, " of all dTLB cache hits "); + out->print_metric(out->ctx, color, "%7.2f%%", "of all dTLB cache hits", ratio); } -static void print_itlb_cache_misses(FILE *out, int cpu, +static void print_itlb_cache_misses(int cpu, struct perf_evsel *evsel __maybe_unused, - double avg) + double avg, + struct perf_stat_output_ctx *out) { double total, ratio = 0.0; const char *color; @@ -271,15 +268,13 @@ static void print_itlb_cache_misses(FILE *out, int cpu, ratio = avg / total * 100.0; color = get_ratio_color(GRC_CACHE_MISSES, ratio); - - fprintf(out, " # "); - color_fprintf(out, color, "%6.2f%%", ratio); - fprintf(out, " of all iTLB cache hits "); + out->print_metric(out->ctx, color, "%7.2f%%", "of all iTLB cache hits", ratio); } -static void print_ll_cache_misses(FILE *out, int cpu, +static void print_ll_cache_misses(int cpu, struct perf_evsel *evsel __maybe_unused, - double avg) + double avg, + struct perf_stat_output_ctx *out) { double total, ratio = 0.0; const char *color; @@ -291,15 +286,15 @@ static void print_ll_cache_misses(FILE *out, int cpu, ratio = avg / total * 100.0; color = get_ratio_color(GRC_CACHE_MISSES, ratio); - - fprintf(out, " # "); - color_fprintf(out, color, "%6.2f%%", ratio); - fprintf(out, " of all LL-cache hits "); + out->print_metric(out->ctx, color, "%7.2f%%", "of all LL-cache hits", ratio); } -void perf_stat__print_shadow_stats(FILE *out, struct perf_evsel *evsel, - double avg, int cpu, enum aggr_mode aggr) +void perf_stat__print_shadow_stats(struct perf_evsel *evsel, + double avg, int cpu, + struct perf_stat_output_ctx *out) { + void *ctxp = out->ctx; + print_metric_t print_metric = out->print_metric; double total, ratio = 0.0, total2; int ctx = evsel_context(evsel); @@ -307,119 +302,145 @@ void perf_stat__print_shadow_stats(FILE *out, struct perf_evsel *evsel, total = avg_stats(&runtime_cycles_stats[ctx][cpu]); if (total) { ratio = avg / total; - fprintf(out, " # %5.2f insns per cycle ", ratio); + print_metric(ctxp, NULL, "%7.2f ", + "insn per cycle", ratio); } else { - fprintf(out, " "); + print_metric(ctxp, NULL, NULL, "insn per cycle", 0); } total = avg_stats(&runtime_stalled_cycles_front_stats[ctx][cpu]); total = max(total, avg_stats(&runtime_stalled_cycles_back_stats[ctx][cpu])); + out->new_line(ctxp); if (total && avg) { ratio = total / avg; - fprintf(out, "\n"); - if (aggr == AGGR_NONE) - fprintf(out, " "); - fprintf(out, " # %5.2f stalled cycles per insn", ratio); + print_metric(ctxp, NULL, "%7.2f ", + "stalled cycles per insn", + ratio); + } else { + print_metric(ctxp, NULL, NULL, + "stalled cycles per insn", 0); } - - } else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES) && - runtime_branches_stats[ctx][cpu].n != 0) { - print_branch_misses(out, cpu, evsel, avg); + } else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES)) { + if (runtime_branches_stats[ctx][cpu].n != 0) + print_branch_misses(cpu, evsel, avg, out); + else + print_metric(ctxp, NULL, NULL, "of all branches", 0); } else if ( evsel->attr.type == PERF_TYPE_HW_CACHE && evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1D | ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | - ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && - runtime_l1_dcache_stats[ctx][cpu].n != 0) { - print_l1_dcache_misses(out, cpu, evsel, avg); + ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { + if (runtime_l1_dcache_stats[ctx][cpu].n != 0) + print_l1_dcache_misses(cpu, evsel, avg, out); + else + print_metric(ctxp, NULL, NULL, "of all L1-dcache hits", 0); } else if ( evsel->attr.type == PERF_TYPE_HW_CACHE && evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1I | ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | - ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && - runtime_l1_icache_stats[ctx][cpu].n != 0) { - print_l1_icache_misses(out, cpu, evsel, avg); + ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { + if (runtime_l1_icache_stats[ctx][cpu].n != 0) + print_l1_icache_misses(cpu, evsel, avg, out); + else + print_metric(ctxp, NULL, NULL, "of all L1-icache hits", 0); } else if ( evsel->attr.type == PERF_TYPE_HW_CACHE && evsel->attr.config == ( PERF_COUNT_HW_CACHE_DTLB | ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | - ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && - runtime_dtlb_cache_stats[ctx][cpu].n != 0) { - print_dtlb_cache_misses(out, cpu, evsel, avg); + ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { + if (runtime_dtlb_cache_stats[ctx][cpu].n != 0) + print_dtlb_cache_misses(cpu, evsel, avg, out); + else + print_metric(ctxp, NULL, NULL, "of all dTLB cache hits", 0); } else if ( evsel->attr.type == PERF_TYPE_HW_CACHE && evsel->attr.config == ( PERF_COUNT_HW_CACHE_ITLB | ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | - ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && - runtime_itlb_cache_stats[ctx][cpu].n != 0) { - print_itlb_cache_misses(out, cpu, evsel, avg); + ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { + if (runtime_itlb_cache_stats[ctx][cpu].n != 0) + print_itlb_cache_misses(cpu, evsel, avg, out); + else + print_metric(ctxp, NULL, NULL, "of all iTLB cache hits", 0); } else if ( evsel->attr.type == PERF_TYPE_HW_CACHE && evsel->attr.config == ( PERF_COUNT_HW_CACHE_LL | ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | - ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && - runtime_ll_cache_stats[ctx][cpu].n != 0) { - print_ll_cache_misses(out, cpu, evsel, avg); - } else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES) && - runtime_cacherefs_stats[ctx][cpu].n != 0) { + ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { + if (runtime_ll_cache_stats[ctx][cpu].n != 0) + print_ll_cache_misses(cpu, evsel, avg, out); + else + print_metric(ctxp, NULL, NULL, "of all LL-cache hits", 0); + } else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES)) { total = avg_stats(&runtime_cacherefs_stats[ctx][cpu]); if (total) ratio = avg * 100 / total; - fprintf(out, " # %8.3f %% of all cache refs ", ratio); - + if (runtime_cacherefs_stats[ctx][cpu].n != 0) + print_metric(ctxp, NULL, "%8.3f %%", + "of all cache refs", ratio); + else + print_metric(ctxp, NULL, NULL, "of all cache refs", 0); } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) { - print_stalled_cycles_frontend(out, cpu, evsel, avg); + print_stalled_cycles_frontend(cpu, evsel, avg, out); } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) { - print_stalled_cycles_backend(out, cpu, evsel, avg); + print_stalled_cycles_backend(cpu, evsel, avg, out); } else if (perf_evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) { total = avg_stats(&runtime_nsecs_stats[cpu]); if (total) { ratio = avg / total; - fprintf(out, " # %8.3f GHz ", ratio); + print_metric(ctxp, NULL, "%8.3f", "GHz", ratio); } else { - fprintf(out, " "); + print_metric(ctxp, NULL, NULL, "Ghz", 0); } } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX)) { total = avg_stats(&runtime_cycles_stats[ctx][cpu]); if (total) - fprintf(out, - " # %5.2f%% transactional cycles ", - 100.0 * (avg / total)); + print_metric(ctxp, NULL, + "%7.2f%%", "transactional cycles", + 100.0 * (avg / total)); + else + print_metric(ctxp, NULL, NULL, "transactional cycles", + 0); } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX_CP)) { total = avg_stats(&runtime_cycles_stats[ctx][cpu]); total2 = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]); if (total2 < avg) total2 = avg; if (total) - fprintf(out, - " # %5.2f%% aborted cycles ", + print_metric(ctxp, NULL, "%7.2f%%", "aborted cycles", 100.0 * ((total2-avg) / total)); - } else if (perf_stat_evsel__is(evsel, TRANSACTION_START) && - runtime_cycles_in_tx_stats[ctx][cpu].n != 0) { + else + print_metric(ctxp, NULL, NULL, "aborted cycles", 0); + } else if (perf_stat_evsel__is(evsel, TRANSACTION_START)) { total = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]); if (avg) ratio = total / avg; - fprintf(out, " # %8.0f cycles / transaction ", ratio); - } else if (perf_stat_evsel__is(evsel, ELISION_START) && - runtime_cycles_in_tx_stats[ctx][cpu].n != 0) { + if (runtime_cycles_in_tx_stats[ctx][cpu].n != 0) + print_metric(ctxp, NULL, "%8.0f", + "cycles / transaction", ratio); + else + print_metric(ctxp, NULL, NULL, "cycles / transaction", + 0); + } else if (perf_stat_evsel__is(evsel, ELISION_START)) { total = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]); if (avg) ratio = total / avg; - fprintf(out, " # %8.0f cycles / elision ", ratio); + print_metric(ctxp, NULL, "%8.0f", "cycles / elision", ratio); } else if (perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK)) { if ((ratio = avg_stats(&walltime_nsecs_stats)) != 0) - fprintf(out, " # %8.3f CPUs utilized ", avg / ratio); + print_metric(ctxp, NULL, "%8.3f", "CPUs utilized", + avg / ratio); else - fprintf(out, " "); + print_metric(ctxp, NULL, NULL, "CPUs utilized", 0); } else if (runtime_nsecs_stats[cpu].n != 0) { char unit = 'M'; + char unit_buf[10]; total = avg_stats(&runtime_nsecs_stats[cpu]); @@ -429,9 +450,9 @@ void perf_stat__print_shadow_stats(FILE *out, struct perf_evsel *evsel, ratio *= 1000; unit = 'K'; } - - fprintf(out, " # %8.3f %c/sec ", ratio, unit); + snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit); + print_metric(ctxp, NULL, "%8.3f", unit_buf, ratio); } else { - fprintf(out, " "); + print_metric(ctxp, NULL, NULL, NULL, 0); } } diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index afb0c45eba34..4d9b481cf3b6 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -97,7 +97,7 @@ void perf_stat_evsel_id_init(struct perf_evsel *evsel) } } -void perf_evsel__reset_stat_priv(struct perf_evsel *evsel) +static void perf_evsel__reset_stat_priv(struct perf_evsel *evsel) { int i; struct perf_stat_evsel *ps = evsel->priv; @@ -108,7 +108,7 @@ void perf_evsel__reset_stat_priv(struct perf_evsel *evsel) perf_stat_evsel_id_init(evsel); } -int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel) +static int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel) { evsel->priv = zalloc(sizeof(struct perf_stat_evsel)); if (evsel->priv == NULL) @@ -117,13 +117,13 @@ int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel) return 0; } -void perf_evsel__free_stat_priv(struct perf_evsel *evsel) +static void perf_evsel__free_stat_priv(struct perf_evsel *evsel) { zfree(&evsel->priv); } -int perf_evsel__alloc_prev_raw_counts(struct perf_evsel *evsel, - int ncpus, int nthreads) +static int perf_evsel__alloc_prev_raw_counts(struct perf_evsel *evsel, + int ncpus, int nthreads) { struct perf_counts *counts; @@ -134,13 +134,13 @@ int perf_evsel__alloc_prev_raw_counts(struct perf_evsel *evsel, return counts ? 0 : -ENOMEM; } -void perf_evsel__free_prev_raw_counts(struct perf_evsel *evsel) +static void perf_evsel__free_prev_raw_counts(struct perf_evsel *evsel) { perf_counts__delete(evsel->prev_raw_counts); evsel->prev_raw_counts = NULL; } -int perf_evsel__alloc_stats(struct perf_evsel *evsel, bool alloc_raw) +static int perf_evsel__alloc_stats(struct perf_evsel *evsel, bool alloc_raw) { int ncpus = perf_evsel__nr_cpus(evsel); int nthreads = thread_map__nr(evsel->threads); diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index 086f4e128d63..f02af68adc04 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -68,21 +68,22 @@ void perf_stat_evsel_id_init(struct perf_evsel *evsel); extern struct stats walltime_nsecs_stats; +typedef void (*print_metric_t)(void *ctx, const char *color, const char *unit, + const char *fmt, double val); +typedef void (*new_line_t )(void *ctx); + void perf_stat__reset_shadow_stats(void); void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 *count, int cpu); -void perf_stat__print_shadow_stats(FILE *out, struct perf_evsel *evsel, - double avg, int cpu, enum aggr_mode aggr); - -void perf_evsel__reset_stat_priv(struct perf_evsel *evsel); -int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel); -void perf_evsel__free_stat_priv(struct perf_evsel *evsel); - -int perf_evsel__alloc_prev_raw_counts(struct perf_evsel *evsel, - int ncpus, int nthreads); -void perf_evsel__free_prev_raw_counts(struct perf_evsel *evsel); +struct perf_stat_output_ctx { + void *ctx; + print_metric_t print_metric; + new_line_t new_line; +}; -int perf_evsel__alloc_stats(struct perf_evsel *evsel, bool alloc_raw); +void perf_stat__print_shadow_stats(struct perf_evsel *evsel, + double avg, int cpu, + struct perf_stat_output_ctx *out); int perf_evlist__alloc_stats(struct perf_evlist *evlist, bool alloc_raw); void perf_evlist__free_stats(struct perf_evlist *evlist); diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 562b8ebeae5b..b1dd68f358fc 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -6,6 +6,7 @@ #include <inttypes.h> #include "symbol.h" +#include "demangle-java.h" #include "machine.h" #include "vdso.h" #include <symbol/kallsyms.h> @@ -1077,6 +1078,8 @@ new_symbol: demangle_flags = DMGL_PARAMS | DMGL_ANSI; demangled = bfd_demangle(NULL, elf_name, demangle_flags); + if (demangled == NULL) + demangled = java_demangle_sym(elf_name, JAVA_DEMANGLE_NORET); if (demangled != NULL) elf_name = demangled; } diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index ab02209a7cf3..e7588dc91518 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -1466,7 +1466,8 @@ int dso__load(struct dso *dso, struct map *map, symbol_filter_t filter) * Read the build id if possible. This is required for * DSO_BINARY_TYPE__BUILDID_DEBUGINFO to work */ - if (filename__read_build_id(dso->long_name, build_id, BUILD_ID_SIZE) > 0) + if (is_regular_file(name) && + filename__read_build_id(dso->long_name, build_id, BUILD_ID_SIZE) > 0) dso__set_build_id(dso, build_id); /* @@ -1487,6 +1488,9 @@ int dso__load(struct dso *dso, struct map *map, symbol_filter_t filter) root_dir, name, PATH_MAX)) continue; + if (!is_regular_file(name)) + continue; + /* Name is now the name of the next image to try */ if (symsrc__init(ss, dso, name, symtab_type) < 0) continue; @@ -1525,6 +1529,10 @@ int dso__load(struct dso *dso, struct map *map, symbol_filter_t filter) if (!runtime_ss && syms_ss) runtime_ss = syms_ss; + if (syms_ss && syms_ss->type == DSO_BINARY_TYPE__BUILD_ID_CACHE) + if (dso__build_id_is_kmod(dso, name, PATH_MAX)) + kmod = true; + if (syms_ss) ret = dso__load_sym(dso, map, syms_ss, runtime_ss, filter, kmod); else diff --git a/tools/perf/util/trace-event.c b/tools/perf/util/trace-event.c index 802bb868d446..8ae051e0ec79 100644 --- a/tools/perf/util/trace-event.c +++ b/tools/perf/util/trace-event.c @@ -10,6 +10,7 @@ #include <linux/err.h> #include <traceevent/event-parse.h> #include <api/fs/tracing_path.h> +#include <api/fs/fs.h> #include "trace-event.h" #include "machine.h" #include "util.h" diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index ead9509835d2..35b20dd454de 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c @@ -507,54 +507,6 @@ int parse_callchain_record(const char *arg, struct callchain_param *param) return ret; } -int filename__read_str(const char *filename, char **buf, size_t *sizep) -{ - size_t size = 0, alloc_size = 0; - void *bf = NULL, *nbf; - int fd, n, err = 0; - char sbuf[STRERR_BUFSIZE]; - - fd = open(filename, O_RDONLY); - if (fd < 0) - return -errno; - - do { - if (size == alloc_size) { - alloc_size += BUFSIZ; - nbf = realloc(bf, alloc_size); - if (!nbf) { - err = -ENOMEM; - break; - } - - bf = nbf; - } - - n = read(fd, bf + size, alloc_size - size); - if (n < 0) { - if (size) { - pr_warning("read failed %d: %s\n", errno, - strerror_r(errno, sbuf, sizeof(sbuf))); - err = 0; - } else - err = -errno; - - break; - } - - size += n; - } while (n > 0); - - if (!err) { - *sizep = size; - *buf = bf; - } else - free(bf); - - close(fd); - return err; -} - const char *get_filename_for_perf_kvm(void) { const char *filename; @@ -691,3 +643,30 @@ out: return tip; } + +bool is_regular_file(const char *file) +{ + struct stat st; + + if (stat(file, &st)) + return false; + + return S_ISREG(st.st_mode); +} + +int fetch_current_timestamp(char *buf, size_t sz) +{ + struct timeval tv; + struct tm tm; + char dt[32]; + + if (gettimeofday(&tv, NULL) || !localtime_r(&tv.tv_sec, &tm)) + return -1; + + if (!strftime(dt, sizeof(dt), "%Y%m%d%H%M%S", &tm)) + return -1; + + scnprintf(buf, sz, "%s%02u", dt, (unsigned)tv.tv_usec / 10000); + + return 0; +} diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index fe915e616f9b..3dd04089e8be 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -303,7 +303,6 @@ char *__get_srcline(struct dso *dso, u64 addr, struct symbol *sym, bool show_sym, bool unwind_inlines); void free_srcline(char *srcline); -int filename__read_str(const char *filename, char **buf, size_t *sizep); int perf_event_paranoid(void); void mem_bswap_64(void *src, int byte_size); @@ -343,5 +342,7 @@ int fetch_kernel_version(unsigned int *puint, #define KVER_PARAM(x) KVER_VERSION(x), KVER_PATCHLEVEL(x), KVER_SUBLEVEL(x) const char *perf_tip(const char *dirpath); +bool is_regular_file(const char *file); +int fetch_current_timestamp(char *buf, size_t sz); #endif /* GIT_COMPAT_UTIL_H */ |