diff options
-rw-r--r-- | arch/x86/kernel/cpu/intel_pt.h | 33 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_intel.c | 23 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_intel_cqm.c | 8 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_intel_pt.c | 10 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_msr.c | 168 | ||||
-rw-r--r-- | kernel/events/core.c | 87 | ||||
-rw-r--r-- | kernel/events/ring_buffer.c | 15 |
7 files changed, 189 insertions, 155 deletions
diff --git a/arch/x86/kernel/cpu/intel_pt.h b/arch/x86/kernel/cpu/intel_pt.h index feb293e96531..336878a5d205 100644 --- a/arch/x86/kernel/cpu/intel_pt.h +++ b/arch/x86/kernel/cpu/intel_pt.h @@ -25,32 +25,11 @@ */ #define TOPA_PMI_MARGIN 512 -/* - * Table of Physical Addresses bits - */ -enum topa_sz { - TOPA_4K = 0, - TOPA_8K, - TOPA_16K, - TOPA_32K, - TOPA_64K, - TOPA_128K, - TOPA_256K, - TOPA_512K, - TOPA_1MB, - TOPA_2MB, - TOPA_4MB, - TOPA_8MB, - TOPA_16MB, - TOPA_32MB, - TOPA_64MB, - TOPA_128MB, - TOPA_SZ_END, -}; +#define TOPA_SHIFT 12 -static inline unsigned int sizes(enum topa_sz tsz) +static inline unsigned int sizes(unsigned int tsz) { - return 1 << (tsz + 12); + return 1 << (tsz + TOPA_SHIFT); }; struct topa_entry { @@ -66,8 +45,8 @@ struct topa_entry { u64 rsvd4 : 16; }; -#define TOPA_SHIFT 12 -#define PT_CPUID_LEAVES 2 +#define PT_CPUID_LEAVES 2 +#define PT_CPUID_REGS_NUM 4 /* number of regsters (eax, ebx, ecx, edx) */ enum pt_capabilities { PT_CAP_max_subleaf = 0, @@ -85,7 +64,7 @@ enum pt_capabilities { struct pt_pmu { struct pmu pmu; - u32 caps[4 * PT_CPUID_LEAVES]; + u32 caps[PT_CPUID_REGS_NUM * PT_CPUID_LEAVES]; }; /** diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index a478e3c4cc3f..3f124d553c5a 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -2758,7 +2758,7 @@ static int intel_pmu_cpu_prepare(int cpu) if (x86_pmu.extra_regs || x86_pmu.lbr_sel_map) { cpuc->shared_regs = allocate_shared_regs(cpu); if (!cpuc->shared_regs) - return NOTIFY_BAD; + goto err; } if (x86_pmu.flags & PMU_FL_EXCL_CNTRS) { @@ -2766,18 +2766,27 @@ static int intel_pmu_cpu_prepare(int cpu) cpuc->constraint_list = kzalloc(sz, GFP_KERNEL); if (!cpuc->constraint_list) - return NOTIFY_BAD; + goto err_shared_regs; cpuc->excl_cntrs = allocate_excl_cntrs(cpu); - if (!cpuc->excl_cntrs) { - kfree(cpuc->constraint_list); - kfree(cpuc->shared_regs); - return NOTIFY_BAD; - } + if (!cpuc->excl_cntrs) + goto err_constraint_list; + cpuc->excl_thread_id = 0; } return NOTIFY_OK; + +err_constraint_list: + kfree(cpuc->constraint_list); + cpuc->constraint_list = NULL; + +err_shared_regs: + kfree(cpuc->shared_regs); + cpuc->shared_regs = NULL; + +err: + return NOTIFY_BAD; } static void intel_pmu_cpu_starting(int cpu) diff --git a/arch/x86/kernel/cpu/perf_event_intel_cqm.c b/arch/x86/kernel/cpu/perf_event_intel_cqm.c index 63eb68b73589..377e8f8ed391 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_cqm.c +++ b/arch/x86/kernel/cpu/perf_event_intel_cqm.c @@ -1255,7 +1255,7 @@ static inline void cqm_pick_event_reader(int cpu) cpumask_set_cpu(cpu, &cqm_cpumask); } -static void intel_cqm_cpu_prepare(unsigned int cpu) +static void intel_cqm_cpu_starting(unsigned int cpu) { struct intel_pqr_state *state = &per_cpu(pqr_state, cpu); struct cpuinfo_x86 *c = &cpu_data(cpu); @@ -1296,13 +1296,11 @@ static int intel_cqm_cpu_notifier(struct notifier_block *nb, unsigned int cpu = (unsigned long)hcpu; switch (action & ~CPU_TASKS_FROZEN) { - case CPU_UP_PREPARE: - intel_cqm_cpu_prepare(cpu); - break; case CPU_DOWN_PREPARE: intel_cqm_cpu_exit(cpu); break; case CPU_STARTING: + intel_cqm_cpu_starting(cpu); cqm_pick_event_reader(cpu); break; } @@ -1373,7 +1371,7 @@ static int __init intel_cqm_init(void) goto out; for_each_online_cpu(i) { - intel_cqm_cpu_prepare(i); + intel_cqm_cpu_starting(i); cqm_pick_event_reader(i); } diff --git a/arch/x86/kernel/cpu/perf_event_intel_pt.c b/arch/x86/kernel/cpu/perf_event_intel_pt.c index e20cfacb5a32..42169283448b 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_pt.c +++ b/arch/x86/kernel/cpu/perf_event_intel_pt.c @@ -79,7 +79,7 @@ static struct pt_cap_desc { static u32 pt_cap_get(enum pt_capabilities cap) { struct pt_cap_desc *cd = &pt_caps[cap]; - u32 c = pt_pmu.caps[cd->leaf * 4 + cd->reg]; + u32 c = pt_pmu.caps[cd->leaf * PT_CPUID_REGS_NUM + cd->reg]; unsigned int shift = __ffs(cd->mask); return (c & cd->mask) >> shift; @@ -145,10 +145,10 @@ static int __init pt_pmu_hw_init(void) for (i = 0; i < PT_CPUID_LEAVES; i++) { cpuid_count(20, i, - &pt_pmu.caps[CR_EAX + i*4], - &pt_pmu.caps[CR_EBX + i*4], - &pt_pmu.caps[CR_ECX + i*4], - &pt_pmu.caps[CR_EDX + i*4]); + &pt_pmu.caps[CR_EAX + i*PT_CPUID_REGS_NUM], + &pt_pmu.caps[CR_EBX + i*PT_CPUID_REGS_NUM], + &pt_pmu.caps[CR_ECX + i*PT_CPUID_REGS_NUM], + &pt_pmu.caps[CR_EDX + i*PT_CPUID_REGS_NUM]); } ret = -ENOMEM; diff --git a/arch/x86/kernel/cpu/perf_event_msr.c b/arch/x86/kernel/cpu/perf_event_msr.c index af216e9223e8..b0dd2e8a6d12 100644 --- a/arch/x86/kernel/cpu/perf_event_msr.c +++ b/arch/x86/kernel/cpu/perf_event_msr.c @@ -10,17 +10,63 @@ enum perf_msr_id { PERF_MSR_EVENT_MAX, }; +bool test_aperfmperf(int idx) +{ + return boot_cpu_has(X86_FEATURE_APERFMPERF); +} + +bool test_intel(int idx) +{ + if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL || + boot_cpu_data.x86 != 6) + return false; + + switch (boot_cpu_data.x86_model) { + case 30: /* 45nm Nehalem */ + case 26: /* 45nm Nehalem-EP */ + case 46: /* 45nm Nehalem-EX */ + + case 37: /* 32nm Westmere */ + case 44: /* 32nm Westmere-EP */ + case 47: /* 32nm Westmere-EX */ + + case 42: /* 32nm SandyBridge */ + case 45: /* 32nm SandyBridge-E/EN/EP */ + + case 58: /* 22nm IvyBridge */ + case 62: /* 22nm IvyBridge-EP/EX */ + + case 60: /* 22nm Haswell Core */ + case 63: /* 22nm Haswell Server */ + case 69: /* 22nm Haswell ULT */ + case 70: /* 22nm Haswell + GT3e (Intel Iris Pro graphics) */ + + case 61: /* 14nm Broadwell Core-M */ + case 86: /* 14nm Broadwell Xeon D */ + case 71: /* 14nm Broadwell + GT3e (Intel Iris Pro graphics) */ + case 79: /* 14nm Broadwell Server */ + + case 55: /* 22nm Atom "Silvermont" */ + case 77: /* 22nm Atom "Silvermont Avoton/Rangely" */ + case 76: /* 14nm Atom "Airmont" */ + if (idx == PERF_MSR_SMI) + return true; + break; + + case 78: /* 14nm Skylake Mobile */ + case 94: /* 14nm Skylake Desktop */ + if (idx == PERF_MSR_SMI || idx == PERF_MSR_PPERF) + return true; + break; + } + + return false; +} + struct perf_msr { - int id; u64 msr; -}; - -static struct perf_msr msr[] = { - { PERF_MSR_TSC, 0 }, - { PERF_MSR_APERF, MSR_IA32_APERF }, - { PERF_MSR_MPERF, MSR_IA32_MPERF }, - { PERF_MSR_PPERF, MSR_PPERF }, - { PERF_MSR_SMI, MSR_SMI_COUNT }, + struct perf_pmu_events_attr *attr; + bool (*test)(int idx); }; PMU_EVENT_ATTR_STRING(tsc, evattr_tsc, "event=0x00"); @@ -29,8 +75,16 @@ PMU_EVENT_ATTR_STRING(mperf, evattr_mperf, "event=0x02"); PMU_EVENT_ATTR_STRING(pperf, evattr_pperf, "event=0x03"); PMU_EVENT_ATTR_STRING(smi, evattr_smi, "event=0x04"); +static struct perf_msr msr[] = { + [PERF_MSR_TSC] = { 0, &evattr_tsc, NULL, }, + [PERF_MSR_APERF] = { MSR_IA32_APERF, &evattr_aperf, test_aperfmperf, }, + [PERF_MSR_MPERF] = { MSR_IA32_MPERF, &evattr_mperf, test_aperfmperf, }, + [PERF_MSR_PPERF] = { MSR_PPERF, &evattr_pperf, test_intel, }, + [PERF_MSR_SMI] = { MSR_SMI_COUNT, &evattr_smi, test_intel, }, +}; + static struct attribute *events_attrs[PERF_MSR_EVENT_MAX + 1] = { - &evattr_tsc.attr.attr, + NULL, }; static struct attribute_group events_attr_group = { @@ -74,6 +128,9 @@ static int msr_event_init(struct perf_event *event) event->attr.sample_period) /* no sampling */ return -EINVAL; + if (!msr[cfg].attr) + return -EINVAL; + event->hw.idx = -1; event->hw.event_base = msr[cfg].msr; event->hw.config = cfg; @@ -151,89 +208,32 @@ static struct pmu pmu_msr = { .capabilities = PERF_PMU_CAP_NO_INTERRUPT, }; -static int __init intel_msr_init(int idx) -{ - if (boot_cpu_data.x86 != 6) - return 0; - - switch (boot_cpu_data.x86_model) { - case 30: /* 45nm Nehalem */ - case 26: /* 45nm Nehalem-EP */ - case 46: /* 45nm Nehalem-EX */ - - case 37: /* 32nm Westmere */ - case 44: /* 32nm Westmere-EP */ - case 47: /* 32nm Westmere-EX */ - - case 42: /* 32nm SandyBridge */ - case 45: /* 32nm SandyBridge-E/EN/EP */ - - case 58: /* 22nm IvyBridge */ - case 62: /* 22nm IvyBridge-EP/EX */ - - case 60: /* 22nm Haswell Core */ - case 63: /* 22nm Haswell Server */ - case 69: /* 22nm Haswell ULT */ - case 70: /* 22nm Haswell + GT3e (Intel Iris Pro graphics) */ - - case 61: /* 14nm Broadwell Core-M */ - case 86: /* 14nm Broadwell Xeon D */ - case 71: /* 14nm Broadwell + GT3e (Intel Iris Pro graphics) */ - case 79: /* 14nm Broadwell Server */ - events_attrs[idx++] = &evattr_smi.attr.attr; - break; - - case 78: /* 14nm Skylake Mobile */ - case 94: /* 14nm Skylake Desktop */ - events_attrs[idx++] = &evattr_pperf.attr.attr; - events_attrs[idx++] = &evattr_smi.attr.attr; - break; - - case 55: /* 22nm Atom "Silvermont" */ - case 76: /* 14nm Atom "Airmont" */ - case 77: /* 22nm Atom "Silvermont Avoton/Rangely" */ - events_attrs[idx++] = &evattr_smi.attr.attr; - break; - } - - events_attrs[idx] = NULL; - - return 0; -} - -static int __init amd_msr_init(int idx) -{ - return 0; -} - static int __init msr_init(void) { - int err; - int idx = 1; + int i, j = 0; - if (boot_cpu_has(X86_FEATURE_APERFMPERF)) { - events_attrs[idx++] = &evattr_aperf.attr.attr; - events_attrs[idx++] = &evattr_mperf.attr.attr; - events_attrs[idx] = NULL; + if (!boot_cpu_has(X86_FEATURE_TSC)) { + pr_cont("no MSR PMU driver.\n"); + return 0; } - switch (boot_cpu_data.x86_vendor) { - case X86_VENDOR_INTEL: - err = intel_msr_init(idx); - break; - - case X86_VENDOR_AMD: - err = amd_msr_init(idx); - break; + /* Probe the MSRs. */ + for (i = PERF_MSR_TSC + 1; i < PERF_MSR_EVENT_MAX; i++) { + u64 val; - default: - err = -ENOTSUPP; + /* + * Virt sucks arse; you cannot tell if a R/O MSR is present :/ + */ + if (!msr[i].test(i) || rdmsrl_safe(msr[i].msr, &val)) + msr[i].attr = NULL; } - if (err != 0) { - pr_cont("no msr PMU driver.\n"); - return 0; + /* List remaining MSRs in the sysfs attrs. */ + for (i = 0; i < PERF_MSR_EVENT_MAX; i++) { + if (msr[i].attr) + events_attrs[j++] = &msr[i].attr->attr.attr; } + events_attrs[j] = NULL; perf_pmu_register(&pmu_msr, "msr", -1); diff --git a/kernel/events/core.c b/kernel/events/core.c index 77f9e5d0e2d1..ae16867670a9 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -3972,28 +3972,21 @@ static void perf_event_for_each(struct perf_event *event, perf_event_for_each_child(sibling, func); } -static int perf_event_period(struct perf_event *event, u64 __user *arg) -{ - struct perf_event_context *ctx = event->ctx; - int ret = 0, active; +struct period_event { + struct perf_event *event; u64 value; +}; - if (!is_sampling_event(event)) - return -EINVAL; - - if (copy_from_user(&value, arg, sizeof(value))) - return -EFAULT; - - if (!value) - return -EINVAL; +static int __perf_event_period(void *info) +{ + struct period_event *pe = info; + struct perf_event *event = pe->event; + struct perf_event_context *ctx = event->ctx; + u64 value = pe->value; + bool active; - raw_spin_lock_irq(&ctx->lock); + raw_spin_lock(&ctx->lock); if (event->attr.freq) { - if (value > sysctl_perf_event_sample_rate) { - ret = -EINVAL; - goto unlock; - } - event->attr.sample_freq = value; } else { event->attr.sample_period = value; @@ -4012,11 +4005,53 @@ static int perf_event_period(struct perf_event *event, u64 __user *arg) event->pmu->start(event, PERF_EF_RELOAD); perf_pmu_enable(ctx->pmu); } + raw_spin_unlock(&ctx->lock); -unlock: + return 0; +} + +static int perf_event_period(struct perf_event *event, u64 __user *arg) +{ + struct period_event pe = { .event = event, }; + struct perf_event_context *ctx = event->ctx; + struct task_struct *task; + u64 value; + + if (!is_sampling_event(event)) + return -EINVAL; + + if (copy_from_user(&value, arg, sizeof(value))) + return -EFAULT; + + if (!value) + return -EINVAL; + + if (event->attr.freq && value > sysctl_perf_event_sample_rate) + return -EINVAL; + + task = ctx->task; + pe.value = value; + + if (!task) { + cpu_function_call(event->cpu, __perf_event_period, &pe); + return 0; + } + +retry: + if (!task_function_call(task, __perf_event_period, &pe)) + return 0; + + raw_spin_lock_irq(&ctx->lock); + if (ctx->is_active) { + raw_spin_unlock_irq(&ctx->lock); + task = ctx->task; + goto retry; + } + + __perf_event_period(&pe); raw_spin_unlock_irq(&ctx->lock); - return ret; + return 0; } static const struct file_operations perf_fops; @@ -4754,12 +4789,20 @@ static const struct file_operations perf_fops = { * to user-space before waking everybody up. */ +static inline struct fasync_struct **perf_event_fasync(struct perf_event *event) +{ + /* only the parent has fasync state */ + if (event->parent) + event = event->parent; + return &event->fasync; +} + void perf_event_wakeup(struct perf_event *event) { ring_buffer_wakeup(event); if (event->pending_kill) { - kill_fasync(&event->fasync, SIGIO, event->pending_kill); + kill_fasync(perf_event_fasync(event), SIGIO, event->pending_kill); event->pending_kill = 0; } } @@ -6221,7 +6264,7 @@ static int __perf_event_overflow(struct perf_event *event, else perf_event_output(event, data, regs); - if (event->fasync && event->pending_kill) { + if (*perf_event_fasync(event) && event->pending_kill) { event->pending_wakeup = 1; irq_work_queue(&event->pending); } diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index b2be01b1aa9d..182bc30899d5 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -437,7 +437,10 @@ static struct page *rb_alloc_aux_page(int node, int order) if (page && order) { /* - * Communicate the allocation size to the driver + * Communicate the allocation size to the driver: + * if we managed to secure a high-order allocation, + * set its first page's private to this order; + * !PagePrivate(page) means it's just a normal page. */ split_page(page, order); SetPagePrivate(page); @@ -559,11 +562,13 @@ static void __rb_free_aux(struct ring_buffer *rb) rb->aux_priv = NULL; } - for (pg = 0; pg < rb->aux_nr_pages; pg++) - rb_free_aux_page(rb, pg); + if (rb->aux_nr_pages) { + for (pg = 0; pg < rb->aux_nr_pages; pg++) + rb_free_aux_page(rb, pg); - kfree(rb->aux_pages); - rb->aux_nr_pages = 0; + kfree(rb->aux_pages); + rb->aux_nr_pages = 0; + } } void rb_free_aux(struct ring_buffer *rb) |