diff options
47 files changed, 994 insertions, 131 deletions
diff --git a/arch/arm/include/asm/perf_event.h b/arch/arm/include/asm/perf_event.h index 99cfe3607989..7523340afb8a 100644 --- a/arch/arm/include/asm/perf_event.h +++ b/arch/arm/include/asm/perf_event.h @@ -12,10 +12,6 @@ #ifndef __ARM_PERF_EVENT_H__ #define __ARM_PERF_EVENT_H__ -/* ARM performance counters start from 1 (in the cp15 accesses) so use the - * same indexes here for consistency. */ -#define PERF_EVENT_INDEX_OFFSET 1 - /* ARM perf PMU IDs for use by internal perf clients. */ enum arm_perf_pmu_ids { ARM_PERF_PMU_ID_XSCALE1 = 0, diff --git a/arch/frv/include/asm/perf_event.h b/arch/frv/include/asm/perf_event.h index a69e0155d146..c52ea5546b5b 100644 --- a/arch/frv/include/asm/perf_event.h +++ b/arch/frv/include/asm/perf_event.h @@ -12,6 +12,4 @@ #ifndef _ASM_PERF_EVENT_H #define _ASM_PERF_EVENT_H -#define PERF_EVENT_INDEX_OFFSET 0 - #endif /* _ASM_PERF_EVENT_H */ diff --git a/arch/hexagon/include/asm/perf_event.h b/arch/hexagon/include/asm/perf_event.h index 6c2910f91180..8b8526b491c7 100644 --- a/arch/hexagon/include/asm/perf_event.h +++ b/arch/hexagon/include/asm/perf_event.h @@ -19,6 +19,4 @@ #ifndef _ASM_PERF_EVENT_H #define _ASM_PERF_EVENT_H -#define PERF_EVENT_INDEX_OFFSET 0 - #endif /* _ASM_PERF_EVENT_H */ diff --git a/arch/powerpc/include/asm/perf_event_server.h b/arch/powerpc/include/asm/perf_event_server.h index 8f1df1208d23..1a8093fa8f71 100644 --- a/arch/powerpc/include/asm/perf_event_server.h +++ b/arch/powerpc/include/asm/perf_event_server.h @@ -61,8 +61,6 @@ struct pt_regs; extern unsigned long perf_misc_flags(struct pt_regs *regs); extern unsigned long perf_instruction_pointer(struct pt_regs *regs); -#define PERF_EVENT_INDEX_OFFSET 1 - /* * Only override the default definitions in include/linux/perf_event.h * if we have hardware PMU support. diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c index 10a140f82cb8..d614ab57ccca 100644 --- a/arch/powerpc/kernel/perf_event.c +++ b/arch/powerpc/kernel/perf_event.c @@ -1187,6 +1187,11 @@ static int power_pmu_event_init(struct perf_event *event) return err; } +static int power_pmu_event_idx(struct perf_event *event) +{ + return event->hw.idx; +} + struct pmu power_pmu = { .pmu_enable = power_pmu_enable, .pmu_disable = power_pmu_disable, @@ -1199,6 +1204,7 @@ struct pmu power_pmu = { .start_txn = power_pmu_start_txn, .cancel_txn = power_pmu_cancel_txn, .commit_txn = power_pmu_commit_txn, + .event_idx = power_pmu_event_idx, }; /* diff --git a/arch/s390/include/asm/perf_event.h b/arch/s390/include/asm/perf_event.h index a75f168d2718..4eb444edbe49 100644 --- a/arch/s390/include/asm/perf_event.h +++ b/arch/s390/include/asm/perf_event.h @@ -6,4 +6,3 @@ /* Empty, just to avoid compiling error */ -#define PERF_EVENT_INDEX_OFFSET 0 diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index 096c975e099f..9b922c136254 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -188,8 +188,6 @@ extern u32 get_ibs_caps(void); #ifdef CONFIG_PERF_EVENTS extern void perf_events_lapic_init(void); -#define PERF_EVENT_INDEX_OFFSET 0 - /* * Abuse bit 3 of the cpu eflags register to indicate proper PEBS IP fixups. * This flag is otherwise unused and ABI specified to be 0, so nobody should diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 5adce1040b11..f8bddb5b0600 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -24,6 +24,7 @@ #include <linux/slab.h> #include <linux/cpu.h> #include <linux/bitops.h> +#include <linux/device.h> #include <asm/apic.h> #include <asm/stacktrace.h> @@ -31,6 +32,7 @@ #include <asm/compat.h> #include <asm/smp.h> #include <asm/alternative.h> +#include <asm/timer.h> #include "perf_event.h" @@ -1210,6 +1212,8 @@ x86_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu) break; case CPU_STARTING: + if (x86_pmu.attr_rdpmc) + set_in_cr4(X86_CR4_PCE); if (x86_pmu.cpu_starting) x86_pmu.cpu_starting(cpu); break; @@ -1319,6 +1323,8 @@ static int __init init_hw_perf_events(void) } } + x86_pmu.attr_rdpmc = 1; /* enable userspace RDPMC usage by default */ + pr_info("... version: %d\n", x86_pmu.version); pr_info("... bit width: %d\n", x86_pmu.cntval_bits); pr_info("... generic registers: %d\n", x86_pmu.num_counters); @@ -1542,10 +1548,71 @@ static int x86_pmu_event_init(struct perf_event *event) return err; } +static int x86_pmu_event_idx(struct perf_event *event) +{ + int idx = event->hw.idx; + + if (x86_pmu.num_counters_fixed && idx >= X86_PMC_IDX_FIXED) { + idx -= X86_PMC_IDX_FIXED; + idx |= 1 << 30; + } + + return idx + 1; +} + +static ssize_t get_attr_rdpmc(struct device *cdev, + struct device_attribute *attr, + char *buf) +{ + return snprintf(buf, 40, "%d\n", x86_pmu.attr_rdpmc); +} + +static void change_rdpmc(void *info) +{ + bool enable = !!(unsigned long)info; + + if (enable) + set_in_cr4(X86_CR4_PCE); + else + clear_in_cr4(X86_CR4_PCE); +} + +static ssize_t set_attr_rdpmc(struct device *cdev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + unsigned long val = simple_strtoul(buf, NULL, 0); + + if (!!val != !!x86_pmu.attr_rdpmc) { + x86_pmu.attr_rdpmc = !!val; + smp_call_function(change_rdpmc, (void *)val, 1); + } + + return count; +} + +static DEVICE_ATTR(rdpmc, S_IRUSR | S_IWUSR, get_attr_rdpmc, set_attr_rdpmc); + +static struct attribute *x86_pmu_attrs[] = { + &dev_attr_rdpmc.attr, + NULL, +}; + +static struct attribute_group x86_pmu_attr_group = { + .attrs = x86_pmu_attrs, +}; + +static const struct attribute_group *x86_pmu_attr_groups[] = { + &x86_pmu_attr_group, + NULL, +}; + static struct pmu pmu = { .pmu_enable = x86_pmu_enable, .pmu_disable = x86_pmu_disable, + .attr_groups = x86_pmu_attr_groups, + .event_init = x86_pmu_event_init, .add = x86_pmu_add, @@ -1557,8 +1624,23 @@ static struct pmu pmu = { .start_txn = x86_pmu_start_txn, .cancel_txn = x86_pmu_cancel_txn, .commit_txn = x86_pmu_commit_txn, + + .event_idx = x86_pmu_event_idx, }; +void perf_update_user_clock(struct perf_event_mmap_page *userpg, u64 now) +{ + if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) + return; + + if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC)) + return; + + userpg->time_mult = this_cpu_read(cyc2ns); + userpg->time_shift = CYC2NS_SCALE_FACTOR; + userpg->time_offset = this_cpu_read(cyc2ns_offset) - now; +} + /* * callchain support */ diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index 8944062f46e2..513d617b93c4 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h @@ -307,6 +307,14 @@ struct x86_pmu { struct x86_pmu_quirk *quirks; int perfctr_second_write; + /* + * sysfs attrs + */ + int attr_rdpmc; + + /* + * CPU Hotplug hooks + */ int (*cpu_prepare)(int cpu); void (*cpu_starting)(int cpu); void (*cpu_dying)(int cpu); diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h index 5ce8b140428f..f7c69580fea7 100644 --- a/include/linux/jump_label.h +++ b/include/linux/jump_label.h @@ -1,6 +1,38 @@ #ifndef _LINUX_JUMP_LABEL_H #define _LINUX_JUMP_LABEL_H +/* + * Jump label support + * + * Copyright (C) 2009-2012 Jason Baron <jbaron@redhat.com> + * Copyright (C) 2011-2012 Peter Zijlstra <pzijlstr@redhat.com> + * + * Jump labels provide an interface to generate dynamic branches using + * self-modifying code. Assuming toolchain and architecture support the result + * of a "if (static_branch(&key))" statement is a unconditional branch (which + * defaults to false - and the true block is placed out of line). + * + * However at runtime we can change the 'static' branch target using + * jump_label_{inc,dec}(). These function as a 'reference' count on the key + * object and for as long as there are references all branches referring to + * that particular key will point to the (out of line) true block. + * + * Since this relies on modifying code the jump_label_{inc,dec}() functions + * must be considered absolute slow paths (machine wide synchronization etc.). + * OTOH, since the affected branches are unconditional their runtime overhead + * will be absolutely minimal, esp. in the default (off) case where the total + * effect is a single NOP of appropriate size. The on case will patch in a jump + * to the out-of-line block. + * + * When the control is directly exposed to userspace it is prudent to delay the + * decrement to avoid high frequency code modifications which can (and do) + * cause significant performance degradation. Struct jump_label_key_deferred and + * jump_label_dec_deferred() provide for this. + * + * Lacking toolchain and or architecture support, it falls back to a simple + * conditional branch. + */ + #include <linux/types.h> #include <linux/compiler.h> #include <linux/workqueue.h> diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index abb2776be1ba..412b790f5da6 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -291,12 +291,14 @@ struct perf_event_mmap_page { __s64 offset; /* add to hardware event value */ __u64 time_enabled; /* time event active */ __u64 time_running; /* time event on cpu */ + __u32 time_mult, time_shift; + __u64 time_offset; /* * Hole for extension of the self monitor capabilities */ - __u64 __reserved[123]; /* align to 1k */ + __u64 __reserved[121]; /* align to 1k */ /* * Control data for the mmap() data buffer. @@ -616,6 +618,7 @@ struct pmu { struct list_head entry; struct device *dev; + const struct attribute_group **attr_groups; char *name; int type; @@ -681,6 +684,12 @@ struct pmu { * for each successful ->add() during the transaction. */ void (*cancel_txn) (struct pmu *pmu); /* optional */ + + /* + * Will return the value for perf_event_mmap_page::index for this event, + * if no implementation is provided it will default to: event->hw.idx + 1. + */ + int (*event_idx) (struct perf_event *event); /*optional */ }; /** diff --git a/include/trace/events/signal.h b/include/trace/events/signal.h index 17df43464df0..39a8a430d90f 100644 --- a/include/trace/events/signal.h +++ b/include/trace/events/signal.h @@ -23,11 +23,23 @@ } \ } while (0) +#ifndef TRACE_HEADER_MULTI_READ +enum { + TRACE_SIGNAL_DELIVERED, + TRACE_SIGNAL_IGNORED, + TRACE_SIGNAL_ALREADY_PENDING, + TRACE_SIGNAL_OVERFLOW_FAIL, + TRACE_SIGNAL_LOSE_INFO, +}; +#endif + /** * signal_generate - called when a signal is generated * @sig: signal number * @info: pointer to struct siginfo * @task: pointer to struct task_struct + * @group: shared or private + * @result: TRACE_SIGNAL_* * * Current process sends a 'sig' signal to 'task' process with * 'info' siginfo. If 'info' is SEND_SIG_NOINFO or SEND_SIG_PRIV, @@ -37,9 +49,10 @@ */ TRACE_EVENT(signal_generate, - TP_PROTO(int sig, struct siginfo *info, struct task_struct *task), + TP_PROTO(int sig, struct siginfo *info, struct task_struct *task, + int group, int result), - TP_ARGS(sig, info, task), + TP_ARGS(sig, info, task, group, result), TP_STRUCT__entry( __field( int, sig ) @@ -47,6 +60,8 @@ TRACE_EVENT(signal_generate, __field( int, code ) __array( char, comm, TASK_COMM_LEN ) __field( pid_t, pid ) + __field( int, group ) + __field( int, result ) ), TP_fast_assign( @@ -54,11 +69,14 @@ TRACE_EVENT(signal_generate, TP_STORE_SIGINFO(__entry, info); memcpy(__entry->comm, task->comm, TASK_COMM_LEN); __entry->pid = task->pid; + __entry->group = group; + __entry->result = result; ), - TP_printk("sig=%d errno=%d code=%d comm=%s pid=%d", + TP_printk("sig=%d errno=%d code=%d comm=%s pid=%d grp=%d res=%d", __entry->sig, __entry->errno, __entry->code, - __entry->comm, __entry->pid) + __entry->comm, __entry->pid, __entry->group, + __entry->result) ); /** @@ -101,65 +119,6 @@ TRACE_EVENT(signal_deliver, __entry->sa_handler, __entry->sa_flags) ); -DECLARE_EVENT_CLASS(signal_queue_overflow, - - TP_PROTO(int sig, int group, struct siginfo *info), - - TP_ARGS(sig, group, info), - - TP_STRUCT__entry( - __field( int, sig ) - __field( int, group ) - __field( int, errno ) - __field( int, code ) - ), - - TP_fast_assign( - __entry->sig = sig; - __entry->group = group; - TP_STORE_SIGINFO(__entry, info); - ), - - TP_printk("sig=%d group=%d errno=%d code=%d", - __entry->sig, __entry->group, __entry->errno, __entry->code) -); - -/** - * signal_overflow_fail - called when signal queue is overflow - * @sig: signal number - * @group: signal to process group or not (bool) - * @info: pointer to struct siginfo - * - * Kernel fails to generate 'sig' signal with 'info' siginfo, because - * siginfo queue is overflow, and the signal is dropped. - * 'group' is not 0 if the signal will be sent to a process group. - * 'sig' is always one of RT signals. - */ -DEFINE_EVENT(signal_queue_overflow, signal_overflow_fail, - - TP_PROTO(int sig, int group, struct siginfo *info), - - TP_ARGS(sig, group, info) -); - -/** - * signal_lose_info - called when siginfo is lost - * @sig: signal number - * @group: signal to process group or not (bool) - * @info: pointer to struct siginfo - * - * Kernel generates 'sig' signal but loses 'info' siginfo, because siginfo - * queue is overflow. - * 'group' is not 0 if the signal will be sent to a process group. - * 'sig' is always one of non-RT signals. - */ -DEFINE_EVENT(signal_queue_overflow, signal_lose_info, - - TP_PROTO(int sig, int group, struct siginfo *info), - - TP_ARGS(sig, group, info) -); - #endif /* _TRACE_SIGNAL_H */ /* This part must be outside protection */ diff --git a/kernel/events/core.c b/kernel/events/core.c index ba36013cfb21..7c3b9de55f6b 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -3229,10 +3229,6 @@ int perf_event_task_disable(void) return 0; } -#ifndef PERF_EVENT_INDEX_OFFSET -# define PERF_EVENT_INDEX_OFFSET 0 -#endif - static int perf_event_index(struct perf_event *event) { if (event->hw.state & PERF_HES_STOPPED) @@ -3241,21 +3237,26 @@ static int perf_event_index(struct perf_event *event) if (event->state != PERF_EVENT_STATE_ACTIVE) return 0; - return event->hw.idx + 1 - PERF_EVENT_INDEX_OFFSET; + return event->pmu->event_idx(event); } static void calc_timer_values(struct perf_event *event, + u64 *now, u64 *enabled, u64 *running) { - u64 now, ctx_time; + u64 ctx_time; - now = perf_clock(); - ctx_time = event->shadow_ctx_time + now; + *now = perf_clock(); + ctx_time = event->shadow_ctx_time + *now; *enabled = ctx_time - event->tstamp_enabled; *running = ctx_time - event->tstamp_running; } +void __weak perf_update_user_clock(struct perf_event_mmap_page *userpg, u64 now) +{ +} + /* * Callers need to ensure there can be no nesting of this function, otherwise * the seqlock logic goes bad. We can not serialize this because the arch @@ -3265,7 +3266,7 @@ void perf_event_update_userpage(struct perf_event *event) { struct perf_event_mmap_page *userpg; struct ring_buffer *rb; - u64 enabled, running; + u64 enabled, running, now; rcu_read_lock(); /* @@ -3277,7 +3278,7 @@ void perf_event_update_userpage(struct perf_event *event) * because of locking issue as we can be called in * NMI context */ - calc_timer_values(event, &enabled, &running); + calc_timer_values(event, &now, &enabled, &running); rb = rcu_dereference(event->rb); if (!rb) goto unlock; @@ -3293,7 +3294,7 @@ void perf_event_update_userpage(struct perf_event *event) barrier(); userpg->index = perf_event_index(event); userpg->offset = perf_event_count(event); - if (event->state == PERF_EVENT_STATE_ACTIVE) + if (userpg->index) userpg->offset -= local64_read(&event->hw.prev_count); userpg->time_enabled = enabled + @@ -3302,6 +3303,8 @@ void perf_event_update_userpage(struct perf_event *event) userpg->time_running = running + atomic64_read(&event->child_total_time_running); + perf_update_user_clock(userpg, now); + barrier(); ++userpg->lock; preempt_enable(); @@ -3559,6 +3562,8 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) event->mmap_user = get_current_user(); vma->vm_mm->pinned_vm += event->mmap_locked; + perf_event_update_userpage(event); + unlock: if (!ret) atomic_inc(&event->mmap_count); @@ -3790,7 +3795,7 @@ static void perf_output_read_group(struct perf_output_handle *handle, static void perf_output_read(struct perf_output_handle *handle, struct perf_event *event) { - u64 enabled = 0, running = 0; + u64 enabled = 0, running = 0, now; u64 read_format = event->attr.read_format; /* @@ -3803,7 +3808,7 @@ static void perf_output_read(struct perf_output_handle *handle, * NMI context */ if (read_format & PERF_FORMAT_TOTAL_TIMES) - calc_timer_values(event, &enabled, &running); + calc_timer_values(event, &now, &enabled, &running); if (event->attr.read_format & PERF_FORMAT_GROUP) perf_output_read_group(handle, event, enabled, running); @@ -5022,6 +5027,11 @@ static int perf_swevent_init(struct perf_event *event) return 0; } +static int perf_swevent_event_idx(struct perf_event *event) +{ + return 0; +} + static struct pmu perf_swevent = { .task_ctx_nr = perf_sw_context, @@ -5031,6 +5041,8 @@ static struct pmu perf_swevent = { .start = perf_swevent_start, .stop = perf_swevent_stop, .read = perf_swevent_read, + + .event_idx = perf_swevent_event_idx, }; #ifdef CONFIG_EVENT_TRACING @@ -5117,6 +5129,8 @@ static struct pmu perf_tracepoint = { .start = perf_swevent_start, .stop = perf_swevent_stop, .read = perf_swevent_read, + + .event_idx = perf_swevent_event_idx, }; static inline void perf_tp_register(void) @@ -5336,6 +5350,8 @@ static struct pmu perf_cpu_clock = { .start = cpu_clock_event_start, .stop = cpu_clock_event_stop, .read = cpu_clock_event_read, + + .event_idx = perf_swevent_event_idx, }; /* @@ -5408,6 +5424,8 @@ static struct pmu perf_task_clock = { .start = task_clock_event_start, .stop = task_clock_event_stop, .read = task_clock_event_read, + + .event_idx = perf_swevent_event_idx, }; static void perf_pmu_nop_void(struct pmu *pmu) @@ -5435,6 +5453,11 @@ static void perf_pmu_cancel_txn(struct pmu *pmu) perf_pmu_enable(pmu); } +static int perf_event_idx_default(struct perf_event *event) +{ + return event->hw.idx + 1; +} + /* * Ensures all contexts with the same task_ctx_nr have the same * pmu_cpu_context too. @@ -5521,6 +5544,7 @@ static int pmu_dev_alloc(struct pmu *pmu) if (!pmu->dev) goto out; + pmu->dev->groups = pmu->attr_groups; device_initialize(pmu->dev); ret = dev_set_name(pmu->dev, "%s", pmu->name); if (ret) @@ -5624,6 +5648,9 @@ got_cpu_context: pmu->pmu_disable = perf_pmu_nop_void; } + if (!pmu->event_idx) + pmu->event_idx = perf_event_idx_default; + list_add_rcu(&pmu->entry, &pmus); ret = 0; unlock: diff --git a/kernel/events/hw_breakpoint.c b/kernel/events/hw_breakpoint.c index b7971d6f38bf..b0309f76d777 100644 --- a/kernel/events/hw_breakpoint.c +++ b/kernel/events/hw_breakpoint.c @@ -613,6 +613,11 @@ static void hw_breakpoint_stop(struct perf_event *bp, int flags) bp->hw.state = PERF_HES_STOPPED; } +static int hw_breakpoint_event_idx(struct perf_event *bp) +{ + return 0; +} + static struct pmu perf_breakpoint = { .task_ctx_nr = perf_sw_context, /* could eventually get its own */ @@ -622,6 +627,8 @@ static struct pmu perf_breakpoint = { .start = hw_breakpoint_start, .stop = hw_breakpoint_stop, .read = hw_breakpoint_pmu_read, + + .event_idx = hw_breakpoint_event_idx, }; int __init init_hw_breakpoint(void) diff --git a/kernel/signal.c b/kernel/signal.c index c73c4284160e..8511e39813c7 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1054,13 +1054,13 @@ static int __send_signal(int sig, struct siginfo *info, struct task_struct *t, struct sigpending *pending; struct sigqueue *q; int override_rlimit; - - trace_signal_generate(sig, info, t); + int ret = 0, result; assert_spin_locked(&t->sighand->siglock); + result = TRACE_SIGNAL_IGNORED; if (!prepare_signal(sig, t, from_ancestor_ns)) - return 0; + goto ret; pending = group ? &t->signal->shared_pending : &t->pending; /* @@ -1068,8 +1068,11 @@ static int __send_signal(int sig, struct siginfo *info, struct task_struct *t, * exactly one non-rt signal, so that we can get more * detailed information about the cause of the signal. */ + result = TRACE_SIGNAL_ALREADY_PENDING; if (legacy_queue(pending, sig)) - return 0; + goto ret; + + result = TRACE_SIGNAL_DELIVERED; /* * fast-pathed signals for kernel-internal things like SIGSTOP * or SIGKILL. @@ -1127,14 +1130,15 @@ static int __send_signal(int sig, struct siginfo *info, struct task_struct *t, * signal was rt and sent by user using something * other than kill(). */ - trace_signal_overflow_fail(sig, group, info); - return -EAGAIN; + result = TRACE_SIGNAL_OVERFLOW_FAIL; + ret = -EAGAIN; + goto ret; } else { /* * This is a silent loss of information. We still * send the signal, but the *info bits are lost. */ - trace_signal_lose_info(sig, group, info); + result = TRACE_SIGNAL_LOSE_INFO; } } @@ -1142,7 +1146,9 @@ out_set: signalfd_notify(t, sig); sigaddset(&pending->signal, sig); complete_signal(sig, t, group); - return 0; +ret: + trace_signal_generate(sig, info, t, group, result); + return ret; } static int send_signal(int sig, struct siginfo *info, struct task_struct *t, @@ -1585,7 +1591,7 @@ int send_sigqueue(struct sigqueue *q, struct task_struct *t, int group) int sig = q->info.si_signo; struct sigpending *pending; unsigned long flags; - int ret; + int ret, result; BUG_ON(!(q->flags & SIGQUEUE_PREALLOC)); @@ -1594,6 +1600,7 @@ int send_sigqueue(struct sigqueue *q, struct task_struct *t, int group) goto ret; ret = 1; /* the signal is ignored */ + result = TRACE_SIGNAL_IGNORED; if (!prepare_signal(sig, t, 0)) goto out; @@ -1605,6 +1612,7 @@ int send_sigqueue(struct sigqueue *q, struct task_struct *t, int group) */ BUG_ON(q->info.si_code != SI_TIMER); q->info.si_overrun++; + result = TRACE_SIGNAL_ALREADY_PENDING; goto out; } q->info.si_overrun = 0; @@ -1614,7 +1622,9 @@ int send_sigqueue(struct sigqueue *q, struct task_struct *t, int group) list_add_tail(&q->list, &pending->list); sigaddset(&pending->signal, sig); complete_signal(sig, t, group); + result = TRACE_SIGNAL_DELIVERED; out: + trace_signal_generate(sig, &q->info, t, group, result); unlock_task_sighand(t, &flags); ret: return ret; diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index 2937f7e14bb7..ff9a66e0d4e4 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -58,6 +58,10 @@ OPTIONS --tid=:: Record events on existing thread ID. +-u:: +--uid=:: + Record events in threads owned by uid. Name or number. + -r:: --realtime=:: Collect data with this RT SCHED_FIFO priority. diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt index b1a5bbbfebef..ab1454ed450f 100644 --- a/tools/perf/Documentation/perf-top.txt +++ b/tools/perf/Documentation/perf-top.txt @@ -78,6 +78,10 @@ Default is to monitor all CPUS. --tid=<tid>:: Profile events on existing thread ID. +-u:: +--uid=:: + Record events in threads owned by uid. Name or number. + -r <priority>:: --realtime=<priority>:: Collect data with this RT SCHED_FIFO priority. diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 7c12650165ae..d64f5819a380 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -61,7 +61,7 @@ ifeq ($(ARCH),x86_64) ifeq (${IS_X86_64}, 1) RAW_ARCH := x86_64 ARCH_CFLAGS := -DARCH_X86_64 - ARCH_INCLUDE = ../../arch/x86/lib/memcpy_64.S + ARCH_INCLUDE = ../../arch/x86/lib/memcpy_64.S ../../arch/x86/lib/memset_64.S endif endif @@ -359,8 +359,10 @@ BUILTIN_OBJS += $(OUTPUT)bench/sched-messaging.o BUILTIN_OBJS += $(OUTPUT)bench/sched-pipe.o ifeq ($(RAW_ARCH),x86_64) BUILTIN_OBJS += $(OUTPUT)bench/mem-memcpy-x86-64-asm.o +BUILTIN_OBJS += $(OUTPUT)bench/mem-memset-x86-64-asm.o endif BUILTIN_OBJS += $(OUTPUT)bench/mem-memcpy.o +BUILTIN_OBJS += $(OUTPUT)bench/mem-memset.o BUILTIN_OBJS += $(OUTPUT)builtin-diff.o BUILTIN_OBJS += $(OUTPUT)builtin-evlist.o diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h index f7781c6267c0..a09bece6dad2 100644 --- a/tools/perf/bench/bench.h +++ b/tools/perf/bench/bench.h @@ -4,6 +4,7 @@ extern int bench_sched_messaging(int argc, const char **argv, const char *prefix); extern int bench_sched_pipe(int argc, const char **argv, const char *prefix); extern int bench_mem_memcpy(int argc, const char **argv, const char *prefix __used); +extern int bench_mem_memset(int argc, const char **argv, const char *prefix); #define BENCH_FORMAT_DEFAULT_STR "default" #define BENCH_FORMAT_DEFAULT 0 diff --git a/tools/perf/bench/mem-memcpy-x86-64-asm-def.h b/tools/perf/bench/mem-memcpy-x86-64-asm-def.h index d588b87696fc..d66ab799b35f 100644 --- a/tools/perf/bench/mem-memcpy-x86-64-asm-def.h +++ b/tools/perf/bench/mem-memcpy-x86-64-asm-def.h @@ -2,3 +2,11 @@ MEMCPY_FN(__memcpy, "x86-64-unrolled", "unrolled memcpy() in arch/x86/lib/memcpy_64.S") + +MEMCPY_FN(memcpy_c, + "x86-64-movsq", + "movsq-based memcpy() in arch/x86/lib/memcpy_64.S") + +MEMCPY_FN(memcpy_c_e, + "x86-64-movsb", + "movsb-based memcpy() in arch/x86/lib/memcpy_64.S") diff --git a/tools/perf/bench/mem-memcpy-x86-64-asm.S b/tools/perf/bench/mem-memcpy-x86-64-asm.S index a57b66e853c2..a20780bd0771 100644 --- a/tools/perf/bench/mem-memcpy-x86-64-asm.S +++ b/tools/perf/bench/mem-memcpy-x86-64-asm.S @@ -1,2 +1,6 @@ - +#define memcpy MEMCPY /* don't hide glibc's memcpy() */ +#define altinstr_replacement text +#define globl p2align 4; .globl +#define Lmemcpy_c globl memcpy_c; memcpy_c +#define Lmemcpy_c_e globl memcpy_c_e; memcpy_c_e #include "../../../arch/x86/lib/memcpy_64.S" diff --git a/tools/perf/bench/mem-memcpy.c b/tools/perf/bench/mem-memcpy.c index db82021f4b91..6ad2b1c6b27b 100644 --- a/tools/perf/bench/mem-memcpy.c +++ b/tools/perf/bench/mem-memcpy.c @@ -24,6 +24,7 @@ static const char *length_str = "1MB"; static const char *routine = "default"; +static int iterations = 1; static bool use_clock; static int clock_fd; static bool only_prefault; @@ -35,6 +36,8 @@ static const struct option options[] = { "available unit: B, MB, GB (upper and lower)"), OPT_STRING('r', "routine", &routine, "default", "Specify routine to copy"), + OPT_INTEGER('i', "iterations", &iterations, + "repeat memcpy() invocation this number of times"), OPT_BOOLEAN('c', "clock", &use_clock, "Use CPU clock for measuring"), OPT_BOOLEAN('o', "only-prefault", &only_prefault, @@ -121,6 +124,7 @@ static u64 do_memcpy_clock(memcpy_t fn, size_t len, bool prefault) { u64 clock_start = 0ULL, clock_end = 0ULL; void *src = NULL, *dst = NULL; + int i; alloc_mem(&src, &dst, len); @@ -128,7 +132,8 @@ static u64 do_memcpy_clock(memcpy_t fn, size_t len, bool prefault) fn(dst, src, len); clock_start = get_clock(); - fn(dst, src, len); + for (i = 0; i < iterations; ++i) + fn(dst, src, len); clock_end = get_clock(); free(src); @@ -140,6 +145,7 @@ static double do_memcpy_gettimeofday(memcpy_t fn, size_t len, bool prefault) { struct timeval tv_start, tv_end, tv_diff; void *src = NULL, *dst = NULL; + int i; alloc_mem(&src, &dst, len); @@ -147,7 +153,8 @@ static double do_memcpy_gettimeofday(memcpy_t fn, size_t len, bool prefault) fn(dst, src, len); BUG_ON(gettimeofday(&tv_start, NULL)); - fn(dst, src, len); + for (i = 0; i < iterations; ++i) + fn(dst, src, len); BUG_ON(gettimeofday(&tv_end, NULL)); timersub(&tv_end, &tv_start, &tv_diff); diff --git a/tools/perf/bench/mem-memset-arch.h b/tools/perf/bench/mem-memset-arch.h new file mode 100644 index 000000000000..a040fa77665b --- /dev/null +++ b/tools/perf/bench/mem-memset-arch.h @@ -0,0 +1,12 @@ + +#ifdef ARCH_X86_64 + +#define MEMSET_FN(fn, name, desc) \ + extern void *fn(void *, int, size_t); + +#include "mem-memset-x86-64-asm-def.h" + +#undef MEMSET_FN + +#endif + diff --git a/tools/perf/bench/mem-memset-x86-64-asm-def.h b/tools/perf/bench/mem-memset-x86-64-asm-def.h new file mode 100644 index 000000000000..a71dff97c1f5 --- /dev/null +++ b/tools/perf/bench/mem-memset-x86-64-asm-def.h @@ -0,0 +1,12 @@ + +MEMSET_FN(__memset, + "x86-64-unrolled", + "unrolled memset() in arch/x86/lib/memset_64.S") + +MEMSET_FN(memset_c, + "x86-64-stosq", + "movsq-based memset() in arch/x86/lib/memset_64.S") + +MEMSET_FN(memset_c_e, + "x86-64-stosb", + "movsb-based memset() in arch/x86/lib/memset_64.S") diff --git a/tools/perf/bench/mem-memset-x86-64-asm.S b/tools/perf/bench/mem-memset-x86-64-asm.S new file mode 100644 index 000000000000..cb9217063776 --- /dev/null +++ b/tools/perf/bench/mem-memset-x86-64-asm.S @@ -0,0 +1,6 @@ +#define memset MEMSET /* don't hide glibc's memset() */ +#define altinstr_replacement text +#define globl p2align 4; .globl +#define Lmemset_c globl memset_c; memset_c +#define Lmemset_c_e globl memset_c_e; memset_c_e +#include "../../../arch/x86/lib/memset_64.S" diff --git a/tools/perf/bench/mem-memset.c b/tools/perf/bench/mem-memset.c new file mode 100644 index 000000000000..59d4933eff44 --- /dev/null +++ b/tools/perf/bench/mem-memset.c @@ -0,0 +1,298 @@ +/* + * mem-memset.c + * + * memset: Simple memory set in various ways + * + * Trivial clone of mem-memcpy.c. + */ +#include <ctype.h> + +#include "../perf.h" +#include "../util/util.h" +#include "../util/parse-options.h" +#include "../util/header.h" +#include "bench.h" +#include "mem-memset-arch.h" + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/time.h> +#include <errno.h> + +#define K 1024 + +static const char *length_str = "1MB"; +static const char *routine = "default"; +static int iterations = 1; +static bool use_clock; +static int clock_fd; +static bool only_prefault; +static bool no_prefault; + +static const struct option options[] = { + OPT_STRING('l', "length", &length_str, "1MB", + "Specify length of memory to copy. " + "available unit: B, MB, GB (upper and lower)"), + OPT_STRING('r', "routine", &routine, "default", + "Specify routine to copy"), + OPT_INTEGER('i', "iterations", &iterations, + "repeat memset() invocation this number of times"), + OPT_BOOLEAN('c', "clock", &use_clock, + "Use CPU clock for measuring"), + OPT_BOOLEAN('o', "only-prefault", &only_prefault, + "Show only the result with page faults before memset()"), + OPT_BOOLEAN('n', "no-prefault", &no_prefault, + "Show only the result without page faults before memset()"), + OPT_END() +}; + +typedef void *(*memset_t)(void *, int, size_t); + +struct routine { + const char *name; + const char *desc; + memset_t fn; +}; + +static const struct routine routines[] = { + { "default", + "Default memset() provided by glibc", + memset }, +#ifdef ARCH_X86_64 + +#define MEMSET_FN(fn, name, desc) { name, desc, fn }, +#include "mem-memset-x86-64-asm-def.h" +#undef MEMSET_FN + +#endif + + { NULL, + NULL, + NULL } +}; + +static const char * const bench_mem_memset_usage[] = { + "perf bench mem memset <options>", + NULL +}; + +static struct perf_event_attr clock_attr = { + .type = PERF_TYPE_HARDWARE, + .config = PERF_COUNT_HW_CPU_CYCLES +}; + +static void init_clock(void) +{ + clock_fd = sys_perf_event_open(&clock_attr, getpid(), -1, -1, 0); + + if (clock_fd < 0 && errno == ENOSYS) + die("No CONFIG_PERF_EVENTS=y kernel support configured?\n"); + else + BUG_ON(clock_fd < 0); +} + +static u64 get_clock(void) +{ + int ret; + u64 clk; + + ret = read(clock_fd, &clk, sizeof(u64)); + BUG_ON(ret != sizeof(u64)); + + return clk; +} + +static double timeval2double(struct timeval *ts) +{ + return (double)ts->tv_sec + + (double)ts->tv_usec / (double)1000000; +} + +static void alloc_mem(void **dst, size_t length) +{ + *dst = zalloc(length); + if (!dst) + die("memory allocation failed - maybe length is too large?\n"); +} + +static u64 do_memset_clock(memset_t fn, size_t len, bool prefault) +{ + u64 clock_start = 0ULL, clock_end = 0ULL; + void *dst = NULL; + int i; + + alloc_mem(&dst, len); + + if (prefault) + fn(dst, -1, len); + + clock_start = get_clock(); + for (i = 0; i < iterations; ++i) + fn(dst, i, len); + clock_end = get_clock(); + + free(dst); + return clock_end - clock_start; +} + +static double do_memset_gettimeofday(memset_t fn, size_t len, bool prefault) +{ + struct timeval tv_start, tv_end, tv_diff; + void *dst = NULL; + int i; + + alloc_mem(&dst, len); + + if (prefault) + fn(dst, -1, len); + + BUG_ON(gettimeofday(&tv_start, NULL)); + for (i = 0; i < iterations; ++i) + fn(dst, i, len); + BUG_ON(gettimeofday(&tv_end, NULL)); + + timersub(&tv_end, &tv_start, &tv_diff); + + free(dst); + return (double)((double)len / timeval2double(&tv_diff)); +} + +#define pf (no_prefault ? 0 : 1) + +#define print_bps(x) do { \ + if (x < K) \ + printf(" %14lf B/Sec", x); \ + else if (x < K * K) \ + printf(" %14lfd KB/Sec", x / K); \ + else if (x < K * K * K) \ + printf(" %14lf MB/Sec", x / K / K); \ + else \ + printf(" %14lf GB/Sec", x / K / K / K); \ + } while (0) + +int bench_mem_memset(int argc, const char **argv, + const char *prefix __used) +{ + int i; + size_t len; + double result_bps[2]; + u64 result_clock[2]; + + argc = parse_options(argc, argv, options, + bench_mem_memset_usage, 0); + + if (use_clock) + init_clock(); + + len = (size_t)perf_atoll((char *)length_str); + + result_clock[0] = result_clock[1] = 0ULL; + result_bps[0] = result_bps[1] = 0.0; + + if ((s64)len <= 0) { + fprintf(stderr, "Invalid length:%s\n", length_str); + return 1; + } + + /* same to without specifying either of prefault and no-prefault */ + if (only_prefault && no_prefault) + only_prefault = no_prefault = false; + + for (i = 0; routines[i].name; i++) { + if (!strcmp(routines[i].name, routine)) + break; + } + if (!routines[i].name) { + printf("Unknown routine:%s\n", routine); + printf("Available routines...\n"); + for (i = 0; routines[i].name; i++) { + printf("\t%s ... %s\n", + routines[i].name, routines[i].desc); + } + return 1; + } + + if (bench_format == BENCH_FORMAT_DEFAULT) + printf("# Copying %s Bytes ...\n\n", length_str); + + if (!only_prefault && !no_prefault) { + /* show both of results */ + if (use_clock) { + result_clock[0] = + do_memset_clock(routines[i].fn, len, false); + result_clock[1] = + do_memset_clock(routines[i].fn, len, true); + } else { + result_bps[0] = + do_memset_gettimeofday(routines[i].fn, + len, false); + result_bps[1] = + do_memset_gettimeofday(routines[i].fn, + len, true); + } + } else { + if (use_clock) { + result_clock[pf] = + do_memset_clock(routines[i].fn, + len, only_prefault); + } else { + result_bps[pf] = + do_memset_gettimeofday(routines[i].fn, + len, only_prefault); + } + } + + switch (bench_format) { + case BENCH_FORMAT_DEFAULT: + if (!only_prefault && !no_prefault) { + if (use_clock) { + printf(" %14lf Clock/Byte\n", + (double)result_clock[0] + / (double)len); + printf(" %14lf Clock/Byte (with prefault)\n ", + (double)result_clock[1] + / (double)len); + } else { + print_bps(result_bps[0]); + printf("\n"); + print_bps(result_bps[1]); + printf(" (with prefault)\n"); + } + } else { + if (use_clock) { + printf(" %14lf Clock/Byte", + (double)result_clock[pf] + / (double)len); + } else + print_bps(result_bps[pf]); + + printf("%s\n", only_prefault ? " (with prefault)" : ""); + } + break; + case BENCH_FORMAT_SIMPLE: + if (!only_prefault && !no_prefault) { + if (use_clock) { + printf("%lf %lf\n", + (double)result_clock[0] / (double)len, + (double)result_clock[1] / (double)len); + } else { + printf("%lf %lf\n", + result_bps[0], result_bps[1]); + } + } else { + if (use_clock) { + printf("%lf\n", (double)result_clock[pf] + / (double)len); + } else + printf("%lf\n", result_bps[pf]); + } + break; + default: + /* reaching this means there's some disaster: */ + die("unknown format: %d\n", bench_format); + break; + } + + return 0; +} diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c index fcb96269852a..b0e74ab2d7a2 100644 --- a/tools/perf/builtin-bench.c +++ b/tools/perf/builtin-bench.c @@ -52,6 +52,9 @@ static struct bench_suite mem_suites[] = { { "memcpy", "Simple memory copy in various ways", bench_mem_memcpy }, + { "memset", + "Simple memory set in various ways", + bench_mem_memset }, suite_all, { NULL, NULL, diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 0abfb18b911f..32870eef952f 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -44,6 +44,7 @@ struct perf_record { struct perf_evlist *evlist; struct perf_session *session; const char *progname; + const char *uid_str; int output; unsigned int page_size; int realtime_prio; @@ -727,6 +728,7 @@ const struct option record_options[] = { OPT_CALLBACK('G', "cgroup", &record.evlist, "name", "monitor event in cgroup name only", parse_cgroups), + OPT_STRING('u', "uid", &record.uid_str, "user", "user to profile"), OPT_END() }; @@ -748,7 +750,7 @@ int cmd_record(int argc, const char **argv, const char *prefix __used) argc = parse_options(argc, argv, record_options, record_usage, PARSE_OPT_STOP_AT_NON_OPTION); if (!argc && rec->opts.target_pid == -1 && rec->opts.target_tid == -1 && - !rec->opts.system_wide && !rec->opts.cpu_list) + !rec->opts.system_wide && !rec->opts.cpu_list && !rec->uid_str) usage_with_options(record_usage, record_options); if (rec->force && rec->append_file) { @@ -788,11 +790,17 @@ int cmd_record(int argc, const char **argv, const char *prefix __used) goto out_symbol_exit; } + rec->opts.uid = parse_target_uid(rec->uid_str, rec->opts.target_tid, + rec->opts.target_pid); + if (rec->uid_str != NULL && rec->opts.uid == UINT_MAX - 1) + goto out_free_fd; + if (rec->opts.target_pid != -1) rec->opts.target_tid = rec->opts.target_pid; if (perf_evlist__create_maps(evsel_list, rec->opts.target_pid, - rec->opts.target_tid, rec->opts.cpu_list) < 0) + rec->opts.target_tid, rec->opts.uid, + rec->opts.cpu_list) < 0) usage_with_options(record_usage, record_options); list_for_each_entry(pos, &evsel_list->entries, node) { diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index f5d2a63eba66..459b8620a5d9 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -1201,7 +1201,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used) if (target_pid != -1) target_tid = target_pid; - evsel_list->threads = thread_map__new(target_pid, target_tid); + evsel_list->threads = thread_map__new(target_pid, target_tid, UINT_MAX); if (evsel_list->threads == NULL) { pr_err("Problems finding threads of monitor\n"); usage_with_options(stat_usage, options); diff --git a/tools/perf/builtin-test.c b/tools/perf/builtin-test.c index 3854e869dce1..70c4eb2bdf72 100644 --- a/tools/perf/builtin-test.c +++ b/tools/perf/builtin-test.c @@ -15,6 +15,8 @@ #include "util/thread_map.h" #include "../../include/linux/hw_breakpoint.h" +#include <sys/mman.h> + static int vmlinux_matches_kallsyms_filter(struct map *map __used, struct symbol *sym) { bool *visited = symbol__priv(sym); @@ -276,7 +278,7 @@ static int test__open_syscall_event(void) return -1; } - threads = thread_map__new(-1, getpid()); + threads = thread_map__new(-1, getpid(), UINT_MAX); if (threads == NULL) { pr_debug("thread_map__new\n"); return -1; @@ -342,7 +344,7 @@ static int test__open_syscall_event_on_all_cpus(void) return -1; } - threads = thread_map__new(-1, getpid()); + threads = thread_map__new(-1, getpid(), UINT_MAX); if (threads == NULL) { pr_debug("thread_map__new\n"); return -1; @@ -490,7 +492,7 @@ static int test__basic_mmap(void) expected_nr_events[i] = random() % 257; } - threads = thread_map__new(-1, getpid()); + threads = thread_map__new(-1, getpid(), UINT_MAX); if (threads == NULL) { pr_debug("thread_map__new\n"); return -1; @@ -1054,7 +1056,7 @@ static int test__PERF_RECORD(void) * we're monitoring, the one forked there. */ err = perf_evlist__create_maps(evlist, opts.target_pid, - opts.target_tid, opts.cpu_list); + opts.target_tid, UINT_MAX, opts.cpu_list); if (err < 0) { pr_debug("Not enough memory to create thread/cpu maps\n"); goto out_delete_evlist; @@ -1296,6 +1298,173 @@ out: return (err < 0 || errs > 0) ? -1 : 0; } + +#if defined(__x86_64__) || defined(__i386__) + +#define barrier() asm volatile("" ::: "memory") + +static u64 rdpmc(unsigned int counter) +{ + unsigned int low, high; + + asm volatile("rdpmc" : "=a" (low), "=d" (high) : "c" (counter)); + + return low | ((u64)high) << 32; +} + +static u64 rdtsc(void) +{ + unsigned int low, high; + + asm volatile("rdtsc" : "=a" (low), "=d" (high)); + + return low | ((u64)high) << 32; +} + +static u64 mmap_read_self(void *addr) +{ + struct perf_event_mmap_page *pc = addr; + u32 seq, idx, time_mult = 0, time_shift = 0; + u64 count, cyc = 0, time_offset = 0, enabled, running, delta; + + do { + seq = pc->lock; + barrier(); + + enabled = pc->time_enabled; + running = pc->time_running; + + if (enabled != running) { + cyc = rdtsc(); + time_mult = pc->time_mult; + time_shift = pc->time_shift; + time_offset = pc->time_offset; + } + + idx = pc->index; + count = pc->offset; + if (idx) + count += rdpmc(idx - 1); + + barrier(); + } while (pc->lock != seq); + + if (enabled != running) { + u64 quot, rem; + + quot = (cyc >> time_shift); + rem = cyc & ((1 << time_shift) - 1); + delta = time_offset + quot * time_mult + + ((rem * time_mult) >> time_shift); + + enabled += delta; + if (idx) + running += delta; + + quot = count / running; + rem = count % running; + count = quot * enabled + (rem * enabled) / running; + } + + return count; +} + +/* + * If the RDPMC instruction faults then signal this back to the test parent task: + */ +static void segfault_handler(int sig __used, siginfo_t *info __used, void *uc __used) +{ + exit(-1); +} + +static int __test__rdpmc(void) +{ + long page_size = sysconf(_SC_PAGE_SIZE); + volatile int tmp = 0; + u64 i, loops = 1000; + int n; + int fd; + void *addr; + struct perf_event_attr attr = { + .type = PERF_TYPE_HARDWARE, + .config = PERF_COUNT_HW_INSTRUCTIONS, + .exclude_kernel = 1, + }; + u64 delta_sum = 0; + struct sigaction sa; + + sigfillset(&sa.sa_mask); + sa.sa_sigaction = segfault_handler; + sigaction(SIGSEGV, &sa, NULL); + + fprintf(stderr, "\n\n"); + + fd = sys_perf_event_open(&attr, 0, -1, -1, 0); + if (fd < 0) { + die("Error: sys_perf_event_open() syscall returned " + "with %d (%s)\n", fd, strerror(errno)); + } + + addr = mmap(NULL, page_size, PROT_READ, MAP_SHARED, fd, 0); + if (addr == (void *)(-1)) { + die("Error: mmap() syscall returned " + "with (%s)\n", strerror(errno)); + } + + for (n = 0; n < 6; n++) { + u64 stamp, now, delta; + + stamp = mmap_read_self(addr); + + for (i = 0; i < loops; i++) + tmp++; + + now = mmap_read_self(addr); + loops *= 10; + + delta = now - stamp; + fprintf(stderr, "%14d: %14Lu\n", n, (long long)delta); + + delta_sum += delta; + } + + munmap(addr, page_size); + close(fd); + + fprintf(stderr, " "); + + if (!delta_sum) + return -1; + + return 0; +} + +static int test__rdpmc(void) +{ + int status = 0; + int wret = 0; + int ret; + int pid; + + pid = fork(); + if (pid < 0) + return -1; + + if (!pid) { + ret = __test__rdpmc(); + + exit(ret); + } + + wret = waitpid(pid, &status, 0); + if (wret < 0 || status) + return -1; + + return 0; +} + +#endif + static struct test { const char *desc; int (*func)(void); @@ -1320,6 +1489,12 @@ static struct test { .desc = "parse events tests", .func = test__parse_events, }, +#if defined(__x86_64__) || defined(__i386__) + { + .desc = "x86 rdpmc test", + .func = test__rdpmc, + }, +#endif { .desc = "Validate PERF_RECORD_* events & perf_sample fields", .func = test__PERF_RECORD, @@ -1412,7 +1587,5 @@ int cmd_test(int argc, const char **argv, const char *prefix __used) if (symbol__init() < 0) return -1; - setup_pager(); - return __cmd_test(argc, argv); } diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index dd162aa24baa..d869b214ada2 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -64,7 +64,6 @@ #include <linux/unistd.h> #include <linux/types.h> - void get_term_dimensions(struct winsize *ws) { char *s = getenv("LINES"); @@ -544,10 +543,20 @@ static void perf_top__sort_new_samples(void *arg) static void *display_thread_tui(void *arg) { + struct perf_evsel *pos; struct perf_top *top = arg; const char *help = "For a higher level overview, try: perf top --sort comm,dso"; perf_top__sort_new_samples(top); + + /* + * Initialize the uid_filter_str, in the future the TUI will allow + * Zooming in/out UIDs. For now juse use whatever the user passed + * via --uid. + */ + list_for_each_entry(pos, &top->evlist->entries, node) + pos->hists.uid_filter_str = top->uid_str; + perf_evlist__tui_browse_hists(top->evlist, help, perf_top__sort_new_samples, top, top->delay_secs); @@ -956,7 +965,7 @@ static int __cmd_top(struct perf_top *top) if (ret) goto out_delete; - if (top->target_tid != -1) + if (top->target_tid != -1 || top->uid != UINT_MAX) perf_event__synthesize_thread_map(&top->tool, top->evlist->threads, perf_event__process, &top->session->host_machine); @@ -1096,6 +1105,7 @@ int cmd_top(int argc, const char **argv, const char *prefix __used) .delay_secs = 2, .target_pid = -1, .target_tid = -1, + .uid = UINT_MAX, .freq = 1000, /* 1 KHz */ .sample_id_all_avail = true, .mmap_pages = 128, @@ -1169,6 +1179,7 @@ int cmd_top(int argc, const char **argv, const char *prefix __used) "Display raw encoding of assembly instructions (default)"), OPT_STRING('M', "disassembler-style", &disassembler_style, "disassembler style", "Specify disassembler style (e.g. -M intel for intel syntax)"), + OPT_STRING('u', "uid", &top.uid_str, "user", "user to profile"), OPT_END() }; @@ -1194,6 +1205,10 @@ int cmd_top(int argc, const char **argv, const char *prefix __used) setup_browser(false); + top.uid = parse_target_uid(top.uid_str, top.target_tid, top.target_pid); + if (top.uid_str != NULL && top.uid == UINT_MAX - 1) + goto out_delete_evlist; + /* CPU and PID are mutually exclusive */ if (top.target_tid > 0 && top.cpu_list) { printf("WARNING: PID switch overriding CPU\n"); @@ -1205,7 +1220,7 @@ int cmd_top(int argc, const char **argv, const char *prefix __used) top.target_tid = top.target_pid; if (perf_evlist__create_maps(top.evlist, top.target_pid, - top.target_tid, top.cpu_list) < 0) + top.target_tid, top.uid, top.cpu_list) < 0) usage_with_options(top_usage, options); if (!top.evlist->nr_entries && @@ -1269,6 +1284,7 @@ int cmd_top(int argc, const char **argv, const char *prefix __used) status = __cmd_top(&top); +out_delete_evlist: perf_evlist__delete(top.evlist); return status; diff --git a/tools/perf/perf.h b/tools/perf/perf.h index 64f8bee31ced..92af1688bae4 100644 --- a/tools/perf/perf.h +++ b/tools/perf/perf.h @@ -188,6 +188,7 @@ void pthread__unblock_sigwinch(void); struct perf_record_opts { pid_t target_pid; pid_t target_tid; + uid_t uid; bool call_graph; bool group; bool inherit_stat; diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c index 6893eec693ab..adc72f09914d 100644 --- a/tools/perf/util/cpumap.c +++ b/tools/perf/util/cpumap.c @@ -166,6 +166,17 @@ out: return cpus; } +size_t cpu_map__fprintf(struct cpu_map *map, FILE *fp) +{ + int i; + size_t printed = fprintf(fp, "%d cpu%s: ", + map->nr, map->nr > 1 ? "s" : ""); + for (i = 0; i < map->nr; ++i) + printed += fprintf(fp, "%s%d", i ? ", " : "", map->map[i]); + + return printed + fprintf(fp, "\n"); +} + struct cpu_map *cpu_map__dummy_new(void) { struct cpu_map *cpus = malloc(sizeof(*cpus) + sizeof(int)); diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h index 072c0a374794..c41518573c6a 100644 --- a/tools/perf/util/cpumap.h +++ b/tools/perf/util/cpumap.h @@ -1,6 +1,8 @@ #ifndef __PERF_CPUMAP_H #define __PERF_CPUMAP_H +#include <stdio.h> + struct cpu_map { int nr; int map[]; @@ -10,4 +12,6 @@ struct cpu_map *cpu_map__new(const char *cpu_list); struct cpu_map *cpu_map__dummy_new(void); void cpu_map__delete(struct cpu_map *map); +size_t cpu_map__fprintf(struct cpu_map *map, FILE *fp); + #endif /* __PERF_CPUMAP_H */ diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 3f16e08a5c8d..a6d50e376257 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -594,14 +594,14 @@ int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages, } int perf_evlist__create_maps(struct perf_evlist *evlist, pid_t target_pid, - pid_t target_tid, const char *cpu_list) + pid_t target_tid, uid_t uid, const char *cpu_list) { - evlist->threads = thread_map__new(target_pid, target_tid); + evlist->threads = thread_map__new(target_pid, target_tid, uid); if (evlist->threads == NULL) return -1; - if (cpu_list == NULL && target_tid != -1) + if (uid != UINT_MAX || (cpu_list == NULL && target_tid != -1)) evlist->cpus = cpu_map__dummy_new(); else evlist->cpus = cpu_map__new(cpu_list); diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 8922aeed0467..9c516607ce20 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -107,7 +107,7 @@ static inline void perf_evlist__set_maps(struct perf_evlist *evlist, } int perf_evlist__create_maps(struct perf_evlist *evlist, pid_t target_pid, - pid_t target_tid, const char *cpu_list); + pid_t tid, uid_t uid, const char *cpu_list); void perf_evlist__delete_maps(struct perf_evlist *evlist); int perf_evlist__set_filters(struct perf_evlist *evlist); diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index f55f0a8d1f81..0d486135d10f 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -55,6 +55,7 @@ struct hists { u64 nr_entries; const struct thread *thread_filter; const struct dso *dso_filter; + const char *uid_filter_str; pthread_mutex_t lock; struct events_stats stats; u64 event_stream; diff --git a/tools/perf/util/include/asm/dwarf2.h b/tools/perf/util/include/asm/dwarf2.h index bb4198e7837a..afe38199e922 100644 --- a/tools/perf/util/include/asm/dwarf2.h +++ b/tools/perf/util/include/asm/dwarf2.h @@ -2,10 +2,12 @@ #ifndef PERF_DWARF2_H #define PERF_DWARF2_H -/* dwarf2.h ... dummy header file for including arch/x86/lib/memcpy_64.S */ +/* dwarf2.h ... dummy header file for including arch/x86/lib/mem{cpy,set}_64.S */ #define CFI_STARTPROC #define CFI_ENDPROC +#define CFI_REMEMBER_STATE +#define CFI_RESTORE_STATE #endif /* PERF_DWARF2_H */ diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 29cb65459811..b9bbdd236357 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -1729,7 +1729,7 @@ static int __add_probe_trace_events(struct perf_probe_event *pev, } ret = 0; - printf("Add new event%s\n", (ntevs > 1) ? "s:" : ":"); + printf("Added new event%s\n", (ntevs > 1) ? "s:" : ":"); for (i = 0; i < ntevs; i++) { tev = &tevs[i]; if (pev->event) @@ -1784,7 +1784,7 @@ static int __add_probe_trace_events(struct perf_probe_event *pev, if (ret >= 0) { /* Show how to use the event. */ - printf("\nYou can now use it on all perf tools, such as:\n\n"); + printf("\nYou can now use it in all perf tools, such as:\n\n"); printf("\tperf record -e %s:%s -aR sleep 1\n\n", tev->group, tev->event); } @@ -1959,7 +1959,7 @@ static int __del_trace_probe_event(int fd, struct str_node *ent) goto error; } - printf("Remove event: %s\n", ent->s); + printf("Removed event: %s\n", ent->s); return 0; error: pr_warning("Failed to delete event: %s\n", strerror(-ret)); diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index 9dd47a4f2596..e03b58a48424 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -425,14 +425,14 @@ struct pyrf_thread_map { static int pyrf_thread_map__init(struct pyrf_thread_map *pthreads, PyObject *args, PyObject *kwargs) { - static char *kwlist[] = { "pid", "tid", NULL }; - int pid = -1, tid = -1; + static char *kwlist[] = { "pid", "tid", "uid", NULL }; + int pid = -1, tid = -1, uid = UINT_MAX; - if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ii", - kwlist, &pid, &tid)) + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|iii", + kwlist, &pid, &tid, &uid)) return -1; - pthreads->threads = thread_map__new(pid, tid); + pthreads->threads = thread_map__new(pid, tid, uid); if (pthreads->threads == NULL) return -1; return 0; diff --git a/tools/perf/util/thread_map.c b/tools/perf/util/thread_map.c index a5df131b77c3..3d4b6c5931b9 100644 --- a/tools/perf/util/thread_map.c +++ b/tools/perf/util/thread_map.c @@ -1,6 +1,11 @@ #include <dirent.h> +#include <limits.h> +#include <stdbool.h> #include <stdlib.h> #include <stdio.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <unistd.h> #include "thread_map.h" /* Skip "." and ".." directories */ @@ -23,7 +28,7 @@ struct thread_map *thread_map__new_by_pid(pid_t pid) sprintf(name, "/proc/%d/task", pid); items = scandir(name, &namelist, filter, NULL); if (items <= 0) - return NULL; + return NULL; threads = malloc(sizeof(*threads) + sizeof(pid_t) * items); if (threads != NULL) { @@ -51,10 +56,99 @@ struct thread_map *thread_map__new_by_tid(pid_t tid) return threads; } -struct thread_map *thread_map__new(pid_t pid, pid_t tid) +struct thread_map *thread_map__new_by_uid(uid_t uid) +{ + DIR *proc; + int max_threads = 32, items, i; + char path[256]; + struct dirent dirent, *next, **namelist = NULL; + struct thread_map *threads = malloc(sizeof(*threads) + + max_threads * sizeof(pid_t)); + if (threads == NULL) + goto out; + + proc = opendir("/proc"); + if (proc == NULL) + goto out_free_threads; + + threads->nr = 0; + + while (!readdir_r(proc, &dirent, &next) && next) { + char *end; + bool grow = false; + struct stat st; + pid_t pid = strtol(dirent.d_name, &end, 10); + + if (*end) /* only interested in proper numerical dirents */ + continue; + + snprintf(path, sizeof(path), "/proc/%s", dirent.d_name); + + if (stat(path, &st) != 0) + continue; + + if (st.st_uid != uid) + continue; + + snprintf(path, sizeof(path), "/proc/%d/task", pid); + items = scandir(path, &namelist, filter, NULL); + if (items <= 0) + goto out_free_closedir; + + while (threads->nr + items >= max_threads) { + max_threads *= 2; + grow = true; + } + + if (grow) { + struct thread_map *tmp; + + tmp = realloc(threads, (sizeof(*threads) + + max_threads * sizeof(pid_t))); + if (tmp == NULL) + goto out_free_namelist; + + threads = tmp; + } + + for (i = 0; i < items; i++) + threads->map[threads->nr + i] = atoi(namelist[i]->d_name); + + for (i = 0; i < items; i++) + free(namelist[i]); + free(namelist); + + threads->nr += items; + } + +out_closedir: + closedir(proc); +out: + return threads; + +out_free_threads: + free(threads); + return NULL; + +out_free_namelist: + for (i = 0; i < items; i++) + free(namelist[i]); + free(namelist); + +out_free_closedir: + free(threads); + threads = NULL; + goto out_closedir; +} + +struct thread_map *thread_map__new(pid_t pid, pid_t tid, uid_t uid) { if (pid != -1) return thread_map__new_by_pid(pid); + + if (tid == -1 && uid != UINT_MAX) + return thread_map__new_by_uid(uid); + return thread_map__new_by_tid(tid); } @@ -62,3 +156,14 @@ void thread_map__delete(struct thread_map *threads) { free(threads); } + +size_t thread_map__fprintf(struct thread_map *threads, FILE *fp) +{ + int i; + size_t printed = fprintf(fp, "%d thread%s: ", + threads->nr, threads->nr > 1 ? "s" : ""); + for (i = 0; i < threads->nr; ++i) + printed += fprintf(fp, "%s%d", i ? ", " : "", threads->map[i]); + + return printed + fprintf(fp, "\n"); +} diff --git a/tools/perf/util/thread_map.h b/tools/perf/util/thread_map.h index 3cb907311409..c75ddbaba005 100644 --- a/tools/perf/util/thread_map.h +++ b/tools/perf/util/thread_map.h @@ -2,6 +2,7 @@ #define __PERF_THREAD_MAP_H #include <sys/types.h> +#include <stdio.h> struct thread_map { int nr; @@ -10,6 +11,10 @@ struct thread_map { struct thread_map *thread_map__new_by_pid(pid_t pid); struct thread_map *thread_map__new_by_tid(pid_t tid); -struct thread_map *thread_map__new(pid_t pid, pid_t tid); +struct thread_map *thread_map__new_by_uid(uid_t uid); +struct thread_map *thread_map__new(pid_t pid, pid_t tid, uid_t uid); void thread_map__delete(struct thread_map *threads); + +size_t thread_map__fprintf(struct thread_map *threads, FILE *fp); + #endif /* __PERF_THREAD_MAP_H */ diff --git a/tools/perf/util/top.c b/tools/perf/util/top.c index 500471dffa4f..e4370ca27193 100644 --- a/tools/perf/util/top.c +++ b/tools/perf/util/top.c @@ -75,6 +75,9 @@ size_t perf_top__header_snprintf(struct perf_top *top, char *bf, size_t size) else if (top->target_tid != -1) ret += SNPRINTF(bf + ret, size - ret, " (target_tid: %d", top->target_tid); + else if (top->uid_str != NULL) + ret += SNPRINTF(bf + ret, size - ret, " (uid: %s", + top->uid_str); else ret += SNPRINTF(bf + ret, size - ret, " (all"); diff --git a/tools/perf/util/top.h b/tools/perf/util/top.h index a248f3c2c60d..def3e53e0fe0 100644 --- a/tools/perf/util/top.h +++ b/tools/perf/util/top.h @@ -24,6 +24,7 @@ struct perf_top { int print_entries, count_filter, delay_secs; int freq; pid_t target_pid, target_tid; + uid_t uid; bool hide_kernel_symbols, hide_user_symbols, zero; bool system_wide; bool use_tui, use_stdio; @@ -45,6 +46,7 @@ struct perf_top { int realtime_prio; int sym_pcnt_filter; const char *sym_filter; + const char *uid_str; }; size_t perf_top__header_snprintf(struct perf_top *top, char *bf, size_t size); diff --git a/tools/perf/util/ui/browsers/hists.c b/tools/perf/util/ui/browsers/hists.c index e81aef1f2569..bfba0490c098 100644 --- a/tools/perf/util/ui/browsers/hists.c +++ b/tools/perf/util/ui/browsers/hists.c @@ -839,6 +839,9 @@ static int hists__browser_title(struct hists *self, char *bf, size_t size, nr_events = convert_unit(nr_events, &unit); printed = snprintf(bf, size, "Events: %lu%c %s", nr_events, unit, ev_name); + if (self->uid_filter_str) + printed += snprintf(bf + printed, size - printed, + ", UID: %s", self->uid_filter_str); if (thread) printed += snprintf(bf + printed, size - printed, ", Thread: %s(%d)", diff --git a/tools/perf/util/usage.c b/tools/perf/util/usage.c index d76d1c0ff98f..d0c013934f30 100644 --- a/tools/perf/util/usage.c +++ b/tools/perf/util/usage.c @@ -7,6 +7,7 @@ * Copyright (C) Linus Torvalds, 2005 */ #include "util.h" +#include "debug.h" static void report(const char *prefix, const char *err, va_list params) { @@ -81,3 +82,41 @@ void warning(const char *warn, ...) warn_routine(warn, params); va_end(params); } + +uid_t parse_target_uid(const char *str, pid_t tid, pid_t pid) +{ + struct passwd pwd, *result; + char buf[1024]; + + if (str == NULL) + return UINT_MAX; + + /* CPU and PID are mutually exclusive */ + if (tid > 0 || pid > 0) { + ui__warning("PID/TID switch overriding UID\n"); + sleep(1); + return UINT_MAX; + } + + getpwnam_r(str, &pwd, buf, sizeof(buf), &result); + + if (result == NULL) { + char *endptr; + int uid = strtol(str, &endptr, 10); + + if (*endptr != '\0') { + ui__error("Invalid user %s\n", str); + return UINT_MAX - 1; + } + + getpwuid_r(uid, &pwd, buf, sizeof(buf), &result); + + if (result == NULL) { + ui__error("Problems obtaining information for user %s\n", + str); + return UINT_MAX - 1; + } + } + + return result->pw_uid; +} diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index ecf9898169c8..232d17ef3e60 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -245,6 +245,8 @@ struct perf_event_attr; void event_attr_init(struct perf_event_attr *attr); +uid_t parse_target_uid(const char *str, pid_t tid, pid_t pid); + #define _STR(x) #x #define STR(x) _STR(x) |