diff options
Diffstat (limited to 'kernel/trace/trace.c')
-rw-r--r-- | kernel/trace/trace.c | 330 |
1 files changed, 260 insertions, 70 deletions
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index d23a09d3eb37..7896d30d90f7 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -39,6 +39,7 @@ #include <linux/slab.h> #include <linux/ctype.h> #include <linux/init.h> +#include <linux/panic_notifier.h> #include <linux/poll.h> #include <linux/nmi.h> #include <linux/fs.h> @@ -86,6 +87,7 @@ void __init disable_tracing_selftest(const char *reason) /* Pipe tracepoints to printk */ struct trace_iterator *tracepoint_print_iter; int tracepoint_printk; +static bool tracepoint_printk_stop_on_boot __initdata; static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key); /* For tracers that don't implement custom flags */ @@ -196,12 +198,12 @@ __setup("ftrace=", set_cmdline_ftrace); static int __init set_ftrace_dump_on_oops(char *str) { - if (*str++ != '=' || !*str) { + if (*str++ != '=' || !*str || !strcmp("1", str)) { ftrace_dump_on_oops = DUMP_ALL; return 1; } - if (!strcmp("orig_cpu", str)) { + if (!strcmp("orig_cpu", str) || !strcmp("2", str)) { ftrace_dump_on_oops = DUMP_ORIG; return 1; } @@ -256,6 +258,13 @@ static int __init set_tracepoint_printk(char *str) } __setup("tp_printk", set_tracepoint_printk); +static int __init set_tracepoint_printk_stop(char *str) +{ + tracepoint_printk_stop_on_boot = true; + return 1; +} +__setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop); + unsigned long long ns2usecs(u64 nsec) { nsec += 500; @@ -1682,8 +1691,7 @@ static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt) unsigned long __read_mostly tracing_thresh; static const struct file_operations tracing_max_lat_fops; -#if (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \ - defined(CONFIG_FSNOTIFY) +#ifdef LATENCY_FS_NOTIFY static struct workqueue_struct *fsnotify_wq; @@ -2184,8 +2192,15 @@ void tracing_reset_all_online_cpus(void) } } +/* + * The tgid_map array maps from pid to tgid; i.e. the value stored at index i + * is the tgid last observed corresponding to pid=i. + */ static int *tgid_map; +/* The maximum valid index into tgid_map. */ +static size_t tgid_map_max; + #define SAVED_CMDLINES_DEFAULT 128 #define NO_CMDLINE_MAP UINT_MAX static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED; @@ -2458,24 +2473,41 @@ void trace_find_cmdline(int pid, char comm[]) preempt_enable(); } +static int *trace_find_tgid_ptr(int pid) +{ + /* + * Pairs with the smp_store_release in set_tracer_flag() to ensure that + * if we observe a non-NULL tgid_map then we also observe the correct + * tgid_map_max. + */ + int *map = smp_load_acquire(&tgid_map); + + if (unlikely(!map || pid > tgid_map_max)) + return NULL; + + return &map[pid]; +} + int trace_find_tgid(int pid) { - if (unlikely(!tgid_map || !pid || pid > PID_MAX_DEFAULT)) - return 0; + int *ptr = trace_find_tgid_ptr(pid); - return tgid_map[pid]; + return ptr ? *ptr : 0; } static int trace_save_tgid(struct task_struct *tsk) { + int *ptr; + /* treat recording of idle task as a success */ if (!tsk->pid) return 1; - if (unlikely(!tgid_map || tsk->pid > PID_MAX_DEFAULT)) + ptr = trace_find_tgid_ptr(tsk->pid); + if (!ptr) return 0; - tgid_map[tsk->pid] = tsk->tgid; + *ptr = tsk->tgid; return 1; } @@ -2571,6 +2603,15 @@ enum print_line_t trace_handle_return(struct trace_seq *s) } EXPORT_SYMBOL_GPL(trace_handle_return); +static unsigned short migration_disable_value(void) +{ +#if defined(CONFIG_SMP) + return current->migration_disabled; +#else + return 0; +#endif +} + unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status) { unsigned int trace_flags = irqs_status; @@ -2589,7 +2630,8 @@ unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status) trace_flags |= TRACE_FLAG_NEED_RESCHED; if (test_preempt_need_resched()) trace_flags |= TRACE_FLAG_PREEMPT_RESCHED; - return (trace_flags << 16) | (pc & 0xff); + return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) | + (min_t(unsigned int, migration_disable_value(), 0xf)) << 4; } struct ring_buffer_event * @@ -2729,9 +2771,45 @@ trace_event_buffer_lock_reserve(struct trace_buffer **current_rb, if (!tr->no_filter_buffering_ref && (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) && (entry = this_cpu_read(trace_buffered_event))) { - /* Try to use the per cpu buffer first */ + /* + * Filtering is on, so try to use the per cpu buffer first. + * This buffer will simulate a ring_buffer_event, + * where the type_len is zero and the array[0] will + * hold the full length. + * (see include/linux/ring-buffer.h for details on + * how the ring_buffer_event is structured). + * + * Using a temp buffer during filtering and copying it + * on a matched filter is quicker than writing directly + * into the ring buffer and then discarding it when + * it doesn't match. That is because the discard + * requires several atomic operations to get right. + * Copying on match and doing nothing on a failed match + * is still quicker than no copy on match, but having + * to discard out of the ring buffer on a failed match. + */ + int max_len = PAGE_SIZE - struct_size(entry, array, 1); + val = this_cpu_inc_return(trace_buffered_event_cnt); - if ((len < (PAGE_SIZE - sizeof(*entry) - sizeof(entry->array[0]))) && val == 1) { + + /* + * Preemption is disabled, but interrupts and NMIs + * can still come in now. If that happens after + * the above increment, then it will have to go + * back to the old method of allocating the event + * on the ring buffer, and if the filter fails, it + * will have to call ring_buffer_discard_commit() + * to remove it. + * + * Need to also check the unlikely case that the + * length is bigger than the temp buffer size. + * If that happens, then the reserve is pretty much + * guaranteed to fail, as the ring buffer currently + * only allows events less than a page. But that may + * change in the future, so let the ring buffer reserve + * handle the failure in that case. + */ + if (val == 1 && likely(len <= max_len)) { trace_event_setup(entry, type, trace_ctx); entry->array[0] = len; return entry; @@ -2829,14 +2907,26 @@ int tracepoint_printk_sysctl(struct ctl_table *table, int write, void trace_event_buffer_commit(struct trace_event_buffer *fbuffer) { + enum event_trigger_type tt = ETT_NONE; + struct trace_event_file *file = fbuffer->trace_file; + + if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event, + fbuffer->entry, &tt)) + goto discard; + if (static_key_false(&tracepoint_printk_key.key)) output_printk(fbuffer); if (static_branch_unlikely(&trace_event_exports_enabled)) ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT); - event_trigger_unlock_commit_regs(fbuffer->trace_file, fbuffer->buffer, - fbuffer->event, fbuffer->entry, - fbuffer->trace_ctx, fbuffer->regs); + + trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer, + fbuffer->event, fbuffer->trace_ctx, fbuffer->regs); + +discard: + if (tt) + event_triggers_post_call(file, tt); + } EXPORT_SYMBOL_GPL(trace_event_buffer_commit); @@ -3617,11 +3707,11 @@ static bool trace_safe_str(struct trace_iterator *iter, const char *str) return false; event = container_of(trace_event, struct trace_event_call, event); - if (!event->mod) + if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module) return false; /* Would rather have rodata, but this will suffice */ - if (within_module_core(addr, event->mod)) + if (within_module_core(addr, event->module)) return true; return false; @@ -4109,9 +4199,10 @@ static void print_lat_help_header(struct seq_file *m) "# | / _----=> need-resched \n" "# || / _---=> hardirq/softirq \n" "# ||| / _--=> preempt-depth \n" - "# |||| / delay \n" - "# cmd pid ||||| time | caller \n" - "# \\ / ||||| \\ | / \n"); + "# |||| / _-=> migrate-disable \n" + "# ||||| / delay \n" + "# cmd pid |||||| time | caller \n" + "# \\ / |||||| \\ | / \n"); } static void print_event_info(struct array_buffer *buf, struct seq_file *m) @@ -4149,9 +4240,10 @@ static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file seq_printf(m, "# %.*s / _----=> need-resched\n", prec, space); seq_printf(m, "# %.*s| / _---=> hardirq/softirq\n", prec, space); seq_printf(m, "# %.*s|| / _--=> preempt-depth\n", prec, space); - seq_printf(m, "# %.*s||| / delay\n", prec, space); - seq_printf(m, "# TASK-PID %.*s CPU# |||| TIMESTAMP FUNCTION\n", prec, " TGID "); - seq_printf(m, "# | | %.*s | |||| | |\n", prec, " | "); + seq_printf(m, "# %.*s||| / _-=> migrate-disable\n", prec, space); + seq_printf(m, "# %.*s|||| / delay\n", prec, space); + seq_printf(m, "# TASK-PID %.*s CPU# ||||| TIMESTAMP FUNCTION\n", prec, " TGID "); + seq_printf(m, "# | | %.*s | ||||| | |\n", prec, " | "); } void @@ -5171,6 +5263,8 @@ int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set) int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled) { + int *map; + if ((mask == TRACE_ITER_RECORD_TGID) || (mask == TRACE_ITER_RECORD_CMD)) lockdep_assert_held(&event_mutex); @@ -5193,10 +5287,19 @@ int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled) trace_event_enable_cmd_record(enabled); if (mask == TRACE_ITER_RECORD_TGID) { - if (!tgid_map) - tgid_map = kvcalloc(PID_MAX_DEFAULT + 1, - sizeof(*tgid_map), - GFP_KERNEL); + if (!tgid_map) { + tgid_map_max = pid_max; + map = kvcalloc(tgid_map_max + 1, sizeof(*tgid_map), + GFP_KERNEL); + + /* + * Pairs with smp_load_acquire() in + * trace_find_tgid_ptr() to ensure that if it observes + * the tgid_map we just allocated then it also observes + * the corresponding tgid_map_max value. + */ + smp_store_release(&tgid_map, map); + } if (!tgid_map) { tr->trace_flags &= ~TRACE_ITER_RECORD_TGID; return -ENOMEM; @@ -5452,6 +5555,7 @@ static const char readme_msg[] = #ifdef CONFIG_HIST_TRIGGERS "\t s:[synthetic/]<event> <field> [<field>]\n" #endif + "\t e[:[<group>/]<event>] <attached-group>.<attached-event> [<args>]\n" "\t -:[<group>/]<event>\n" #ifdef CONFIG_KPROBE_EVENTS "\t place: [<module>:]<symbol>[+<offset>]|<memaddr>\n" @@ -5461,7 +5565,7 @@ static const char readme_msg[] = " place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n" #endif "\t args: <name>=fetcharg[:type]\n" - "\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n" + "\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n" #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API "\t $stack<index>, $stack, $retval, $comm, $arg<N>,\n" #else @@ -5476,6 +5580,8 @@ static const char readme_msg[] = "\t stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n" "\t [unsigned] char/int/long\n" #endif + "\t efield: For event probes ('e' types), the field is on of the fields\n" + "\t of the <attached-group>/<attached-event>.\n" #endif " events/\t\t- Directory containing all trace event subsystems:\n" " enable\t\t- Write 0/1 to enable/disable tracing of all events\n" @@ -5530,6 +5636,10 @@ static const char readme_msg[] = "\t [:name=histname1]\n" "\t [:<handler>.<action>]\n" "\t [if <filter>]\n\n" + "\t Note, special fields can be used as well:\n" + "\t common_timestamp - to record current timestamp\n" + "\t common_cpu - to record the CPU the event happened on\n" + "\n" "\t When a matching event is hit, an entry is added to a hash\n" "\t table using the key(s) and value(s) named, and the value of a\n" "\t sum called 'hitcount' is incremented. Keys and values\n" @@ -5559,6 +5669,7 @@ static const char readme_msg[] = "\t .execname display a common_pid as a program name\n" "\t .syscall display a syscall id as a syscall name\n" "\t .log2 display log2 value rather than raw number\n" + "\t .buckets=size display values in groups of size rather than raw number\n" "\t .usecs display a common_timestamp in microseconds\n\n" "\t The 'pause' parameter can be used to pause an existing hist\n" "\t trigger or to start a hist trigger but not log any events\n" @@ -5608,37 +5719,16 @@ static const struct file_operations tracing_readme_fops = { static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos) { - int *ptr = v; - - if (*pos || m->count) - ptr++; - - (*pos)++; + int pid = ++(*pos); - for (; ptr <= &tgid_map[PID_MAX_DEFAULT]; ptr++) { - if (trace_find_tgid(*ptr)) - return ptr; - } - - return NULL; + return trace_find_tgid_ptr(pid); } static void *saved_tgids_start(struct seq_file *m, loff_t *pos) { - void *v; - loff_t l = 0; - - if (!tgid_map) - return NULL; - - v = &tgid_map[0]; - while (l <= *pos) { - v = saved_tgids_next(m, v, &l); - if (!v) - return NULL; - } + int pid = *pos; - return v; + return trace_find_tgid_ptr(pid); } static void saved_tgids_stop(struct seq_file *m, void *v) @@ -5647,9 +5737,14 @@ static void saved_tgids_stop(struct seq_file *m, void *v) static int saved_tgids_show(struct seq_file *m, void *v) { - int pid = (int *)v - tgid_map; + int *entry = (int *)v; + int pid = entry - tgid_map; + int tgid = *entry; + + if (tgid == 0) + return SEQ_SKIP; - seq_printf(m, "%d %d\n", pid, trace_find_tgid(pid)); + seq_printf(m, "%d %d\n", pid, tgid); return 0; } @@ -6134,7 +6229,7 @@ static int __tracing_resize_ring_buffer(struct trace_array *tr, ssize_t tracing_resize_ring_buffer(struct trace_array *tr, unsigned long size, int cpu_id) { - int ret = size; + int ret; mutex_lock(&trace_types_lock); @@ -7528,6 +7623,91 @@ static const struct file_operations snapshot_raw_fops = { #endif /* CONFIG_TRACER_SNAPSHOT */ +/* + * trace_min_max_write - Write a u64 value to a trace_min_max_param struct + * @filp: The active open file structure + * @ubuf: The userspace provided buffer to read value into + * @cnt: The maximum number of bytes to read + * @ppos: The current "file" position + * + * This function implements the write interface for a struct trace_min_max_param. + * The filp->private_data must point to a trace_min_max_param structure that + * defines where to write the value, the min and the max acceptable values, + * and a lock to protect the write. + */ +static ssize_t +trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) +{ + struct trace_min_max_param *param = filp->private_data; + u64 val; + int err; + + if (!param) + return -EFAULT; + + err = kstrtoull_from_user(ubuf, cnt, 10, &val); + if (err) + return err; + + if (param->lock) + mutex_lock(param->lock); + + if (param->min && val < *param->min) + err = -EINVAL; + + if (param->max && val > *param->max) + err = -EINVAL; + + if (!err) + *param->val = val; + + if (param->lock) + mutex_unlock(param->lock); + + if (err) + return err; + + return cnt; +} + +/* + * trace_min_max_read - Read a u64 value from a trace_min_max_param struct + * @filp: The active open file structure + * @ubuf: The userspace provided buffer to read value into + * @cnt: The maximum number of bytes to read + * @ppos: The current "file" position + * + * This function implements the read interface for a struct trace_min_max_param. + * The filp->private_data must point to a trace_min_max_param struct with valid + * data. + */ +static ssize_t +trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) +{ + struct trace_min_max_param *param = filp->private_data; + char buf[U64_STR_SIZE]; + int len; + u64 val; + + if (!param) + return -EFAULT; + + val = *param->val; + + if (cnt > sizeof(buf)) + cnt = sizeof(buf); + + len = snprintf(buf, sizeof(buf), "%llu\n", val); + + return simple_read_from_buffer(ubuf, cnt, ppos, buf, len); +} + +const struct file_operations trace_min_max_fops = { + .open = tracing_open_generic, + .read = trace_min_max_read, + .write = trace_min_max_write, +}; + #define TRACING_LOG_ERRS_MAX 8 #define TRACING_LOG_LOC_MAX 128 @@ -8983,8 +9163,10 @@ static int trace_array_create_dir(struct trace_array *tr) return -EINVAL; ret = event_trace_add_tracer(tr->dir, tr); - if (ret) + if (ret) { tracefs_remove(tr->dir); + return ret; + } init_tracer_tracefs(tr, tr->dir); __update_tracer_options(tr); @@ -9531,6 +9713,8 @@ static __init int tracer_init_tracefs(void) return 0; } +fs_initcall(tracer_init_tracefs); + static int trace_panic_handler(struct notifier_block *this, unsigned long event, void *unused) { @@ -9647,7 +9831,6 @@ void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) tracing_off(); local_irq_save(flags); - printk_nmi_direct_enter(); /* Simulate the iterator */ trace_init_global_iter(&iter); @@ -9729,7 +9912,6 @@ void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled); } atomic_dec(&dump_running); - printk_nmi_direct_exit(); local_irq_restore(flags); } EXPORT_SYMBOL_GPL(ftrace_dump); @@ -9951,7 +10133,7 @@ void __init trace_init(void) trace_event_init(); } -__init static int clear_boot_tracer(void) +__init static void clear_boot_tracer(void) { /* * The default tracer at boot buffer is an init section. @@ -9961,26 +10143,21 @@ __init static int clear_boot_tracer(void) * about to be freed. */ if (!default_bootup_tracer) - return 0; + return; printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n", default_bootup_tracer); default_bootup_tracer = NULL; - - return 0; } -fs_initcall(tracer_init_tracefs); -late_initcall_sync(clear_boot_tracer); - #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK -__init static int tracing_set_default_clock(void) +__init static void tracing_set_default_clock(void) { /* sched_clock_stable() is determined in late_initcall */ if (!trace_boot_clock && !sched_clock_stable()) { if (security_locked_down(LOCKDOWN_TRACEFS)) { pr_warn("Can not set tracing clock due to lockdown\n"); - return -EPERM; + return; } printk(KERN_WARNING @@ -9990,8 +10167,21 @@ __init static int tracing_set_default_clock(void) "on the kernel command line\n"); tracing_set_clock(&global_trace, "global"); } +} +#else +static inline void tracing_set_default_clock(void) { } +#endif + +__init static int late_trace_init(void) +{ + if (tracepoint_printk && tracepoint_printk_stop_on_boot) { + static_key_disable(&tracepoint_printk_key.key); + tracepoint_printk = 0; + } + tracing_set_default_clock(); + clear_boot_tracer(); return 0; } -late_initcall_sync(tracing_set_default_clock); -#endif + +late_initcall_sync(late_trace_init); |