diff options
Diffstat (limited to 'kernel/trace')
-rw-r--r-- | kernel/trace/Kconfig | 15 | ||||
-rw-r--r-- | kernel/trace/blktrace.c | 4 | ||||
-rw-r--r-- | kernel/trace/bpf_trace.c | 169 | ||||
-rw-r--r-- | kernel/trace/ftrace.c | 134 | ||||
-rw-r--r-- | kernel/trace/rv/rv.c | 2 | ||||
-rw-r--r-- | kernel/trace/trace.c | 8 | ||||
-rw-r--r-- | kernel/trace/trace.h | 2 | ||||
-rw-r--r-- | kernel/trace/trace_events.c | 39 | ||||
-rw-r--r-- | kernel/trace/trace_events_filter.c | 8 | ||||
-rw-r--r-- | kernel/trace/trace_events_hist.c | 2 | ||||
-rw-r--r-- | kernel/trace/trace_export.c | 3 | ||||
-rw-r--r-- | kernel/trace/trace_osnoise.c | 5 | ||||
-rw-r--r-- | kernel/trace/trace_output.c | 3 | ||||
-rw-r--r-- | kernel/trace/trace_preemptirq.c | 61 |
14 files changed, 294 insertions, 161 deletions
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 197545241ab8..caf32389faf3 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -42,6 +42,9 @@ config HAVE_DYNAMIC_FTRACE_WITH_REGS config HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS bool +config HAVE_DYNAMIC_FTRACE_WITH_CALL_OPS + bool + config HAVE_DYNAMIC_FTRACE_WITH_ARGS bool help @@ -257,6 +260,10 @@ config DYNAMIC_FTRACE_WITH_DIRECT_CALLS depends on DYNAMIC_FTRACE_WITH_REGS depends on HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS +config DYNAMIC_FTRACE_WITH_CALL_OPS + def_bool y + depends on HAVE_DYNAMIC_FTRACE_WITH_CALL_OPS + config DYNAMIC_FTRACE_WITH_ARGS def_bool y depends on DYNAMIC_FTRACE @@ -933,8 +940,8 @@ config RING_BUFFER_RECORD_RECURSION default y help The ring buffer has its own internal recursion. Although when - recursion happens it wont cause harm because of the protection, - but it does cause an unwanted overhead. Enabling this option will + recursion happens it won't cause harm because of the protection, + but it does cause unwanted overhead. Enabling this option will place where recursion was detected into the ftrace "recursed_functions" file. @@ -1017,8 +1024,8 @@ config RING_BUFFER_STARTUP_TEST The test runs for 10 seconds. This will slow your boot time by at least 10 more seconds. - At the end of the test, statics and more checks are done. - It will output the stats of each per cpu buffer. What + At the end of the test, statistics and more checks are done. + It will output the stats of each per cpu buffer: What was written, the sizes, what was read, what was lost, and other similar details. diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 918a7d12df8f..5743be559415 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -320,8 +320,8 @@ static void blk_trace_free(struct request_queue *q, struct blk_trace *bt) * under 'q->debugfs_dir', thus lookup and remove them. */ if (!bt->dir) { - debugfs_remove(debugfs_lookup("dropped", q->debugfs_dir)); - debugfs_remove(debugfs_lookup("msg", q->debugfs_dir)); + debugfs_lookup_and_remove("dropped", q->debugfs_dir); + debugfs_lookup_and_remove("msg", q->debugfs_dir); } else { debugfs_remove(bt->dir); } diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 3bbd3f0c810c..e8da032bb6fc 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -369,8 +369,6 @@ static const struct bpf_func_proto *bpf_get_probe_write_proto(void) return &bpf_probe_write_user_proto; } -static DEFINE_RAW_SPINLOCK(trace_printk_lock); - #define MAX_TRACE_PRINTK_VARARGS 3 #define BPF_TRACE_PRINTK_SIZE 1024 @@ -378,23 +376,22 @@ BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, u64, arg1, u64, arg2, u64, arg3) { u64 args[MAX_TRACE_PRINTK_VARARGS] = { arg1, arg2, arg3 }; - u32 *bin_args; - static char buf[BPF_TRACE_PRINTK_SIZE]; - unsigned long flags; + struct bpf_bprintf_data data = { + .get_bin_args = true, + .get_buf = true, + }; int ret; - ret = bpf_bprintf_prepare(fmt, fmt_size, args, &bin_args, - MAX_TRACE_PRINTK_VARARGS); + ret = bpf_bprintf_prepare(fmt, fmt_size, args, + MAX_TRACE_PRINTK_VARARGS, &data); if (ret < 0) return ret; - raw_spin_lock_irqsave(&trace_printk_lock, flags); - ret = bstr_printf(buf, sizeof(buf), fmt, bin_args); + ret = bstr_printf(data.buf, MAX_BPRINTF_BUF, fmt, data.bin_args); - trace_bpf_trace_printk(buf); - raw_spin_unlock_irqrestore(&trace_printk_lock, flags); + trace_bpf_trace_printk(data.buf); - bpf_bprintf_cleanup(); + bpf_bprintf_cleanup(&data); return ret; } @@ -427,30 +424,29 @@ const struct bpf_func_proto *bpf_get_trace_printk_proto(void) return &bpf_trace_printk_proto; } -BPF_CALL_4(bpf_trace_vprintk, char *, fmt, u32, fmt_size, const void *, data, +BPF_CALL_4(bpf_trace_vprintk, char *, fmt, u32, fmt_size, const void *, args, u32, data_len) { - static char buf[BPF_TRACE_PRINTK_SIZE]; - unsigned long flags; + struct bpf_bprintf_data data = { + .get_bin_args = true, + .get_buf = true, + }; int ret, num_args; - u32 *bin_args; if (data_len & 7 || data_len > MAX_BPRINTF_VARARGS * 8 || - (data_len && !data)) + (data_len && !args)) return -EINVAL; num_args = data_len / 8; - ret = bpf_bprintf_prepare(fmt, fmt_size, data, &bin_args, num_args); + ret = bpf_bprintf_prepare(fmt, fmt_size, args, num_args, &data); if (ret < 0) return ret; - raw_spin_lock_irqsave(&trace_printk_lock, flags); - ret = bstr_printf(buf, sizeof(buf), fmt, bin_args); + ret = bstr_printf(data.buf, MAX_BPRINTF_BUF, fmt, data.bin_args); - trace_bpf_trace_printk(buf); - raw_spin_unlock_irqrestore(&trace_printk_lock, flags); + trace_bpf_trace_printk(data.buf); - bpf_bprintf_cleanup(); + bpf_bprintf_cleanup(&data); return ret; } @@ -472,23 +468,25 @@ const struct bpf_func_proto *bpf_get_trace_vprintk_proto(void) } BPF_CALL_5(bpf_seq_printf, struct seq_file *, m, char *, fmt, u32, fmt_size, - const void *, data, u32, data_len) + const void *, args, u32, data_len) { + struct bpf_bprintf_data data = { + .get_bin_args = true, + }; int err, num_args; - u32 *bin_args; if (data_len & 7 || data_len > MAX_BPRINTF_VARARGS * 8 || - (data_len && !data)) + (data_len && !args)) return -EINVAL; num_args = data_len / 8; - err = bpf_bprintf_prepare(fmt, fmt_size, data, &bin_args, num_args); + err = bpf_bprintf_prepare(fmt, fmt_size, args, num_args, &data); if (err < 0) return err; - seq_bprintf(m, fmt, bin_args); + seq_bprintf(m, fmt, data.bin_args); - bpf_bprintf_cleanup(); + bpf_bprintf_cleanup(&data); return seq_has_overflowed(m) ? -EOVERFLOW : 0; } @@ -687,8 +685,7 @@ BPF_CALL_5(bpf_perf_event_output, struct pt_regs *, regs, struct bpf_map *, map, } perf_sample_data_init(sd, 0, 0); - sd->raw = &raw; - sd->sample_flags |= PERF_SAMPLE_RAW; + perf_sample_save_raw_data(sd, &raw); err = __bpf_perf_event_output(regs, map, flags, sd); @@ -746,8 +743,7 @@ u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size, perf_fetch_caller_regs(regs); perf_sample_data_init(sd, 0, 0); - sd->raw = &raw; - sd->sample_flags |= PERF_SAMPLE_RAW; + perf_sample_save_raw_data(sd, &raw); ret = __bpf_perf_event_output(regs, map, flags, sd); out: @@ -833,6 +829,7 @@ static void do_bpf_send_signal(struct irq_work *entry) work = container_of(entry, struct send_signal_irq_work, irq_work); group_send_sig_info(work->sig, SEND_SIG_PRIV, work->task, work->type); + put_task_struct(work->task); } static int bpf_send_signal_common(u32 sig, enum pid_type type) @@ -848,6 +845,9 @@ static int bpf_send_signal_common(u32 sig, enum pid_type type) return -EPERM; if (unlikely(!nmi_uaccess_okay())) return -EPERM; + /* Task should not be pid=1 to avoid kernel panic. */ + if (unlikely(is_global_init(current))) + return -EPERM; if (irqs_disabled()) { /* Do an early check on signal validity. Otherwise, @@ -864,7 +864,7 @@ static int bpf_send_signal_common(u32 sig, enum pid_type type) * to the irq_work. The current task may change when queued * irq works get executed. */ - work->task = current; + work->task = get_task_struct(current); work->sig = sig; work->type = type; irq_work_queue(&work->irq_work); @@ -1235,7 +1235,7 @@ __diag_ignore_all("-Wmissing-prototypes", * Return: a bpf_key pointer with a valid key pointer if the key is found, a * NULL pointer otherwise. */ -struct bpf_key *bpf_lookup_user_key(u32 serial, u64 flags) +__bpf_kfunc struct bpf_key *bpf_lookup_user_key(u32 serial, u64 flags) { key_ref_t key_ref; struct bpf_key *bkey; @@ -1284,7 +1284,7 @@ struct bpf_key *bpf_lookup_user_key(u32 serial, u64 flags) * Return: a bpf_key pointer with an invalid key pointer set from the * pre-determined ID on success, a NULL pointer otherwise */ -struct bpf_key *bpf_lookup_system_key(u64 id) +__bpf_kfunc struct bpf_key *bpf_lookup_system_key(u64 id) { struct bpf_key *bkey; @@ -1308,7 +1308,7 @@ struct bpf_key *bpf_lookup_system_key(u64 id) * Decrement the reference count of the key inside *bkey*, if the pointer * is valid, and free *bkey*. */ -void bpf_key_put(struct bpf_key *bkey) +__bpf_kfunc void bpf_key_put(struct bpf_key *bkey) { if (bkey->has_ref) key_put(bkey->key); @@ -1328,7 +1328,7 @@ void bpf_key_put(struct bpf_key *bkey) * * Return: 0 on success, a negative value on error. */ -int bpf_verify_pkcs7_signature(struct bpf_dynptr_kern *data_ptr, +__bpf_kfunc int bpf_verify_pkcs7_signature(struct bpf_dynptr_kern *data_ptr, struct bpf_dynptr_kern *sig_ptr, struct bpf_key *trusted_keyring) { @@ -2684,69 +2684,77 @@ static void symbols_swap_r(void *a, void *b, int size, const void *priv) } } -struct module_addr_args { - unsigned long *addrs; - u32 addrs_cnt; +struct modules_array { struct module **mods; int mods_cnt; int mods_cap; }; -static int module_callback(void *data, const char *name, - struct module *mod, unsigned long addr) +static int add_module(struct modules_array *arr, struct module *mod) { - struct module_addr_args *args = data; struct module **mods; - /* We iterate all modules symbols and for each we: - * - search for it in provided addresses array - * - if found we check if we already have the module pointer stored - * (we iterate modules sequentially, so we can check just the last - * module pointer) - * - take module reference and store it - */ - if (!bsearch(&addr, args->addrs, args->addrs_cnt, sizeof(addr), - bpf_kprobe_multi_addrs_cmp)) - return 0; - - if (args->mods && args->mods[args->mods_cnt - 1] == mod) - return 0; - - if (args->mods_cnt == args->mods_cap) { - args->mods_cap = max(16, args->mods_cap * 3 / 2); - mods = krealloc_array(args->mods, args->mods_cap, sizeof(*mods), GFP_KERNEL); + if (arr->mods_cnt == arr->mods_cap) { + arr->mods_cap = max(16, arr->mods_cap * 3 / 2); + mods = krealloc_array(arr->mods, arr->mods_cap, sizeof(*mods), GFP_KERNEL); if (!mods) return -ENOMEM; - args->mods = mods; + arr->mods = mods; } - if (!try_module_get(mod)) - return -EINVAL; - - args->mods[args->mods_cnt] = mod; - args->mods_cnt++; + arr->mods[arr->mods_cnt] = mod; + arr->mods_cnt++; return 0; } +static bool has_module(struct modules_array *arr, struct module *mod) +{ + int i; + + for (i = arr->mods_cnt - 1; i >= 0; i--) { + if (arr->mods[i] == mod) + return true; + } + return false; +} + static int get_modules_for_addrs(struct module ***mods, unsigned long *addrs, u32 addrs_cnt) { - struct module_addr_args args = { - .addrs = addrs, - .addrs_cnt = addrs_cnt, - }; - int err; + struct modules_array arr = {}; + u32 i, err = 0; + + for (i = 0; i < addrs_cnt; i++) { + struct module *mod; + + preempt_disable(); + mod = __module_address(addrs[i]); + /* Either no module or we it's already stored */ + if (!mod || has_module(&arr, mod)) { + preempt_enable(); + continue; + } + if (!try_module_get(mod)) + err = -EINVAL; + preempt_enable(); + if (err) + break; + err = add_module(&arr, mod); + if (err) { + module_put(mod); + break; + } + } /* We return either err < 0 in case of error, ... */ - err = module_kallsyms_on_each_symbol(module_callback, &args); if (err) { - kprobe_multi_put_modules(args.mods, args.mods_cnt); - kfree(args.mods); + kprobe_multi_put_modules(arr.mods, arr.mods_cnt); + kfree(arr.mods); return err; } /* or number of modules found if everything is ok. */ - *mods = args.mods; - return args.mods_cnt; + *mods = arr.mods; + return arr.mods_cnt; } int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) @@ -2859,13 +2867,6 @@ int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr bpf_kprobe_multi_cookie_cmp, bpf_kprobe_multi_cookie_swap, link); - } else { - /* - * We need to sort addrs array even if there are no cookies - * provided, to allow bsearch in get_modules_for_addrs. - */ - sort(addrs, cnt, sizeof(*addrs), - bpf_kprobe_multi_addrs_cmp, NULL); } err = get_modules_for_addrs(&link->mods, addrs, cnt); diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 442438b93fe9..29baa97d0d53 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -125,6 +125,33 @@ struct ftrace_ops global_ops; void ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *op, struct ftrace_regs *fregs); +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS +/* + * Stub used to invoke the list ops without requiring a separate trampoline. + */ +const struct ftrace_ops ftrace_list_ops = { + .func = ftrace_ops_list_func, + .flags = FTRACE_OPS_FL_STUB, +}; + +static void ftrace_ops_nop_func(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op, + struct ftrace_regs *fregs) +{ + /* do nothing */ +} + +/* + * Stub used when a call site is disabled. May be called transiently by threads + * which have made it into ftrace_caller but haven't yet recovered the ops at + * the point the call site is disabled. + */ +const struct ftrace_ops ftrace_nop_ops = { + .func = ftrace_ops_nop_func, + .flags = FTRACE_OPS_FL_STUB, +}; +#endif + static inline void ftrace_ops_init(struct ftrace_ops *ops) { #ifdef CONFIG_DYNAMIC_FTRACE @@ -1248,12 +1275,17 @@ static void free_ftrace_hash_rcu(struct ftrace_hash *hash) call_rcu(&hash->rcu, __free_ftrace_hash_rcu); } +/** + * ftrace_free_filter - remove all filters for an ftrace_ops + * @ops - the ops to remove the filters from + */ void ftrace_free_filter(struct ftrace_ops *ops) { ftrace_ops_init(ops); free_ftrace_hash(ops->func_hash->filter_hash); free_ftrace_hash(ops->func_hash->notrace_hash); } +EXPORT_SYMBOL_GPL(ftrace_free_filter); static struct ftrace_hash *alloc_ftrace_hash(int size_bits) { @@ -1814,6 +1846,18 @@ static bool __ftrace_hash_rec_update(struct ftrace_ops *ops, * if rec count is zero. */ } + + /* + * If the rec has a single associated ops, and ops->func can be + * called directly, allow the call site to call via the ops. + */ + if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS) && + ftrace_rec_count(rec) == 1 && + ftrace_ops_get_func(ops) == ops->func) + rec->flags |= FTRACE_FL_CALL_OPS; + else + rec->flags &= ~FTRACE_FL_CALL_OPS; + count++; /* Must match FTRACE_UPDATE_CALLS in ftrace_modify_all_code() */ @@ -2108,8 +2152,9 @@ void ftrace_bug(int failed, struct dyn_ftrace *rec) struct ftrace_ops *ops = NULL; pr_info("ftrace record flags: %lx\n", rec->flags); - pr_cont(" (%ld)%s", ftrace_rec_count(rec), - rec->flags & FTRACE_FL_REGS ? " R" : " "); + pr_cont(" (%ld)%s%s", ftrace_rec_count(rec), + rec->flags & FTRACE_FL_REGS ? " R" : " ", + rec->flags & FTRACE_FL_CALL_OPS ? " O" : " "); if (rec->flags & FTRACE_FL_TRAMP_EN) { ops = ftrace_find_tramp_ops_any(rec); if (ops) { @@ -2177,6 +2222,7 @@ static int ftrace_check_record(struct dyn_ftrace *rec, bool enable, bool update) * want the direct enabled (it will be done via the * direct helper). But if DIRECT_EN is set, and * the count is not one, we need to clear it. + * */ if (ftrace_rec_count(rec) == 1) { if (!(rec->flags & FTRACE_FL_DIRECT) != @@ -2185,6 +2231,19 @@ static int ftrace_check_record(struct dyn_ftrace *rec, bool enable, bool update) } else if (rec->flags & FTRACE_FL_DIRECT_EN) { flag |= FTRACE_FL_DIRECT; } + + /* + * Ops calls are special, as count matters. + * As with direct calls, they must only be enabled when count + * is one, otherwise they'll be handled via the list ops. + */ + if (ftrace_rec_count(rec) == 1) { + if (!(rec->flags & FTRACE_FL_CALL_OPS) != + !(rec->flags & FTRACE_FL_CALL_OPS_EN)) + flag |= FTRACE_FL_CALL_OPS; + } else if (rec->flags & FTRACE_FL_CALL_OPS_EN) { + flag |= FTRACE_FL_CALL_OPS; + } } /* If the state of this record hasn't changed, then do nothing */ @@ -2229,6 +2288,21 @@ static int ftrace_check_record(struct dyn_ftrace *rec, bool enable, bool update) rec->flags &= ~FTRACE_FL_DIRECT_EN; } } + + if (flag & FTRACE_FL_CALL_OPS) { + if (ftrace_rec_count(rec) == 1) { + if (rec->flags & FTRACE_FL_CALL_OPS) + rec->flags |= FTRACE_FL_CALL_OPS_EN; + else + rec->flags &= ~FTRACE_FL_CALL_OPS_EN; + } else { + /* + * Can only call directly if there's + * only one set of associated ops. + */ + rec->flags &= ~FTRACE_FL_CALL_OPS_EN; + } + } } /* @@ -2258,7 +2332,8 @@ static int ftrace_check_record(struct dyn_ftrace *rec, bool enable, bool update) * and REGS states. The _EN flags must be disabled though. */ rec->flags &= ~(FTRACE_FL_ENABLED | FTRACE_FL_TRAMP_EN | - FTRACE_FL_REGS_EN | FTRACE_FL_DIRECT_EN); + FTRACE_FL_REGS_EN | FTRACE_FL_DIRECT_EN | + FTRACE_FL_CALL_OPS_EN); } ftrace_bug_type = FTRACE_BUG_NOP; @@ -2431,6 +2506,25 @@ ftrace_find_tramp_ops_new(struct dyn_ftrace *rec) return NULL; } +struct ftrace_ops * +ftrace_find_unique_ops(struct dyn_ftrace *rec) +{ + struct ftrace_ops *op, *found = NULL; + unsigned long ip = rec->ip; + + do_for_each_ftrace_op(op, ftrace_ops_list) { + + if (hash_contains_ip(ip, op->func_hash)) { + if (found) + return NULL; + found = op; + } + + } while_for_each_ftrace_op(op); + + return found; +} + #ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS /* Protected by rcu_tasks for reading, and direct_mutex for writing */ static struct ftrace_hash *direct_functions = EMPTY_HASH; @@ -3780,11 +3874,12 @@ static int t_show(struct seq_file *m, void *v) if (iter->flags & FTRACE_ITER_ENABLED) { struct ftrace_ops *ops; - seq_printf(m, " (%ld)%s%s%s", + seq_printf(m, " (%ld)%s%s%s%s", ftrace_rec_count(rec), rec->flags & FTRACE_FL_REGS ? " R" : " ", rec->flags & FTRACE_FL_IPMODIFY ? " I" : " ", - rec->flags & FTRACE_FL_DIRECT ? " D" : " "); + rec->flags & FTRACE_FL_DIRECT ? " D" : " ", + rec->flags & FTRACE_FL_CALL_OPS ? " O" : " "); if (rec->flags & FTRACE_FL_TRAMP_EN) { ops = ftrace_find_tramp_ops_any(rec); if (ops) { @@ -3800,6 +3895,15 @@ static int t_show(struct seq_file *m, void *v) } else { add_trampoline_func(m, NULL, rec); } + if (rec->flags & FTRACE_FL_CALL_OPS_EN) { + ops = ftrace_find_unique_ops(rec); + if (ops) { + seq_printf(m, "\tops: %pS (%pS)", + ops, ops->func); + } else { + seq_puts(m, "\tops: ERROR!"); + } + } if (rec->flags & FTRACE_FL_DIRECT) { unsigned long direct; @@ -5839,6 +5943,10 @@ EXPORT_SYMBOL_GPL(modify_ftrace_direct_multi); * * Filters denote which functions should be enabled when tracing is enabled * If @ip is NULL, it fails to update filter. + * + * This can allocate memory which must be freed before @ops can be freed, + * either by removing each filtered addr or by using + * ftrace_free_filter(@ops). */ int ftrace_set_filter_ip(struct ftrace_ops *ops, unsigned long ip, int remove, int reset) @@ -5858,7 +5966,11 @@ EXPORT_SYMBOL_GPL(ftrace_set_filter_ip); * * Filters denote which functions should be enabled when tracing is enabled * If @ips array or any ip specified within is NULL , it fails to update filter. - */ + * + * This can allocate memory which must be freed before @ops can be freed, + * either by removing each filtered addr or by using + * ftrace_free_filter(@ops). +*/ int ftrace_set_filter_ips(struct ftrace_ops *ops, unsigned long *ips, unsigned int cnt, int remove, int reset) { @@ -5900,6 +6012,10 @@ ftrace_set_regex(struct ftrace_ops *ops, unsigned char *buf, int len, * * Filters denote which functions should be enabled when tracing is enabled. * If @buf is NULL and reset is set, all functions will be enabled for tracing. + * + * This can allocate memory which must be freed before @ops can be freed, + * either by removing each filtered addr or by using + * ftrace_free_filter(@ops). */ int ftrace_set_filter(struct ftrace_ops *ops, unsigned char *buf, int len, int reset) @@ -5919,6 +6035,10 @@ EXPORT_SYMBOL_GPL(ftrace_set_filter); * Notrace Filters denote which functions should not be enabled when tracing * is enabled. If @buf is NULL and reset is set, all functions will be enabled * for tracing. + * + * This can allocate memory which must be freed before @ops can be freed, + * either by removing each filtered addr or by using + * ftrace_free_filter(@ops). */ int ftrace_set_notrace(struct ftrace_ops *ops, unsigned char *buf, int len, int reset) @@ -8324,7 +8444,7 @@ int ftrace_lookup_symbols(const char **sorted_syms, size_t cnt, unsigned long *a found_all = kallsyms_on_each_symbol(kallsyms_callback, &args); if (found_all) return 0; - found_all = module_kallsyms_on_each_symbol(kallsyms_callback, &args); + found_all = module_kallsyms_on_each_symbol(NULL, kallsyms_callback, &args); return found_all ? 0 : -ESRCH; } diff --git a/kernel/trace/rv/rv.c b/kernel/trace/rv/rv.c index 6c97cc2d754a..7e9061828c24 100644 --- a/kernel/trace/rv/rv.c +++ b/kernel/trace/rv/rv.c @@ -516,7 +516,7 @@ static ssize_t enabled_monitors_write(struct file *filp, const char __user *user struct rv_monitor_def *mdef; int retval = -EINVAL; bool enable = true; - char *ptr = buff; + char *ptr; int len; if (count < 1 || count > MAX_RV_MONITOR_NAME_SIZE + 1) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index a555a861b978..54a163ae4815 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3128,6 +3128,9 @@ void __trace_stack(struct trace_array *tr, unsigned int trace_ctx, return; } + if (WARN_ON_ONCE(IS_ENABLED(CONFIG_GENERIC_ENTRY))) + return; + /* * When an NMI triggers, RCU is enabled via ct_nmi_enter(), * but if the above rcu_is_watching() failed, then the NMI @@ -9148,9 +9151,6 @@ buffer_percent_write(struct file *filp, const char __user *ubuf, if (val > 100) return -EINVAL; - if (!val) - val = 1; - tr->buffer_percent = val; (*ppos)++; @@ -10295,6 +10295,8 @@ void __init early_trace_init(void) static_key_enable(&tracepoint_printk_key.key); } tracer_alloc_buffers(); + + init_events(); } void __init trace_init(void) diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index e46a49269be2..085a31b978a5 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -1282,6 +1282,7 @@ struct ftrace_event_field { int offset; int size; int is_signed; + int len; }; struct prog_entry; @@ -1490,6 +1491,7 @@ extern void trace_event_enable_cmd_record(bool enable); extern void trace_event_enable_tgid_record(bool enable); extern int event_trace_init(void); +extern int init_events(void); extern int event_trace_add_tracer(struct dentry *parent, struct trace_array *tr); extern int event_trace_del_tracer(struct trace_array *tr); extern void __trace_early_add_events(struct trace_array *tr); diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 33e0b4f8ebe6..6a942fa275c7 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -114,7 +114,7 @@ trace_find_event_field(struct trace_event_call *call, char *name) static int __trace_define_field(struct list_head *head, const char *type, const char *name, int offset, int size, - int is_signed, int filter_type) + int is_signed, int filter_type, int len) { struct ftrace_event_field *field; @@ -133,6 +133,7 @@ static int __trace_define_field(struct list_head *head, const char *type, field->offset = offset; field->size = size; field->is_signed = is_signed; + field->len = len; list_add(&field->link, head); @@ -150,14 +151,28 @@ int trace_define_field(struct trace_event_call *call, const char *type, head = trace_get_fields(call); return __trace_define_field(head, type, name, offset, size, - is_signed, filter_type); + is_signed, filter_type, 0); } EXPORT_SYMBOL_GPL(trace_define_field); +static int trace_define_field_ext(struct trace_event_call *call, const char *type, + const char *name, int offset, int size, int is_signed, + int filter_type, int len) +{ + struct list_head *head; + + if (WARN_ON(!call->class)) + return 0; + + head = trace_get_fields(call); + return __trace_define_field(head, type, name, offset, size, + is_signed, filter_type, len); +} + #define __generic_field(type, item, filter_type) \ ret = __trace_define_field(&ftrace_generic_fields, #type, \ #item, 0, 0, is_signed_type(type), \ - filter_type); \ + filter_type, 0); \ if (ret) \ return ret; @@ -166,7 +181,7 @@ EXPORT_SYMBOL_GPL(trace_define_field); "common_" #item, \ offsetof(typeof(ent), item), \ sizeof(ent.item), \ - is_signed_type(type), FILTER_OTHER); \ + is_signed_type(type), FILTER_OTHER, 0); \ if (ret) \ return ret; @@ -1588,12 +1603,17 @@ static int f_show(struct seq_file *m, void *v) seq_printf(m, "\tfield:%s %s;\toffset:%u;\tsize:%u;\tsigned:%d;\n", field->type, field->name, field->offset, field->size, !!field->is_signed); - else - seq_printf(m, "\tfield:%.*s %s%s;\toffset:%u;\tsize:%u;\tsigned:%d;\n", + else if (field->len) + seq_printf(m, "\tfield:%.*s %s[%d];\toffset:%u;\tsize:%u;\tsigned:%d;\n", (int)(array_descriptor - field->type), field->type, field->name, - array_descriptor, field->offset, + field->len, field->offset, field->size, !!field->is_signed); + else + seq_printf(m, "\tfield:%.*s %s[];\toffset:%u;\tsize:%u;\tsigned:%d;\n", + (int)(array_descriptor - field->type), + field->type, field->name, + field->offset, field->size, !!field->is_signed); return 0; } @@ -2379,9 +2399,10 @@ event_define_fields(struct trace_event_call *call) } offset = ALIGN(offset, field->align); - ret = trace_define_field(call, field->type, field->name, + ret = trace_define_field_ext(call, field->type, field->name, offset, field->size, - field->is_signed, field->filter_type); + field->is_signed, field->filter_type, + field->len); if (WARN_ON_ONCE(ret)) { pr_err("error code is %d\n", ret); break; diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 96acc2b71ac7..e095c3b3a50d 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -128,7 +128,7 @@ static bool is_not(const char *str) } /** - * prog_entry - a singe entry in the filter program + * struct prog_entry - a singe entry in the filter program * @target: Index to jump to on a branch (actually one minus the index) * @when_to_branch: The value of the result of the predicate to do a branch * @pred: The predicate to execute. @@ -140,16 +140,16 @@ struct prog_entry { }; /** - * update_preds- assign a program entry a label target + * update_preds - assign a program entry a label target * @prog: The program array * @N: The index of the current entry in @prog - * @when_to_branch: What to assign a program entry for its branch condition + * @invert: What to assign a program entry for its branch condition * * The program entry at @N has a target that points to the index of a program * entry that can have its target and when_to_branch fields updated. * Update the current program entry denoted by index @N target field to be * that of the updated entry. This will denote the entry to update if - * we are processing an "||" after an "&&" + * we are processing an "||" after an "&&". */ static void update_preds(struct prog_entry *prog, int N, int invert) { diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index fcaf226b7744..5edbf6b1da3f 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -1988,6 +1988,8 @@ static struct hist_field *create_hist_field(struct hist_trigger_data *hist_data, hist_field->fn_num = flags & HIST_FIELD_FL_LOG2 ? HIST_FIELD_FN_LOG2 : HIST_FIELD_FN_BUCKET; hist_field->operands[0] = create_hist_field(hist_data, field, fl, NULL); + if (!hist_field->operands[0]) + goto free; hist_field->size = hist_field->operands[0]->size; hist_field->type = kstrdup_const(hist_field->operands[0]->type, GFP_KERNEL); if (!hist_field->type) diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c index d960f6b11b5e..58f3946081e2 100644 --- a/kernel/trace/trace_export.c +++ b/kernel/trace/trace_export.c @@ -111,7 +111,8 @@ static void __always_unused ____ftrace_check_##name(void) \ #define __array(_type, _item, _len) { \ .type = #_type"["__stringify(_len)"]", .name = #_item, \ .size = sizeof(_type[_len]), .align = __alignof__(_type), \ - is_signed_type(_type), .filter_type = FILTER_OTHER }, + is_signed_type(_type), .filter_type = FILTER_OTHER, \ + .len = _len }, #undef __array_desc #define __array_desc(_type, _container, _item, _len) __array(_type, _item, _len) diff --git a/kernel/trace/trace_osnoise.c b/kernel/trace/trace_osnoise.c index 94c1b5eb1dc0..210e1f168392 100644 --- a/kernel/trace/trace_osnoise.c +++ b/kernel/trace/trace_osnoise.c @@ -147,9 +147,8 @@ static void osnoise_unregister_instance(struct trace_array *tr) * register/unregister serialization is provided by trace's * trace_types_lock. */ - lockdep_assert_held(&trace_types_lock); - - list_for_each_entry_rcu(inst, &osnoise_instances, list) { + list_for_each_entry_rcu(inst, &osnoise_instances, list, + lockdep_is_held(&trace_types_lock)) { if (inst->tr == tr) { list_del_rcu(&inst->list); found = 1; diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index 57a13b61f186..bd475a00f96d 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -1535,7 +1535,7 @@ static struct trace_event *events[] __initdata = { NULL }; -__init static int init_events(void) +__init int init_events(void) { struct trace_event *event; int i, ret; @@ -1548,4 +1548,3 @@ __init static int init_events(void) return 0; } -early_initcall(init_events); diff --git a/kernel/trace/trace_preemptirq.c b/kernel/trace/trace_preemptirq.c index 1e130da1b742..e37446f7916e 100644 --- a/kernel/trace/trace_preemptirq.c +++ b/kernel/trace/trace_preemptirq.c @@ -15,6 +15,20 @@ #define CREATE_TRACE_POINTS #include <trace/events/preemptirq.h> +/* + * Use regular trace points on architectures that implement noinstr + * tooling: these calls will only happen with RCU enabled, which can + * use a regular tracepoint. + * + * On older architectures, use the rcuidle tracing methods (which + * aren't NMI-safe - so exclude NMI contexts): + */ +#ifdef CONFIG_ARCH_WANTS_NO_INSTR +#define trace(point) trace_##point +#else +#define trace(point) if (!in_nmi()) trace_##point##_rcuidle +#endif + #ifdef CONFIG_TRACE_IRQFLAGS /* Per-cpu variable to prevent redundant calls when IRQs already off */ static DEFINE_PER_CPU(int, tracing_irq_cpu); @@ -28,8 +42,7 @@ static DEFINE_PER_CPU(int, tracing_irq_cpu); void trace_hardirqs_on_prepare(void) { if (this_cpu_read(tracing_irq_cpu)) { - if (!in_nmi()) - trace_irq_enable(CALLER_ADDR0, CALLER_ADDR1); + trace(irq_enable)(CALLER_ADDR0, CALLER_ADDR1); tracer_hardirqs_on(CALLER_ADDR0, CALLER_ADDR1); this_cpu_write(tracing_irq_cpu, 0); } @@ -40,8 +53,7 @@ NOKPROBE_SYMBOL(trace_hardirqs_on_prepare); void trace_hardirqs_on(void) { if (this_cpu_read(tracing_irq_cpu)) { - if (!in_nmi()) - trace_irq_enable_rcuidle(CALLER_ADDR0, CALLER_ADDR1); + trace(irq_enable)(CALLER_ADDR0, CALLER_ADDR1); tracer_hardirqs_on(CALLER_ADDR0, CALLER_ADDR1); this_cpu_write(tracing_irq_cpu, 0); } @@ -63,8 +75,7 @@ void trace_hardirqs_off_finish(void) if (!this_cpu_read(tracing_irq_cpu)) { this_cpu_write(tracing_irq_cpu, 1); tracer_hardirqs_off(CALLER_ADDR0, CALLER_ADDR1); - if (!in_nmi()) - trace_irq_disable(CALLER_ADDR0, CALLER_ADDR1); + trace(irq_disable)(CALLER_ADDR0, CALLER_ADDR1); } } @@ -78,56 +89,24 @@ void trace_hardirqs_off(void) if (!this_cpu_read(tracing_irq_cpu)) { this_cpu_write(tracing_irq_cpu, 1); tracer_hardirqs_off(CALLER_ADDR0, CALLER_ADDR1); - if (!in_nmi()) - trace_irq_disable_rcuidle(CALLER_ADDR0, CALLER_ADDR1); + trace(irq_disable)(CALLER_ADDR0, CALLER_ADDR1); } } EXPORT_SYMBOL(trace_hardirqs_off); NOKPROBE_SYMBOL(trace_hardirqs_off); - -__visible void trace_hardirqs_on_caller(unsigned long caller_addr) -{ - if (this_cpu_read(tracing_irq_cpu)) { - if (!in_nmi()) - trace_irq_enable_rcuidle(CALLER_ADDR0, caller_addr); - tracer_hardirqs_on(CALLER_ADDR0, caller_addr); - this_cpu_write(tracing_irq_cpu, 0); - } - - lockdep_hardirqs_on_prepare(); - lockdep_hardirqs_on(caller_addr); -} -EXPORT_SYMBOL(trace_hardirqs_on_caller); -NOKPROBE_SYMBOL(trace_hardirqs_on_caller); - -__visible void trace_hardirqs_off_caller(unsigned long caller_addr) -{ - lockdep_hardirqs_off(caller_addr); - - if (!this_cpu_read(tracing_irq_cpu)) { - this_cpu_write(tracing_irq_cpu, 1); - tracer_hardirqs_off(CALLER_ADDR0, caller_addr); - if (!in_nmi()) - trace_irq_disable_rcuidle(CALLER_ADDR0, caller_addr); - } -} -EXPORT_SYMBOL(trace_hardirqs_off_caller); -NOKPROBE_SYMBOL(trace_hardirqs_off_caller); #endif /* CONFIG_TRACE_IRQFLAGS */ #ifdef CONFIG_TRACE_PREEMPT_TOGGLE void trace_preempt_on(unsigned long a0, unsigned long a1) { - if (!in_nmi()) - trace_preempt_enable_rcuidle(a0, a1); + trace(preempt_enable)(a0, a1); tracer_preempt_on(a0, a1); } void trace_preempt_off(unsigned long a0, unsigned long a1) { - if (!in_nmi()) - trace_preempt_disable_rcuidle(a0, a1); + trace(preempt_disable)(a0, a1); tracer_preempt_off(a0, a1); } #endif |