From 5f4126327494c189767ca64b222abadb07c55e3d Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Sat, 28 Apr 2018 22:28:07 -0700 Subject: bpf: change prototype for stack_map_get_build_id_offset This patch didn't incur functionality change. The function prototype got changed so that the same function can be reused later. Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov --- kernel/bpf/stackmap.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c index 57eeb1234b67..04f6ec1679f0 100644 --- a/kernel/bpf/stackmap.c +++ b/kernel/bpf/stackmap.c @@ -262,16 +262,11 @@ out: return ret; } -static void stack_map_get_build_id_offset(struct bpf_map *map, - struct stack_map_bucket *bucket, +static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs, u64 *ips, u32 trace_nr, bool user) { int i; struct vm_area_struct *vma; - struct bpf_stack_build_id *id_offs; - - bucket->nr = trace_nr; - id_offs = (struct bpf_stack_build_id *)bucket->data; /* * We cannot do up_read() in nmi context, so build_id lookup is @@ -361,8 +356,10 @@ BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map, pcpu_freelist_pop(&smap->freelist); if (unlikely(!new_bucket)) return -ENOMEM; - stack_map_get_build_id_offset(map, new_bucket, ips, - trace_nr, user); + new_bucket->nr = trace_nr; + stack_map_get_build_id_offset( + (struct bpf_stack_build_id *)new_bucket->data, + ips, trace_nr, user); trace_len = trace_nr * sizeof(struct bpf_stack_build_id); if (hash_matches && bucket->nr == trace_nr && memcmp(bucket->data, new_bucket->data, trace_len) == 0) { -- cgit v1.2.3 From c195651e565ae7f41a68acb7d4aa7390ad215de1 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Sat, 28 Apr 2018 22:28:08 -0700 Subject: bpf: add bpf_get_stack helper Currently, stackmap and bpf_get_stackid helper are provided for bpf program to get the stack trace. This approach has a limitation though. If two stack traces have the same hash, only one will get stored in the stackmap table, so some stack traces are missing from user perspective. This patch implements a new helper, bpf_get_stack, will send stack traces directly to bpf program. The bpf program is able to see all stack traces, and then can do in-kernel processing or send stack traces to user space through shared map or bpf_perf_event_output. Acked-by: Alexei Starovoitov Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 1 + include/linux/filter.h | 3 ++- include/uapi/linux/bpf.h | 42 ++++++++++++++++++++++++++++-- kernel/bpf/core.c | 5 ++++ kernel/bpf/stackmap.c | 67 ++++++++++++++++++++++++++++++++++++++++++++++++ kernel/bpf/verifier.c | 19 ++++++++++++++ kernel/trace/bpf_trace.c | 50 +++++++++++++++++++++++++++++++++++- 7 files changed, 183 insertions(+), 4 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 38ebbc61ed99..c553f6f9c6b0 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -692,6 +692,7 @@ extern const struct bpf_func_proto bpf_get_current_comm_proto; extern const struct bpf_func_proto bpf_skb_vlan_push_proto; extern const struct bpf_func_proto bpf_skb_vlan_pop_proto; extern const struct bpf_func_proto bpf_get_stackid_proto; +extern const struct bpf_func_proto bpf_get_stack_proto; extern const struct bpf_func_proto bpf_sock_map_update_proto; /* Shared helpers among cBPF and eBPF. */ diff --git a/include/linux/filter.h b/include/linux/filter.h index 4da8b2308174..64899c04c1a6 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -468,7 +468,8 @@ struct bpf_prog { dst_needed:1, /* Do we need dst entry? */ blinded:1, /* Was blinded */ is_func:1, /* program is a bpf function */ - kprobe_override:1; /* Do we override a kprobe? */ + kprobe_override:1, /* Do we override a kprobe? */ + has_callchain_buf:1; /* callchain buffer allocated? */ enum bpf_prog_type type; /* Type of BPF program */ enum bpf_attach_type expected_attach_type; /* For some prog types */ u32 len; /* Number of filter blocks */ diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index da77a9388947..1afb606a18b9 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1767,6 +1767,40 @@ union bpf_attr { * **CONFIG_XFRM** configuration option. * Return * 0 on success, or a negative error in case of failure. + * + * int bpf_get_stack(struct pt_regs *regs, void *buf, u32 size, u64 flags) + * Description + * Return a user or a kernel stack in bpf program provided buffer. + * To achieve this, the helper needs *ctx*, which is a pointer + * to the context on which the tracing program is executed. + * To store the stacktrace, the bpf program provides *buf* with + * a nonnegative *size*. + * + * The last argument, *flags*, holds the number of stack frames to + * skip (from 0 to 255), masked with + * **BPF_F_SKIP_FIELD_MASK**. The next bits can be used to set + * the following flags: + * + * **BPF_F_USER_STACK** + * Collect a user space stack instead of a kernel stack. + * **BPF_F_USER_BUILD_ID** + * Collect buildid+offset instead of ips for user stack, + * only valid if **BPF_F_USER_STACK** is also specified. + * + * **bpf_get_stack**\ () can collect up to + * **PERF_MAX_STACK_DEPTH** both kernel and user frames, subject + * to sufficient large buffer size. Note that + * this limit can be controlled with the **sysctl** program, and + * that it should be manually increased in order to profile long + * user stacks (such as stacks for Java programs). To do so, use: + * + * :: + * + * # sysctl kernel.perf_event_max_stack= + * + * Return + * a non-negative value equal to or less than size on success, or + * a negative error in case of failure. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -1835,7 +1869,8 @@ union bpf_attr { FN(msg_pull_data), \ FN(bind), \ FN(xdp_adjust_tail), \ - FN(skb_get_xfrm_state), + FN(skb_get_xfrm_state), \ + FN(get_stack), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call @@ -1869,11 +1904,14 @@ enum bpf_func_id { /* BPF_FUNC_skb_set_tunnel_key and BPF_FUNC_skb_get_tunnel_key flags. */ #define BPF_F_TUNINFO_IPV6 (1ULL << 0) -/* BPF_FUNC_get_stackid flags. */ +/* flags for both BPF_FUNC_get_stackid and BPF_FUNC_get_stack. */ #define BPF_F_SKIP_FIELD_MASK 0xffULL #define BPF_F_USER_STACK (1ULL << 8) +/* flags used by BPF_FUNC_get_stackid only. */ #define BPF_F_FAST_STACK_CMP (1ULL << 9) #define BPF_F_REUSE_STACKID (1ULL << 10) +/* flags used by BPF_FUNC_get_stack only. */ +#define BPF_F_USER_BUILD_ID (1ULL << 11) /* BPF_FUNC_skb_set_tunnel_key flags. */ #define BPF_F_ZERO_CSUM_TX (1ULL << 1) diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index ba03ec39efb3..9349a5db3cf2 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -31,6 +31,7 @@ #include #include #include +#include #include @@ -1722,6 +1723,10 @@ static void bpf_prog_free_deferred(struct work_struct *work) aux = container_of(work, struct bpf_prog_aux, work); if (bpf_prog_is_dev_bound(aux)) bpf_prog_offload_destroy(aux->prog); +#ifdef CONFIG_PERF_EVENTS + if (aux->prog->has_callchain_buf) + put_callchain_buffers(); +#endif for (i = 0; i < aux->func_cnt; i++) bpf_jit_free(aux->func[i]); if (aux->func_cnt) { diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c index 04f6ec1679f0..3ba102b41512 100644 --- a/kernel/bpf/stackmap.c +++ b/kernel/bpf/stackmap.c @@ -402,6 +402,73 @@ const struct bpf_func_proto bpf_get_stackid_proto = { .arg3_type = ARG_ANYTHING, }; +BPF_CALL_4(bpf_get_stack, struct pt_regs *, regs, void *, buf, u32, size, + u64, flags) +{ + u32 init_nr, trace_nr, copy_len, elem_size, num_elem; + bool user_build_id = flags & BPF_F_USER_BUILD_ID; + u32 skip = flags & BPF_F_SKIP_FIELD_MASK; + bool user = flags & BPF_F_USER_STACK; + struct perf_callchain_entry *trace; + bool kernel = !user; + int err = -EINVAL; + u64 *ips; + + if (unlikely(flags & ~(BPF_F_SKIP_FIELD_MASK | BPF_F_USER_STACK | + BPF_F_USER_BUILD_ID))) + goto clear; + if (kernel && user_build_id) + goto clear; + + elem_size = (user && user_build_id) ? sizeof(struct bpf_stack_build_id) + : sizeof(u64); + if (unlikely(size % elem_size)) + goto clear; + + num_elem = size / elem_size; + if (sysctl_perf_event_max_stack < num_elem) + init_nr = 0; + else + init_nr = sysctl_perf_event_max_stack - num_elem; + trace = get_perf_callchain(regs, init_nr, kernel, user, + sysctl_perf_event_max_stack, false, false); + if (unlikely(!trace)) + goto err_fault; + + trace_nr = trace->nr - init_nr; + if (trace_nr < skip) + goto err_fault; + + trace_nr -= skip; + trace_nr = (trace_nr <= num_elem) ? trace_nr : num_elem; + copy_len = trace_nr * elem_size; + ips = trace->ip + skip + init_nr; + if (user && user_build_id) + stack_map_get_build_id_offset(buf, ips, trace_nr, user); + else + memcpy(buf, ips, copy_len); + + if (size > copy_len) + memset(buf + copy_len, 0, size - copy_len); + return copy_len; + +err_fault: + err = -EFAULT; +clear: + memset(buf, 0, size); + return err; +} + +const struct bpf_func_proto bpf_get_stack_proto = { + .func = bpf_get_stack, + .gpl_only = true, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_PTR_TO_UNINIT_MEM, + .arg3_type = ARG_CONST_SIZE_OR_ZERO, + .arg4_type = ARG_ANYTHING, +}; + /* Called from eBPF program */ static void *stack_map_lookup_elem(struct bpf_map *map, void *key) { diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index eb1a596aebd3..253f6bdb9117 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -22,6 +22,7 @@ #include #include #include +#include #include "disasm.h" @@ -2450,6 +2451,24 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn if (err) return err; + if (func_id == BPF_FUNC_get_stack && !env->prog->has_callchain_buf) { + const char *err_str; + +#ifdef CONFIG_PERF_EVENTS + err = get_callchain_buffers(sysctl_perf_event_max_stack); + err_str = "cannot get callchain buffer for func %s#%d\n"; +#else + err = -ENOTSUPP; + err_str = "func %s#%d not supported without CONFIG_PERF_EVENTS\n"; +#endif + if (err) { + verbose(env, err_str, func_id_name(func_id), func_id); + return err; + } + + env->prog->has_callchain_buf = true; + } + if (changes_data) clear_all_pkt_pointers(env); return 0; diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 56ba0f2a01db..46d866e9937c 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -20,6 +20,7 @@ #include "trace.h" u64 bpf_get_stackid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); +u64 bpf_get_stack(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); /** * trace_call_bpf - invoke BPF program @@ -577,6 +578,8 @@ kprobe_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_perf_event_output_proto; case BPF_FUNC_get_stackid: return &bpf_get_stackid_proto; + case BPF_FUNC_get_stack: + return &bpf_get_stack_proto; case BPF_FUNC_perf_event_read_value: return &bpf_perf_event_read_value_proto; #ifdef CONFIG_BPF_KPROBE_OVERRIDE @@ -664,6 +667,25 @@ static const struct bpf_func_proto bpf_get_stackid_proto_tp = { .arg3_type = ARG_ANYTHING, }; +BPF_CALL_4(bpf_get_stack_tp, void *, tp_buff, void *, buf, u32, size, + u64, flags) +{ + struct pt_regs *regs = *(struct pt_regs **)tp_buff; + + return bpf_get_stack((unsigned long) regs, (unsigned long) buf, + (unsigned long) size, flags, 0); +} + +static const struct bpf_func_proto bpf_get_stack_proto_tp = { + .func = bpf_get_stack_tp, + .gpl_only = true, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_PTR_TO_UNINIT_MEM, + .arg3_type = ARG_CONST_SIZE_OR_ZERO, + .arg4_type = ARG_ANYTHING, +}; + static const struct bpf_func_proto * tp_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) { @@ -672,6 +694,8 @@ tp_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_perf_event_output_proto_tp; case BPF_FUNC_get_stackid: return &bpf_get_stackid_proto_tp; + case BPF_FUNC_get_stack: + return &bpf_get_stack_proto_tp; default: return tracing_func_proto(func_id, prog); } @@ -734,6 +758,8 @@ pe_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_perf_event_output_proto_tp; case BPF_FUNC_get_stackid: return &bpf_get_stackid_proto_tp; + case BPF_FUNC_get_stack: + return &bpf_get_stack_proto_tp; case BPF_FUNC_perf_prog_read_value: return &bpf_perf_prog_read_value_proto; default: @@ -744,7 +770,7 @@ pe_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) /* * bpf_raw_tp_regs are separate from bpf_pt_regs used from skb/xdp * to avoid potential recursive reuse issue when/if tracepoints are added - * inside bpf_*_event_output and/or bpf_get_stack_id + * inside bpf_*_event_output, bpf_get_stackid and/or bpf_get_stack */ static DEFINE_PER_CPU(struct pt_regs, bpf_raw_tp_regs); BPF_CALL_5(bpf_perf_event_output_raw_tp, struct bpf_raw_tracepoint_args *, args, @@ -787,6 +813,26 @@ static const struct bpf_func_proto bpf_get_stackid_proto_raw_tp = { .arg3_type = ARG_ANYTHING, }; +BPF_CALL_4(bpf_get_stack_raw_tp, struct bpf_raw_tracepoint_args *, args, + void *, buf, u32, size, u64, flags) +{ + struct pt_regs *regs = this_cpu_ptr(&bpf_raw_tp_regs); + + perf_fetch_caller_regs(regs); + return bpf_get_stack((unsigned long) regs, (unsigned long) buf, + (unsigned long) size, flags, 0); +} + +static const struct bpf_func_proto bpf_get_stack_proto_raw_tp = { + .func = bpf_get_stack_raw_tp, + .gpl_only = true, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_PTR_TO_MEM, + .arg3_type = ARG_CONST_SIZE_OR_ZERO, + .arg4_type = ARG_ANYTHING, +}; + static const struct bpf_func_proto * raw_tp_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) { @@ -795,6 +841,8 @@ raw_tp_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_perf_event_output_proto_raw_tp; case BPF_FUNC_get_stackid: return &bpf_get_stackid_proto_raw_tp; + case BPF_FUNC_get_stack: + return &bpf_get_stack_proto_raw_tp; default: return tracing_func_proto(func_id, prog); } -- cgit v1.2.3 From 849fa50662fbc8b476d652f8a4e6bdda17b37859 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Sat, 28 Apr 2018 22:28:09 -0700 Subject: bpf/verifier: refine retval R0 state for bpf_get_stack helper The special property of return values for helpers bpf_get_stack and bpf_probe_read_str are captured in verifier. Both helpers return a negative error code or a length, which is equal to or smaller than the buffer size argument. This additional information in the verifier can avoid the condition such as "retval > bufsize" in the bpf program. For example, for the code blow, usize = bpf_get_stack(ctx, raw_data, max_len, BPF_F_USER_STACK); if (usize < 0 || usize > max_len) return 0; The verifier may have the following errors: 52: (85) call bpf_get_stack#65 R0=map_value(id=0,off=0,ks=4,vs=1600,imm=0) R1_w=ctx(id=0,off=0,imm=0) R2_w=map_value(id=0,off=0,ks=4,vs=1600,imm=0) R3_w=inv800 R4_w=inv256 R6=ctx(id=0,off=0,imm=0) R7=map_value(id=0,off=0,ks=4,vs=1600,imm=0) R9_w=inv800 R10=fp0,call_-1 53: (bf) r8 = r0 54: (bf) r1 = r8 55: (67) r1 <<= 32 56: (bf) r2 = r1 57: (77) r2 >>= 32 58: (25) if r2 > 0x31f goto pc+33 R0=inv(id=0) R1=inv(id=0,smax_value=9223372032559808512, umax_value=18446744069414584320, var_off=(0x0; 0xffffffff00000000)) R2=inv(id=0,umax_value=799,var_off=(0x0; 0x3ff)) R6=ctx(id=0,off=0,imm=0) R7=map_value(id=0,off=0,ks=4,vs=1600,imm=0) R8=inv(id=0) R9=inv800 R10=fp0,call_-1 59: (1f) r9 -= r8 60: (c7) r1 s>>= 32 61: (bf) r2 = r7 62: (0f) r2 += r1 math between map_value pointer and register with unbounded min value is not allowed The failure is due to llvm compiler optimization where register "r2", which is a copy of "r1", is tested for condition while later on "r1" is used for map_ptr operation. The verifier is not able to track such inst sequence effectively. Without the "usize > max_len" condition, there is no llvm optimization and the below generated code passed verifier: 52: (85) call bpf_get_stack#65 R0=map_value(id=0,off=0,ks=4,vs=1600,imm=0) R1_w=ctx(id=0,off=0,imm=0) R2_w=map_value(id=0,off=0,ks=4,vs=1600,imm=0) R3_w=inv800 R4_w=inv256 R6=ctx(id=0,off=0,imm=0) R7=map_value(id=0,off=0,ks=4,vs=1600,imm=0) R9_w=inv800 R10=fp0,call_-1 53: (b7) r1 = 0 54: (bf) r8 = r0 55: (67) r8 <<= 32 56: (c7) r8 s>>= 32 57: (6d) if r1 s> r8 goto pc+24 R0=inv(id=0,umax_value=800,var_off=(0x0; 0x3ff)) R1=inv0 R6=ctx(id=0,off=0,imm=0) R7=map_value(id=0,off=0,ks=4,vs=1600,imm=0) R8=inv(id=0,umax_value=800,var_off=(0x0; 0x3ff)) R9=inv800 R10=fp0,call_-1 58: (bf) r2 = r7 59: (0f) r2 += r8 60: (1f) r9 -= r8 61: (bf) r1 = r6 Acked-by: Alexei Starovoitov Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 253f6bdb9117..988400e283b7 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -165,6 +165,8 @@ struct bpf_call_arg_meta { bool pkt_access; int regno; int access_size; + s64 msize_smax_value; + u64 msize_umax_value; }; static DEFINE_MUTEX(bpf_verifier_lock); @@ -1985,6 +1987,12 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno, } else if (arg_type_is_mem_size(arg_type)) { bool zero_size_allowed = (arg_type == ARG_CONST_SIZE_OR_ZERO); + /* remember the mem_size which may be used later + * to refine return values. + */ + meta->msize_smax_value = reg->smax_value; + meta->msize_umax_value = reg->umax_value; + /* The register is SCALAR_VALUE; the access check * happens using its boundaries. */ @@ -2324,6 +2332,23 @@ static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx) return 0; } +static void do_refine_retval_range(struct bpf_reg_state *regs, int ret_type, + int func_id, + struct bpf_call_arg_meta *meta) +{ + struct bpf_reg_state *ret_reg = ®s[BPF_REG_0]; + + if (ret_type != RET_INTEGER || + (func_id != BPF_FUNC_get_stack && + func_id != BPF_FUNC_probe_read_str)) + return; + + ret_reg->smax_value = meta->msize_smax_value; + ret_reg->umax_value = meta->msize_umax_value; + __reg_deduce_bounds(ret_reg); + __reg_bound_offset(ret_reg); +} + static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn_idx) { const struct bpf_func_proto *fn = NULL; @@ -2447,6 +2472,8 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn return -EINVAL; } + do_refine_retval_range(regs, fn->ret_type, func_id, &meta); + err = check_map_func_compatibility(env, meta.map_ptr, func_id); if (err) return err; -- cgit v1.2.3 From afbe1a5b79cd18f0ace9107e6b4cd7aa193e5e52 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Sat, 28 Apr 2018 22:28:10 -0700 Subject: bpf: remove never-hit branches in verifier adjust_scalar_min_max_vals In verifier function adjust_scalar_min_max_vals, when src_known is false and the opcode is BPF_LSH/BPF_RSH, early return will happen in the function. So remove the branch in handling BPF_LSH/BPF_RSH when src_known is false. Acked-by: Alexei Starovoitov Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 988400e283b7..6e3f859b3abf 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -2940,10 +2940,7 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env, dst_reg->umin_value <<= umin_val; dst_reg->umax_value <<= umax_val; } - if (src_known) - dst_reg->var_off = tnum_lshift(dst_reg->var_off, umin_val); - else - dst_reg->var_off = tnum_lshift(tnum_unknown, umin_val); + dst_reg->var_off = tnum_lshift(dst_reg->var_off, umin_val); /* We may learn something more from the var_off */ __update_reg_bounds(dst_reg); break; @@ -2971,11 +2968,7 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env, */ dst_reg->smin_value = S64_MIN; dst_reg->smax_value = S64_MAX; - if (src_known) - dst_reg->var_off = tnum_rshift(dst_reg->var_off, - umin_val); - else - dst_reg->var_off = tnum_rshift(tnum_unknown, umin_val); + dst_reg->var_off = tnum_rshift(dst_reg->var_off, umin_val); dst_reg->umin_value >>= umax_val; dst_reg->umax_value >>= umin_val; /* We may learn something more from the var_off */ -- cgit v1.2.3 From 9cbe1f5a32dcd6d0508326f7d9098e5bc380a4fe Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Sat, 28 Apr 2018 22:28:11 -0700 Subject: bpf/verifier: improve register value range tracking with ARSH When helpers like bpf_get_stack returns an int value and later on used for arithmetic computation, the LSH and ARSH operations are often required to get proper sign extension into 64-bit. For example, without this patch: 54: R0=inv(id=0,umax_value=800) 54: (bf) r8 = r0 55: R0=inv(id=0,umax_value=800) R8_w=inv(id=0,umax_value=800) 55: (67) r8 <<= 32 56: R8_w=inv(id=0,umax_value=3435973836800,var_off=(0x0; 0x3ff00000000)) 56: (c7) r8 s>>= 32 57: R8=inv(id=0) With this patch: 54: R0=inv(id=0,umax_value=800) 54: (bf) r8 = r0 55: R0=inv(id=0,umax_value=800) R8_w=inv(id=0,umax_value=800) 55: (67) r8 <<= 32 56: R8_w=inv(id=0,umax_value=3435973836800,var_off=(0x0; 0x3ff00000000)) 56: (c7) r8 s>>= 32 57: R8=inv(id=0, umax_value=800,var_off=(0x0; 0x3ff)) With better range of "R8", later on when "R8" is added to other register, e.g., a map pointer or scalar-value register, the better register range can be derived and verifier failure may be avoided. In our later example, ...... usize = bpf_get_stack(ctx, raw_data, max_len, BPF_F_USER_STACK); if (usize < 0) return 0; ksize = bpf_get_stack(ctx, raw_data + usize, max_len - usize, 0); ...... Without improving ARSH value range tracking, the register representing "max_len - usize" will have smin_value equal to S64_MIN and will be rejected by verifier. Acked-by: Alexei Starovoitov Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov --- include/linux/tnum.h | 4 +++- kernel/bpf/tnum.c | 10 ++++++++++ kernel/bpf/verifier.c | 23 +++++++++++++++++++++++ 3 files changed, 36 insertions(+), 1 deletion(-) diff --git a/include/linux/tnum.h b/include/linux/tnum.h index 0d2d3da46139..c7dc2b5902c0 100644 --- a/include/linux/tnum.h +++ b/include/linux/tnum.h @@ -23,8 +23,10 @@ struct tnum tnum_range(u64 min, u64 max); /* Arithmetic and logical ops */ /* Shift a tnum left (by a fixed shift) */ struct tnum tnum_lshift(struct tnum a, u8 shift); -/* Shift a tnum right (by a fixed shift) */ +/* Shift (rsh) a tnum right (by a fixed shift) */ struct tnum tnum_rshift(struct tnum a, u8 shift); +/* Shift (arsh) a tnum right (by a fixed min_shift) */ +struct tnum tnum_arshift(struct tnum a, u8 min_shift); /* Add two tnums, return @a + @b */ struct tnum tnum_add(struct tnum a, struct tnum b); /* Subtract two tnums, return @a - @b */ diff --git a/kernel/bpf/tnum.c b/kernel/bpf/tnum.c index 1f4bf68c12db..938d41211be7 100644 --- a/kernel/bpf/tnum.c +++ b/kernel/bpf/tnum.c @@ -43,6 +43,16 @@ struct tnum tnum_rshift(struct tnum a, u8 shift) return TNUM(a.value >> shift, a.mask >> shift); } +struct tnum tnum_arshift(struct tnum a, u8 min_shift) +{ + /* if a.value is negative, arithmetic shifting by minimum shift + * will have larger negative offset compared to more shifting. + * If a.value is nonnegative, arithmetic shifting by minimum shift + * will have larger positive offset compare to more shifting. + */ + return TNUM((s64)a.value >> min_shift, (s64)a.mask >> min_shift); +} + struct tnum tnum_add(struct tnum a, struct tnum b) { u64 sm, sv, sigma, chi, mu; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 6e3f859b3abf..712d8655e916 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -2974,6 +2974,29 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env, /* We may learn something more from the var_off */ __update_reg_bounds(dst_reg); break; + case BPF_ARSH: + if (umax_val >= insn_bitness) { + /* Shifts greater than 31 or 63 are undefined. + * This includes shifts by a negative number. + */ + mark_reg_unknown(env, regs, insn->dst_reg); + break; + } + + /* Upon reaching here, src_known is true and + * umax_val is equal to umin_val. + */ + dst_reg->smin_value >>= umin_val; + dst_reg->smax_value >>= umin_val; + dst_reg->var_off = tnum_arshift(dst_reg->var_off, umin_val); + + /* blow away the dst_reg umin_value/umax_value and rely on + * dst_reg var_off to refine the result. + */ + dst_reg->umin_value = 0; + dst_reg->umax_value = U64_MAX; + __update_reg_bounds(dst_reg); + break; default: mark_reg_unknown(env, regs, insn->dst_reg); break; -- cgit v1.2.3 From de2ff05f48afcde816ff4edb217417f62f624ab5 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Sat, 28 Apr 2018 22:28:12 -0700 Subject: tools/bpf: add bpf_get_stack helper to tools headers The tools header file bpf.h is synced with kernel uapi bpf.h. The new helper is also added to bpf_helpers.h. Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov --- tools/include/uapi/linux/bpf.h | 42 +++++++++++++++++++++++++++++-- tools/testing/selftests/bpf/bpf_helpers.h | 2 ++ 2 files changed, 42 insertions(+), 2 deletions(-) diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index da77a9388947..1afb606a18b9 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1767,6 +1767,40 @@ union bpf_attr { * **CONFIG_XFRM** configuration option. * Return * 0 on success, or a negative error in case of failure. + * + * int bpf_get_stack(struct pt_regs *regs, void *buf, u32 size, u64 flags) + * Description + * Return a user or a kernel stack in bpf program provided buffer. + * To achieve this, the helper needs *ctx*, which is a pointer + * to the context on which the tracing program is executed. + * To store the stacktrace, the bpf program provides *buf* with + * a nonnegative *size*. + * + * The last argument, *flags*, holds the number of stack frames to + * skip (from 0 to 255), masked with + * **BPF_F_SKIP_FIELD_MASK**. The next bits can be used to set + * the following flags: + * + * **BPF_F_USER_STACK** + * Collect a user space stack instead of a kernel stack. + * **BPF_F_USER_BUILD_ID** + * Collect buildid+offset instead of ips for user stack, + * only valid if **BPF_F_USER_STACK** is also specified. + * + * **bpf_get_stack**\ () can collect up to + * **PERF_MAX_STACK_DEPTH** both kernel and user frames, subject + * to sufficient large buffer size. Note that + * this limit can be controlled with the **sysctl** program, and + * that it should be manually increased in order to profile long + * user stacks (such as stacks for Java programs). To do so, use: + * + * :: + * + * # sysctl kernel.perf_event_max_stack= + * + * Return + * a non-negative value equal to or less than size on success, or + * a negative error in case of failure. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -1835,7 +1869,8 @@ union bpf_attr { FN(msg_pull_data), \ FN(bind), \ FN(xdp_adjust_tail), \ - FN(skb_get_xfrm_state), + FN(skb_get_xfrm_state), \ + FN(get_stack), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call @@ -1869,11 +1904,14 @@ enum bpf_func_id { /* BPF_FUNC_skb_set_tunnel_key and BPF_FUNC_skb_get_tunnel_key flags. */ #define BPF_F_TUNINFO_IPV6 (1ULL << 0) -/* BPF_FUNC_get_stackid flags. */ +/* flags for both BPF_FUNC_get_stackid and BPF_FUNC_get_stack. */ #define BPF_F_SKIP_FIELD_MASK 0xffULL #define BPF_F_USER_STACK (1ULL << 8) +/* flags used by BPF_FUNC_get_stackid only. */ #define BPF_F_FAST_STACK_CMP (1ULL << 9) #define BPF_F_REUSE_STACKID (1ULL << 10) +/* flags used by BPF_FUNC_get_stack only. */ +#define BPF_F_USER_BUILD_ID (1ULL << 11) /* BPF_FUNC_skb_set_tunnel_key flags. */ #define BPF_F_ZERO_CSUM_TX (1ULL << 1) diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h index 69d7b918e66a..265f8e0e8ada 100644 --- a/tools/testing/selftests/bpf/bpf_helpers.h +++ b/tools/testing/selftests/bpf/bpf_helpers.h @@ -101,6 +101,8 @@ static int (*bpf_xdp_adjust_tail)(void *ctx, int offset) = static int (*bpf_skb_get_xfrm_state)(void *ctx, int index, void *state, int size, int flags) = (void *) BPF_FUNC_skb_get_xfrm_state; +static int (*bpf_get_stack)(void *ctx, void *buf, int size, int flags) = + (void *) BPF_FUNC_get_stack; /* llvm builtin functions that eBPF C program may use to * emit BPF_LD_ABS and BPF_LD_IND instructions -- cgit v1.2.3 From 28dbf861deacb0321604bf1c5e1ccc34dd215669 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Sat, 28 Apr 2018 22:28:13 -0700 Subject: samples/bpf: move common-purpose trace functions to selftests There is no functionality change in this patch. The common-purpose trace functions, including perf_event polling and ksym lookup, are moved from trace_output_user.c and bpf_load.c to selftests/bpf/trace_helpers.c so that these function can be reused later in selftests. Acked-by: Alexei Starovoitov Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov --- samples/bpf/Makefile | 11 +- samples/bpf/bpf_load.c | 63 ---------- samples/bpf/bpf_load.h | 7 -- samples/bpf/offwaketime_user.c | 1 + samples/bpf/sampleip_user.c | 1 + samples/bpf/spintest_user.c | 1 + samples/bpf/trace_event_user.c | 1 + samples/bpf/trace_output_user.c | 110 ++--------------- tools/testing/selftests/bpf/trace_helpers.c | 180 ++++++++++++++++++++++++++++ tools/testing/selftests/bpf/trace_helpers.h | 23 ++++ 10 files changed, 223 insertions(+), 175 deletions(-) create mode 100644 tools/testing/selftests/bpf/trace_helpers.c create mode 100644 tools/testing/selftests/bpf/trace_helpers.h diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index b853581592fd..5e31770ac087 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -49,6 +49,7 @@ hostprogs-y += xdp_adjust_tail # Libbpf dependencies LIBBPF := ../../tools/lib/bpf/bpf.o ../../tools/lib/bpf/nlattr.o CGROUP_HELPERS := ../../tools/testing/selftests/bpf/cgroup_helpers.o +TRACE_HELPERS := ../../tools/testing/selftests/bpf/trace_helpers.o test_lru_dist-objs := test_lru_dist.o $(LIBBPF) sock_example-objs := sock_example.o $(LIBBPF) @@ -65,10 +66,10 @@ tracex6-objs := bpf_load.o $(LIBBPF) tracex6_user.o tracex7-objs := bpf_load.o $(LIBBPF) tracex7_user.o load_sock_ops-objs := bpf_load.o $(LIBBPF) load_sock_ops.o test_probe_write_user-objs := bpf_load.o $(LIBBPF) test_probe_write_user_user.o -trace_output-objs := bpf_load.o $(LIBBPF) trace_output_user.o +trace_output-objs := bpf_load.o $(LIBBPF) trace_output_user.o $(TRACE_HELPERS) lathist-objs := bpf_load.o $(LIBBPF) lathist_user.o -offwaketime-objs := bpf_load.o $(LIBBPF) offwaketime_user.o -spintest-objs := bpf_load.o $(LIBBPF) spintest_user.o +offwaketime-objs := bpf_load.o $(LIBBPF) offwaketime_user.o $(TRACE_HELPERS) +spintest-objs := bpf_load.o $(LIBBPF) spintest_user.o $(TRACE_HELPERS) map_perf_test-objs := bpf_load.o $(LIBBPF) map_perf_test_user.o test_overhead-objs := bpf_load.o $(LIBBPF) test_overhead_user.o test_cgrp2_array_pin-objs := $(LIBBPF) test_cgrp2_array_pin.o @@ -82,8 +83,8 @@ xdp2-objs := bpf_load.o $(LIBBPF) xdp1_user.o xdp_router_ipv4-objs := bpf_load.o $(LIBBPF) xdp_router_ipv4_user.o test_current_task_under_cgroup-objs := bpf_load.o $(LIBBPF) $(CGROUP_HELPERS) \ test_current_task_under_cgroup_user.o -trace_event-objs := bpf_load.o $(LIBBPF) trace_event_user.o -sampleip-objs := bpf_load.o $(LIBBPF) sampleip_user.o +trace_event-objs := bpf_load.o $(LIBBPF) trace_event_user.o $(TRACE_HELPERS) +sampleip-objs := bpf_load.o $(LIBBPF) sampleip_user.o $(TRACE_HELPERS) tc_l2_redirect-objs := bpf_load.o $(LIBBPF) tc_l2_redirect_user.o lwt_len_hist-objs := bpf_load.o $(LIBBPF) lwt_len_hist_user.o xdp_tx_iptunnel-objs := bpf_load.o $(LIBBPF) xdp_tx_iptunnel_user.o diff --git a/samples/bpf/bpf_load.c b/samples/bpf/bpf_load.c index feca497d6afd..a27ef3c42e4e 100644 --- a/samples/bpf/bpf_load.c +++ b/samples/bpf/bpf_load.c @@ -648,66 +648,3 @@ void read_trace_pipe(void) } } } - -#define MAX_SYMS 300000 -static struct ksym syms[MAX_SYMS]; -static int sym_cnt; - -static int ksym_cmp(const void *p1, const void *p2) -{ - return ((struct ksym *)p1)->addr - ((struct ksym *)p2)->addr; -} - -int load_kallsyms(void) -{ - FILE *f = fopen("/proc/kallsyms", "r"); - char func[256], buf[256]; - char symbol; - void *addr; - int i = 0; - - if (!f) - return -ENOENT; - - while (!feof(f)) { - if (!fgets(buf, sizeof(buf), f)) - break; - if (sscanf(buf, "%p %c %s", &addr, &symbol, func) != 3) - break; - if (!addr) - continue; - syms[i].addr = (long) addr; - syms[i].name = strdup(func); - i++; - } - sym_cnt = i; - qsort(syms, sym_cnt, sizeof(struct ksym), ksym_cmp); - return 0; -} - -struct ksym *ksym_search(long key) -{ - int start = 0, end = sym_cnt; - int result; - - while (start < end) { - size_t mid = start + (end - start) / 2; - - result = key - syms[mid].addr; - if (result < 0) - end = mid; - else if (result > 0) - start = mid + 1; - else - return &syms[mid]; - } - - if (start >= 1 && syms[start - 1].addr < key && - key < syms[start].addr) - /* valid ksym */ - return &syms[start - 1]; - - /* out of range. return _stext */ - return &syms[0]; -} - diff --git a/samples/bpf/bpf_load.h b/samples/bpf/bpf_load.h index 453c200b389b..2c3d0b448632 100644 --- a/samples/bpf/bpf_load.h +++ b/samples/bpf/bpf_load.h @@ -54,12 +54,5 @@ int load_bpf_file(char *path); int load_bpf_file_fixup_map(const char *path, fixup_map_cb fixup_map); void read_trace_pipe(void); -struct ksym { - long addr; - char *name; -}; - -int load_kallsyms(void); -struct ksym *ksym_search(long key); int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags); #endif diff --git a/samples/bpf/offwaketime_user.c b/samples/bpf/offwaketime_user.c index 512f87a5fd20..f06063af9fcb 100644 --- a/samples/bpf/offwaketime_user.c +++ b/samples/bpf/offwaketime_user.c @@ -17,6 +17,7 @@ #include #include "libbpf.h" #include "bpf_load.h" +#include "trace_helpers.h" #define PRINT_RAW_ADDR 0 diff --git a/samples/bpf/sampleip_user.c b/samples/bpf/sampleip_user.c index 4ed690b907ff..60c2b73d1b4d 100644 --- a/samples/bpf/sampleip_user.c +++ b/samples/bpf/sampleip_user.c @@ -22,6 +22,7 @@ #include "libbpf.h" #include "bpf_load.h" #include "perf-sys.h" +#include "trace_helpers.h" #define DEFAULT_FREQ 99 #define DEFAULT_SECS 5 diff --git a/samples/bpf/spintest_user.c b/samples/bpf/spintest_user.c index 3d736219a31c..8d3e9cfa1909 100644 --- a/samples/bpf/spintest_user.c +++ b/samples/bpf/spintest_user.c @@ -7,6 +7,7 @@ #include #include "libbpf.h" #include "bpf_load.h" +#include "trace_helpers.h" int main(int ac, char **argv) { diff --git a/samples/bpf/trace_event_user.c b/samples/bpf/trace_event_user.c index 56f7a259a7c9..1fa1becfa641 100644 --- a/samples/bpf/trace_event_user.c +++ b/samples/bpf/trace_event_user.c @@ -21,6 +21,7 @@ #include "libbpf.h" #include "bpf_load.h" #include "perf-sys.h" +#include "trace_helpers.h" #define SAMPLE_FREQ 50 diff --git a/samples/bpf/trace_output_user.c b/samples/bpf/trace_output_user.c index ccca1e348017..5e78c2ecd08d 100644 --- a/samples/bpf/trace_output_user.c +++ b/samples/bpf/trace_output_user.c @@ -21,100 +21,10 @@ #include "libbpf.h" #include "bpf_load.h" #include "perf-sys.h" +#include "trace_helpers.h" static int pmu_fd; -int page_size; -int page_cnt = 8; -volatile struct perf_event_mmap_page *header; - -typedef void (*print_fn)(void *data, int size); - -static int perf_event_mmap(int fd) -{ - void *base; - int mmap_size; - - page_size = getpagesize(); - mmap_size = page_size * (page_cnt + 1); - - base = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); - if (base == MAP_FAILED) { - printf("mmap err\n"); - return -1; - } - - header = base; - return 0; -} - -static int perf_event_poll(int fd) -{ - struct pollfd pfd = { .fd = fd, .events = POLLIN }; - - return poll(&pfd, 1, 1000); -} - -struct perf_event_sample { - struct perf_event_header header; - __u32 size; - char data[]; -}; - -static void perf_event_read(print_fn fn) -{ - __u64 data_tail = header->data_tail; - __u64 data_head = header->data_head; - __u64 buffer_size = page_cnt * page_size; - void *base, *begin, *end; - char buf[256]; - - asm volatile("" ::: "memory"); /* in real code it should be smp_rmb() */ - if (data_head == data_tail) - return; - - base = ((char *)header) + page_size; - - begin = base + data_tail % buffer_size; - end = base + data_head % buffer_size; - - while (begin != end) { - struct perf_event_sample *e; - - e = begin; - if (begin + e->header.size > base + buffer_size) { - long len = base + buffer_size - begin; - - assert(len < e->header.size); - memcpy(buf, begin, len); - memcpy(buf + len, base, e->header.size - len); - e = (void *) buf; - begin = base + e->header.size - len; - } else if (begin + e->header.size == base + buffer_size) { - begin = base; - } else { - begin += e->header.size; - } - - if (e->header.type == PERF_RECORD_SAMPLE) { - fn(e->data, e->size); - } else if (e->header.type == PERF_RECORD_LOST) { - struct { - struct perf_event_header header; - __u64 id; - __u64 lost; - } *lost = (void *) e; - printf("lost %lld events\n", lost->lost); - } else { - printf("unknown event type=%d size=%d\n", - e->header.type, e->header.size); - } - } - - __sync_synchronize(); /* smp_mb() */ - header->data_tail = data_head; -} - static __u64 time_get_ns(void) { struct timespec ts; @@ -127,7 +37,7 @@ static __u64 start_time; #define MAX_CNT 100000ll -static void print_bpf_output(void *data, int size) +static int print_bpf_output(void *data, int size) { static __u64 cnt; struct { @@ -138,7 +48,7 @@ static void print_bpf_output(void *data, int size) if (e->cookie != 0x12345678) { printf("BUG pid %llx cookie %llx sized %d\n", e->pid, e->cookie, size); - kill(0, SIGINT); + return PERF_EVENT_ERROR; } cnt++; @@ -146,8 +56,10 @@ static void print_bpf_output(void *data, int size) if (cnt == MAX_CNT) { printf("recv %lld events per sec\n", MAX_CNT * 1000000000ll / (time_get_ns() - start_time)); - kill(0, SIGINT); + return PERF_EVENT_DONE; } + + return PERF_EVENT_CONT; } static void test_bpf_perf_event(void) @@ -170,6 +82,7 @@ int main(int argc, char **argv) { char filename[256]; FILE *f; + int ret; snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); @@ -187,10 +100,7 @@ int main(int argc, char **argv) (void) f; start_time = time_get_ns(); - for (;;) { - perf_event_poll(pmu_fd); - perf_event_read(print_bpf_output); - } - - return 0; + ret = perf_event_poller(pmu_fd, print_bpf_output); + kill(0, SIGINT); + return ret; } diff --git a/tools/testing/selftests/bpf/trace_helpers.c b/tools/testing/selftests/bpf/trace_helpers.c new file mode 100644 index 000000000000..ad025bd75f1c --- /dev/null +++ b/tools/testing/selftests/bpf/trace_helpers.c @@ -0,0 +1,180 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "trace_helpers.h" + +#define MAX_SYMS 300000 +static struct ksym syms[MAX_SYMS]; +static int sym_cnt; + +static int ksym_cmp(const void *p1, const void *p2) +{ + return ((struct ksym *)p1)->addr - ((struct ksym *)p2)->addr; +} + +int load_kallsyms(void) +{ + FILE *f = fopen("/proc/kallsyms", "r"); + char func[256], buf[256]; + char symbol; + void *addr; + int i = 0; + + if (!f) + return -ENOENT; + + while (!feof(f)) { + if (!fgets(buf, sizeof(buf), f)) + break; + if (sscanf(buf, "%p %c %s", &addr, &symbol, func) != 3) + break; + if (!addr) + continue; + syms[i].addr = (long) addr; + syms[i].name = strdup(func); + i++; + } + sym_cnt = i; + qsort(syms, sym_cnt, sizeof(struct ksym), ksym_cmp); + return 0; +} + +struct ksym *ksym_search(long key) +{ + int start = 0, end = sym_cnt; + int result; + + while (start < end) { + size_t mid = start + (end - start) / 2; + + result = key - syms[mid].addr; + if (result < 0) + end = mid; + else if (result > 0) + start = mid + 1; + else + return &syms[mid]; + } + + if (start >= 1 && syms[start - 1].addr < key && + key < syms[start].addr) + /* valid ksym */ + return &syms[start - 1]; + + /* out of range. return _stext */ + return &syms[0]; +} + +static int page_size; +static int page_cnt = 8; +static volatile struct perf_event_mmap_page *header; + +int perf_event_mmap(int fd) +{ + void *base; + int mmap_size; + + page_size = getpagesize(); + mmap_size = page_size * (page_cnt + 1); + + base = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + if (base == MAP_FAILED) { + printf("mmap err\n"); + return -1; + } + + header = base; + return 0; +} + +static int perf_event_poll(int fd) +{ + struct pollfd pfd = { .fd = fd, .events = POLLIN }; + + return poll(&pfd, 1, 1000); +} + +struct perf_event_sample { + struct perf_event_header header; + __u32 size; + char data[]; +}; + +static int perf_event_read(perf_event_print_fn fn) +{ + __u64 data_tail = header->data_tail; + __u64 data_head = header->data_head; + __u64 buffer_size = page_cnt * page_size; + void *base, *begin, *end; + char buf[256]; + int ret; + + asm volatile("" ::: "memory"); /* in real code it should be smp_rmb() */ + if (data_head == data_tail) + return PERF_EVENT_CONT; + + base = ((char *)header) + page_size; + + begin = base + data_tail % buffer_size; + end = base + data_head % buffer_size; + + while (begin != end) { + struct perf_event_sample *e; + + e = begin; + if (begin + e->header.size > base + buffer_size) { + long len = base + buffer_size - begin; + + assert(len < e->header.size); + memcpy(buf, begin, len); + memcpy(buf + len, base, e->header.size - len); + e = (void *) buf; + begin = base + e->header.size - len; + } else if (begin + e->header.size == base + buffer_size) { + begin = base; + } else { + begin += e->header.size; + } + + if (e->header.type == PERF_RECORD_SAMPLE) { + ret = fn(e->data, e->size); + if (ret != PERF_EVENT_CONT) + return ret; + } else if (e->header.type == PERF_RECORD_LOST) { + struct { + struct perf_event_header header; + __u64 id; + __u64 lost; + } *lost = (void *) e; + printf("lost %lld events\n", lost->lost); + } else { + printf("unknown event type=%d size=%d\n", + e->header.type, e->header.size); + } + } + + __sync_synchronize(); /* smp_mb() */ + header->data_tail = data_head; + return PERF_EVENT_CONT; +} + +int perf_event_poller(int fd, perf_event_print_fn output_fn) +{ + int ret; + + for (;;) { + perf_event_poll(fd); + ret = perf_event_read(output_fn); + if (ret != PERF_EVENT_CONT) + return ret; + } + + return PERF_EVENT_DONE; +} diff --git a/tools/testing/selftests/bpf/trace_helpers.h b/tools/testing/selftests/bpf/trace_helpers.h new file mode 100644 index 000000000000..fe3eefd21e86 --- /dev/null +++ b/tools/testing/selftests/bpf/trace_helpers.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __TRACE_HELPER_H +#define __TRACE_HELPER_H + +struct ksym { + long addr; + char *name; +}; + +int load_kallsyms(void); +struct ksym *ksym_search(long key); + +typedef int (*perf_event_print_fn)(void *data, int size); + +/* return code for perf_event_print_fn */ +#define PERF_EVENT_DONE 0 +#define PERF_EVENT_ERROR -1 +#define PERF_EVENT_CONT -2 + +int perf_event_mmap(int fd); +/* return PERF_EVENT_DONE or PERF_EVENT_ERROR */ +int perf_event_poller(int fd, perf_event_print_fn output_fn); +#endif -- cgit v1.2.3 From 2abe611c5f031f06ab783102e62c0ae9dfd93e80 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Sat, 28 Apr 2018 22:28:14 -0700 Subject: tools/bpf: add a verifier test case for bpf_get_stack helper and ARSH The test_verifier already has a few ARSH test cases. This patch adds a new test case which takes advantage of newly improved verifier behavior for bpf_get_stack and ARSH. Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/test_verifier.c | 45 +++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 165e9ddfa446..1acafe26498b 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -11680,6 +11680,51 @@ static struct bpf_test tests[] = { .errstr = "BPF_XADD stores into R2 packet", .prog_type = BPF_PROG_TYPE_XDP, }, + { + "bpf_get_stack return R0 within range", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 28), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), + BPF_MOV64_IMM(BPF_REG_9, sizeof(struct test_val)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_7), + BPF_MOV64_IMM(BPF_REG_3, sizeof(struct test_val)), + BPF_MOV64_IMM(BPF_REG_4, 256), + BPF_EMIT_CALL(BPF_FUNC_get_stack), + BPF_MOV64_IMM(BPF_REG_1, 0), + BPF_MOV64_REG(BPF_REG_8, BPF_REG_0), + BPF_ALU64_IMM(BPF_LSH, BPF_REG_8, 32), + BPF_ALU64_IMM(BPF_ARSH, BPF_REG_8, 32), + BPF_JMP_REG(BPF_JSLT, BPF_REG_1, BPF_REG_8, 16), + BPF_ALU64_REG(BPF_SUB, BPF_REG_9, BPF_REG_8), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_7), + BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_8), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_9), + BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 32), + BPF_ALU64_IMM(BPF_ARSH, BPF_REG_1, 32), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_2), + BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_1), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), + BPF_MOV64_IMM(BPF_REG_5, sizeof(struct test_val)), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_5), + BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_1, 4), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_9), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_EMIT_CALL(BPF_FUNC_get_stack), + BPF_EXIT_INSN(), + }, + .fixup_map2 = { 4 }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + }, }; static int probe_filter_length(const struct bpf_insn *fp) -- cgit v1.2.3 From 173965fbfba596c02fa128966c2a33cb88afcd7f Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Sat, 28 Apr 2018 22:28:15 -0700 Subject: tools/bpf: add a test for bpf_get_stack with raw tracepoint prog The test attached a raw_tracepoint program to raw_syscalls/sys_enter. It tested to get stack for user space, kernel space and user space with build_id request. It also tested to get user and kernel stack into the same buffer with back-to-back bpf_get_stack helper calls. If jit is not enabled, the user space application will check to ensure that the kernel function for raw_tracepoint ___bpf_prog_run is part of the stack. If jit is enabled, we did not have a reliable way to verify the kernel stack, so just assume the kernel stack is good when the kernel stack size is greater than 0. Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/Makefile | 4 +- tools/testing/selftests/bpf/test_get_stack_rawtp.c | 102 ++++++++++++ tools/testing/selftests/bpf/test_progs.c | 172 +++++++++++++++++++-- 3 files changed, 266 insertions(+), 12 deletions(-) create mode 100644 tools/testing/selftests/bpf/test_get_stack_rawtp.c diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index b64a7a39cbc8..9d762184b805 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -32,7 +32,8 @@ TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test test_l4lb_noinline.o test_xdp_noinline.o test_stacktrace_map.o \ sample_map_ret0.o test_tcpbpf_kern.o test_stacktrace_build_id.o \ sockmap_tcp_msg_prog.o connect4_prog.o connect6_prog.o test_adjust_tail.o \ - test_btf_haskv.o test_btf_nokv.o test_sockmap_kern.o test_tunnel_kern.o + test_btf_haskv.o test_btf_nokv.o test_sockmap_kern.o test_tunnel_kern.o \ + test_get_stack_rawtp.o # Order correspond to 'make run_tests' order TEST_PROGS := test_kmod.sh \ @@ -58,6 +59,7 @@ $(OUTPUT)/test_dev_cgroup: cgroup_helpers.c $(OUTPUT)/test_sock: cgroup_helpers.c $(OUTPUT)/test_sock_addr: cgroup_helpers.c $(OUTPUT)/test_sockmap: cgroup_helpers.c +$(OUTPUT)/test_progs: trace_helpers.c .PHONY: force diff --git a/tools/testing/selftests/bpf/test_get_stack_rawtp.c b/tools/testing/selftests/bpf/test_get_stack_rawtp.c new file mode 100644 index 000000000000..f6d9f238e00a --- /dev/null +++ b/tools/testing/selftests/bpf/test_get_stack_rawtp.c @@ -0,0 +1,102 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include "bpf_helpers.h" + +/* Permit pretty deep stack traces */ +#define MAX_STACK_RAWTP 100 +struct stack_trace_t { + int pid; + int kern_stack_size; + int user_stack_size; + int user_stack_buildid_size; + __u64 kern_stack[MAX_STACK_RAWTP]; + __u64 user_stack[MAX_STACK_RAWTP]; + struct bpf_stack_build_id user_stack_buildid[MAX_STACK_RAWTP]; +}; + +struct bpf_map_def SEC("maps") perfmap = { + .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY, + .key_size = sizeof(int), + .value_size = sizeof(__u32), + .max_entries = 2, +}; + +struct bpf_map_def SEC("maps") stackdata_map = { + .type = BPF_MAP_TYPE_PERCPU_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(struct stack_trace_t), + .max_entries = 1, +}; + +/* Allocate per-cpu space twice the needed. For the code below + * usize = bpf_get_stack(ctx, raw_data, max_len, BPF_F_USER_STACK); + * if (usize < 0) + * return 0; + * ksize = bpf_get_stack(ctx, raw_data + usize, max_len - usize, 0); + * + * If we have value_size = MAX_STACK_RAWTP * sizeof(__u64), + * verifier will complain that access "raw_data + usize" + * with size "max_len - usize" may be out of bound. + * The maximum "raw_data + usize" is "raw_data + max_len" + * and the maximum "max_len - usize" is "max_len", verifier + * concludes that the maximum buffer access range is + * "raw_data[0...max_len * 2 - 1]" and hence reject the program. + * + * Doubling the to-be-used max buffer size can fix this verifier + * issue and avoid complicated C programming massaging. + * This is an acceptable workaround since there is one entry here. + */ +struct bpf_map_def SEC("maps") rawdata_map = { + .type = BPF_MAP_TYPE_PERCPU_ARRAY, + .key_size = sizeof(__u32), + .value_size = MAX_STACK_RAWTP * sizeof(__u64) * 2, + .max_entries = 1, +}; + +SEC("tracepoint/raw_syscalls/sys_enter") +int bpf_prog1(void *ctx) +{ + int max_len, max_buildid_len, usize, ksize, total_size; + struct stack_trace_t *data; + void *raw_data; + __u32 key = 0; + + data = bpf_map_lookup_elem(&stackdata_map, &key); + if (!data) + return 0; + + max_len = MAX_STACK_RAWTP * sizeof(__u64); + max_buildid_len = MAX_STACK_RAWTP * sizeof(struct bpf_stack_build_id); + data->pid = bpf_get_current_pid_tgid(); + data->kern_stack_size = bpf_get_stack(ctx, data->kern_stack, + max_len, 0); + data->user_stack_size = bpf_get_stack(ctx, data->user_stack, max_len, + BPF_F_USER_STACK); + data->user_stack_buildid_size = bpf_get_stack( + ctx, data->user_stack_buildid, max_buildid_len, + BPF_F_USER_STACK | BPF_F_USER_BUILD_ID); + bpf_perf_event_output(ctx, &perfmap, 0, data, sizeof(*data)); + + /* write both kernel and user stacks to the same buffer */ + raw_data = bpf_map_lookup_elem(&rawdata_map, &key); + if (!raw_data) + return 0; + + usize = bpf_get_stack(ctx, raw_data, max_len, BPF_F_USER_STACK); + if (usize < 0) + return 0; + + ksize = bpf_get_stack(ctx, raw_data + usize, max_len - usize, 0); + if (ksize < 0) + return 0; + + total_size = usize + ksize; + if (total_size > 0 && total_size <= max_len) + bpf_perf_event_output(ctx, &perfmap, 0, raw_data, total_size); + + return 0; +} + +char _license[] SEC("license") = "GPL"; +__u32 _version SEC("version") = 1; /* ignored by tracepoints, required by libbpf.a */ diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index eedda98d7bb1..0ddbf34b2438 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -38,8 +38,10 @@ typedef __u16 __sum16; #include "bpf_util.h" #include "bpf_endian.h" #include "bpf_rlimit.h" +#include "trace_helpers.h" static int error_cnt, pass_cnt; +static bool jit_enabled; #define MAGIC_BYTES 123 @@ -391,13 +393,30 @@ static inline __u64 ptr_to_u64(const void *ptr) return (__u64) (unsigned long) ptr; } +static bool is_jit_enabled(void) +{ + const char *jit_sysctl = "/proc/sys/net/core/bpf_jit_enable"; + bool enabled = false; + int sysctl_fd; + + sysctl_fd = open(jit_sysctl, 0, O_RDONLY); + if (sysctl_fd != -1) { + char tmpc; + + if (read(sysctl_fd, &tmpc, sizeof(tmpc)) == 1) + enabled = (tmpc != '0'); + close(sysctl_fd); + } + + return enabled; +} + static void test_bpf_obj_id(void) { const __u64 array_magic_value = 0xfaceb00c; const __u32 array_key = 0; const int nr_iters = 2; const char *file = "./test_obj_id.o"; - const char *jit_sysctl = "/proc/sys/net/core/bpf_jit_enable"; const char *expected_prog_name = "test_obj_id"; const char *expected_map_name = "test_map_id"; const __u64 nsec_per_sec = 1000000000; @@ -414,20 +433,11 @@ static void test_bpf_obj_id(void) char jited_insns[128], xlated_insns[128], zeros[128]; __u32 i, next_id, info_len, nr_id_found, duration = 0; struct timespec real_time_ts, boot_time_ts; - int sysctl_fd, jit_enabled = 0, err = 0; + int err = 0; __u64 array_value; uid_t my_uid = getuid(); time_t now, load_time; - sysctl_fd = open(jit_sysctl, 0, O_RDONLY); - if (sysctl_fd != -1) { - char tmpc; - - if (read(sysctl_fd, &tmpc, sizeof(tmpc)) == 1) - jit_enabled = (tmpc != '0'); - close(sysctl_fd); - } - err = bpf_prog_get_fd_by_id(0); CHECK(err >= 0 || errno != ENOENT, "get-fd-by-notexist-prog-id", "err %d errno %d\n", err, errno); @@ -1204,8 +1214,147 @@ out: return; } +#define MAX_CNT_RAWTP 10ull +#define MAX_STACK_RAWTP 100 +struct get_stack_trace_t { + int pid; + int kern_stack_size; + int user_stack_size; + int user_stack_buildid_size; + __u64 kern_stack[MAX_STACK_RAWTP]; + __u64 user_stack[MAX_STACK_RAWTP]; + struct bpf_stack_build_id user_stack_buildid[MAX_STACK_RAWTP]; +}; + +static int get_stack_print_output(void *data, int size) +{ + bool good_kern_stack = false, good_user_stack = false; + const char *nonjit_func = "___bpf_prog_run"; + struct get_stack_trace_t *e = data; + int i, num_stack; + static __u64 cnt; + struct ksym *ks; + + cnt++; + + if (size < sizeof(struct get_stack_trace_t)) { + __u64 *raw_data = data; + bool found = false; + + num_stack = size / sizeof(__u64); + /* If jit is enabled, we do not have a good way to + * verify the sanity of the kernel stack. So we + * just assume it is good if the stack is not empty. + * This could be improved in the future. + */ + if (jit_enabled) { + found = num_stack > 0; + } else { + for (i = 0; i < num_stack; i++) { + ks = ksym_search(raw_data[i]); + if (strcmp(ks->name, nonjit_func) == 0) { + found = true; + break; + } + } + } + if (found) { + good_kern_stack = true; + good_user_stack = true; + } + } else { + num_stack = e->kern_stack_size / sizeof(__u64); + if (jit_enabled) { + good_kern_stack = num_stack > 0; + } else { + for (i = 0; i < num_stack; i++) { + ks = ksym_search(e->kern_stack[i]); + if (strcmp(ks->name, nonjit_func) == 0) { + good_kern_stack = true; + break; + } + } + } + if (e->user_stack_size > 0 && e->user_stack_buildid_size > 0) + good_user_stack = true; + } + if (!good_kern_stack || !good_user_stack) + return PERF_EVENT_ERROR; + + if (cnt == MAX_CNT_RAWTP) + return PERF_EVENT_DONE; + + return PERF_EVENT_CONT; +} + +static void test_get_stack_raw_tp(void) +{ + const char *file = "./test_get_stack_rawtp.o"; + int i, efd, err, prog_fd, pmu_fd, perfmap_fd; + struct perf_event_attr attr = {}; + struct timespec tv = {0, 10}; + __u32 key = 0, duration = 0; + struct bpf_object *obj; + + err = bpf_prog_load(file, BPF_PROG_TYPE_RAW_TRACEPOINT, &obj, &prog_fd); + if (CHECK(err, "prog_load raw tp", "err %d errno %d\n", err, errno)) + return; + + efd = bpf_raw_tracepoint_open("sys_enter", prog_fd); + if (CHECK(efd < 0, "raw_tp_open", "err %d errno %d\n", efd, errno)) + goto close_prog; + + perfmap_fd = bpf_find_map(__func__, obj, "perfmap"); + if (CHECK(perfmap_fd < 0, "bpf_find_map", "err %d errno %d\n", + perfmap_fd, errno)) + goto close_prog; + + err = load_kallsyms(); + if (CHECK(err < 0, "load_kallsyms", "err %d errno %d\n", err, errno)) + goto close_prog; + + attr.sample_type = PERF_SAMPLE_RAW; + attr.type = PERF_TYPE_SOFTWARE; + attr.config = PERF_COUNT_SW_BPF_OUTPUT; + pmu_fd = syscall(__NR_perf_event_open, &attr, getpid()/*pid*/, -1/*cpu*/, + -1/*group_fd*/, 0); + if (CHECK(pmu_fd < 0, "perf_event_open", "err %d errno %d\n", pmu_fd, + errno)) + goto close_prog; + + err = bpf_map_update_elem(perfmap_fd, &key, &pmu_fd, BPF_ANY); + if (CHECK(err < 0, "bpf_map_update_elem", "err %d errno %d\n", err, + errno)) + goto close_prog; + + err = ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0); + if (CHECK(err < 0, "ioctl PERF_EVENT_IOC_ENABLE", "err %d errno %d\n", + err, errno)) + goto close_prog; + + err = perf_event_mmap(pmu_fd); + if (CHECK(err < 0, "perf_event_mmap", "err %d errno %d\n", err, errno)) + goto close_prog; + + /* trigger some syscall action */ + for (i = 0; i < MAX_CNT_RAWTP; i++) + nanosleep(&tv, NULL); + + err = perf_event_poller(pmu_fd, get_stack_print_output); + if (CHECK(err < 0, "perf_event_poller", "err %d errno %d\n", err, errno)) + goto close_prog; + + goto close_prog_noerr; +close_prog: + error_cnt++; +close_prog_noerr: + bpf_object__close(obj); +} + int main(void) { + jit_enabled = is_jit_enabled(); + test_pkt_access(); test_xdp(); test_xdp_adjust_tail(); @@ -1219,6 +1368,7 @@ int main(void) test_stacktrace_map(); test_stacktrace_build_id(); test_stacktrace_map_raw_tp(); + test_get_stack_raw_tp(); printf("Summary: %d PASSED, %d FAILED\n", pass_cnt, error_cnt); return error_cnt ? EXIT_FAILURE : EXIT_SUCCESS; -- cgit v1.2.3 From 79b45350131057250236e162ce7b3c0b291dc0a4 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Sat, 28 Apr 2018 22:28:16 -0700 Subject: tools/bpf: add a test for bpf_get_stack with tracepoint prog The test_stacktrace_map and test_stacktrace_build_id are enhanced to call bpf_get_stack in the helper to get the stack trace as well. The stack traces from bpf_get_stack and bpf_get_stackid are compared to ensure that for the same stack as represented as the same hash, their ip addresses or build id's must be the same. Acked-by: Alexei Starovoitov Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/test_progs.c | 70 ++++++++++++++++++++-- .../selftests/bpf/test_stacktrace_build_id.c | 20 ++++++- tools/testing/selftests/bpf/test_stacktrace_map.c | 19 +++++- 3 files changed, 98 insertions(+), 11 deletions(-) diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index 0ddbf34b2438..aa336f0abebc 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -906,11 +906,47 @@ static int compare_map_keys(int map1_fd, int map2_fd) return 0; } +static int compare_stack_ips(int smap_fd, int amap_fd, int stack_trace_len) +{ + __u32 key, next_key, *cur_key_p, *next_key_p; + char *val_buf1, *val_buf2; + int i, err = 0; + + val_buf1 = malloc(stack_trace_len); + val_buf2 = malloc(stack_trace_len); + cur_key_p = NULL; + next_key_p = &key; + while (bpf_map_get_next_key(smap_fd, cur_key_p, next_key_p) == 0) { + err = bpf_map_lookup_elem(smap_fd, next_key_p, val_buf1); + if (err) + goto out; + err = bpf_map_lookup_elem(amap_fd, next_key_p, val_buf2); + if (err) + goto out; + for (i = 0; i < stack_trace_len; i++) { + if (val_buf1[i] != val_buf2[i]) { + err = -1; + goto out; + } + } + key = *next_key_p; + cur_key_p = &key; + next_key_p = &next_key; + } + if (errno != ENOENT) + err = -1; + +out: + free(val_buf1); + free(val_buf2); + return err; +} + static void test_stacktrace_map() { - int control_map_fd, stackid_hmap_fd, stackmap_fd; + int control_map_fd, stackid_hmap_fd, stackmap_fd, stack_amap_fd; const char *file = "./test_stacktrace_map.o"; - int bytes, efd, err, pmu_fd, prog_fd; + int bytes, efd, err, pmu_fd, prog_fd, stack_trace_len; struct perf_event_attr attr = {}; __u32 key, val, duration = 0; struct bpf_object *obj; @@ -966,6 +1002,10 @@ static void test_stacktrace_map() if (stackmap_fd < 0) goto disable_pmu; + stack_amap_fd = bpf_find_map(__func__, obj, "stack_amap"); + if (stack_amap_fd < 0) + goto disable_pmu; + /* give some time for bpf program run */ sleep(1); @@ -987,6 +1027,12 @@ static void test_stacktrace_map() "err %d errno %d\n", err, errno)) goto disable_pmu_noerr; + stack_trace_len = PERF_MAX_STACK_DEPTH * sizeof(__u64); + err = compare_stack_ips(stackmap_fd, stack_amap_fd, stack_trace_len); + if (CHECK(err, "compare_stack_ips stackmap vs. stack_amap", + "err %d errno %d\n", err, errno)) + goto disable_pmu_noerr; + goto disable_pmu_noerr; disable_pmu: error_cnt++; @@ -1080,9 +1126,9 @@ err: static void test_stacktrace_build_id(void) { - int control_map_fd, stackid_hmap_fd, stackmap_fd; + int control_map_fd, stackid_hmap_fd, stackmap_fd, stack_amap_fd; const char *file = "./test_stacktrace_build_id.o"; - int bytes, efd, err, pmu_fd, prog_fd; + int bytes, efd, err, pmu_fd, prog_fd, stack_trace_len; struct perf_event_attr attr = {}; __u32 key, previous_key, val, duration = 0; struct bpf_object *obj; @@ -1147,6 +1193,11 @@ static void test_stacktrace_build_id(void) err, errno)) goto disable_pmu; + stack_amap_fd = bpf_find_map(__func__, obj, "stack_amap"); + if (CHECK(stack_amap_fd < 0, "bpf_find_map stack_amap", + "err %d errno %d\n", err, errno)) + goto disable_pmu; + assert(system("dd if=/dev/urandom of=/dev/zero count=4 2> /dev/null") == 0); assert(system("./urandom_read if=/dev/urandom of=/dev/zero count=4 2> /dev/null") == 0); @@ -1198,8 +1249,15 @@ static void test_stacktrace_build_id(void) previous_key = key; } while (bpf_map_get_next_key(stackmap_fd, &previous_key, &key) == 0); - CHECK(build_id_matches < 1, "build id match", - "Didn't find expected build ID from the map"); + if (CHECK(build_id_matches < 1, "build id match", + "Didn't find expected build ID from the map")) + goto disable_pmu; + + stack_trace_len = PERF_MAX_STACK_DEPTH + * sizeof(struct bpf_stack_build_id); + err = compare_stack_ips(stackmap_fd, stack_amap_fd, stack_trace_len); + CHECK(err, "compare_stack_ips stackmap vs. stack_amap", + "err %d errno %d\n", err, errno); disable_pmu: ioctl(pmu_fd, PERF_EVENT_IOC_DISABLE); diff --git a/tools/testing/selftests/bpf/test_stacktrace_build_id.c b/tools/testing/selftests/bpf/test_stacktrace_build_id.c index b755bd783ce5..d86c281e957f 100644 --- a/tools/testing/selftests/bpf/test_stacktrace_build_id.c +++ b/tools/testing/selftests/bpf/test_stacktrace_build_id.c @@ -19,7 +19,7 @@ struct bpf_map_def SEC("maps") stackid_hmap = { .type = BPF_MAP_TYPE_HASH, .key_size = sizeof(__u32), .value_size = sizeof(__u32), - .max_entries = 10000, + .max_entries = 16384, }; struct bpf_map_def SEC("maps") stackmap = { @@ -31,6 +31,14 @@ struct bpf_map_def SEC("maps") stackmap = { .map_flags = BPF_F_STACK_BUILD_ID, }; +struct bpf_map_def SEC("maps") stack_amap = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(struct bpf_stack_build_id) + * PERF_MAX_STACK_DEPTH, + .max_entries = 128, +}; + /* taken from /sys/kernel/debug/tracing/events/random/urandom_read/format */ struct random_urandom_args { unsigned long long pad; @@ -42,7 +50,10 @@ struct random_urandom_args { SEC("tracepoint/random/urandom_read") int oncpu(struct random_urandom_args *args) { + __u32 max_len = sizeof(struct bpf_stack_build_id) + * PERF_MAX_STACK_DEPTH; __u32 key = 0, val = 0, *value_p; + void *stack_p; value_p = bpf_map_lookup_elem(&control_map, &key); if (value_p && *value_p) @@ -50,8 +61,13 @@ int oncpu(struct random_urandom_args *args) /* The size of stackmap and stackid_hmap should be the same */ key = bpf_get_stackid(args, &stackmap, BPF_F_USER_STACK); - if ((int)key >= 0) + if ((int)key >= 0) { bpf_map_update_elem(&stackid_hmap, &key, &val, 0); + stack_p = bpf_map_lookup_elem(&stack_amap, &key); + if (stack_p) + bpf_get_stack(args, stack_p, max_len, + BPF_F_USER_STACK | BPF_F_USER_BUILD_ID); + } return 0; } diff --git a/tools/testing/selftests/bpf/test_stacktrace_map.c b/tools/testing/selftests/bpf/test_stacktrace_map.c index 76d85c5d08bd..af111af7ca1a 100644 --- a/tools/testing/selftests/bpf/test_stacktrace_map.c +++ b/tools/testing/selftests/bpf/test_stacktrace_map.c @@ -19,14 +19,21 @@ struct bpf_map_def SEC("maps") stackid_hmap = { .type = BPF_MAP_TYPE_HASH, .key_size = sizeof(__u32), .value_size = sizeof(__u32), - .max_entries = 10000, + .max_entries = 16384, }; struct bpf_map_def SEC("maps") stackmap = { .type = BPF_MAP_TYPE_STACK_TRACE, .key_size = sizeof(__u32), .value_size = sizeof(__u64) * PERF_MAX_STACK_DEPTH, - .max_entries = 10000, + .max_entries = 16384, +}; + +struct bpf_map_def SEC("maps") stack_amap = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(__u64) * PERF_MAX_STACK_DEPTH, + .max_entries = 16384, }; /* taken from /sys/kernel/debug/tracing/events/sched/sched_switch/format */ @@ -44,7 +51,9 @@ struct sched_switch_args { SEC("tracepoint/sched/sched_switch") int oncpu(struct sched_switch_args *ctx) { + __u32 max_len = PERF_MAX_STACK_DEPTH * sizeof(__u64); __u32 key = 0, val = 0, *value_p; + void *stack_p; value_p = bpf_map_lookup_elem(&control_map, &key); if (value_p && *value_p) @@ -52,8 +61,12 @@ int oncpu(struct sched_switch_args *ctx) /* The size of stackmap and stackid_hmap should be the same */ key = bpf_get_stackid(ctx, &stackmap, 0); - if ((int)key >= 0) + if ((int)key >= 0) { bpf_map_update_elem(&stackid_hmap, &key, &val, 0); + stack_p = bpf_map_lookup_elem(&stack_amap, &key); + if (stack_p) + bpf_get_stack(ctx, stack_p, max_len, 0); + } return 0; } -- cgit v1.2.3