From 485ec2ea9cf556e9c120e07961b7b459d776a115 Mon Sep 17 00:00:00 2001 From: Amol Grover Date: Thu, 23 Jan 2020 17:34:38 +0530 Subject: bpf, devmap: Pass lockdep expression to RCU lists MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit head is traversed using hlist_for_each_entry_rcu outside an RCU read-side critical section but under the protection of dtab->index_lock. Hence, add corresponding lockdep expression to silence false-positive lockdep warnings, and harden RCU lists. Fixes: 6f9d451ab1a3 ("xdp: Add devmap_hash map type for looking up devices by hashed index") Signed-off-by: Amol Grover Signed-off-by: Daniel Borkmann Acked-by: Jesper Dangaard Brouer Acked-by: Toke Høiland-Jørgensen Link: https://lore.kernel.org/bpf/20200123120437.26506-1-frextrite@gmail.com --- kernel/bpf/devmap.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index de630f980282..f3a44f6ca86e 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c @@ -263,7 +263,8 @@ struct bpf_dtab_netdev *__dev_map_hash_lookup_elem(struct bpf_map *map, u32 key) struct hlist_head *head = dev_map_index_hash(dtab, key); struct bpf_dtab_netdev *dev; - hlist_for_each_entry_rcu(dev, head, index_hlist) + hlist_for_each_entry_rcu(dev, head, index_hlist, + lockdep_is_held(&dtab->index_lock)) if (dev->idx == key) return dev; -- cgit v1.2.3 From 1a6fa10640d97e16d4184fa1c24aa8c3337d4653 Mon Sep 17 00:00:00 2001 From: John Sperbeck Date: Thu, 23 Jan 2020 15:51:44 -0800 Subject: selftests/bpf: Initialize duration variable before using The 'duration' variable is referenced in the CHECK() macro, and there are some uses of the macro before 'duration' is set. The clang compiler (validly) complains about this. Sample error: .../selftests/bpf/prog_tests/fexit_test.c:23:6: warning: variable 'duration' is uninitialized when used here [-Wuninitialized] if (CHECK(err, "prog_load sched cls", "err %d errno %d\n", err, errno)) ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .../selftests/bpf/test_progs.h:134:25: note: expanded from macro 'CHECK' if (CHECK(err, "prog_load sched cls", "err %d errno %d\n", err, errno)) ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ _CHECK(condition, tag, duration, format) ^~~~~~~~ Signed-off-by: John Sperbeck Signed-off-by: Stanislav Fomichev Signed-off-by: Daniel Borkmann Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20200123235144.93610-1-sdf@google.com --- tools/testing/selftests/bpf/prog_tests/fentry_test.c | 2 +- tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c | 2 +- tools/testing/selftests/bpf/prog_tests/fexit_test.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/fentry_test.c b/tools/testing/selftests/bpf/prog_tests/fentry_test.c index e1a379f5f7d2..5cc06021f27d 100644 --- a/tools/testing/selftests/bpf/prog_tests/fentry_test.c +++ b/tools/testing/selftests/bpf/prog_tests/fentry_test.c @@ -9,7 +9,7 @@ void test_fentry_test(void) struct test_pkt_access *pkt_skel = NULL; struct fentry_test *fentry_skel = NULL; int err, pkt_fd, i; - __u32 duration, retval; + __u32 duration = 0, retval; __u64 *result; pkt_skel = test_pkt_access__open_and_load(); diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c index db5c74d2ce6d..cde463af7071 100644 --- a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c +++ b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c @@ -11,7 +11,7 @@ static void test_fexit_bpf2bpf_common(const char *obj_file, int err, pkt_fd, i; struct bpf_link **link = NULL; struct bpf_program **prog = NULL; - __u32 duration, retval; + __u32 duration = 0, retval; struct bpf_map *data_map; const int zero = 0; u64 *result = NULL; diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_test.c b/tools/testing/selftests/bpf/prog_tests/fexit_test.c index f99013222c74..d2c3655dd7a3 100644 --- a/tools/testing/selftests/bpf/prog_tests/fexit_test.c +++ b/tools/testing/selftests/bpf/prog_tests/fexit_test.c @@ -13,7 +13,7 @@ void test_fexit_test(void) int err, pkt_fd, kfree_skb_fd, i; struct bpf_link *link[6] = {}; struct bpf_program *prog[6]; - __u32 duration, retval; + __u32 duration = 0, retval; struct bpf_map *data_map; const int zero = 0; u64 result[6]; -- cgit v1.2.3 From 03506297d205e5fa25b5ead0f6338b5a4a996a93 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 23 Jan 2020 21:41:48 -0800 Subject: selftests/bpf: Improve bpftool changes detection MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Detect when bpftool source code changes and trigger rebuild within selftests/bpf Makefile. Also fix few small formatting problems. Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Acked-by: Martin KaFai Lau Acked-by: Toke Høiland-Jørgensen Link: https://lore.kernel.org/bpf/20200124054148.2455060-1-andriin@fb.com --- tools/testing/selftests/bpf/Makefile | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 5f41f5bd8033..257a1aaaa37d 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -155,16 +155,17 @@ $(OUTPUT)/test_sysctl: cgroup_helpers.c DEFAULT_BPFTOOL := $(SCRATCH_DIR)/sbin/bpftool BPFTOOL ?= $(DEFAULT_BPFTOOL) -$(DEFAULT_BPFTOOL): $(BPFOBJ) | $(BUILD_DIR)/bpftool - $(Q)$(MAKE) $(submake_extras) -C $(BPFTOOLDIR) \ - OUTPUT=$(BUILD_DIR)/bpftool/ \ +$(DEFAULT_BPFTOOL): $(wildcard $(BPFTOOLDIR)/*.[ch] $(BPFTOOLDIR)/Makefile) \ + $(BPFOBJ) | $(BUILD_DIR)/bpftool + $(Q)$(MAKE) $(submake_extras) -C $(BPFTOOLDIR) \ + OUTPUT=$(BUILD_DIR)/bpftool/ \ prefix= DESTDIR=$(SCRATCH_DIR)/ install -$(BPFOBJ): $(wildcard $(BPFDIR)/*.c $(BPFDIR)/*.h $(BPFDIR)/Makefile) \ +$(BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile) \ ../../../include/uapi/linux/bpf.h \ | $(INCLUDE_DIR) $(BUILD_DIR)/libbpf $(Q)$(MAKE) $(submake_extras) -C $(BPFDIR) OUTPUT=$(BUILD_DIR)/libbpf/ \ - DESTDIR=$(SCRATCH_DIR) prefix= all install_headers + DESTDIR=$(SCRATCH_DIR) prefix= all install_headers $(BUILD_DIR)/libbpf $(BUILD_DIR)/bpftool $(INCLUDE_DIR): $(call msg,MKDIR,,$@) -- cgit v1.2.3 From 41258289a8e9e3e110bab316e0aeded25fa8beb6 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 23 Jan 2020 21:43:17 -0800 Subject: bpftool: Print function linkage in BTF dump MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add printing out BTF_KIND_FUNC's linkage. Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Acked-by: Martin KaFai Lau Acked-by: Toke Høiland-Jørgensen Link: https://lore.kernel.org/bpf/20200124054317.2459436-1-andriin@fb.com --- tools/bpf/bpftool/btf.c | 27 +++++++++++++++++++++++---- 1 file changed, 23 insertions(+), 4 deletions(-) diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c index 4ba90d81b6a1..b3745ed711ba 100644 --- a/tools/bpf/bpftool/btf.c +++ b/tools/bpf/bpftool/btf.c @@ -77,6 +77,20 @@ static const char *btf_var_linkage_str(__u32 linkage) } } +static const char *btf_func_linkage_str(const struct btf_type *t) +{ + switch (btf_vlen(t)) { + case BTF_FUNC_STATIC: + return "static"; + case BTF_FUNC_GLOBAL: + return "global"; + case BTF_FUNC_EXTERN: + return "extern"; + default: + return "(unknown)"; + } +} + static const char *btf_str(const struct btf *btf, __u32 off) { if (!off) @@ -231,12 +245,17 @@ static int dump_btf_type(const struct btf *btf, __u32 id, printf(" fwd_kind=%s", fwd_kind); break; } - case BTF_KIND_FUNC: - if (json_output) + case BTF_KIND_FUNC: { + const char *linkage = btf_func_linkage_str(t); + + if (json_output) { jsonw_uint_field(w, "type_id", t->type); - else - printf(" type_id=%u", t->type); + jsonw_string_field(w, "linkage", linkage); + } else { + printf(" type_id=%u linkage=%s", t->type, linkage); + } break; + } case BTF_KIND_FUNC_PROTO: { const struct btf_param *p = (const void *)(t + 1); __u16 vlen = BTF_INFO_VLEN(t->info); -- cgit v1.2.3 From c31dbb1e41d1857b403f9bf58c87f5898519a0bc Mon Sep 17 00:00:00 2001 From: Lorenz Bauer Date: Fri, 24 Jan 2020 11:27:51 +0000 Subject: selftests: bpf: Use a temporary file in test_sockmap Use a proper temporary file for sendpage tests. This means that running the tests doesn't clutter the working directory, and allows running the test on read-only filesystems. Fixes: 16962b2404ac ("bpf: sockmap, add selftests") Signed-off-by: Lorenz Bauer Signed-off-by: Daniel Borkmann Reviewed-by: Jakub Sitnicki Acked-by: Martin KaFai Lau Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20200124112754.19664-2-lmb@cloudflare.com --- tools/testing/selftests/bpf/test_sockmap.c | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/tools/testing/selftests/bpf/test_sockmap.c b/tools/testing/selftests/bpf/test_sockmap.c index 4a851513c842..779e11da979c 100644 --- a/tools/testing/selftests/bpf/test_sockmap.c +++ b/tools/testing/selftests/bpf/test_sockmap.c @@ -331,7 +331,7 @@ static int msg_loop_sendpage(int fd, int iov_length, int cnt, FILE *file; int i, fp; - file = fopen(".sendpage_tst.tmp", "w+"); + file = tmpfile(); if (!file) { perror("create file for sendpage"); return 1; @@ -340,13 +340,8 @@ static int msg_loop_sendpage(int fd, int iov_length, int cnt, fwrite(&k, sizeof(char), 1, file); fflush(file); fseek(file, 0, SEEK_SET); - fclose(file); - fp = open(".sendpage_tst.tmp", O_RDONLY); - if (fp < 0) { - perror("reopen file for sendpage"); - return 1; - } + fp = fileno(file); clock_gettime(CLOCK_MONOTONIC, &s->start); for (i = 0; i < cnt; i++) { @@ -354,11 +349,11 @@ static int msg_loop_sendpage(int fd, int iov_length, int cnt, if (!drop && sent < 0) { perror("send loop error"); - close(fp); + fclose(file); return sent; } else if (drop && sent >= 0) { printf("sendpage loop error expected: %i\n", sent); - close(fp); + fclose(file); return -EIO; } @@ -366,7 +361,7 @@ static int msg_loop_sendpage(int fd, int iov_length, int cnt, s->bytes_sent += sent; } clock_gettime(CLOCK_MONOTONIC, &s->end); - close(fp); + fclose(file); return 0; } -- cgit v1.2.3 From 8bec4f665e0baecb5f1b683379fc10b3745eb612 Mon Sep 17 00:00:00 2001 From: Lorenz Bauer Date: Fri, 24 Jan 2020 11:27:52 +0000 Subject: selftests: bpf: Ignore FIN packets for reuseport tests The reuseport tests currently suffer from a race condition: FIN packets count towards DROP_ERR_SKB_DATA, since they don't contain a valid struct cmd. Tests will spuriously fail depending on whether check_results is called before or after the FIN is processed. Exit the BPF program early if FIN is set. Fixes: 91134d849a0e ("bpf: Test BPF_PROG_TYPE_SK_REUSEPORT") Signed-off-by: Lorenz Bauer Signed-off-by: Daniel Borkmann Reviewed-by: Jakub Sitnicki Acked-by: Martin KaFai Lau Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20200124112754.19664-3-lmb@cloudflare.com --- tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c b/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c index d69a1f2bbbfd..26e77dcc7e91 100644 --- a/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c +++ b/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c @@ -113,6 +113,12 @@ int _select_by_skb_data(struct sk_reuseport_md *reuse_md) data_check.skb_ports[0] = th->source; data_check.skb_ports[1] = th->dest; + if (th->fin) + /* The connection is being torn down at the end of a + * test. It can't contain a cmd, so return early. + */ + return SK_PASS; + if ((th->doff << 2) + sizeof(*cmd) > data_check.len) GOTO_DONE(DROP_ERR_SKB_DATA); if (bpf_skb_load_bytes(reuse_md, th->doff << 2, &cmd_copy, -- cgit v1.2.3 From 603fba9dfd0b2e2a844ece9ed98ce874c38aa98e Mon Sep 17 00:00:00 2001 From: Lorenz Bauer Date: Fri, 24 Jan 2020 11:27:53 +0000 Subject: selftests: bpf: Make reuseport test output more legible Include the name of the mismatching result in human readable format when reporting an error. The new output looks like the following: unexpected result result: [1, 0, 0, 0, 0, 0] expected: [0, 0, 0, 0, 0, 0] mismatch on DROP_ERR_INNER_MAP (bpf_prog_linum:153) check_results:FAIL:382 Signed-off-by: Lorenz Bauer Signed-off-by: Daniel Borkmann Reviewed-by: Jakub Sitnicki Acked-by: Martin KaFai Lau Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20200124112754.19664-4-lmb@cloudflare.com --- .../selftests/bpf/prog_tests/select_reuseport.c | 28 ++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/select_reuseport.c b/tools/testing/selftests/bpf/prog_tests/select_reuseport.c index 2c37ae7dc214..e7e56929751c 100644 --- a/tools/testing/selftests/bpf/prog_tests/select_reuseport.c +++ b/tools/testing/selftests/bpf/prog_tests/select_reuseport.c @@ -316,6 +316,26 @@ static void check_data(int type, sa_family_t family, const struct cmd *cmd, expected.len, result.len, get_linum()); } +static const char *result_to_str(enum result res) +{ + switch (res) { + case DROP_ERR_INNER_MAP: + return "DROP_ERR_INNER_MAP"; + case DROP_ERR_SKB_DATA: + return "DROP_ERR_SKB_DATA"; + case DROP_ERR_SK_SELECT_REUSEPORT: + return "DROP_ERR_SK_SELECT_REUSEPORT"; + case DROP_MISC: + return "DROP_MISC"; + case PASS: + return "PASS"; + case PASS_ERR_SK_SELECT_REUSEPORT: + return "PASS_ERR_SK_SELECT_REUSEPORT"; + default: + return "UNKNOWN"; + } +} + static void check_results(void) { __u32 results[NR_RESULTS]; @@ -351,10 +371,10 @@ static void check_results(void) printf(", %u", expected_results[i]); printf("]\n"); - RET_IF(expected_results[broken] != results[broken], - "unexpected result", - "expected_results[%u] != results[%u] bpf_prog_linum:%ld\n", - broken, broken, get_linum()); + printf("mismatch on %s (bpf_prog_linum:%ld)\n", result_to_str(broken), + get_linum()); + + CHECK_FAIL(true); } static int send_data(int type, sa_family_t family, void *data, size_t len, -- cgit v1.2.3 From 51bad0f05616c43d6d34b0a19bcc9bdab8e8fb39 Mon Sep 17 00:00:00 2001 From: Lorenz Bauer Date: Fri, 24 Jan 2020 11:27:54 +0000 Subject: selftests: bpf: Reset global state between reuseport test runs Currently, there is a lot of false positives if a single reuseport test fails. This is because expected_results and the result map are not cleared. Zero both after individual test runs, which fixes the mentioned false positives. Fixes: 91134d849a0e ("bpf: Test BPF_PROG_TYPE_SK_REUSEPORT") Signed-off-by: Lorenz Bauer Signed-off-by: Daniel Borkmann Reviewed-by: Jakub Sitnicki Acked-by: Martin KaFai Lau Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20200124112754.19664-5-lmb@cloudflare.com --- .../testing/selftests/bpf/prog_tests/select_reuseport.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/select_reuseport.c b/tools/testing/selftests/bpf/prog_tests/select_reuseport.c index e7e56929751c..098bcae5f827 100644 --- a/tools/testing/selftests/bpf/prog_tests/select_reuseport.c +++ b/tools/testing/selftests/bpf/prog_tests/select_reuseport.c @@ -33,7 +33,7 @@ #define REUSEPORT_ARRAY_SIZE 32 static int result_map, tmp_index_ovr_map, linum_map, data_check_map; -static enum result expected_results[NR_RESULTS]; +static __u32 expected_results[NR_RESULTS]; static int sk_fds[REUSEPORT_ARRAY_SIZE]; static int reuseport_array = -1, outer_map = -1; static int select_by_skb_data_prog; @@ -697,7 +697,19 @@ static void setup_per_test(int type, sa_family_t family, bool inany, static void cleanup_per_test(bool no_inner_map) { - int i, err; + int i, err, zero = 0; + + memset(expected_results, 0, sizeof(expected_results)); + + for (i = 0; i < NR_RESULTS; i++) { + err = bpf_map_update_elem(result_map, &i, &zero, BPF_ANY); + RET_IF(err, "reset elem in result_map", + "i:%u err:%d errno:%d\n", i, err, errno); + } + + err = bpf_map_update_elem(linum_map, &zero, &zero, BPF_ANY); + RET_IF(err, "reset line number in linum_map", "err:%d errno:%d\n", + err, errno); for (i = 0; i < REUSEPORT_ARRAY_SIZE; i++) close(sk_fds[i]); -- cgit v1.2.3 From d7a252708dbc950ca2064310217e8b9f85846e2f Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 23 Jan 2020 21:38:37 -0800 Subject: libbpf: Improve handling of failed CO-RE relocations Previously, if libbpf failed to resolve CO-RE relocation for some instructions, it would either return error immediately, or, if .relaxed_core_relocs option was set, would replace relocatable offset/imm part of an instruction with a bogus value (-1). Neither approach is good, because there are many possible scenarios where relocation is expected to fail (e.g., when some field knowingly can be missing on specific kernel versions). On the other hand, replacing offset with invalid one can hide programmer errors, if this relocation failue wasn't anticipated. This patch deprecates .relaxed_core_relocs option and changes the approach to always replacing instruction, for which relocation failed, with invalid BPF helper call instruction. For cases where this is expected, BPF program should already ensure that that instruction is unreachable, in which case this invalid instruction is going to be silently ignored. But if instruction wasn't guarded, BPF program will be rejected at verification step with verifier log pointing precisely to the place in assembly where the problem is. Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20200124053837.2434679-1-andriin@fb.com --- tools/lib/bpf/libbpf.c | 95 +++++++++++++++++++++++++++++--------------------- tools/lib/bpf/libbpf.h | 6 +++- 2 files changed, 61 insertions(+), 40 deletions(-) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index ae34b681ae82..39f1b7633a7c 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -345,7 +345,6 @@ struct bpf_object { bool loaded; bool has_pseudo_calls; - bool relaxed_core_relocs; /* * Information when doing elf related work. Only valid if fd @@ -4238,25 +4237,38 @@ static int bpf_core_calc_field_relo(const struct bpf_program *prog, */ static int bpf_core_reloc_insn(struct bpf_program *prog, const struct bpf_field_reloc *relo, + int relo_idx, const struct bpf_core_spec *local_spec, const struct bpf_core_spec *targ_spec) { - bool failed = false, validate = true; __u32 orig_val, new_val; struct bpf_insn *insn; + bool validate = true; int insn_idx, err; __u8 class; if (relo->insn_off % sizeof(struct bpf_insn)) return -EINVAL; insn_idx = relo->insn_off / sizeof(struct bpf_insn); + insn = &prog->insns[insn_idx]; + class = BPF_CLASS(insn->code); if (relo->kind == BPF_FIELD_EXISTS) { orig_val = 1; /* can't generate EXISTS relo w/o local field */ new_val = targ_spec ? 1 : 0; } else if (!targ_spec) { - failed = true; - new_val = (__u32)-1; + pr_debug("prog '%s': relo #%d: substituting insn #%d w/ invalid insn\n", + bpf_program__title(prog, false), relo_idx, insn_idx); + insn->code = BPF_JMP | BPF_CALL; + insn->dst_reg = 0; + insn->src_reg = 0; + insn->off = 0; + /* if this instruction is reachable (not a dead code), + * verifier will complain with the following message: + * invalid func unknown#195896080 + */ + insn->imm = 195896080; /* => 0xbad2310 => "bad relo" */ + return 0; } else { err = bpf_core_calc_field_relo(prog, relo, local_spec, &orig_val, &validate); @@ -4268,50 +4280,47 @@ static int bpf_core_reloc_insn(struct bpf_program *prog, return err; } - insn = &prog->insns[insn_idx]; - class = BPF_CLASS(insn->code); - switch (class) { case BPF_ALU: case BPF_ALU64: if (BPF_SRC(insn->code) != BPF_K) return -EINVAL; - if (!failed && validate && insn->imm != orig_val) { - pr_warn("prog '%s': unexpected insn #%d (ALU/ALU64) value: got %u, exp %u -> %u\n", - bpf_program__title(prog, false), insn_idx, - insn->imm, orig_val, new_val); + if (validate && insn->imm != orig_val) { + pr_warn("prog '%s': relo #%d: unexpected insn #%d (ALU/ALU64) value: got %u, exp %u -> %u\n", + bpf_program__title(prog, false), relo_idx, + insn_idx, insn->imm, orig_val, new_val); return -EINVAL; } orig_val = insn->imm; insn->imm = new_val; - pr_debug("prog '%s': patched insn #%d (ALU/ALU64)%s imm %u -> %u\n", - bpf_program__title(prog, false), insn_idx, - failed ? " w/ failed reloc" : "", orig_val, new_val); + pr_debug("prog '%s': relo #%d: patched insn #%d (ALU/ALU64) imm %u -> %u\n", + bpf_program__title(prog, false), relo_idx, insn_idx, + orig_val, new_val); break; case BPF_LDX: case BPF_ST: case BPF_STX: - if (!failed && validate && insn->off != orig_val) { - pr_warn("prog '%s': unexpected insn #%d (LD/LDX/ST/STX) value: got %u, exp %u -> %u\n", - bpf_program__title(prog, false), insn_idx, - insn->off, orig_val, new_val); + if (validate && insn->off != orig_val) { + pr_warn("prog '%s': relo #%d: unexpected insn #%d (LD/LDX/ST/STX) value: got %u, exp %u -> %u\n", + bpf_program__title(prog, false), relo_idx, + insn_idx, insn->off, orig_val, new_val); return -EINVAL; } if (new_val > SHRT_MAX) { - pr_warn("prog '%s': insn #%d (LD/LDX/ST/STX) value too big: %u\n", - bpf_program__title(prog, false), insn_idx, - new_val); + pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) value too big: %u\n", + bpf_program__title(prog, false), relo_idx, + insn_idx, new_val); return -ERANGE; } orig_val = insn->off; insn->off = new_val; - pr_debug("prog '%s': patched insn #%d (LD/LDX/ST/STX)%s off %u -> %u\n", - bpf_program__title(prog, false), insn_idx, - failed ? " w/ failed reloc" : "", orig_val, new_val); + pr_debug("prog '%s': relo #%d: patched insn #%d (LDX/ST/STX) off %u -> %u\n", + bpf_program__title(prog, false), relo_idx, insn_idx, + orig_val, new_val); break; default: - pr_warn("prog '%s': trying to relocate unrecognized insn #%d, code:%x, src:%x, dst:%x, off:%x, imm:%x\n", - bpf_program__title(prog, false), + pr_warn("prog '%s': relo #%d: trying to relocate unrecognized insn #%d, code:%x, src:%x, dst:%x, off:%x, imm:%x\n", + bpf_program__title(prog, false), relo_idx, insn_idx, insn->code, insn->src_reg, insn->dst_reg, insn->off, insn->imm); return -EINVAL; @@ -4510,24 +4519,33 @@ static int bpf_core_reloc_field(struct bpf_program *prog, } /* - * For BPF_FIELD_EXISTS relo or when relaxed CO-RE reloc mode is - * requested, it's expected that we might not find any candidates. - * In this case, if field wasn't found in any candidate, the list of - * candidates shouldn't change at all, we'll just handle relocating - * appropriately, depending on relo's kind. + * For BPF_FIELD_EXISTS relo or when used BPF program has field + * existence checks or kernel version/config checks, it's expected + * that we might not find any candidates. In this case, if field + * wasn't found in any candidate, the list of candidates shouldn't + * change at all, we'll just handle relocating appropriately, + * depending on relo's kind. */ if (j > 0) cand_ids->len = j; - if (j == 0 && !prog->obj->relaxed_core_relocs && - relo->kind != BPF_FIELD_EXISTS) { - pr_warn("prog '%s': relo #%d: no matching targets found for [%d] %s + %s\n", - prog_name, relo_idx, local_id, local_name, spec_str); - return -ESRCH; - } + /* + * If no candidates were found, it might be both a programmer error, + * as well as expected case, depending whether instruction w/ + * relocation is guarded in some way that makes it unreachable (dead + * code) if relocation can't be resolved. This is handled in + * bpf_core_reloc_insn() uniformly by replacing that instruction with + * BPF helper call insn (using invalid helper ID). If that instruction + * is indeed unreachable, then it will be ignored and eliminated by + * verifier. If it was an error, then verifier will complain and point + * to a specific instruction number in its log. + */ + if (j == 0) + pr_debug("prog '%s': relo #%d: no matching targets found for [%d] %s + %s\n", + prog_name, relo_idx, local_id, local_name, spec_str); /* bpf_core_reloc_insn should know how to handle missing targ_spec */ - err = bpf_core_reloc_insn(prog, relo, &local_spec, + err = bpf_core_reloc_insn(prog, relo, relo_idx, &local_spec, j ? &targ_spec : NULL); if (err) { pr_warn("prog '%s': relo #%d: failed to patch insn at offset %d: %d\n", @@ -5057,7 +5075,6 @@ __bpf_object__open(const char *path, const void *obj_buf, size_t obj_buf_sz, if (IS_ERR(obj)) return obj; - obj->relaxed_core_relocs = OPTS_GET(opts, relaxed_core_relocs, false); kconfig = OPTS_GET(opts, kconfig, NULL); if (kconfig) { obj->kconfig = strdup(kconfig); diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 2a5e3b087002..3fe12c9d1f92 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -77,7 +77,11 @@ struct bpf_object_open_opts { const char *object_name; /* parse map definitions non-strictly, allowing extra attributes/data */ bool relaxed_maps; - /* process CO-RE relocations non-strictly, allowing them to fail */ + /* DEPRECATED: handle CO-RE relocations non-strictly, allowing failures. + * Value is ignored. Relocations always are processed non-strictly. + * Non-relocatable instructions are replaced with invalid ones to + * prevent accidental errors. + * */ bool relaxed_core_relocs; /* maps that set the 'pinning' attribute in their definition will have * their pin_path attribute set to a file in this directory, and be -- cgit v1.2.3 From 35b9211c0a2427e8f39e534f442f43804fc8d5ca Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Fri, 24 Jan 2020 12:18:46 -0800 Subject: libbpf: Fix realloc usage in bpf_core_find_cands Fix bug requesting invalid size of reallocated array when constructing CO-RE relocation candidate list. This can cause problems if there are many potential candidates and a very fine-grained memory allocator bucket sizes are used. Fixes: ddc7c3042614 ("libbpf: implement BPF CO-RE offset relocation algorithm") Reported-by: William Smith Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20200124201847.212528-1-andriin@fb.com --- tools/lib/bpf/libbpf.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 39f1b7633a7c..514b1a524abb 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -3869,7 +3869,9 @@ static struct ids_vec *bpf_core_find_cands(const struct btf *local_btf, if (strncmp(local_name, targ_name, local_essent_len) == 0) { pr_debug("[%d] %s: found candidate [%d] %s\n", local_type_id, local_name, i, targ_name); - new_ids = realloc(cand_ids->data, cand_ids->len + 1); + new_ids = reallocarray(cand_ids->data, + cand_ids->len + 1, + sizeof(*cand_ids->data)); if (!new_ids) { err = -ENOMEM; goto err_out; -- cgit v1.2.3 From 84ad7a7ab69f112c0c4b878c9be91b950a1fb1f8 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 23 Jan 2020 17:15:06 +0100 Subject: bpf: Allow BTF ctx access for string pointers When accessing the context we allow access to arguments with scalar type and pointer to struct. But we deny access for pointer to scalar type, which is the case for many functions. Alexei suggested to take conservative approach and allow currently only string pointer access, which is the case for most functions now: Adding check if the pointer is to string type and allow access to it. Suggested-by: Alexei Starovoitov Signed-off-by: Jiri Olsa Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200123161508.915203-2-jolsa@kernel.org --- kernel/bpf/btf.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 32963b6d5a9c..b7c1660fb594 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -3669,6 +3669,19 @@ struct btf *bpf_prog_get_target_btf(const struct bpf_prog *prog) } } +static bool is_string_ptr(struct btf *btf, const struct btf_type *t) +{ + /* t comes in already as a pointer */ + t = btf_type_by_id(btf, t->type); + + /* allow const */ + if (BTF_INFO_KIND(t->info) == BTF_KIND_CONST) + t = btf_type_by_id(btf, t->type); + + /* char, signed char, unsigned char */ + return btf_type_is_int(t) && t->size == 1; +} + bool btf_ctx_access(int off, int size, enum bpf_access_type type, const struct bpf_prog *prog, struct bpf_insn_access_aux *info) @@ -3735,6 +3748,9 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type, */ return true; + if (is_string_ptr(btf, t)) + return true; + /* this is a pointer to another type */ info->reg_type = PTR_TO_BTF_ID; -- cgit v1.2.3 From e9b4e606c2289d6610113253922bb8c9ac7f68b0 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 23 Jan 2020 17:15:07 +0100 Subject: bpf: Allow to resolve bpf trampoline and dispatcher in unwind When unwinding the stack we need to identify each address to successfully continue. Adding latch tree to keep trampolines for quick lookup during the unwind. The patch uses first 48 bytes for latch tree node, leaving 4048 bytes from the rest of the page for trampoline or dispatcher generated code. It's still enough not to affect trampoline and dispatcher progs maximum counts. Signed-off-by: Jiri Olsa Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200123161508.915203-3-jolsa@kernel.org --- include/linux/bpf.h | 12 +++++++- kernel/bpf/dispatcher.c | 4 +-- kernel/bpf/trampoline.c | 80 ++++++++++++++++++++++++++++++++++++++++++++----- kernel/extable.c | 7 +++-- 4 files changed, 90 insertions(+), 13 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index a9687861fd7e..8e9ad3943cd9 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -525,7 +525,6 @@ struct bpf_trampoline *bpf_trampoline_lookup(u64 key); int bpf_trampoline_link_prog(struct bpf_prog *prog); int bpf_trampoline_unlink_prog(struct bpf_prog *prog); void bpf_trampoline_put(struct bpf_trampoline *tr); -void *bpf_jit_alloc_exec_page(void); #define BPF_DISPATCHER_INIT(name) { \ .mutex = __MUTEX_INITIALIZER(name.mutex), \ .func = &name##func, \ @@ -557,6 +556,13 @@ void *bpf_jit_alloc_exec_page(void); #define BPF_DISPATCHER_PTR(name) (&name) void bpf_dispatcher_change_prog(struct bpf_dispatcher *d, struct bpf_prog *from, struct bpf_prog *to); +struct bpf_image { + struct latch_tree_node tnode; + unsigned char data[]; +}; +#define BPF_IMAGE_SIZE (PAGE_SIZE - sizeof(struct bpf_image)) +bool is_bpf_image_address(unsigned long address); +void *bpf_image_alloc(void); #else static inline struct bpf_trampoline *bpf_trampoline_lookup(u64 key) { @@ -578,6 +584,10 @@ static inline void bpf_trampoline_put(struct bpf_trampoline *tr) {} static inline void bpf_dispatcher_change_prog(struct bpf_dispatcher *d, struct bpf_prog *from, struct bpf_prog *to) {} +static inline bool is_bpf_image_address(unsigned long address) +{ + return false; +} #endif struct bpf_func_info_aux { diff --git a/kernel/bpf/dispatcher.c b/kernel/bpf/dispatcher.c index 204ee61a3904..b3e5b214fed8 100644 --- a/kernel/bpf/dispatcher.c +++ b/kernel/bpf/dispatcher.c @@ -113,7 +113,7 @@ static void bpf_dispatcher_update(struct bpf_dispatcher *d, int prev_num_progs) noff = 0; } else { old = d->image + d->image_off; - noff = d->image_off ^ (PAGE_SIZE / 2); + noff = d->image_off ^ (BPF_IMAGE_SIZE / 2); } new = d->num_progs ? d->image + noff : NULL; @@ -140,7 +140,7 @@ void bpf_dispatcher_change_prog(struct bpf_dispatcher *d, struct bpf_prog *from, mutex_lock(&d->mutex); if (!d->image) { - d->image = bpf_jit_alloc_exec_page(); + d->image = bpf_image_alloc(); if (!d->image) goto out; } diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c index eb64c245052b..6b264a92064b 100644 --- a/kernel/bpf/trampoline.c +++ b/kernel/bpf/trampoline.c @@ -4,6 +4,7 @@ #include #include #include +#include /* dummy _ops. The verifier will operate on target program's ops. */ const struct bpf_verifier_ops bpf_extension_verifier_ops = { @@ -16,11 +17,12 @@ const struct bpf_prog_ops bpf_extension_prog_ops = { #define TRAMPOLINE_TABLE_SIZE (1 << TRAMPOLINE_HASH_BITS) static struct hlist_head trampoline_table[TRAMPOLINE_TABLE_SIZE]; +static struct latch_tree_root image_tree __cacheline_aligned; -/* serializes access to trampoline_table */ +/* serializes access to trampoline_table and image_tree */ static DEFINE_MUTEX(trampoline_mutex); -void *bpf_jit_alloc_exec_page(void) +static void *bpf_jit_alloc_exec_page(void) { void *image; @@ -36,6 +38,64 @@ void *bpf_jit_alloc_exec_page(void) return image; } +static __always_inline bool image_tree_less(struct latch_tree_node *a, + struct latch_tree_node *b) +{ + struct bpf_image *ia = container_of(a, struct bpf_image, tnode); + struct bpf_image *ib = container_of(b, struct bpf_image, tnode); + + return ia < ib; +} + +static __always_inline int image_tree_comp(void *addr, struct latch_tree_node *n) +{ + void *image = container_of(n, struct bpf_image, tnode); + + if (addr < image) + return -1; + if (addr >= image + PAGE_SIZE) + return 1; + + return 0; +} + +static const struct latch_tree_ops image_tree_ops = { + .less = image_tree_less, + .comp = image_tree_comp, +}; + +static void *__bpf_image_alloc(bool lock) +{ + struct bpf_image *image; + + image = bpf_jit_alloc_exec_page(); + if (!image) + return NULL; + + if (lock) + mutex_lock(&trampoline_mutex); + latch_tree_insert(&image->tnode, &image_tree, &image_tree_ops); + if (lock) + mutex_unlock(&trampoline_mutex); + return image->data; +} + +void *bpf_image_alloc(void) +{ + return __bpf_image_alloc(true); +} + +bool is_bpf_image_address(unsigned long addr) +{ + bool ret; + + rcu_read_lock(); + ret = latch_tree_find((void *) addr, &image_tree, &image_tree_ops) != NULL; + rcu_read_unlock(); + + return ret; +} + struct bpf_trampoline *bpf_trampoline_lookup(u64 key) { struct bpf_trampoline *tr; @@ -56,7 +116,7 @@ struct bpf_trampoline *bpf_trampoline_lookup(u64 key) goto out; /* is_root was checked earlier. No need for bpf_jit_charge_modmem() */ - image = bpf_jit_alloc_exec_page(); + image = __bpf_image_alloc(false); if (!image) { kfree(tr); tr = NULL; @@ -131,14 +191,14 @@ static int register_fentry(struct bpf_trampoline *tr, void *new_addr) } /* Each call __bpf_prog_enter + call bpf_func + call __bpf_prog_exit is ~50 - * bytes on x86. Pick a number to fit into PAGE_SIZE / 2 + * bytes on x86. Pick a number to fit into BPF_IMAGE_SIZE / 2 */ #define BPF_MAX_TRAMP_PROGS 40 static int bpf_trampoline_update(struct bpf_trampoline *tr) { - void *old_image = tr->image + ((tr->selector + 1) & 1) * PAGE_SIZE/2; - void *new_image = tr->image + (tr->selector & 1) * PAGE_SIZE/2; + void *old_image = tr->image + ((tr->selector + 1) & 1) * BPF_IMAGE_SIZE/2; + void *new_image = tr->image + (tr->selector & 1) * BPF_IMAGE_SIZE/2; struct bpf_prog *progs_to_run[BPF_MAX_TRAMP_PROGS]; int fentry_cnt = tr->progs_cnt[BPF_TRAMP_FENTRY]; int fexit_cnt = tr->progs_cnt[BPF_TRAMP_FEXIT]; @@ -174,7 +234,7 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr) */ synchronize_rcu_tasks(); - err = arch_prepare_bpf_trampoline(new_image, new_image + PAGE_SIZE / 2, + err = arch_prepare_bpf_trampoline(new_image, new_image + BPF_IMAGE_SIZE / 2, &tr->func.model, flags, fentry, fentry_cnt, fexit, fexit_cnt, @@ -284,6 +344,8 @@ out: void bpf_trampoline_put(struct bpf_trampoline *tr) { + struct bpf_image *image; + if (!tr) return; mutex_lock(&trampoline_mutex); @@ -294,9 +356,11 @@ void bpf_trampoline_put(struct bpf_trampoline *tr) goto out; if (WARN_ON_ONCE(!hlist_empty(&tr->progs_hlist[BPF_TRAMP_FEXIT]))) goto out; + image = container_of(tr->image, struct bpf_image, data); + latch_tree_erase(&image->tnode, &image_tree, &image_tree_ops); /* wait for tasks to get out of trampoline before freeing it */ synchronize_rcu_tasks(); - bpf_jit_free_exec(tr->image); + bpf_jit_free_exec(image); hlist_del(&tr->hlist); kfree(tr); out: diff --git a/kernel/extable.c b/kernel/extable.c index f6920a11e28a..a0024f27d3a1 100644 --- a/kernel/extable.c +++ b/kernel/extable.c @@ -131,8 +131,9 @@ int kernel_text_address(unsigned long addr) * triggers a stack trace, or a WARN() that happens during * coming back from idle, or cpu on or offlining. * - * is_module_text_address() as well as the kprobe slots - * and is_bpf_text_address() require RCU to be watching. + * is_module_text_address() as well as the kprobe slots, + * is_bpf_text_address() and is_bpf_image_address require + * RCU to be watching. */ no_rcu = !rcu_is_watching(); @@ -148,6 +149,8 @@ int kernel_text_address(unsigned long addr) goto out; if (is_bpf_text_address(addr)) goto out; + if (is_bpf_image_address(addr)) + goto out; ret = 0; out: if (no_rcu) -- cgit v1.2.3 From d633d57902a510debd4ec5b7a374a009c8c2d620 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 23 Jan 2020 17:15:08 +0100 Subject: selftest/bpf: Add test for allowed trampolines count There's limit of 40 programs tht can be attached to trampoline for one function. Adding test that tries to attach that many plus one extra that needs to fail. Signed-off-by: Jiri Olsa Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200123161508.915203-4-jolsa@kernel.org --- .../selftests/bpf/prog_tests/trampoline_count.c | 112 +++++++++++++++++++++ .../selftests/bpf/progs/test_trampoline_count.c | 21 ++++ 2 files changed, 133 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/trampoline_count.c create mode 100644 tools/testing/selftests/bpf/progs/test_trampoline_count.c diff --git a/tools/testing/selftests/bpf/prog_tests/trampoline_count.c b/tools/testing/selftests/bpf/prog_tests/trampoline_count.c new file mode 100644 index 000000000000..1235f3d1cc50 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/trampoline_count.c @@ -0,0 +1,112 @@ +// SPDX-License-Identifier: GPL-2.0-only +#define _GNU_SOURCE +#include +#include +#include + +#define MAX_TRAMP_PROGS 40 + +struct inst { + struct bpf_object *obj; + struct bpf_link *link_fentry; + struct bpf_link *link_fexit; +}; + +static int test_task_rename(void) +{ + int fd, duration = 0, err; + char buf[] = "test_overhead"; + + fd = open("/proc/self/comm", O_WRONLY|O_TRUNC); + if (CHECK(fd < 0, "open /proc", "err %d", errno)) + return -1; + err = write(fd, buf, sizeof(buf)); + if (err < 0) { + CHECK(err < 0, "task rename", "err %d", errno); + close(fd); + return -1; + } + close(fd); + return 0; +} + +static struct bpf_link *load(struct bpf_object *obj, const char *name) +{ + struct bpf_program *prog; + int duration = 0; + + prog = bpf_object__find_program_by_title(obj, name); + if (CHECK(!prog, "find_probe", "prog '%s' not found\n", name)) + return ERR_PTR(-EINVAL); + return bpf_program__attach_trace(prog); +} + +void test_trampoline_count(void) +{ + const char *fentry_name = "fentry/__set_task_comm"; + const char *fexit_name = "fexit/__set_task_comm"; + const char *object = "test_trampoline_count.o"; + struct inst inst[MAX_TRAMP_PROGS] = { 0 }; + int err, i = 0, duration = 0; + struct bpf_object *obj; + struct bpf_link *link; + char comm[16] = {}; + + /* attach 'allowed' 40 trampoline programs */ + for (i = 0; i < MAX_TRAMP_PROGS; i++) { + obj = bpf_object__open_file(object, NULL); + if (CHECK(IS_ERR(obj), "obj_open_file", "err %ld\n", PTR_ERR(obj))) + goto cleanup; + + err = bpf_object__load(obj); + if (CHECK(err, "obj_load", "err %d\n", err)) + goto cleanup; + inst[i].obj = obj; + + if (rand() % 2) { + link = load(obj, fentry_name); + if (CHECK(IS_ERR(link), "attach prog", "err %ld\n", PTR_ERR(link))) + goto cleanup; + inst[i].link_fentry = link; + } else { + link = load(obj, fexit_name); + if (CHECK(IS_ERR(link), "attach prog", "err %ld\n", PTR_ERR(link))) + goto cleanup; + inst[i].link_fexit = link; + } + } + + /* and try 1 extra.. */ + obj = bpf_object__open_file(object, NULL); + if (CHECK(IS_ERR(obj), "obj_open_file", "err %ld\n", PTR_ERR(obj))) + goto cleanup; + + err = bpf_object__load(obj); + if (CHECK(err, "obj_load", "err %d\n", err)) + goto cleanup_extra; + + /* ..that needs to fail */ + link = load(obj, fentry_name); + if (CHECK(!IS_ERR(link), "cannot attach over the limit", "err %ld\n", PTR_ERR(link))) { + bpf_link__destroy(link); + goto cleanup_extra; + } + + /* with E2BIG error */ + CHECK(PTR_ERR(link) != -E2BIG, "proper error check", "err %ld\n", PTR_ERR(link)); + + /* and finaly execute the probe */ + if (CHECK_FAIL(prctl(PR_GET_NAME, comm, 0L, 0L, 0L))) + goto cleanup_extra; + CHECK_FAIL(test_task_rename()); + CHECK_FAIL(prctl(PR_SET_NAME, comm, 0L, 0L, 0L)); + +cleanup_extra: + bpf_object__close(obj); +cleanup: + while (--i) { + bpf_link__destroy(inst[i].link_fentry); + bpf_link__destroy(inst[i].link_fexit); + bpf_object__close(inst[i].obj); + } +} diff --git a/tools/testing/selftests/bpf/progs/test_trampoline_count.c b/tools/testing/selftests/bpf/progs/test_trampoline_count.c new file mode 100644 index 000000000000..e51e6e3a81c2 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_trampoline_count.c @@ -0,0 +1,21 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include "bpf_trace_helpers.h" + +struct task_struct; + +SEC("fentry/__set_task_comm") +int BPF_PROG(prog1, struct task_struct *tsk, const char *buf, bool exec) +{ + return 0; +} + +SEC("fexit/__set_task_comm") +int BPF_PROG(prog2, struct task_struct *tsk, const char *buf, bool exec) +{ + return 0; +} + +char _license[] SEC("license") = "GPL"; -- cgit v1.2.3 From 07fdbee134b3530ef30c709aa4251ca04ea9e3f8 Mon Sep 17 00:00:00 2001 From: Andrey Ignatov Date: Fri, 24 Jan 2020 14:41:42 -0800 Subject: tools/bpf: Allow overriding llvm tools for runqslower tools/testing/selftests/bpf/Makefile supports overriding clang, llc and other tools so that custom ones can be used instead of those from PATH. It's convinient and heavily used by some users. Apply same rules to runqslower/Makefile. Signed-off-by: Andrey Ignatov Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200124224142.1833678-1-rdna@fb.com --- tools/bpf/runqslower/Makefile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/bpf/runqslower/Makefile b/tools/bpf/runqslower/Makefile index faf5418609ea..0c021352beed 100644 --- a/tools/bpf/runqslower/Makefile +++ b/tools/bpf/runqslower/Makefile @@ -1,8 +1,8 @@ # SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) OUTPUT := .output -CLANG := clang -LLC := llc -LLVM_STRIP := llvm-strip +CLANG ?= clang +LLC ?= llc +LLVM_STRIP ?= llvm-strip DEFAULT_BPFTOOL := $(OUTPUT)/sbin/bpftool BPFTOOL ?= $(DEFAULT_BPFTOOL) LIBBPF_SRC := $(abspath ../../lib/bpf) -- cgit v1.2.3 From 90435a7891a2259b0f74c5a1bc5600d0d64cba8f Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Sat, 25 Jan 2020 12:10:02 +0300 Subject: bpf: map_seq_next should always increase position index If seq_file .next fuction does not change position index, read after some lseek can generate an unexpected output. See also: https://bugzilla.kernel.org/show_bug.cgi?id=206283 v1 -> v2: removed missed increment in end of function Signed-off-by: Vasily Averin Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/eca84fdd-c374-a154-d874-6c7b55fc3bc4@virtuozzo.com --- kernel/bpf/inode.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c index e11059b99f18..bd2fd8eab470 100644 --- a/kernel/bpf/inode.c +++ b/kernel/bpf/inode.c @@ -196,6 +196,7 @@ static void *map_seq_next(struct seq_file *m, void *v, loff_t *pos) void *key = map_iter(m)->key; void *prev_key; + (*pos)++; if (map_iter(m)->done) return NULL; @@ -208,8 +209,6 @@ static void *map_seq_next(struct seq_file *m, void *v, loff_t *pos) map_iter(m)->done = true; return NULL; } - - ++(*pos); return key; } -- cgit v1.2.3 From 42a84a8cd0ff0cbff5a4595e1304c4567a30267d Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Sun, 26 Jan 2020 16:14:00 -0800 Subject: bpf, xdp: Update devmap comments to reflect napi/rcu usage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that we rely on synchronize_rcu and call_rcu waiting to exit perempt-disable regions (NAPI) lets update the comments to reflect this. Fixes: 0536b85239b84 ("xdp: Simplify devmap cleanup") Signed-off-by: John Fastabend Signed-off-by: Daniel Borkmann Acked-by: Björn Töpel Acked-by: Song Liu Link: https://lore.kernel.org/bpf/1580084042-11598-2-git-send-email-john.fastabend@gmail.com --- kernel/bpf/devmap.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index f3a44f6ca86e..373c6a30d8a5 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c @@ -190,10 +190,12 @@ static void dev_map_free(struct bpf_map *map) /* At this point bpf_prog->aux->refcnt == 0 and this map->refcnt == 0, * so the programs (can be more than one that used this map) were - * disconnected from events. Wait for outstanding critical sections in - * these programs to complete. The rcu critical section only guarantees - * no further reads against netdev_map. It does __not__ ensure pending - * flush operations (if any) are complete. + * disconnected from events. The following synchronize_rcu() guarantees + * both rcu read critical sections complete and waits for + * preempt-disable regions (NAPI being the relevant context here) so we + * are certain there will be no further reads against the netdev_map and + * all flush operations are complete. Flush operations can only be done + * from NAPI context for this reason. */ spin_lock(&dev_map_lock); @@ -503,12 +505,11 @@ static int dev_map_delete_elem(struct bpf_map *map, void *key) return -EINVAL; /* Use call_rcu() here to ensure any rcu critical sections have - * completed, but this does not guarantee a flush has happened - * yet. Because driver side rcu_read_lock/unlock only protects the - * running XDP program. However, for pending flush operations the - * dev and ctx are stored in another per cpu map. And additionally, - * the driver tear down ensures all soft irqs are complete before - * removing the net device in the case of dev_put equals zero. + * completed as well as any flush operations because call_rcu + * will wait for preempt-disable region to complete, NAPI in this + * context. And additionally, the driver tear down ensures all + * soft irqs are complete before removing the net device in the + * case of dev_put equals zero. */ old_dev = xchg(&dtab->netdev_map[k], NULL); if (old_dev) -- cgit v1.2.3 From 9719c6b98db42462bfb9aa172f017bdfd6d66c6f Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Sun, 26 Jan 2020 16:14:01 -0800 Subject: bpf, xdp: virtio_net use access ptr macro for xdp enable check virtio_net currently relies on rcu critical section to access the xdp program in its xdp_xmit handler. However, the pointer to the xdp program is only used to do a NULL pointer comparison to determine if xdp is enabled or not. Use rcu_access_pointer() instead of rcu_dereference() to reflect this. Then later when we drop rcu_read critical section virtio_net will not need in special handling. Signed-off-by: John Fastabend Signed-off-by: Daniel Borkmann Acked-by: Jesper Dangaard Brouer Link: https://lore.kernel.org/bpf/1580084042-11598-3-git-send-email-john.fastabend@gmail.com --- drivers/net/virtio_net.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index c458cd313281..2fe7a3188282 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -501,7 +501,7 @@ static int virtnet_xdp_xmit(struct net_device *dev, /* Only allow ndo_xdp_xmit if XDP is loaded on dev, as this * indicate XDP resources have been successfully allocated. */ - xdp_prog = rcu_dereference(rq->xdp_prog); + xdp_prog = rcu_access_pointer(rq->xdp_prog); if (!xdp_prog) return -ENXIO; -- cgit v1.2.3 From b23bfa5633b19bf1db87b36a76b2225c734f794c Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Sun, 26 Jan 2020 16:14:02 -0800 Subject: bpf, xdp: Remove no longer required rcu_read_{un}lock() Now that we depend on rcu_call() and synchronize_rcu() to also wait for preempt_disabled region to complete the rcu read critical section in __dev_map_flush() is no longer required. Except in a few special cases in drivers that need it for other reasons. These originally ensured the map reference was safe while a map was also being free'd. And additionally that bpf program updates via ndo_bpf did not happen while flush updates were in flight. But flush by new rules can only be called from preempt-disabled NAPI context. The synchronize_rcu from the map free path and the rcu_call from the delete path will ensure the reference there is safe. So lets remove the rcu_read_lock and rcu_read_unlock pair to avoid any confusion around how this is being protected. If the rcu_read_lock was required it would mean errors in the above logic and the original patch would also be wrong. Now that we have done above we put the rcu_read_lock in the driver code where it is needed in a driver dependent way. I think this helps readability of the code so we know where and why we are taking read locks. Most drivers will not need rcu_read_locks here and further XDP drivers already have rcu_read_locks in their code paths for reading xdp programs on RX side so this makes it symmetric where we don't have half of rcu critical sections define in driver and the other half in devmap. Signed-off-by: John Fastabend Signed-off-by: Daniel Borkmann Acked-by: Jesper Dangaard Brouer Link: https://lore.kernel.org/bpf/1580084042-11598-4-git-send-email-john.fastabend@gmail.com --- drivers/net/veth.c | 6 +++++- kernel/bpf/devmap.c | 5 +++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/net/veth.c b/drivers/net/veth.c index 1c89017beebb..8cdc4415fa70 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -377,6 +377,7 @@ static int veth_xdp_xmit(struct net_device *dev, int n, unsigned int max_len; struct veth_rq *rq; + rcu_read_lock(); if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) { ret = -EINVAL; goto drop; @@ -418,11 +419,14 @@ static int veth_xdp_xmit(struct net_device *dev, int n, if (flags & XDP_XMIT_FLUSH) __veth_xdp_flush(rq); - if (likely(!drops)) + if (likely(!drops)) { + rcu_read_unlock(); return n; + } ret = n - drops; drop: + rcu_read_unlock(); atomic64_add(drops, &priv->dropped); return ret; diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index 373c6a30d8a5..58bdca5d978a 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c @@ -366,16 +366,17 @@ error: * from NET_RX_SOFTIRQ. Either way the poll routine must complete before the * net device can be torn down. On devmap tear down we ensure the flush list * is empty before completing to ensure all flush operations have completed. + * When drivers update the bpf program they may need to ensure any flush ops + * are also complete. Using synchronize_rcu or call_rcu will suffice for this + * because both wait for napi context to exit. */ void __dev_flush(void) { struct list_head *flush_list = this_cpu_ptr(&dev_flush_list); struct xdp_dev_bulk_queue *bq, *tmp; - rcu_read_lock(); list_for_each_entry_safe(bq, tmp, flush_list, flush_node) bq_xmit_all(bq, XDP_XMIT_FLUSH); - rcu_read_unlock(); } /* rcu_read_lock (from syscall and BPF contexts) ensures that if a delete and/or -- cgit v1.2.3 From 59fb9b62fb6c929a756563152a89f39b07cf8893 Mon Sep 17 00:00:00 2001 From: Yoshiki Komachi Date: Fri, 17 Jan 2020 16:05:32 +0900 Subject: flow_dissector: Fix to use new variables for port ranges in bpf hook This patch applies new flag (FLOW_DISSECTOR_KEY_PORTS_RANGE) and field (tp_range) to BPF flow dissector to generate appropriate flow keys when classified by specified port ranges. Fixes: 8ffb055beae5 ("cls_flower: Fix the behavior using port ranges with hw-offload") Signed-off-by: Yoshiki Komachi Signed-off-by: Daniel Borkmann Acked-by: Petar Penkov Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20200117070533.402240-2-komachi.yoshiki@gmail.com --- net/core/flow_dissector.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index f560b4902060..a1670dff0629 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -834,10 +834,10 @@ static void __skb_flow_bpf_to_target(const struct bpf_flow_keys *flow_keys, struct flow_dissector *flow_dissector, void *target_container) { + struct flow_dissector_key_ports *key_ports = NULL; struct flow_dissector_key_control *key_control; struct flow_dissector_key_basic *key_basic; struct flow_dissector_key_addrs *key_addrs; - struct flow_dissector_key_ports *key_ports; struct flow_dissector_key_tags *key_tags; key_control = skb_flow_dissector_target(flow_dissector, @@ -876,10 +876,17 @@ static void __skb_flow_bpf_to_target(const struct bpf_flow_keys *flow_keys, key_control->addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; } - if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_PORTS)) { + if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_PORTS)) key_ports = skb_flow_dissector_target(flow_dissector, FLOW_DISSECTOR_KEY_PORTS, target_container); + else if (dissector_uses_key(flow_dissector, + FLOW_DISSECTOR_KEY_PORTS_RANGE)) + key_ports = skb_flow_dissector_target(flow_dissector, + FLOW_DISSECTOR_KEY_PORTS_RANGE, + target_container); + + if (key_ports) { key_ports->src = flow_keys->sport; key_ports->dst = flow_keys->dport; } -- cgit v1.2.3 From 265bb359061db6ef825dec3912f341a604966371 Mon Sep 17 00:00:00 2001 From: Yoshiki Komachi Date: Fri, 17 Jan 2020 16:05:33 +0900 Subject: selftests/bpf: Add test based on port range for BPF flow dissector Add a simple test to make sure that a filter based on specified port range classifies packets correctly. Signed-off-by: Yoshiki Komachi Signed-off-by: Daniel Borkmann Acked-by: Petar Penkov Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20200117070533.402240-3-komachi.yoshiki@gmail.com --- tools/testing/selftests/bpf/test_flow_dissector.sh | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/tools/testing/selftests/bpf/test_flow_dissector.sh b/tools/testing/selftests/bpf/test_flow_dissector.sh index a8485ae103d1..174b72a64a4c 100755 --- a/tools/testing/selftests/bpf/test_flow_dissector.sh +++ b/tools/testing/selftests/bpf/test_flow_dissector.sh @@ -139,6 +139,20 @@ echo "Testing IPv4 + GRE..." tc filter del dev lo ingress pref 1337 +echo "Testing port range..." +# Drops all IP/UDP packets coming from port 8-10 +tc filter add dev lo parent ffff: protocol ip pref 1337 flower ip_proto \ + udp src_port 8-10 action drop + +# Send 10 IPv4/UDP packets from port 7. Filter should not drop any. +./test_flow_dissector -i 4 -f 7 +# Send 10 IPv4/UDP packets from port 9. Filter should drop all. +./test_flow_dissector -i 4 -f 9 -F +# Send 10 IPv4/UDP packets from port 11. Filter should not drop any. +./test_flow_dissector -i 4 -f 11 + +tc filter del dev lo ingress pref 1337 + echo "Testing IPv6..." # Drops all IPv6/UDP packets coming from port 9 tc filter add dev lo parent ffff: protocol ipv6 pref 1337 flower ip_proto \ -- cgit v1.2.3