summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/linux/skmsg.h8
-rw-r--r--include/net/tls.h9
-rw-r--r--net/core/skmsg.c98
-rw-r--r--net/tls/tls_sw.c20
-rw-r--r--tools/testing/selftests/bpf/progs/test_sockmap_kern.h46
-rw-r--r--tools/testing/selftests/bpf/test_sockmap.c163
6 files changed, 296 insertions, 48 deletions
diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h
index ad31c9fb7158..08674cd14d5a 100644
--- a/include/linux/skmsg.h
+++ b/include/linux/skmsg.h
@@ -437,4 +437,12 @@ static inline void psock_progs_drop(struct sk_psock_progs *progs)
psock_set_prog(&progs->skb_verdict, NULL);
}
+int sk_psock_tls_strp_read(struct sk_psock *psock, struct sk_buff *skb);
+
+static inline bool sk_psock_strp_enabled(struct sk_psock *psock)
+{
+ if (!psock)
+ return false;
+ return psock->parser.enabled;
+}
#endif /* _LINUX_SKMSG_H */
diff --git a/include/net/tls.h b/include/net/tls.h
index 3e7b44cae0d9..3212d3c214a9 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -571,6 +571,15 @@ static inline bool tls_sw_has_ctx_tx(const struct sock *sk)
return !!tls_sw_ctx_tx(ctx);
}
+static inline bool tls_sw_has_ctx_rx(const struct sock *sk)
+{
+ struct tls_context *ctx = tls_get_ctx(sk);
+
+ if (!ctx)
+ return false;
+ return !!tls_sw_ctx_rx(ctx);
+}
+
void tls_sw_write_space(struct sock *sk, struct tls_context *ctx);
void tls_device_write_space(struct sock *sk, struct tls_context *ctx);
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index c479372f2cd2..351afbf6bfba 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -7,6 +7,7 @@
#include <net/sock.h>
#include <net/tcp.h>
+#include <net/tls.h>
static bool sk_msg_try_coalesce_ok(struct sk_msg *msg, int elem_first_coalesce)
{
@@ -682,13 +683,75 @@ static struct sk_psock *sk_psock_from_strp(struct strparser *strp)
return container_of(parser, struct sk_psock, parser);
}
-static void sk_psock_verdict_apply(struct sk_psock *psock,
- struct sk_buff *skb, int verdict)
+static void sk_psock_skb_redirect(struct sk_psock *psock, struct sk_buff *skb)
{
struct sk_psock *psock_other;
struct sock *sk_other;
bool ingress;
+ sk_other = tcp_skb_bpf_redirect_fetch(skb);
+ if (unlikely(!sk_other)) {
+ kfree_skb(skb);
+ return;
+ }
+ psock_other = sk_psock(sk_other);
+ if (!psock_other || sock_flag(sk_other, SOCK_DEAD) ||
+ !sk_psock_test_state(psock_other, SK_PSOCK_TX_ENABLED)) {
+ kfree_skb(skb);
+ return;
+ }
+
+ ingress = tcp_skb_bpf_ingress(skb);
+ if ((!ingress && sock_writeable(sk_other)) ||
+ (ingress &&
+ atomic_read(&sk_other->sk_rmem_alloc) <=
+ sk_other->sk_rcvbuf)) {
+ if (!ingress)
+ skb_set_owner_w(skb, sk_other);
+ skb_queue_tail(&psock_other->ingress_skb, skb);
+ schedule_work(&psock_other->work);
+ } else {
+ kfree_skb(skb);
+ }
+}
+
+static void sk_psock_tls_verdict_apply(struct sk_psock *psock,
+ struct sk_buff *skb, int verdict)
+{
+ switch (verdict) {
+ case __SK_REDIRECT:
+ sk_psock_skb_redirect(psock, skb);
+ break;
+ case __SK_PASS:
+ case __SK_DROP:
+ default:
+ break;
+ }
+}
+
+int sk_psock_tls_strp_read(struct sk_psock *psock, struct sk_buff *skb)
+{
+ struct bpf_prog *prog;
+ int ret = __SK_PASS;
+
+ rcu_read_lock();
+ prog = READ_ONCE(psock->progs.skb_verdict);
+ if (likely(prog)) {
+ tcp_skb_bpf_redirect_clear(skb);
+ ret = sk_psock_bpf_run(psock, prog, skb);
+ ret = sk_psock_map_verd(ret, tcp_skb_bpf_redirect_fetch(skb));
+ }
+ rcu_read_unlock();
+ sk_psock_tls_verdict_apply(psock, skb, ret);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(sk_psock_tls_strp_read);
+
+static void sk_psock_verdict_apply(struct sk_psock *psock,
+ struct sk_buff *skb, int verdict)
+{
+ struct sock *sk_other;
+
switch (verdict) {
case __SK_PASS:
sk_other = psock->sk;
@@ -707,25 +770,8 @@ static void sk_psock_verdict_apply(struct sk_psock *psock,
}
goto out_free;
case __SK_REDIRECT:
- sk_other = tcp_skb_bpf_redirect_fetch(skb);
- if (unlikely(!sk_other))
- goto out_free;
- psock_other = sk_psock(sk_other);
- if (!psock_other || sock_flag(sk_other, SOCK_DEAD) ||
- !sk_psock_test_state(psock_other, SK_PSOCK_TX_ENABLED))
- goto out_free;
- ingress = tcp_skb_bpf_ingress(skb);
- if ((!ingress && sock_writeable(sk_other)) ||
- (ingress &&
- atomic_read(&sk_other->sk_rmem_alloc) <=
- sk_other->sk_rcvbuf)) {
- if (!ingress)
- skb_set_owner_w(skb, sk_other);
- skb_queue_tail(&psock_other->ingress_skb, skb);
- schedule_work(&psock_other->work);
- break;
- }
- /* fall-through */
+ sk_psock_skb_redirect(psock, skb);
+ break;
case __SK_DROP:
/* fall-through */
default:
@@ -779,9 +825,13 @@ static void sk_psock_strp_data_ready(struct sock *sk)
rcu_read_lock();
psock = sk_psock(sk);
if (likely(psock)) {
- write_lock_bh(&sk->sk_callback_lock);
- strp_data_ready(&psock->parser.strp);
- write_unlock_bh(&sk->sk_callback_lock);
+ if (tls_sw_has_ctx_rx(sk)) {
+ psock->parser.saved_data_ready(sk);
+ } else {
+ write_lock_bh(&sk->sk_callback_lock);
+ strp_data_ready(&psock->parser.strp);
+ write_unlock_bh(&sk->sk_callback_lock);
+ }
}
rcu_read_unlock();
}
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 8c2763eb6aae..24f64bc0de18 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -1742,6 +1742,7 @@ int tls_sw_recvmsg(struct sock *sk,
long timeo;
bool is_kvec = iov_iter_is_kvec(&msg->msg_iter);
bool is_peek = flags & MSG_PEEK;
+ bool bpf_strp_enabled;
int num_async = 0;
int pending;
@@ -1752,6 +1753,7 @@ int tls_sw_recvmsg(struct sock *sk,
psock = sk_psock_get(sk);
lock_sock(sk);
+ bpf_strp_enabled = sk_psock_strp_enabled(psock);
/* Process pending decrypted records. It must be non-zero-copy */
err = process_rx_list(ctx, msg, &control, &cmsg, 0, len, false,
@@ -1805,11 +1807,12 @@ int tls_sw_recvmsg(struct sock *sk,
if (to_decrypt <= len && !is_kvec && !is_peek &&
ctx->control == TLS_RECORD_TYPE_DATA &&
- prot->version != TLS_1_3_VERSION)
+ prot->version != TLS_1_3_VERSION &&
+ !bpf_strp_enabled)
zc = true;
/* Do not use async mode if record is non-data */
- if (ctx->control == TLS_RECORD_TYPE_DATA)
+ if (ctx->control == TLS_RECORD_TYPE_DATA && !bpf_strp_enabled)
async_capable = ctx->async_capable;
else
async_capable = false;
@@ -1859,6 +1862,19 @@ int tls_sw_recvmsg(struct sock *sk,
goto pick_next_record;
if (!zc) {
+ if (bpf_strp_enabled) {
+ err = sk_psock_tls_strp_read(psock, skb);
+ if (err != __SK_PASS) {
+ rxm->offset = rxm->offset + rxm->full_len;
+ rxm->full_len = 0;
+ if (err == __SK_DROP)
+ consume_skb(skb);
+ ctx->recv_pkt = NULL;
+ __strp_unpause(&ctx->strp);
+ continue;
+ }
+ }
+
if (rxm->full_len > len) {
retain_skb = true;
chunk = len;
diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_kern.h b/tools/testing/selftests/bpf/progs/test_sockmap_kern.h
index a443d3637db3..057036ca1111 100644
--- a/tools/testing/selftests/bpf/progs/test_sockmap_kern.h
+++ b/tools/testing/selftests/bpf/progs/test_sockmap_kern.h
@@ -79,11 +79,18 @@ struct {
struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
- __uint(max_entries, 1);
+ __uint(max_entries, 2);
__type(key, int);
__type(value, int);
} sock_skb_opts SEC(".maps");
+struct {
+ __uint(type, TEST_MAP_TYPE);
+ __uint(max_entries, 20);
+ __uint(key_size, sizeof(int));
+ __uint(value_size, sizeof(int));
+} tls_sock_map SEC(".maps");
+
SEC("sk_skb1")
int bpf_prog1(struct __sk_buff *skb)
{
@@ -118,6 +125,43 @@ int bpf_prog2(struct __sk_buff *skb)
}
+SEC("sk_skb3")
+int bpf_prog3(struct __sk_buff *skb)
+{
+ const int one = 1;
+ int err, *f, ret = SK_PASS;
+ void *data_end;
+ char *c;
+
+ err = bpf_skb_pull_data(skb, 19);
+ if (err)
+ goto tls_out;
+
+ c = (char *)(long)skb->data;
+ data_end = (void *)(long)skb->data_end;
+
+ if (c + 18 < data_end)
+ memcpy(&c[13], "PASS", 4);
+ f = bpf_map_lookup_elem(&sock_skb_opts, &one);
+ if (f && *f) {
+ __u64 flags = 0;
+
+ ret = 0;
+ flags = *f;
+#ifdef SOCKMAP
+ return bpf_sk_redirect_map(skb, &tls_sock_map, ret, flags);
+#else
+ return bpf_sk_redirect_hash(skb, &tls_sock_map, &ret, flags);
+#endif
+ }
+
+ f = bpf_map_lookup_elem(&sock_skb_opts, &one);
+ if (f && *f)
+ ret = SK_DROP;
+tls_out:
+ return ret;
+}
+
SEC("sockops")
int bpf_sockmap(struct bpf_sock_ops *skops)
{
diff --git a/tools/testing/selftests/bpf/test_sockmap.c b/tools/testing/selftests/bpf/test_sockmap.c
index c80643828b82..37695fc8096a 100644
--- a/tools/testing/selftests/bpf/test_sockmap.c
+++ b/tools/testing/selftests/bpf/test_sockmap.c
@@ -63,8 +63,8 @@ int s1, s2, c1, c2, p1, p2;
int test_cnt;
int passed;
int failed;
-int map_fd[8];
-struct bpf_map *maps[8];
+int map_fd[9];
+struct bpf_map *maps[9];
int prog_fd[11];
int txmsg_pass;
@@ -79,7 +79,10 @@ int txmsg_end_push;
int txmsg_start_pop;
int txmsg_pop;
int txmsg_ingress;
-int txmsg_skb;
+int txmsg_redir_skb;
+int txmsg_ktls_skb;
+int txmsg_ktls_skb_drop;
+int txmsg_ktls_skb_redir;
int ktls;
int peek_flag;
@@ -104,7 +107,7 @@ static const struct option long_options[] = {
{"txmsg_start_pop", required_argument, NULL, 'w'},
{"txmsg_pop", required_argument, NULL, 'x'},
{"txmsg_ingress", no_argument, &txmsg_ingress, 1 },
- {"txmsg_skb", no_argument, &txmsg_skb, 1 },
+ {"txmsg_redir_skb", no_argument, &txmsg_redir_skb, 1 },
{"ktls", no_argument, &ktls, 1 },
{"peek", no_argument, &peek_flag, 1 },
{"whitelist", required_argument, NULL, 'n' },
@@ -169,7 +172,8 @@ static void test_reset(void)
txmsg_start_push = txmsg_end_push = 0;
txmsg_pass = txmsg_drop = txmsg_redir = 0;
txmsg_apply = txmsg_cork = 0;
- txmsg_ingress = txmsg_skb = 0;
+ txmsg_ingress = txmsg_redir_skb = 0;
+ txmsg_ktls_skb = txmsg_ktls_skb_drop = txmsg_ktls_skb_redir = 0;
}
static int test_start_subtest(const struct _test *t, struct sockmap_options *o)
@@ -502,14 +506,41 @@ unwind_iov:
static int msg_verify_data(struct msghdr *msg, int size, int chunk_sz)
{
- int i, j, bytes_cnt = 0;
+ int i, j = 0, bytes_cnt = 0;
unsigned char k = 0;
for (i = 0; i < msg->msg_iovlen; i++) {
unsigned char *d = msg->msg_iov[i].iov_base;
- for (j = 0;
- j < msg->msg_iov[i].iov_len && size; j++) {
+ /* Special case test for skb ingress + ktls */
+ if (i == 0 && txmsg_ktls_skb) {
+ if (msg->msg_iov[i].iov_len < 4)
+ return -EIO;
+ if (txmsg_ktls_skb_redir) {
+ if (memcmp(&d[13], "PASS", 4) != 0) {
+ fprintf(stderr,
+ "detected redirect ktls_skb data error with skb ingress update @iov[%i]:%i \"%02x %02x %02x %02x\" != \"PASS\"\n", i, 0, d[13], d[14], d[15], d[16]);
+ return -EIO;
+ }
+ d[13] = 0;
+ d[14] = 1;
+ d[15] = 2;
+ d[16] = 3;
+ j = 13;
+ } else if (txmsg_ktls_skb) {
+ if (memcmp(d, "PASS", 4) != 0) {
+ fprintf(stderr,
+ "detected ktls_skb data error with skb ingress update @iov[%i]:%i \"%02x %02x %02x %02x\" != \"PASS\"\n", i, 0, d[0], d[1], d[2], d[3]);
+ return -EIO;
+ }
+ d[0] = 0;
+ d[1] = 1;
+ d[2] = 2;
+ d[3] = 3;
+ }
+ }
+
+ for (; j < msg->msg_iov[i].iov_len && size; j++) {
if (d[j] != k++) {
fprintf(stderr,
"detected data corruption @iov[%i]:%i %02x != %02x, %02x ?= %02x\n",
@@ -724,7 +755,7 @@ static int sendmsg_test(struct sockmap_options *opt)
rxpid = fork();
if (rxpid == 0) {
iov_buf -= (txmsg_pop - txmsg_start_pop + 1);
- if (opt->drop_expected)
+ if (opt->drop_expected || txmsg_ktls_skb_drop)
_exit(0);
if (!iov_buf) /* zero bytes sent case */
@@ -911,8 +942,28 @@ static int run_options(struct sockmap_options *options, int cg_fd, int test)
return err;
}
+ /* Attach programs to TLS sockmap */
+ if (txmsg_ktls_skb) {
+ err = bpf_prog_attach(prog_fd[0], map_fd[8],
+ BPF_SK_SKB_STREAM_PARSER, 0);
+ if (err) {
+ fprintf(stderr,
+ "ERROR: bpf_prog_attach (TLS sockmap %i->%i): %d (%s)\n",
+ prog_fd[0], map_fd[8], err, strerror(errno));
+ return err;
+ }
+
+ err = bpf_prog_attach(prog_fd[2], map_fd[8],
+ BPF_SK_SKB_STREAM_VERDICT, 0);
+ if (err) {
+ fprintf(stderr, "ERROR: bpf_prog_attach (TLS sockmap): %d (%s)\n",
+ err, strerror(errno));
+ return err;
+ }
+ }
+
/* Attach to cgroups */
- err = bpf_prog_attach(prog_fd[2], cg_fd, BPF_CGROUP_SOCK_OPS, 0);
+ err = bpf_prog_attach(prog_fd[3], cg_fd, BPF_CGROUP_SOCK_OPS, 0);
if (err) {
fprintf(stderr, "ERROR: bpf_prog_attach (groups): %d (%s)\n",
err, strerror(errno));
@@ -928,15 +979,15 @@ run:
/* Attach txmsg program to sockmap */
if (txmsg_pass)
- tx_prog_fd = prog_fd[3];
- else if (txmsg_redir)
tx_prog_fd = prog_fd[4];
- else if (txmsg_apply)
+ else if (txmsg_redir)
tx_prog_fd = prog_fd[5];
- else if (txmsg_cork)
+ else if (txmsg_apply)
tx_prog_fd = prog_fd[6];
- else if (txmsg_drop)
+ else if (txmsg_cork)
tx_prog_fd = prog_fd[7];
+ else if (txmsg_drop)
+ tx_prog_fd = prog_fd[8];
else
tx_prog_fd = 0;
@@ -1108,7 +1159,35 @@ run:
}
}
- if (txmsg_skb) {
+ if (txmsg_ktls_skb) {
+ int ingress = BPF_F_INGRESS;
+
+ i = 0;
+ err = bpf_map_update_elem(map_fd[8], &i, &p2, BPF_ANY);
+ if (err) {
+ fprintf(stderr,
+ "ERROR: bpf_map_update_elem (c1 sockmap): %d (%s)\n",
+ err, strerror(errno));
+ }
+
+ if (txmsg_ktls_skb_redir) {
+ i = 1;
+ err = bpf_map_update_elem(map_fd[7],
+ &i, &ingress, BPF_ANY);
+ if (err) {
+ fprintf(stderr,
+ "ERROR: bpf_map_update_elem (txmsg_ingress): %d (%s)\n",
+ err, strerror(errno));
+ }
+ }
+
+ if (txmsg_ktls_skb_drop) {
+ i = 1;
+ err = bpf_map_update_elem(map_fd[7], &i, &i, BPF_ANY);
+ }
+ }
+
+ if (txmsg_redir_skb) {
int skb_fd = (test == SENDMSG || test == SENDPAGE) ?
p2 : p1;
int ingress = BPF_F_INGRESS;
@@ -1123,8 +1202,7 @@ run:
}
i = 3;
- err = bpf_map_update_elem(map_fd[0],
- &i, &skb_fd, BPF_ANY);
+ err = bpf_map_update_elem(map_fd[0], &i, &skb_fd, BPF_ANY);
if (err) {
fprintf(stderr,
"ERROR: bpf_map_update_elem (c1 sockmap): %d (%s)\n",
@@ -1158,9 +1236,12 @@ run:
fprintf(stderr, "unknown test\n");
out:
/* Detatch and zero all the maps */
- bpf_prog_detach2(prog_fd[2], cg_fd, BPF_CGROUP_SOCK_OPS);
+ bpf_prog_detach2(prog_fd[3], cg_fd, BPF_CGROUP_SOCK_OPS);
bpf_prog_detach2(prog_fd[0], map_fd[0], BPF_SK_SKB_STREAM_PARSER);
bpf_prog_detach2(prog_fd[1], map_fd[0], BPF_SK_SKB_STREAM_VERDICT);
+ bpf_prog_detach2(prog_fd[0], map_fd[8], BPF_SK_SKB_STREAM_PARSER);
+ bpf_prog_detach2(prog_fd[2], map_fd[8], BPF_SK_SKB_STREAM_VERDICT);
+
if (tx_prog_fd >= 0)
bpf_prog_detach2(tx_prog_fd, map_fd[1], BPF_SK_MSG_VERDICT);
@@ -1229,8 +1310,10 @@ static void test_options(char *options)
}
if (txmsg_ingress)
strncat(options, "ingress,", OPTSTRING);
- if (txmsg_skb)
- strncat(options, "skb,", OPTSTRING);
+ if (txmsg_redir_skb)
+ strncat(options, "redir_skb,", OPTSTRING);
+ if (txmsg_ktls_skb)
+ strncat(options, "ktls_skb,", OPTSTRING);
if (ktls)
strncat(options, "ktls,", OPTSTRING);
if (peek_flag)
@@ -1362,6 +1445,40 @@ static void test_txmsg_ingress_redir(int cgrp, struct sockmap_options *opt)
test_send(opt, cgrp);
}
+static void test_txmsg_skb(int cgrp, struct sockmap_options *opt)
+{
+ bool data = opt->data_test;
+ int k = ktls;
+
+ opt->data_test = true;
+ ktls = 1;
+
+ txmsg_pass = txmsg_drop = 0;
+ txmsg_ingress = txmsg_redir = 0;
+ txmsg_ktls_skb = 1;
+ txmsg_pass = 1;
+
+ /* Using data verification so ensure iov layout is
+ * expected from test receiver side. e.g. has enough
+ * bytes to write test code.
+ */
+ opt->iov_length = 100;
+ opt->iov_count = 1;
+ opt->rate = 1;
+ test_exec(cgrp, opt);
+
+ txmsg_ktls_skb_drop = 1;
+ test_exec(cgrp, opt);
+
+ txmsg_ktls_skb_drop = 0;
+ txmsg_ktls_skb_redir = 1;
+ test_exec(cgrp, opt);
+
+ opt->data_test = data;
+ ktls = k;
+}
+
+
/* Test cork with hung data. This tests poor usage patterns where
* cork can leave data on the ring if user program is buggy and
* doesn't flush them somehow. They do take some time however
@@ -1542,11 +1659,13 @@ char *map_names[] = {
"sock_bytes",
"sock_redir_flags",
"sock_skb_opts",
+ "tls_sock_map",
};
int prog_attach_type[] = {
BPF_SK_SKB_STREAM_PARSER,
BPF_SK_SKB_STREAM_VERDICT,
+ BPF_SK_SKB_STREAM_VERDICT,
BPF_CGROUP_SOCK_OPS,
BPF_SK_MSG_VERDICT,
BPF_SK_MSG_VERDICT,
@@ -1560,6 +1679,7 @@ int prog_attach_type[] = {
int prog_type[] = {
BPF_PROG_TYPE_SK_SKB,
BPF_PROG_TYPE_SK_SKB,
+ BPF_PROG_TYPE_SK_SKB,
BPF_PROG_TYPE_SOCK_OPS,
BPF_PROG_TYPE_SK_MSG,
BPF_PROG_TYPE_SK_MSG,
@@ -1620,6 +1740,7 @@ struct _test test[] = {
{"txmsg test redirect", test_txmsg_redir},
{"txmsg test drop", test_txmsg_drop},
{"txmsg test ingress redirect", test_txmsg_ingress_redir},
+ {"txmsg test skb", test_txmsg_skb},
{"txmsg test apply", test_txmsg_apply},
{"txmsg test cork", test_txmsg_cork},
{"txmsg test hanging corks", test_txmsg_cork_hangs},