diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index ad31c9fb7158..08674cd14d5a 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -437,4 +437,12 @@ static inline void psock_progs_drop(struct sk_psock_progs *progs) psock_set_prog(&progs->skb_verdict, NULL); } +int sk_psock_tls_strp_read(struct sk_psock *psock, struct sk_buff *skb); + +static inline bool sk_psock_strp_enabled(struct sk_psock *psock) +{ + if (!psock) + return false; + return psock->parser.enabled; +} #endif /* _LINUX_SKMSG_H */ diff --git a/include/net/tls.h b/include/net/tls.h index 3e7b44cae0d9..3212d3c214a9 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -571,6 +571,15 @@ static inline bool tls_sw_has_ctx_tx(const struct sock *sk) return !!tls_sw_ctx_tx(ctx); } +static inline bool tls_sw_has_ctx_rx(const struct sock *sk) +{ + struct tls_context *ctx = tls_get_ctx(sk); + + if (!ctx) + return false; + return !!tls_sw_ctx_rx(ctx); +} + void tls_sw_write_space(struct sock *sk, struct tls_context *ctx); void tls_device_write_space(struct sock *sk, struct tls_context *ctx); diff --git a/net/core/skmsg.c b/net/core/skmsg.c index c479372f2cd2..351afbf6bfba 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -7,6 +7,7 @@ #include #include +#include static bool sk_msg_try_coalesce_ok(struct sk_msg *msg, int elem_first_coalesce) { @@ -682,13 +683,75 @@ static struct sk_psock *sk_psock_from_strp(struct strparser *strp) return container_of(parser, struct sk_psock, parser); } -static void sk_psock_verdict_apply(struct sk_psock *psock, - struct sk_buff *skb, int verdict) +static void sk_psock_skb_redirect(struct sk_psock *psock, struct sk_buff *skb) { struct sk_psock *psock_other; struct sock *sk_other; bool ingress; + sk_other = tcp_skb_bpf_redirect_fetch(skb); + if (unlikely(!sk_other)) { + kfree_skb(skb); + return; + } + psock_other = sk_psock(sk_other); + if (!psock_other || sock_flag(sk_other, SOCK_DEAD) || + !sk_psock_test_state(psock_other, SK_PSOCK_TX_ENABLED)) { + kfree_skb(skb); + return; + } + + ingress = tcp_skb_bpf_ingress(skb); + if ((!ingress && sock_writeable(sk_other)) || + (ingress && + atomic_read(&sk_other->sk_rmem_alloc) <= + sk_other->sk_rcvbuf)) { + if (!ingress) + skb_set_owner_w(skb, sk_other); + skb_queue_tail(&psock_other->ingress_skb, skb); + schedule_work(&psock_other->work); + } else { + kfree_skb(skb); + } +} + +static void sk_psock_tls_verdict_apply(struct sk_psock *psock, + struct sk_buff *skb, int verdict) +{ + switch (verdict) { + case __SK_REDIRECT: + sk_psock_skb_redirect(psock, skb); + break; + case __SK_PASS: + case __SK_DROP: + default: + break; + } +} + +int sk_psock_tls_strp_read(struct sk_psock *psock, struct sk_buff *skb) +{ + struct bpf_prog *prog; + int ret = __SK_PASS; + + rcu_read_lock(); + prog = READ_ONCE(psock->progs.skb_verdict); + if (likely(prog)) { + tcp_skb_bpf_redirect_clear(skb); + ret = sk_psock_bpf_run(psock, prog, skb); + ret = sk_psock_map_verd(ret, tcp_skb_bpf_redirect_fetch(skb)); + } + rcu_read_unlock(); + sk_psock_tls_verdict_apply(psock, skb, ret); + return ret; +} +EXPORT_SYMBOL_GPL(sk_psock_tls_strp_read); + +static void sk_psock_verdict_apply(struct sk_psock *psock, + struct sk_buff *skb, int verdict) +{ + struct sock *sk_other; + switch (verdict) { case __SK_PASS: sk_other = psock->sk; @@ -707,25 +770,8 @@ static void sk_psock_verdict_apply(struct sk_psock *psock, } goto out_free; case __SK_REDIRECT: - sk_other = tcp_skb_bpf_redirect_fetch(skb); - if (unlikely(!sk_other)) - goto out_free; - psock_other = sk_psock(sk_other); - if (!psock_other || sock_flag(sk_other, SOCK_DEAD) || - !sk_psock_test_state(psock_other, SK_PSOCK_TX_ENABLED)) - goto out_free; - ingress = tcp_skb_bpf_ingress(skb); - if ((!ingress && sock_writeable(sk_other)) || - (ingress && - atomic_read(&sk_other->sk_rmem_alloc) <= - sk_other->sk_rcvbuf)) { - if (!ingress) - skb_set_owner_w(skb, sk_other); - skb_queue_tail(&psock_other->ingress_skb, skb); - schedule_work(&psock_other->work); - break; - } - /* fall-through */ + sk_psock_skb_redirect(psock, skb); + break; case __SK_DROP: /* fall-through */ default: @@ -779,9 +825,13 @@ static void sk_psock_strp_data_ready(struct sock *sk) rcu_read_lock(); psock = sk_psock(sk); if (likely(psock)) { - write_lock_bh(&sk->sk_callback_lock); - strp_data_ready(&psock->parser.strp); - write_unlock_bh(&sk->sk_callback_lock); + if (tls_sw_has_ctx_rx(sk)) { + psock->parser.saved_data_ready(sk); + } else { + write_lock_bh(&sk->sk_callback_lock); + strp_data_ready(&psock->parser.strp); + write_unlock_bh(&sk->sk_callback_lock); + } } rcu_read_unlock(); } diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 8c2763eb6aae..24f64bc0de18 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -1742,6 +1742,7 @@ int tls_sw_recvmsg(struct sock *sk, long timeo; bool is_kvec = iov_iter_is_kvec(&msg->msg_iter); bool is_peek = flags & MSG_PEEK; + bool bpf_strp_enabled; int num_async = 0; int pending; @@ -1752,6 +1753,7 @@ int tls_sw_recvmsg(struct sock *sk, psock = sk_psock_get(sk); lock_sock(sk); + bpf_strp_enabled = sk_psock_strp_enabled(psock); /* Process pending decrypted records. It must be non-zero-copy */ err = process_rx_list(ctx, msg, &control, &cmsg, 0, len, false, @@ -1805,11 +1807,12 @@ int tls_sw_recvmsg(struct sock *sk, if (to_decrypt <= len && !is_kvec && !is_peek && ctx->control == TLS_RECORD_TYPE_DATA && - prot->version != TLS_1_3_VERSION) + prot->version != TLS_1_3_VERSION && + !bpf_strp_enabled) zc = true; /* Do not use async mode if record is non-data */ - if (ctx->control == TLS_RECORD_TYPE_DATA) + if (ctx->control == TLS_RECORD_TYPE_DATA && !bpf_strp_enabled) async_capable = ctx->async_capable; else async_capable = false; @@ -1859,6 +1862,19 @@ int tls_sw_recvmsg(struct sock *sk, goto pick_next_record; if (!zc) { + if (bpf_strp_enabled) { + err = sk_psock_tls_strp_read(psock, skb); + if (err != __SK_PASS) { + rxm->offset = rxm->offset + rxm->full_len; + rxm->full_len = 0; + if (err == __SK_DROP) + consume_skb(skb); + ctx->recv_pkt = NULL; + __strp_unpause(&ctx->strp); + continue; + } + } + if (rxm->full_len > len) { retain_skb = true; chunk = len; diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_kern.h b/tools/testing/selftests/bpf/progs/test_sockmap_kern.h index a443d3637db3..057036ca1111 100644 --- a/tools/testing/selftests/bpf/progs/test_sockmap_kern.h +++ b/tools/testing/selftests/bpf/progs/test_sockmap_kern.h @@ -79,11 +79,18 @@ struct { struct { __uint(type, BPF_MAP_TYPE_ARRAY); - __uint(max_entries, 1); + __uint(max_entries, 2); __type(key, int); __type(value, int); } sock_skb_opts SEC(".maps"); +struct { + __uint(type, TEST_MAP_TYPE); + __uint(max_entries, 20); + __uint(key_size, sizeof(int)); + __uint(value_size, sizeof(int)); +} tls_sock_map SEC(".maps"); + SEC("sk_skb1") int bpf_prog1(struct __sk_buff *skb) { @@ -118,6 +125,43 @@ int bpf_prog2(struct __sk_buff *skb) } +SEC("sk_skb3") +int bpf_prog3(struct __sk_buff *skb) +{ + const int one = 1; + int err, *f, ret = SK_PASS; + void *data_end; + char *c; + + err = bpf_skb_pull_data(skb, 19); + if (err) + goto tls_out; + + c = (char *)(long)skb->data; + data_end = (void *)(long)skb->data_end; + + if (c + 18 < data_end) + memcpy(&c[13], "PASS", 4); + f = bpf_map_lookup_elem(&sock_skb_opts, &one); + if (f && *f) { + __u64 flags = 0; + + ret = 0; + flags = *f; +#ifdef SOCKMAP + return bpf_sk_redirect_map(skb, &tls_sock_map, ret, flags); +#else + return bpf_sk_redirect_hash(skb, &tls_sock_map, &ret, flags); +#endif + } + + f = bpf_map_lookup_elem(&sock_skb_opts, &one); + if (f && *f) + ret = SK_DROP; +tls_out: + return ret; +} + SEC("sockops") int bpf_sockmap(struct bpf_sock_ops *skops) { diff --git a/tools/testing/selftests/bpf/test_sockmap.c b/tools/testing/selftests/bpf/test_sockmap.c index c80643828b82..37695fc8096a 100644 --- a/tools/testing/selftests/bpf/test_sockmap.c +++ b/tools/testing/selftests/bpf/test_sockmap.c @@ -63,8 +63,8 @@ int s1, s2, c1, c2, p1, p2; int test_cnt; int passed; int failed; -int map_fd[8]; -struct bpf_map *maps[8]; +int map_fd[9]; +struct bpf_map *maps[9]; int prog_fd[11]; int txmsg_pass; @@ -79,7 +79,10 @@ int txmsg_end_push; int txmsg_start_pop; int txmsg_pop; int txmsg_ingress; -int txmsg_skb; +int txmsg_redir_skb; +int txmsg_ktls_skb; +int txmsg_ktls_skb_drop; +int txmsg_ktls_skb_redir; int ktls; int peek_flag; @@ -104,7 +107,7 @@ static const struct option long_options[] = { {"txmsg_start_pop", required_argument, NULL, 'w'}, {"txmsg_pop", required_argument, NULL, 'x'}, {"txmsg_ingress", no_argument, &txmsg_ingress, 1 }, - {"txmsg_skb", no_argument, &txmsg_skb, 1 }, + {"txmsg_redir_skb", no_argument, &txmsg_redir_skb, 1 }, {"ktls", no_argument, &ktls, 1 }, {"peek", no_argument, &peek_flag, 1 }, {"whitelist", required_argument, NULL, 'n' }, @@ -169,7 +172,8 @@ static void test_reset(void) txmsg_start_push = txmsg_end_push = 0; txmsg_pass = txmsg_drop = txmsg_redir = 0; txmsg_apply = txmsg_cork = 0; - txmsg_ingress = txmsg_skb = 0; + txmsg_ingress = txmsg_redir_skb = 0; + txmsg_ktls_skb = txmsg_ktls_skb_drop = txmsg_ktls_skb_redir = 0; } static int test_start_subtest(const struct _test *t, struct sockmap_options *o) @@ -502,14 +506,41 @@ unwind_iov: static int msg_verify_data(struct msghdr *msg, int size, int chunk_sz) { - int i, j, bytes_cnt = 0; + int i, j = 0, bytes_cnt = 0; unsigned char k = 0; for (i = 0; i < msg->msg_iovlen; i++) { unsigned char *d = msg->msg_iov[i].iov_base; - for (j = 0; - j < msg->msg_iov[i].iov_len && size; j++) { + /* Special case test for skb ingress + ktls */ + if (i == 0 && txmsg_ktls_skb) { + if (msg->msg_iov[i].iov_len < 4) + return -EIO; + if (txmsg_ktls_skb_redir) { + if (memcmp(&d[13], "PASS", 4) != 0) { + fprintf(stderr, + "detected redirect ktls_skb data error with skb ingress update @iov[%i]:%i \"%02x %02x %02x %02x\" != \"PASS\"\n", i, 0, d[13], d[14], d[15], d[16]); + return -EIO; + } + d[13] = 0; + d[14] = 1; + d[15] = 2; + d[16] = 3; + j = 13; + } else if (txmsg_ktls_skb) { + if (memcmp(d, "PASS", 4) != 0) { + fprintf(stderr, + "detected ktls_skb data error with skb ingress update @iov[%i]:%i \"%02x %02x %02x %02x\" != \"PASS\"\n", i, 0, d[0], d[1], d[2], d[3]); + return -EIO; + } + d[0] = 0; + d[1] = 1; + d[2] = 2; + d[3] = 3; + } + } + + for (; j < msg->msg_iov[i].iov_len && size; j++) { if (d[j] != k++) { fprintf(stderr, "detected data corruption @iov[%i]:%i %02x != %02x, %02x ?= %02x\n", @@ -724,7 +755,7 @@ static int sendmsg_test(struct sockmap_options *opt) rxpid = fork(); if (rxpid == 0) { iov_buf -= (txmsg_pop - txmsg_start_pop + 1); - if (opt->drop_expected) + if (opt->drop_expected || txmsg_ktls_skb_drop) _exit(0); if (!iov_buf) /* zero bytes sent case */ @@ -911,8 +942,28 @@ static int run_options(struct sockmap_options *options, int cg_fd, int test) return err; } + /* Attach programs to TLS sockmap */ + if (txmsg_ktls_skb) { + err = bpf_prog_attach(prog_fd[0], map_fd[8], + BPF_SK_SKB_STREAM_PARSER, 0); + if (err) { + fprintf(stderr, + "ERROR: bpf_prog_attach (TLS sockmap %i->%i): %d (%s)\n", + prog_fd[0], map_fd[8], err, strerror(errno)); + return err; + } + + err = bpf_prog_attach(prog_fd[2], map_fd[8], + BPF_SK_SKB_STREAM_VERDICT, 0); + if (err) { + fprintf(stderr, "ERROR: bpf_prog_attach (TLS sockmap): %d (%s)\n", + err, strerror(errno)); + return err; + } + } + /* Attach to cgroups */ - err = bpf_prog_attach(prog_fd[2], cg_fd, BPF_CGROUP_SOCK_OPS, 0); + err = bpf_prog_attach(prog_fd[3], cg_fd, BPF_CGROUP_SOCK_OPS, 0); if (err) { fprintf(stderr, "ERROR: bpf_prog_attach (groups): %d (%s)\n", err, strerror(errno)); @@ -928,15 +979,15 @@ run: /* Attach txmsg program to sockmap */ if (txmsg_pass) - tx_prog_fd = prog_fd[3]; - else if (txmsg_redir) tx_prog_fd = prog_fd[4]; - else if (txmsg_apply) + else if (txmsg_redir) tx_prog_fd = prog_fd[5]; - else if (txmsg_cork) + else if (txmsg_apply) tx_prog_fd = prog_fd[6]; - else if (txmsg_drop) + else if (txmsg_cork) tx_prog_fd = prog_fd[7]; + else if (txmsg_drop) + tx_prog_fd = prog_fd[8]; else tx_prog_fd = 0; @@ -1108,7 +1159,35 @@ run: } } - if (txmsg_skb) { + if (txmsg_ktls_skb) { + int ingress = BPF_F_INGRESS; + + i = 0; + err = bpf_map_update_elem(map_fd[8], &i, &p2, BPF_ANY); + if (err) { + fprintf(stderr, + "ERROR: bpf_map_update_elem (c1 sockmap): %d (%s)\n", + err, strerror(errno)); + } + + if (txmsg_ktls_skb_redir) { + i = 1; + err = bpf_map_update_elem(map_fd[7], + &i, &ingress, BPF_ANY); + if (err) { + fprintf(stderr, + "ERROR: bpf_map_update_elem (txmsg_ingress): %d (%s)\n", + err, strerror(errno)); + } + } + + if (txmsg_ktls_skb_drop) { + i = 1; + err = bpf_map_update_elem(map_fd[7], &i, &i, BPF_ANY); + } + } + + if (txmsg_redir_skb) { int skb_fd = (test == SENDMSG || test == SENDPAGE) ? p2 : p1; int ingress = BPF_F_INGRESS; @@ -1123,8 +1202,7 @@ run: } i = 3; - err = bpf_map_update_elem(map_fd[0], - &i, &skb_fd, BPF_ANY); + err = bpf_map_update_elem(map_fd[0], &i, &skb_fd, BPF_ANY); if (err) { fprintf(stderr, "ERROR: bpf_map_update_elem (c1 sockmap): %d (%s)\n", @@ -1158,9 +1236,12 @@ run: fprintf(stderr, "unknown test\n"); out: /* Detatch and zero all the maps */ - bpf_prog_detach2(prog_fd[2], cg_fd, BPF_CGROUP_SOCK_OPS); + bpf_prog_detach2(prog_fd[3], cg_fd, BPF_CGROUP_SOCK_OPS); bpf_prog_detach2(prog_fd[0], map_fd[0], BPF_SK_SKB_STREAM_PARSER); bpf_prog_detach2(prog_fd[1], map_fd[0], BPF_SK_SKB_STREAM_VERDICT); + bpf_prog_detach2(prog_fd[0], map_fd[8], BPF_SK_SKB_STREAM_PARSER); + bpf_prog_detach2(prog_fd[2], map_fd[8], BPF_SK_SKB_STREAM_VERDICT); + if (tx_prog_fd >= 0) bpf_prog_detach2(tx_prog_fd, map_fd[1], BPF_SK_MSG_VERDICT); @@ -1229,8 +1310,10 @@ static void test_options(char *options) } if (txmsg_ingress) strncat(options, "ingress,", OPTSTRING); - if (txmsg_skb) - strncat(options, "skb,", OPTSTRING); + if (txmsg_redir_skb) + strncat(options, "redir_skb,", OPTSTRING); + if (txmsg_ktls_skb) + strncat(options, "ktls_skb,", OPTSTRING); if (ktls) strncat(options, "ktls,", OPTSTRING); if (peek_flag) @@ -1362,6 +1445,40 @@ static void test_txmsg_ingress_redir(int cgrp, struct sockmap_options *opt) test_send(opt, cgrp); } +static void test_txmsg_skb(int cgrp, struct sockmap_options *opt) +{ + bool data = opt->data_test; + int k = ktls; + + opt->data_test = true; + ktls = 1; + + txmsg_pass = txmsg_drop = 0; + txmsg_ingress = txmsg_redir = 0; + txmsg_ktls_skb = 1; + txmsg_pass = 1; + + /* Using data verification so ensure iov layout is + * expected from test receiver side. e.g. has enough + * bytes to write test code. + */ + opt->iov_length = 100; + opt->iov_count = 1; + opt->rate = 1; + test_exec(cgrp, opt); + + txmsg_ktls_skb_drop = 1; + test_exec(cgrp, opt); + + txmsg_ktls_skb_drop = 0; + txmsg_ktls_skb_redir = 1; + test_exec(cgrp, opt); + + opt->data_test = data; + ktls = k; +} + + /* Test cork with hung data. This tests poor usage patterns where * cork can leave data on the ring if user program is buggy and * doesn't flush them somehow. They do take some time however @@ -1542,11 +1659,13 @@ char *map_names[] = { "sock_bytes", "sock_redir_flags", "sock_skb_opts", + "tls_sock_map", }; int prog_attach_type[] = { BPF_SK_SKB_STREAM_PARSER, BPF_SK_SKB_STREAM_VERDICT, + BPF_SK_SKB_STREAM_VERDICT, BPF_CGROUP_SOCK_OPS, BPF_SK_MSG_VERDICT, BPF_SK_MSG_VERDICT, @@ -1558,6 +1677,7 @@ int prog_attach_type[] = { }; int prog_type[] = { + BPF_PROG_TYPE_SK_SKB, BPF_PROG_TYPE_SK_SKB, BPF_PROG_TYPE_SK_SKB, BPF_PROG_TYPE_SOCK_OPS, @@ -1620,6 +1740,7 @@ struct _test test[] = { {"txmsg test redirect", test_txmsg_redir}, {"txmsg test drop", test_txmsg_drop}, {"txmsg test ingress redirect", test_txmsg_ingress_redir}, + {"txmsg test skb", test_txmsg_skb}, {"txmsg test apply", test_txmsg_apply}, {"txmsg test cork", test_txmsg_cork}, {"txmsg test hanging corks", test_txmsg_cork_hangs},