From 48e2cd3e3dcfe04f212df4fb189fa04c2a87b980 Mon Sep 17 00:00:00 2001 From: Philo Lu Date: Fri, 26 Apr 2024 00:17:23 +0800 Subject: [PATCH 1/2] bpf: add mrtt and srtt as BPF_SOCK_OPS_RTT_CB args Two important arguments in RTT estimation, mrtt and srtt, are passed to tcp_bpf_rtt(), so that bpf programs get more information about RTT computation in BPF_SOCK_OPS_RTT_CB. The difference between bpf_sock_ops->srtt_us and the srtt here is: the former is an old rtt before update, while srtt passed by tcp_bpf_rtt() is that after update. Signed-off-by: Philo Lu Link: https://lore.kernel.org/r/20240425161724.73707-2-lulie@linux.alibaba.com Signed-off-by: Martin KaFai Lau --- include/net/tcp.h | 4 ++-- include/uapi/linux/bpf.h | 2 ++ net/ipv4/tcp_input.c | 4 ++-- tools/include/uapi/linux/bpf.h | 2 ++ 4 files changed, 8 insertions(+), 4 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index 6ae35199d3b3c..0f75d03287c25 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -2706,10 +2706,10 @@ static inline bool tcp_bpf_ca_needs_ecn(struct sock *sk) return (tcp_call_bpf(sk, BPF_SOCK_OPS_NEEDS_ECN, 0, NULL) == 1); } -static inline void tcp_bpf_rtt(struct sock *sk) +static inline void tcp_bpf_rtt(struct sock *sk, long mrtt, u32 srtt) { if (BPF_SOCK_OPS_TEST_FLAG(tcp_sk(sk), BPF_SOCK_OPS_RTT_CB_FLAG)) - tcp_call_bpf(sk, BPF_SOCK_OPS_RTT_CB, 0, NULL); + tcp_call_bpf_2arg(sk, BPF_SOCK_OPS_RTT_CB, mrtt, srtt); } #if IS_ENABLED(CONFIG_SMC) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index e4ae83550fb39..d94a72593ead2 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -6947,6 +6947,8 @@ enum { * socket transition to LISTEN state. */ BPF_SOCK_OPS_RTT_CB, /* Called on every RTT. + * Arg1: measured RTT input (mrtt) + * Arg2: updated srtt */ BPF_SOCK_OPS_PARSE_HDR_OPT_CB, /* Parse the header option. * It will be called to handle diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 5d874817a78db..d1115d7c3936a 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -911,7 +911,7 @@ static void tcp_rtt_estimator(struct sock *sk, long mrtt_us) tp->rtt_seq = tp->snd_nxt; tp->mdev_max_us = tcp_rto_min_us(sk); - tcp_bpf_rtt(sk); + tcp_bpf_rtt(sk, mrtt_us, srtt); } } else { /* no previous measure. */ @@ -921,7 +921,7 @@ static void tcp_rtt_estimator(struct sock *sk, long mrtt_us) tp->mdev_max_us = tp->rttvar_us; tp->rtt_seq = tp->snd_nxt; - tcp_bpf_rtt(sk); + tcp_bpf_rtt(sk, mrtt_us, srtt); } tp->srtt_us = max(1U, srtt); } diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index e4ae83550fb39..d94a72593ead2 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -6947,6 +6947,8 @@ enum { * socket transition to LISTEN state. */ BPF_SOCK_OPS_RTT_CB, /* Called on every RTT. + * Arg1: measured RTT input (mrtt) + * Arg2: updated srtt */ BPF_SOCK_OPS_PARSE_HDR_OPT_CB, /* Parse the header option. * It will be called to handle From 7eb4f66b38069eec9c86c9d115f0bba1cf73ef2c Mon Sep 17 00:00:00 2001 From: Philo Lu Date: Fri, 26 Apr 2024 00:17:24 +0800 Subject: [PATCH 2/2] selftests/bpf: extend BPF_SOCK_OPS_RTT_CB test for srtt and mrtt_us Because srtt and mrtt_us are added as args in bpf_sock_ops at BPF_SOCK_OPS_RTT_CB, a simple check is added to make sure they are both non-zero. $ ./test_progs -t tcp_rtt #373 tcp_rtt:OK Summary: 1/0 PASSED, 0 SKIPPED, 0 FAILED Suggested-by: Stanislav Fomichev Signed-off-by: Philo Lu Link: https://lore.kernel.org/r/20240425161724.73707-3-lulie@linux.alibaba.com Signed-off-by: Martin KaFai Lau --- tools/testing/selftests/bpf/prog_tests/tcp_rtt.c | 14 ++++++++++++++ tools/testing/selftests/bpf/progs/tcp_rtt.c | 6 ++++++ 2 files changed, 20 insertions(+) diff --git a/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c b/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c index 8fe84da1b9b49..f2b99d95d9160 100644 --- a/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c +++ b/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c @@ -10,6 +10,9 @@ struct tcp_rtt_storage { __u32 delivered; __u32 delivered_ce; __u32 icsk_retransmits; + + __u32 mrtt_us; /* args[0] */ + __u32 srtt; /* args[1] */ }; static void send_byte(int fd) @@ -83,6 +86,17 @@ static int verify_sk(int map_fd, int client_fd, const char *msg, __u32 invoked, err++; } + /* Precise values of mrtt and srtt are unavailable, just make sure they are nonzero */ + if (val.mrtt_us == 0) { + log_err("%s: unexpected bpf_tcp_sock.args[0] (mrtt_us) %u == 0", msg, val.mrtt_us); + err++; + } + + if (val.srtt == 0) { + log_err("%s: unexpected bpf_tcp_sock.args[1] (srtt) %u == 0", msg, val.srtt); + err++; + } + return err; } diff --git a/tools/testing/selftests/bpf/progs/tcp_rtt.c b/tools/testing/selftests/bpf/progs/tcp_rtt.c index 0988d79f15877..42c729f855246 100644 --- a/tools/testing/selftests/bpf/progs/tcp_rtt.c +++ b/tools/testing/selftests/bpf/progs/tcp_rtt.c @@ -10,6 +10,9 @@ struct tcp_rtt_storage { __u32 delivered; __u32 delivered_ce; __u32 icsk_retransmits; + + __u32 mrtt_us; /* args[0] */ + __u32 srtt; /* args[1] */ }; struct { @@ -55,5 +58,8 @@ int _sockops(struct bpf_sock_ops *ctx) storage->delivered_ce = tcp_sk->delivered_ce; storage->icsk_retransmits = tcp_sk->icsk_retransmits; + storage->mrtt_us = ctx->args[0]; + storage->srtt = ctx->args[1]; + return 1; }