From 34efc9cfe7c6d11ccc438ca03b59a1bf43cc60ec Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Tue, 28 Nov 2023 18:29:17 -0800 Subject: [PATCH 1/8] tcp: Clean up reverse xmas tree in cookie_v[46]_check(). We will grow and cut the xmas tree in cookie_v[46]_check(). This patch cleans it up to make later patches tidy. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Simon Horman Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20231129022924.96156-2-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- net/ipv4/syncookies.c | 10 +++++----- net/ipv6/syncookies.c | 12 ++++++------ 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index d37282c06e3da..a0118ea76734f 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -331,18 +331,18 @@ EXPORT_SYMBOL_GPL(cookie_tcp_reqsk_alloc); struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) { struct ip_options *opt = &TCP_SKB_CB(skb)->header.h4.opt; + const struct tcphdr *th = tcp_hdr(skb); + __u32 cookie = ntohl(th->ack_seq) - 1; struct tcp_options_received tcp_opt; + struct tcp_sock *tp = tcp_sk(sk); struct inet_request_sock *ireq; struct tcp_request_sock *treq; - struct tcp_sock *tp = tcp_sk(sk); - const struct tcphdr *th = tcp_hdr(skb); - __u32 cookie = ntohl(th->ack_seq) - 1; - struct sock *ret = sk; struct request_sock *req; + struct sock *ret = sk; int full_space, mss; + struct flowi4 fl4; struct rtable *rt; __u8 rcv_wscale; - struct flowi4 fl4; u32 tsoff = 0; int l3index; diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 12eedc6ca2cca..aa5fb5486cde9 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -127,17 +127,17 @@ EXPORT_SYMBOL_GPL(__cookie_v6_check); struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) { + const struct tcphdr *th = tcp_hdr(skb); + __u32 cookie = ntohl(th->ack_seq) - 1; + struct ipv6_pinfo *np = inet6_sk(sk); struct tcp_options_received tcp_opt; + struct tcp_sock *tp = tcp_sk(sk); struct inet_request_sock *ireq; struct tcp_request_sock *treq; - struct ipv6_pinfo *np = inet6_sk(sk); - struct tcp_sock *tp = tcp_sk(sk); - const struct tcphdr *th = tcp_hdr(skb); - __u32 cookie = ntohl(th->ack_seq) - 1; - struct sock *ret = sk; struct request_sock *req; - int full_space, mss; struct dst_entry *dst; + struct sock *ret = sk; + int full_space, mss; __u8 rcv_wscale; u32 tsoff = 0; int l3index; From 45c28509fee6b4c867374b83c5b9e10dac245988 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Tue, 28 Nov 2023 18:29:18 -0800 Subject: [PATCH 2/8] tcp: Cache sock_net(sk) in cookie_v[46]_check(). sock_net(sk) is used repeatedly in cookie_v[46]_check(). Let's cache it in a variable. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Simon Horman Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20231129022924.96156-3-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- net/ipv4/syncookies.c | 21 +++++++++++---------- net/ipv6/syncookies.c | 19 ++++++++++--------- 2 files changed, 21 insertions(+), 19 deletions(-) diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index a0118ea76734f..fb41bb18fe6b7 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -336,6 +336,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) struct tcp_options_received tcp_opt; struct tcp_sock *tp = tcp_sk(sk); struct inet_request_sock *ireq; + struct net *net = sock_net(sk); struct tcp_request_sock *treq; struct request_sock *req; struct sock *ret = sk; @@ -346,7 +347,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) u32 tsoff = 0; int l3index; - if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_syncookies) || + if (!READ_ONCE(net->ipv4.sysctl_tcp_syncookies) || !th->ack || th->rst) goto out; @@ -355,24 +356,24 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) mss = __cookie_v4_check(ip_hdr(skb), th, cookie); if (mss == 0) { - __NET_INC_STATS(sock_net(sk), LINUX_MIB_SYNCOOKIESFAILED); + __NET_INC_STATS(net, LINUX_MIB_SYNCOOKIESFAILED); goto out; } - __NET_INC_STATS(sock_net(sk), LINUX_MIB_SYNCOOKIESRECV); + __NET_INC_STATS(net, LINUX_MIB_SYNCOOKIESRECV); /* check for timestamp cookie support */ memset(&tcp_opt, 0, sizeof(tcp_opt)); - tcp_parse_options(sock_net(sk), skb, &tcp_opt, 0, NULL); + tcp_parse_options(net, skb, &tcp_opt, 0, NULL); if (tcp_opt.saw_tstamp && tcp_opt.rcv_tsecr) { - tsoff = secure_tcp_ts_off(sock_net(sk), + tsoff = secure_tcp_ts_off(net, ip_hdr(skb)->daddr, ip_hdr(skb)->saddr); tcp_opt.rcv_tsecr -= tsoff; } - if (!cookie_timestamp_decode(sock_net(sk), &tcp_opt)) + if (!cookie_timestamp_decode(net, &tcp_opt)) goto out; ret = NULL; @@ -406,13 +407,13 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) ireq->ir_iif = inet_request_bound_dev_if(sk, skb); - l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif); + l3index = l3mdev_master_ifindex_by_index(net, ireq->ir_iif); tcp_ao_syncookie(sk, skb, treq, AF_INET, l3index); /* We throwed the options of the initial SYN away, so we hope * the ACK carries the same options again (see RFC1122 4.2.3.8) */ - RCU_INIT_POINTER(ireq->ireq_opt, tcp_v4_save_options(sock_net(sk), skb)); + RCU_INIT_POINTER(ireq->ireq_opt, tcp_v4_save_options(net, skb)); if (security_inet_conn_request(sk, skb, req)) { reqsk_free(req); @@ -433,7 +434,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) opt->srr ? opt->faddr : ireq->ir_rmt_addr, ireq->ir_loc_addr, th->source, th->dest, sk->sk_uid); security_req_classify_flow(req, flowi4_to_flowi_common(&fl4)); - rt = ip_route_output_key(sock_net(sk), &fl4); + rt = ip_route_output_key(net, &fl4); if (IS_ERR(rt)) { reqsk_free(req); goto out; @@ -453,7 +454,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) dst_metric(&rt->dst, RTAX_INITRWND)); ireq->rcv_wscale = rcv_wscale; - ireq->ecn_ok = cookie_ecn_ok(&tcp_opt, sock_net(sk), &rt->dst); + ireq->ecn_ok = cookie_ecn_ok(&tcp_opt, net, &rt->dst); ret = tcp_get_cookie_sock(sk, skb, req, &rt->dst, tsoff); /* ip_queue_xmit() depends on our flow being setup diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index aa5fb5486cde9..ba394fa73f41f 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -133,6 +133,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) struct tcp_options_received tcp_opt; struct tcp_sock *tp = tcp_sk(sk); struct inet_request_sock *ireq; + struct net *net = sock_net(sk); struct tcp_request_sock *treq; struct request_sock *req; struct dst_entry *dst; @@ -142,7 +143,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) u32 tsoff = 0; int l3index; - if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_syncookies) || + if (!READ_ONCE(net->ipv4.sysctl_tcp_syncookies) || !th->ack || th->rst) goto out; @@ -151,24 +152,24 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) mss = __cookie_v6_check(ipv6_hdr(skb), th, cookie); if (mss == 0) { - __NET_INC_STATS(sock_net(sk), LINUX_MIB_SYNCOOKIESFAILED); + __NET_INC_STATS(net, LINUX_MIB_SYNCOOKIESFAILED); goto out; } - __NET_INC_STATS(sock_net(sk), LINUX_MIB_SYNCOOKIESRECV); + __NET_INC_STATS(net, LINUX_MIB_SYNCOOKIESRECV); /* check for timestamp cookie support */ memset(&tcp_opt, 0, sizeof(tcp_opt)); - tcp_parse_options(sock_net(sk), skb, &tcp_opt, 0, NULL); + tcp_parse_options(net, skb, &tcp_opt, 0, NULL); if (tcp_opt.saw_tstamp && tcp_opt.rcv_tsecr) { - tsoff = secure_tcpv6_ts_off(sock_net(sk), + tsoff = secure_tcpv6_ts_off(net, ipv6_hdr(skb)->daddr.s6_addr32, ipv6_hdr(skb)->saddr.s6_addr32); tcp_opt.rcv_tsecr -= tsoff; } - if (!cookie_timestamp_decode(sock_net(sk), &tcp_opt)) + if (!cookie_timestamp_decode(net, &tcp_opt)) goto out; ret = NULL; @@ -217,7 +218,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) treq->ts_off = 0; treq->txhash = net_tx_rndhash(); - l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif); + l3index = l3mdev_master_ifindex_by_index(net, ireq->ir_iif); tcp_ao_syncookie(sk, skb, treq, AF_INET6, l3index); if (IS_ENABLED(CONFIG_SMC)) @@ -243,7 +244,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) fl6.flowi6_uid = sk->sk_uid; security_req_classify_flow(req, flowi6_to_flowi_common(&fl6)); - dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p); + dst = ip6_dst_lookup_flow(net, sk, &fl6, final_p); if (IS_ERR(dst)) goto out_free; } @@ -261,7 +262,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) dst_metric(dst, RTAX_INITRWND)); ireq->rcv_wscale = rcv_wscale; - ireq->ecn_ok = cookie_ecn_ok(&tcp_opt, sock_net(sk), dst); + ireq->ecn_ok = cookie_ecn_ok(&tcp_opt, net, dst); ret = tcp_get_cookie_sock(sk, skb, req, dst, tsoff); out: From 50468cddd6bc27e75e7377e376674d40fd1b1d73 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Tue, 28 Nov 2023 18:29:19 -0800 Subject: [PATCH 3/8] tcp: Clean up goto labels in cookie_v[46]_check(). We will support arbitrary SYN Cookie with BPF, and then reqsk will be preallocated before cookie_v[46]_check(). Depending on how validation fails, we send RST or just drop skb. To make the error handling easier, let's clean up goto labels. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Simon Horman Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20231129022924.96156-4-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- net/ipv4/syncookies.c | 22 +++++++++++----------- net/ipv6/syncookies.c | 4 ++-- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index fb41bb18fe6b7..8b7d7d7788afa 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -376,11 +376,10 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) if (!cookie_timestamp_decode(net, &tcp_opt)) goto out; - ret = NULL; req = cookie_tcp_reqsk_alloc(&tcp_request_sock_ops, &tcp_request_sock_ipv4_ops, sk, skb); if (!req) - goto out; + goto out_drop; ireq = inet_rsk(req); treq = tcp_rsk(req); @@ -415,10 +414,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) */ RCU_INIT_POINTER(ireq->ireq_opt, tcp_v4_save_options(net, skb)); - if (security_inet_conn_request(sk, skb, req)) { - reqsk_free(req); - goto out; - } + if (security_inet_conn_request(sk, skb, req)) + goto out_free; req->num_retrans = 0; @@ -435,10 +432,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) ireq->ir_loc_addr, th->source, th->dest, sk->sk_uid); security_req_classify_flow(req, flowi4_to_flowi_common(&fl4)); rt = ip_route_output_key(net, &fl4); - if (IS_ERR(rt)) { - reqsk_free(req); - goto out; - } + if (IS_ERR(rt)) + goto out_free; /* Try to redo what tcp_v4_send_synack did. */ req->rsk_window_clamp = tp->window_clamp ? :dst_metric(&rt->dst, RTAX_WINDOW); @@ -462,5 +457,10 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) */ if (ret) inet_sk(ret)->cork.fl.u.ip4 = fl4; -out: return ret; +out: + return ret; +out_free: + reqsk_free(req); +out_drop: + return NULL; } diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index ba394fa73f41f..106376cbc9de5 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -172,11 +172,10 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) if (!cookie_timestamp_decode(net, &tcp_opt)) goto out; - ret = NULL; req = cookie_tcp_reqsk_alloc(&tcp6_request_sock_ops, &tcp_request_sock_ipv6_ops, sk, skb); if (!req) - goto out; + goto out_drop; ireq = inet_rsk(req); treq = tcp_rsk(req); @@ -269,5 +268,6 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) return ret; out_free: reqsk_free(req); +out_drop: return NULL; } From 7577bc8249c3fc86096ef1b1c9a8f4b6232231e7 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Tue, 28 Nov 2023 18:29:20 -0800 Subject: [PATCH 4/8] tcp: Don't pass cookie to __cookie_v[46]_check(). tcp_hdr(skb) and SYN Cookie are passed to __cookie_v[46]_check(), but none of the callers passes cookie other than ntohl(th->ack_seq) - 1. Let's fetch it in __cookie_v[46]_check() instead of passing the cookie over and over. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Simon Horman Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20231129022924.96156-5-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- include/linux/netfilter_ipv6.h | 8 ++++---- include/net/tcp.h | 6 ++---- net/core/filter.c | 15 ++++----------- net/ipv4/syncookies.c | 15 ++++++++------- net/ipv6/syncookies.c | 15 ++++++++------- net/netfilter/nf_synproxy_core.c | 4 ++-- 6 files changed, 28 insertions(+), 35 deletions(-) diff --git a/include/linux/netfilter_ipv6.h b/include/linux/netfilter_ipv6.h index 7834c0be2831d..61aa48f46dd72 100644 --- a/include/linux/netfilter_ipv6.h +++ b/include/linux/netfilter_ipv6.h @@ -51,7 +51,7 @@ struct nf_ipv6_ops { u32 (*cookie_init_sequence)(const struct ipv6hdr *iph, const struct tcphdr *th, u16 *mssp); int (*cookie_v6_check)(const struct ipv6hdr *iph, - const struct tcphdr *th, __u32 cookie); + const struct tcphdr *th); #endif void (*route_input)(struct sk_buff *skb); int (*fragment)(struct net *net, struct sock *sk, struct sk_buff *skb, @@ -179,16 +179,16 @@ static inline u32 nf_ipv6_cookie_init_sequence(const struct ipv6hdr *iph, } static inline int nf_cookie_v6_check(const struct ipv6hdr *iph, - const struct tcphdr *th, __u32 cookie) + const struct tcphdr *th) { #if IS_ENABLED(CONFIG_SYN_COOKIES) #if IS_MODULE(CONFIG_IPV6) const struct nf_ipv6_ops *v6_ops = nf_get_ipv6_ops(); if (v6_ops) - return v6_ops->cookie_v6_check(iph, th, cookie); + return v6_ops->cookie_v6_check(iph, th); #elif IS_BUILTIN(CONFIG_IPV6) - return __cookie_v6_check(iph, th, cookie); + return __cookie_v6_check(iph, th); #endif #endif return 0; diff --git a/include/net/tcp.h b/include/net/tcp.h index d2f0736b76b8b..2b2c79c7bbcd5 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -491,8 +491,7 @@ void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb); struct sock *tcp_get_cookie_sock(struct sock *sk, struct sk_buff *skb, struct request_sock *req, struct dst_entry *dst, u32 tsoff); -int __cookie_v4_check(const struct iphdr *iph, const struct tcphdr *th, - u32 cookie); +int __cookie_v4_check(const struct iphdr *iph, const struct tcphdr *th); struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb); struct request_sock *cookie_tcp_reqsk_alloc(const struct request_sock_ops *ops, const struct tcp_request_sock_ops *af_ops, @@ -586,8 +585,7 @@ bool cookie_ecn_ok(const struct tcp_options_received *opt, const struct net *net, const struct dst_entry *dst); /* From net/ipv6/syncookies.c */ -int __cookie_v6_check(const struct ipv6hdr *iph, const struct tcphdr *th, - u32 cookie); +int __cookie_v6_check(const struct ipv6hdr *iph, const struct tcphdr *th); struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb); u32 __cookie_v6_init_sequence(const struct ipv6hdr *iph, diff --git a/net/core/filter.c b/net/core/filter.c index 7e4d7c3bcc849..0adaa4afa35f2 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -7238,7 +7238,6 @@ BPF_CALL_5(bpf_tcp_check_syncookie, struct sock *, sk, void *, iph, u32, iph_len struct tcphdr *, th, u32, th_len) { #ifdef CONFIG_SYN_COOKIES - u32 cookie; int ret; if (unlikely(!sk || th_len < sizeof(*th))) @@ -7260,8 +7259,6 @@ BPF_CALL_5(bpf_tcp_check_syncookie, struct sock *, sk, void *, iph, u32, iph_len if (tcp_synq_no_recent_overflow(sk)) return -ENOENT; - cookie = ntohl(th->ack_seq) - 1; - /* Both struct iphdr and struct ipv6hdr have the version field at the * same offset so we can cast to the shorter header (struct iphdr). */ @@ -7270,7 +7267,7 @@ BPF_CALL_5(bpf_tcp_check_syncookie, struct sock *, sk, void *, iph, u32, iph_len if (sk->sk_family == AF_INET6 && ipv6_only_sock(sk)) return -EINVAL; - ret = __cookie_v4_check((struct iphdr *)iph, th, cookie); + ret = __cookie_v4_check((struct iphdr *)iph, th); break; #if IS_BUILTIN(CONFIG_IPV6) @@ -7281,7 +7278,7 @@ BPF_CALL_5(bpf_tcp_check_syncookie, struct sock *, sk, void *, iph, u32, iph_len if (sk->sk_family != AF_INET6) return -EINVAL; - ret = __cookie_v6_check((struct ipv6hdr *)iph, th, cookie); + ret = __cookie_v6_check((struct ipv6hdr *)iph, th); break; #endif /* CONFIG_IPV6 */ @@ -7734,9 +7731,7 @@ static const struct bpf_func_proto bpf_tcp_raw_gen_syncookie_ipv6_proto = { BPF_CALL_2(bpf_tcp_raw_check_syncookie_ipv4, struct iphdr *, iph, struct tcphdr *, th) { - u32 cookie = ntohl(th->ack_seq) - 1; - - if (__cookie_v4_check(iph, th, cookie) > 0) + if (__cookie_v4_check(iph, th) > 0) return 0; return -EACCES; @@ -7757,9 +7752,7 @@ BPF_CALL_2(bpf_tcp_raw_check_syncookie_ipv6, struct ipv6hdr *, iph, struct tcphdr *, th) { #if IS_BUILTIN(CONFIG_IPV6) - u32 cookie = ntohl(th->ack_seq) - 1; - - if (__cookie_v6_check(iph, th, cookie) > 0) + if (__cookie_v6_check(iph, th) > 0) return 0; return -EACCES; diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 8b7d7d7788afa..c08428d63d11d 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -189,12 +189,14 @@ __u32 cookie_v4_init_sequence(const struct sk_buff *skb, __u16 *mssp) * Check if a ack sequence number is a valid syncookie. * Return the decoded mss if it is, or 0 if not. */ -int __cookie_v4_check(const struct iphdr *iph, const struct tcphdr *th, - u32 cookie) +int __cookie_v4_check(const struct iphdr *iph, const struct tcphdr *th) { + __u32 cookie = ntohl(th->ack_seq) - 1; __u32 seq = ntohl(th->seq) - 1; - __u32 mssind = check_tcp_syn_cookie(cookie, iph->saddr, iph->daddr, - th->source, th->dest, seq); + __u32 mssind; + + mssind = check_tcp_syn_cookie(cookie, iph->saddr, iph->daddr, + th->source, th->dest, seq); return mssind < ARRAY_SIZE(msstab) ? msstab[mssind] : 0; } @@ -332,7 +334,6 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) { struct ip_options *opt = &TCP_SKB_CB(skb)->header.h4.opt; const struct tcphdr *th = tcp_hdr(skb); - __u32 cookie = ntohl(th->ack_seq) - 1; struct tcp_options_received tcp_opt; struct tcp_sock *tp = tcp_sk(sk); struct inet_request_sock *ireq; @@ -354,7 +355,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) if (tcp_synq_no_recent_overflow(sk)) goto out; - mss = __cookie_v4_check(ip_hdr(skb), th, cookie); + mss = __cookie_v4_check(ip_hdr(skb), th); if (mss == 0) { __NET_INC_STATS(net, LINUX_MIB_SYNCOOKIESFAILED); goto out; @@ -384,7 +385,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) ireq = inet_rsk(req); treq = tcp_rsk(req); treq->rcv_isn = ntohl(th->seq) - 1; - treq->snt_isn = cookie; + treq->snt_isn = ntohl(th->ack_seq) - 1; treq->ts_off = 0; treq->txhash = net_tx_rndhash(); req->mss = mss; diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 106376cbc9de5..4cd26c4811683 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -114,12 +114,14 @@ __u32 cookie_v6_init_sequence(const struct sk_buff *skb, __u16 *mssp) return __cookie_v6_init_sequence(iph, th, mssp); } -int __cookie_v6_check(const struct ipv6hdr *iph, const struct tcphdr *th, - __u32 cookie) +int __cookie_v6_check(const struct ipv6hdr *iph, const struct tcphdr *th) { + __u32 cookie = ntohl(th->ack_seq) - 1; __u32 seq = ntohl(th->seq) - 1; - __u32 mssind = check_tcp_syn_cookie(cookie, &iph->saddr, &iph->daddr, - th->source, th->dest, seq); + __u32 mssind; + + mssind = check_tcp_syn_cookie(cookie, &iph->saddr, &iph->daddr, + th->source, th->dest, seq); return mssind < ARRAY_SIZE(msstab) ? msstab[mssind] : 0; } @@ -128,7 +130,6 @@ EXPORT_SYMBOL_GPL(__cookie_v6_check); struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) { const struct tcphdr *th = tcp_hdr(skb); - __u32 cookie = ntohl(th->ack_seq) - 1; struct ipv6_pinfo *np = inet6_sk(sk); struct tcp_options_received tcp_opt; struct tcp_sock *tp = tcp_sk(sk); @@ -150,7 +151,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) if (tcp_synq_no_recent_overflow(sk)) goto out; - mss = __cookie_v6_check(ipv6_hdr(skb), th, cookie); + mss = __cookie_v6_check(ipv6_hdr(skb), th); if (mss == 0) { __NET_INC_STATS(net, LINUX_MIB_SYNCOOKIESFAILED); goto out; @@ -213,7 +214,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0; treq->snt_synack = 0; treq->rcv_isn = ntohl(th->seq) - 1; - treq->snt_isn = cookie; + treq->snt_isn = ntohl(th->ack_seq) - 1; treq->ts_off = 0; treq->txhash = net_tx_rndhash(); diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c index 467671f2d42f7..fbbc4fd373495 100644 --- a/net/netfilter/nf_synproxy_core.c +++ b/net/netfilter/nf_synproxy_core.c @@ -617,7 +617,7 @@ synproxy_recv_client_ack(struct net *net, struct synproxy_net *snet = synproxy_pernet(net); int mss; - mss = __cookie_v4_check(ip_hdr(skb), th, ntohl(th->ack_seq) - 1); + mss = __cookie_v4_check(ip_hdr(skb), th); if (mss == 0) { this_cpu_inc(snet->stats->cookie_invalid); return false; @@ -1034,7 +1034,7 @@ synproxy_recv_client_ack_ipv6(struct net *net, struct synproxy_net *snet = synproxy_pernet(net); int mss; - mss = nf_cookie_v6_check(ipv6_hdr(skb), th, ntohl(th->ack_seq) - 1); + mss = nf_cookie_v6_check(ipv6_hdr(skb), th); if (mss == 0) { this_cpu_inc(snet->stats->cookie_invalid); return false; From efce3d1fdff53ef45b11ff407f16bc2f6f292b93 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Tue, 28 Nov 2023 18:29:21 -0800 Subject: [PATCH 5/8] tcp: Don't initialise tp->tsoffset in tcp_get_cookie_sock(). When we create a full socket from SYN Cookie, we initialise tcp_sk(sk)->tsoffset redundantly in tcp_get_cookie_sock() as the field is inherited from tcp_rsk(req)->ts_off. cookie_v[46]_check |- treq->ts_off = 0 `- tcp_get_cookie_sock |- tcp_v[46]_syn_recv_sock | `- tcp_create_openreq_child | `- newtp->tsoffset = treq->ts_off `- tcp_sk(child)->tsoffset = tsoff Let's initialise tcp_rsk(req)->ts_off with the correct offset and remove the second initialisation of tcp_sk(sk)->tsoffset. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Simon Horman Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20231129022924.96156-6-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- include/net/tcp.h | 2 +- net/ipv4/syncookies.c | 7 +++---- net/ipv6/syncookies.c | 4 ++-- 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index 2b2c79c7bbcd5..cc7143a781da8 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -490,7 +490,7 @@ void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb); /* From syncookies.c */ struct sock *tcp_get_cookie_sock(struct sock *sk, struct sk_buff *skb, struct request_sock *req, - struct dst_entry *dst, u32 tsoff); + struct dst_entry *dst); int __cookie_v4_check(const struct iphdr *iph, const struct tcphdr *th); struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb); struct request_sock *cookie_tcp_reqsk_alloc(const struct request_sock_ops *ops, diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index c08428d63d11d..de051eb08db85 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -204,7 +204,7 @@ EXPORT_SYMBOL_GPL(__cookie_v4_check); struct sock *tcp_get_cookie_sock(struct sock *sk, struct sk_buff *skb, struct request_sock *req, - struct dst_entry *dst, u32 tsoff) + struct dst_entry *dst) { struct inet_connection_sock *icsk = inet_csk(sk); struct sock *child; @@ -214,7 +214,6 @@ struct sock *tcp_get_cookie_sock(struct sock *sk, struct sk_buff *skb, NULL, &own_req); if (child) { refcount_set(&req->rsk_refcnt, 1); - tcp_sk(child)->tsoffset = tsoff; sock_rps_save_rxhash(child, skb); if (rsk_drop_req(req)) { @@ -386,7 +385,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) treq = tcp_rsk(req); treq->rcv_isn = ntohl(th->seq) - 1; treq->snt_isn = ntohl(th->ack_seq) - 1; - treq->ts_off = 0; + treq->ts_off = tsoff; treq->txhash = net_tx_rndhash(); req->mss = mss; ireq->ir_num = ntohs(th->dest); @@ -452,7 +451,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) ireq->rcv_wscale = rcv_wscale; ireq->ecn_ok = cookie_ecn_ok(&tcp_opt, net, &rt->dst); - ret = tcp_get_cookie_sock(sk, skb, req, &rt->dst, tsoff); + ret = tcp_get_cookie_sock(sk, skb, req, &rt->dst); /* ip_queue_xmit() depends on our flow being setup * Normal sockets get it right from inet_csk_route_child_sock() */ diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 4cd26c4811683..18c2e3c1677b5 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -215,7 +215,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) treq->snt_synack = 0; treq->rcv_isn = ntohl(th->seq) - 1; treq->snt_isn = ntohl(th->ack_seq) - 1; - treq->ts_off = 0; + treq->ts_off = tsoff; treq->txhash = net_tx_rndhash(); l3index = l3mdev_master_ifindex_by_index(net, ireq->ir_iif); @@ -264,7 +264,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) ireq->rcv_wscale = rcv_wscale; ireq->ecn_ok = cookie_ecn_ok(&tcp_opt, net, dst); - ret = tcp_get_cookie_sock(sk, skb, req, dst, tsoff); + ret = tcp_get_cookie_sock(sk, skb, req, dst); out: return ret; out_free: From 7b0f570f879adecf12329ecd60485e7e6b4783c1 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Tue, 28 Nov 2023 18:29:22 -0800 Subject: [PATCH 6/8] tcp: Move TCP-AO bits from cookie_v[46]_check() to tcp_ao_syncookie(). We initialise treq->af_specific in cookie_tcp_reqsk_alloc() so that we can look up a key later in tcp_create_openreq_child(). Initially, that change was added for MD5 by commit ba5a4fdd63ae ("tcp: make sure treq->af_specific is initialized"), but it has not been used since commit d0f2b7a9ca0a ("tcp: Disable header prediction for MD5 flow."). Now, treq->af_specific is used only by TCP-AO, so, we can move that initialisation into tcp_ao_syncookie(). In addition to that, l3index in cookie_v[46]_check() is only used for tcp_ao_syncookie(), so let's move it as well. While at it, we move down tcp_ao_syncookie() in cookie_v4_check() so that it will be called after security_inet_conn_request() to make functions order consistent with cookie_v6_check(). Signed-off-by: Kuniyuki Iwashima Reviewed-by: Simon Horman Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20231129022924.96156-7-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- include/net/tcp.h | 1 - include/net/tcp_ao.h | 6 ++---- net/ipv4/syncookies.c | 16 ++++------------ net/ipv4/tcp_ao.c | 16 ++++++++++++++-- net/ipv6/syncookies.c | 7 ++----- 5 files changed, 22 insertions(+), 24 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index cc7143a781da8..d4d0e97631756 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -494,7 +494,6 @@ struct sock *tcp_get_cookie_sock(struct sock *sk, struct sk_buff *skb, int __cookie_v4_check(const struct iphdr *iph, const struct tcphdr *th); struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb); struct request_sock *cookie_tcp_reqsk_alloc(const struct request_sock_ops *ops, - const struct tcp_request_sock_ops *af_ops, struct sock *sk, struct sk_buff *skb); #ifdef CONFIG_SYN_COOKIES diff --git a/include/net/tcp_ao.h b/include/net/tcp_ao.h index b56be10838f09..4d900ef8dfc3e 100644 --- a/include/net/tcp_ao.h +++ b/include/net/tcp_ao.h @@ -265,8 +265,7 @@ void tcp_ao_established(struct sock *sk); void tcp_ao_finish_connect(struct sock *sk, struct sk_buff *skb); void tcp_ao_connect_init(struct sock *sk); void tcp_ao_syncookie(struct sock *sk, const struct sk_buff *skb, - struct tcp_request_sock *treq, - unsigned short int family, int l3index); + struct request_sock *req, unsigned short int family); #else /* CONFIG_TCP_AO */ static inline int tcp_ao_transmit_skb(struct sock *sk, struct sk_buff *skb, @@ -277,8 +276,7 @@ static inline int tcp_ao_transmit_skb(struct sock *sk, struct sk_buff *skb, } static inline void tcp_ao_syncookie(struct sock *sk, const struct sk_buff *skb, - struct tcp_request_sock *treq, - unsigned short int family, int l3index) + struct request_sock *req, unsigned short int family) { } diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index de051eb08db85..1e3783c97e28b 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -286,9 +286,7 @@ bool cookie_ecn_ok(const struct tcp_options_received *tcp_opt, EXPORT_SYMBOL(cookie_ecn_ok); struct request_sock *cookie_tcp_reqsk_alloc(const struct request_sock_ops *ops, - const struct tcp_request_sock_ops *af_ops, - struct sock *sk, - struct sk_buff *skb) + struct sock *sk, struct sk_buff *skb) { struct tcp_request_sock *treq; struct request_sock *req; @@ -303,9 +301,6 @@ struct request_sock *cookie_tcp_reqsk_alloc(const struct request_sock_ops *ops, treq = tcp_rsk(req); - /* treq->af_specific might be used to perform TCP_MD5 lookup */ - treq->af_specific = af_ops; - treq->syn_tos = TCP_SKB_CB(skb)->ip_dsfield; treq->req_usec_ts = false; @@ -345,7 +340,6 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) struct rtable *rt; __u8 rcv_wscale; u32 tsoff = 0; - int l3index; if (!READ_ONCE(net->ipv4.sysctl_tcp_syncookies) || !th->ack || th->rst) @@ -376,8 +370,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) if (!cookie_timestamp_decode(net, &tcp_opt)) goto out; - req = cookie_tcp_reqsk_alloc(&tcp_request_sock_ops, - &tcp_request_sock_ipv4_ops, sk, skb); + req = cookie_tcp_reqsk_alloc(&tcp_request_sock_ops, sk, skb); if (!req) goto out_drop; @@ -406,9 +399,6 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) ireq->ir_iif = inet_request_bound_dev_if(sk, skb); - l3index = l3mdev_master_ifindex_by_index(net, ireq->ir_iif); - tcp_ao_syncookie(sk, skb, treq, AF_INET, l3index); - /* We throwed the options of the initial SYN away, so we hope * the ACK carries the same options again (see RFC1122 4.2.3.8) */ @@ -417,6 +407,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) if (security_inet_conn_request(sk, skb, req)) goto out_free; + tcp_ao_syncookie(sk, skb, req, AF_INET); + req->num_retrans = 0; /* diff --git a/net/ipv4/tcp_ao.c b/net/ipv4/tcp_ao.c index 7696417d06401..c4cd1e09eb6b2 100644 --- a/net/ipv4/tcp_ao.c +++ b/net/ipv4/tcp_ao.c @@ -844,18 +844,30 @@ static struct tcp_ao_key *tcp_ao_inbound_lookup(unsigned short int family, } void tcp_ao_syncookie(struct sock *sk, const struct sk_buff *skb, - struct tcp_request_sock *treq, - unsigned short int family, int l3index) + struct request_sock *req, unsigned short int family) { + struct tcp_request_sock *treq = tcp_rsk(req); const struct tcphdr *th = tcp_hdr(skb); const struct tcp_ao_hdr *aoh; struct tcp_ao_key *key; + int l3index; + + /* treq->af_specific is used to perform TCP_AO lookup + * in tcp_create_openreq_child(). + */ +#if IS_ENABLED(CONFIG_IPV6) + if (family == AF_INET6) + treq->af_specific = &tcp_request_sock_ipv6_ops; + else +#endif + treq->af_specific = &tcp_request_sock_ipv4_ops; treq->maclen = 0; if (tcp_parse_auth_options(th, NULL, &aoh) || !aoh) return; + l3index = l3mdev_master_ifindex_by_index(sock_net(sk), inet_rsk(req)->ir_iif); key = tcp_ao_inbound_lookup(family, sk, skb, -1, aoh->keyid, l3index); if (!key) /* Key not found, continue without TCP-AO */ diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 18c2e3c1677b5..12b1809245f99 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -142,7 +142,6 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) int full_space, mss; __u8 rcv_wscale; u32 tsoff = 0; - int l3index; if (!READ_ONCE(net->ipv4.sysctl_tcp_syncookies) || !th->ack || th->rst) @@ -173,8 +172,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) if (!cookie_timestamp_decode(net, &tcp_opt)) goto out; - req = cookie_tcp_reqsk_alloc(&tcp6_request_sock_ops, - &tcp_request_sock_ipv6_ops, sk, skb); + req = cookie_tcp_reqsk_alloc(&tcp6_request_sock_ops, sk, skb); if (!req) goto out_drop; @@ -218,8 +216,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) treq->ts_off = tsoff; treq->txhash = net_tx_rndhash(); - l3index = l3mdev_master_ifindex_by_index(net, ireq->ir_iif); - tcp_ao_syncookie(sk, skb, treq, AF_INET6, l3index); + tcp_ao_syncookie(sk, skb, req, AF_INET6); if (IS_ENABLED(CONFIG_SMC)) ireq->smc_ok = 0; From de5626b95e13a054aa80f890f2a774d2fc18d855 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Tue, 28 Nov 2023 18:29:23 -0800 Subject: [PATCH 7/8] tcp: Factorise cookie-independent fields initialisation in cookie_v[46]_check(). We will support arbitrary SYN Cookie with BPF, and then some reqsk fields are initialised in kfunc, and others are done in cookie_v[46]_check(). This patch factorises the common part as cookie_tcp_reqsk_init() and calls it in cookie_tcp_reqsk_alloc() to minimise the discrepancy between cookie_v[46]_check(). Signed-off-by: Kuniyuki Iwashima Reviewed-by: Simon Horman Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20231129022924.96156-8-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- net/ipv4/syncookies.c | 68 +++++++++++++++++++++++-------------------- net/ipv6/syncookies.c | 14 --------- 2 files changed, 37 insertions(+), 45 deletions(-) diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 1e3783c97e28b..f4bcd4822fe07 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -285,10 +285,43 @@ bool cookie_ecn_ok(const struct tcp_options_received *tcp_opt, } EXPORT_SYMBOL(cookie_ecn_ok); +static int cookie_tcp_reqsk_init(struct sock *sk, struct sk_buff *skb, + struct request_sock *req) +{ + struct inet_request_sock *ireq = inet_rsk(req); + struct tcp_request_sock *treq = tcp_rsk(req); + const struct tcphdr *th = tcp_hdr(skb); + + req->num_retrans = 0; + + ireq->ir_num = ntohs(th->dest); + ireq->ir_rmt_port = th->source; + ireq->ir_iif = inet_request_bound_dev_if(sk, skb); + ireq->ir_mark = inet_request_mark(sk, skb); + + if (IS_ENABLED(CONFIG_SMC)) + ireq->smc_ok = 0; + + treq->snt_synack = 0; + treq->tfo_listener = false; + treq->txhash = net_tx_rndhash(); + treq->rcv_isn = ntohl(th->seq) - 1; + treq->snt_isn = ntohl(th->ack_seq) - 1; + treq->syn_tos = TCP_SKB_CB(skb)->ip_dsfield; + treq->req_usec_ts = false; + +#if IS_ENABLED(CONFIG_MPTCP) + treq->is_mptcp = sk_is_mptcp(sk); + if (treq->is_mptcp) + return mptcp_subflow_init_cookie_req(req, sk, skb); +#endif + + return 0; +} + struct request_sock *cookie_tcp_reqsk_alloc(const struct request_sock_ops *ops, struct sock *sk, struct sk_buff *skb) { - struct tcp_request_sock *treq; struct request_sock *req; if (sk_is_mptcp(sk)) @@ -299,22 +332,10 @@ struct request_sock *cookie_tcp_reqsk_alloc(const struct request_sock_ops *ops, if (!req) return NULL; - treq = tcp_rsk(req); - - treq->syn_tos = TCP_SKB_CB(skb)->ip_dsfield; - treq->req_usec_ts = false; - -#if IS_ENABLED(CONFIG_MPTCP) - treq->is_mptcp = sk_is_mptcp(sk); - if (treq->is_mptcp) { - int err = mptcp_subflow_init_cookie_req(req, sk, skb); - - if (err) { - reqsk_free(req); - return NULL; - } + if (cookie_tcp_reqsk_init(sk, skb, req)) { + reqsk_free(req); + return NULL; } -#endif return req; } @@ -376,28 +397,15 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) ireq = inet_rsk(req); treq = tcp_rsk(req); - treq->rcv_isn = ntohl(th->seq) - 1; - treq->snt_isn = ntohl(th->ack_seq) - 1; treq->ts_off = tsoff; - treq->txhash = net_tx_rndhash(); req->mss = mss; - ireq->ir_num = ntohs(th->dest); - ireq->ir_rmt_port = th->source; sk_rcv_saddr_set(req_to_sk(req), ip_hdr(skb)->daddr); sk_daddr_set(req_to_sk(req), ip_hdr(skb)->saddr); - ireq->ir_mark = inet_request_mark(sk, skb); ireq->snd_wscale = tcp_opt.snd_wscale; ireq->sack_ok = tcp_opt.sack_ok; ireq->wscale_ok = tcp_opt.wscale_ok; ireq->tstamp_ok = tcp_opt.saw_tstamp; req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0; - treq->snt_synack = 0; - treq->tfo_listener = false; - - if (IS_ENABLED(CONFIG_SMC)) - ireq->smc_ok = 0; - - ireq->ir_iif = inet_request_bound_dev_if(sk, skb); /* We throwed the options of the initial SYN away, so we hope * the ACK carries the same options again (see RFC1122 4.2.3.8) @@ -409,8 +417,6 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) tcp_ao_syncookie(sk, skb, req, AF_INET); - req->num_retrans = 0; - /* * We need to lookup the route here to get at the correct * window size. We should better make sure that the window size diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 12b1809245f99..e0a9220d1536a 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -178,11 +178,8 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) ireq = inet_rsk(req); treq = tcp_rsk(req); - treq->tfo_listener = false; req->mss = mss; - ireq->ir_rmt_port = th->source; - ireq->ir_num = ntohs(th->dest); ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr; ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr; @@ -196,31 +193,20 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) ireq->pktopts = skb; } - ireq->ir_iif = inet_request_bound_dev_if(sk, skb); /* So that link locals have meaning */ if (!sk->sk_bound_dev_if && ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL) ireq->ir_iif = tcp_v6_iif(skb); - ireq->ir_mark = inet_request_mark(sk, skb); - - req->num_retrans = 0; ireq->snd_wscale = tcp_opt.snd_wscale; ireq->sack_ok = tcp_opt.sack_ok; ireq->wscale_ok = tcp_opt.wscale_ok; ireq->tstamp_ok = tcp_opt.saw_tstamp; req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0; - treq->snt_synack = 0; - treq->rcv_isn = ntohl(th->seq) - 1; - treq->snt_isn = ntohl(th->ack_seq) - 1; treq->ts_off = tsoff; - treq->txhash = net_tx_rndhash(); tcp_ao_syncookie(sk, skb, req, AF_INET6); - if (IS_ENABLED(CONFIG_SMC)) - ireq->smc_ok = 0; - /* * We need to lookup the dst_entry to get the correct window size. * This is taken from tcp_v6_syn_recv_sock. Somebody please enlighten From 8e7bab6b9652cd07a05ee0380b623c0e00e3eb00 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Tue, 28 Nov 2023 18:29:24 -0800 Subject: [PATCH 8/8] tcp: Factorise cookie-dependent fields initialisation in cookie_v[46]_check() We will support arbitrary SYN Cookie with BPF, and then kfunc at TC will preallocate reqsk and initialise some fields that should not be overwritten later by cookie_v[46]_check(). To simplify the flow in cookie_v[46]_check(), we move such fields' initialisation to cookie_tcp_reqsk_alloc() and factorise non-BPF SYN Cookie handling into cookie_tcp_check(), where we validate the cookie and allocate reqsk, as done by kfunc later. Note that we set ireq->ecn_ok in two steps, the latter of which will be shared by the BPF case. As cookie_ecn_ok() is one-liner, now it's inlined. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Simon Horman Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20231129022924.96156-9-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- include/net/tcp.h | 13 ++++-- net/ipv4/syncookies.c | 106 +++++++++++++++++++++++------------------- net/ipv6/syncookies.c | 61 ++++++++++++------------ 3 files changed, 99 insertions(+), 81 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index d4d0e97631756..973555cb1d3f5 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -494,7 +494,10 @@ struct sock *tcp_get_cookie_sock(struct sock *sk, struct sk_buff *skb, int __cookie_v4_check(const struct iphdr *iph, const struct tcphdr *th); struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb); struct request_sock *cookie_tcp_reqsk_alloc(const struct request_sock_ops *ops, - struct sock *sk, struct sk_buff *skb); + struct sock *sk, struct sk_buff *skb, + struct tcp_options_received *tcp_opt, + int mss, u32 tsoff); + #ifdef CONFIG_SYN_COOKIES /* Syncookies use a monotonic timer which increments every 60 seconds. @@ -580,8 +583,12 @@ __u32 cookie_v4_init_sequence(const struct sk_buff *skb, __u16 *mss); u64 cookie_init_timestamp(struct request_sock *req, u64 now); bool cookie_timestamp_decode(const struct net *net, struct tcp_options_received *opt); -bool cookie_ecn_ok(const struct tcp_options_received *opt, - const struct net *net, const struct dst_entry *dst); + +static inline bool cookie_ecn_ok(const struct net *net, const struct dst_entry *dst) +{ + return READ_ONCE(net->ipv4.sysctl_tcp_ecn) || + dst_feature(dst, RTAX_FEATURE_ECN); +} /* From net/ipv6/syncookies.c */ int __cookie_v6_check(const struct ipv6hdr *iph, const struct tcphdr *th); diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index f4bcd4822fe07..61f1c96cfe63e 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -270,21 +270,6 @@ bool cookie_timestamp_decode(const struct net *net, } EXPORT_SYMBOL(cookie_timestamp_decode); -bool cookie_ecn_ok(const struct tcp_options_received *tcp_opt, - const struct net *net, const struct dst_entry *dst) -{ - bool ecn_ok = tcp_opt->rcv_tsecr & TS_OPT_ECN; - - if (!ecn_ok) - return false; - - if (READ_ONCE(net->ipv4.sysctl_tcp_ecn)) - return true; - - return dst_feature(dst, RTAX_FEATURE_ECN); -} -EXPORT_SYMBOL(cookie_ecn_ok); - static int cookie_tcp_reqsk_init(struct sock *sk, struct sk_buff *skb, struct request_sock *req) { @@ -320,8 +305,12 @@ static int cookie_tcp_reqsk_init(struct sock *sk, struct sk_buff *skb, } struct request_sock *cookie_tcp_reqsk_alloc(const struct request_sock_ops *ops, - struct sock *sk, struct sk_buff *skb) + struct sock *sk, struct sk_buff *skb, + struct tcp_options_received *tcp_opt, + int mss, u32 tsoff) { + struct inet_request_sock *ireq; + struct tcp_request_sock *treq; struct request_sock *req; if (sk_is_mptcp(sk)) @@ -337,40 +326,36 @@ struct request_sock *cookie_tcp_reqsk_alloc(const struct request_sock_ops *ops, return NULL; } + ireq = inet_rsk(req); + treq = tcp_rsk(req); + + req->mss = mss; + req->ts_recent = tcp_opt->saw_tstamp ? tcp_opt->rcv_tsval : 0; + + ireq->snd_wscale = tcp_opt->snd_wscale; + ireq->tstamp_ok = tcp_opt->saw_tstamp; + ireq->sack_ok = tcp_opt->sack_ok; + ireq->wscale_ok = tcp_opt->wscale_ok; + ireq->ecn_ok = !!(tcp_opt->rcv_tsecr & TS_OPT_ECN); + + treq->ts_off = tsoff; + return req; } EXPORT_SYMBOL_GPL(cookie_tcp_reqsk_alloc); -/* On input, sk is a listener. - * Output is listener if incoming packet would not create a child - * NULL if memory could not be allocated. - */ -struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) +static struct request_sock *cookie_tcp_check(struct net *net, struct sock *sk, + struct sk_buff *skb) { - struct ip_options *opt = &TCP_SKB_CB(skb)->header.h4.opt; - const struct tcphdr *th = tcp_hdr(skb); struct tcp_options_received tcp_opt; - struct tcp_sock *tp = tcp_sk(sk); - struct inet_request_sock *ireq; - struct net *net = sock_net(sk); - struct tcp_request_sock *treq; - struct request_sock *req; - struct sock *ret = sk; - int full_space, mss; - struct flowi4 fl4; - struct rtable *rt; - __u8 rcv_wscale; u32 tsoff = 0; - - if (!READ_ONCE(net->ipv4.sysctl_tcp_syncookies) || - !th->ack || th->rst) - goto out; + int mss; if (tcp_synq_no_recent_overflow(sk)) goto out; - mss = __cookie_v4_check(ip_hdr(skb), th); - if (mss == 0) { + mss = __cookie_v4_check(ip_hdr(skb), tcp_hdr(skb)); + if (!mss) { __NET_INC_STATS(net, LINUX_MIB_SYNCOOKIESFAILED); goto out; } @@ -391,21 +376,44 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) if (!cookie_timestamp_decode(net, &tcp_opt)) goto out; - req = cookie_tcp_reqsk_alloc(&tcp_request_sock_ops, sk, skb); + return cookie_tcp_reqsk_alloc(&tcp_request_sock_ops, sk, skb, + &tcp_opt, mss, tsoff); +out: + return ERR_PTR(-EINVAL); +} + +/* On input, sk is a listener. + * Output is listener if incoming packet would not create a child + * NULL if memory could not be allocated. + */ +struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) +{ + struct ip_options *opt = &TCP_SKB_CB(skb)->header.h4.opt; + const struct tcphdr *th = tcp_hdr(skb); + struct tcp_sock *tp = tcp_sk(sk); + struct inet_request_sock *ireq; + struct net *net = sock_net(sk); + struct request_sock *req; + struct sock *ret = sk; + struct flowi4 fl4; + struct rtable *rt; + __u8 rcv_wscale; + int full_space; + + if (!READ_ONCE(net->ipv4.sysctl_tcp_syncookies) || + !th->ack || th->rst) + goto out; + + req = cookie_tcp_check(net, sk, skb); + if (IS_ERR(req)) + goto out; if (!req) goto out_drop; ireq = inet_rsk(req); - treq = tcp_rsk(req); - treq->ts_off = tsoff; - req->mss = mss; + sk_rcv_saddr_set(req_to_sk(req), ip_hdr(skb)->daddr); sk_daddr_set(req_to_sk(req), ip_hdr(skb)->saddr); - ireq->snd_wscale = tcp_opt.snd_wscale; - ireq->sack_ok = tcp_opt.sack_ok; - ireq->wscale_ok = tcp_opt.wscale_ok; - ireq->tstamp_ok = tcp_opt.saw_tstamp; - req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0; /* We throwed the options of the initial SYN away, so we hope * the ACK carries the same options again (see RFC1122 4.2.3.8) @@ -447,7 +455,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) dst_metric(&rt->dst, RTAX_INITRWND)); ireq->rcv_wscale = rcv_wscale; - ireq->ecn_ok = cookie_ecn_ok(&tcp_opt, net, &rt->dst); + ireq->ecn_ok &= cookie_ecn_ok(net, &rt->dst); ret = tcp_get_cookie_sock(sk, skb, req, &rt->dst); /* ip_queue_xmit() depends on our flow being setup diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index e0a9220d1536a..c8d2ca27220cc 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -127,31 +127,18 @@ int __cookie_v6_check(const struct ipv6hdr *iph, const struct tcphdr *th) } EXPORT_SYMBOL_GPL(__cookie_v6_check); -struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) +static struct request_sock *cookie_tcp_check(struct net *net, struct sock *sk, + struct sk_buff *skb) { - const struct tcphdr *th = tcp_hdr(skb); - struct ipv6_pinfo *np = inet6_sk(sk); struct tcp_options_received tcp_opt; - struct tcp_sock *tp = tcp_sk(sk); - struct inet_request_sock *ireq; - struct net *net = sock_net(sk); - struct tcp_request_sock *treq; - struct request_sock *req; - struct dst_entry *dst; - struct sock *ret = sk; - int full_space, mss; - __u8 rcv_wscale; u32 tsoff = 0; - - if (!READ_ONCE(net->ipv4.sysctl_tcp_syncookies) || - !th->ack || th->rst) - goto out; + int mss; if (tcp_synq_no_recent_overflow(sk)) goto out; - mss = __cookie_v6_check(ipv6_hdr(skb), th); - if (mss == 0) { + mss = __cookie_v6_check(ipv6_hdr(skb), tcp_hdr(skb)); + if (!mss) { __NET_INC_STATS(net, LINUX_MIB_SYNCOOKIESFAILED); goto out; } @@ -172,14 +159,37 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) if (!cookie_timestamp_decode(net, &tcp_opt)) goto out; - req = cookie_tcp_reqsk_alloc(&tcp6_request_sock_ops, sk, skb); + return cookie_tcp_reqsk_alloc(&tcp6_request_sock_ops, sk, skb, + &tcp_opt, mss, tsoff); +out: + return ERR_PTR(-EINVAL); +} + +struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) +{ + const struct tcphdr *th = tcp_hdr(skb); + struct ipv6_pinfo *np = inet6_sk(sk); + struct tcp_sock *tp = tcp_sk(sk); + struct inet_request_sock *ireq; + struct net *net = sock_net(sk); + struct request_sock *req; + struct dst_entry *dst; + struct sock *ret = sk; + __u8 rcv_wscale; + int full_space; + + if (!READ_ONCE(net->ipv4.sysctl_tcp_syncookies) || + !th->ack || th->rst) + goto out; + + req = cookie_tcp_check(net, sk, skb); + if (IS_ERR(req)) + goto out; if (!req) goto out_drop; ireq = inet_rsk(req); - treq = tcp_rsk(req); - req->mss = mss; ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr; ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr; @@ -198,13 +208,6 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL) ireq->ir_iif = tcp_v6_iif(skb); - ireq->snd_wscale = tcp_opt.snd_wscale; - ireq->sack_ok = tcp_opt.sack_ok; - ireq->wscale_ok = tcp_opt.wscale_ok; - ireq->tstamp_ok = tcp_opt.saw_tstamp; - req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0; - treq->ts_off = tsoff; - tcp_ao_syncookie(sk, skb, req, AF_INET6); /* @@ -245,7 +248,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) dst_metric(dst, RTAX_INITRWND)); ireq->rcv_wscale = rcv_wscale; - ireq->ecn_ok = cookie_ecn_ok(&tcp_opt, net, dst); + ireq->ecn_ok &= cookie_ecn_ok(net, dst); ret = tcp_get_cookie_sock(sk, skb, req, dst); out: