From 77b337196a9d87f3d6bb9b07c0436ecafbffda1e Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 28 Jan 2022 13:13:32 +0100 Subject: [PATCH 1/6] netfilter: conntrack: don't refresh sctp entries in closed state Vivek Thrivikraman reported: An SCTP server application which is accessed continuously by client application. When the session disconnects the client retries to establish a connection. After restart of SCTP server application the session is not established because of stale conntrack entry with connection state CLOSED as below. (removing this entry manually established new connection): sctp 9 CLOSED src=10.141.189.233 [..] [ASSURED] Just skip timeout update of closed entries, we don't want them to stay around forever. Reported-and-tested-by: Vivek Thrivikraman Closes: https://bugzilla.netfilter.org/show_bug.cgi?id=1579 Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_proto_sctp.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index 2394238d01c91..5a936334b517a 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -489,6 +489,15 @@ int nf_conntrack_sctp_packet(struct nf_conn *ct, pr_debug("Setting vtag %x for dir %d\n", ih->init_tag, !dir); ct->proto.sctp.vtag[!dir] = ih->init_tag; + + /* don't renew timeout on init retransmit so + * port reuse by client or NAT middlebox cannot + * keep entry alive indefinitely (incl. nat info). + */ + if (new_state == SCTP_CONNTRACK_CLOSED && + old_state == SCTP_CONNTRACK_CLOSED && + nf_ct_is_confirmed(ct)) + ignore = true; } ct->proto.sctp.state = new_state; From a9e8503def0fd4ed89ade1f61c315f904581d439 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sat, 29 Jan 2022 17:13:23 +0100 Subject: [PATCH 2/6] netfilter: nft_payload: don't allow th access for fragments Loads relative to ->thoff naturally expect that this points to the transport header, but this is only true if pkt->fragoff == 0. This has little effect for rulesets with connection tracking/nat because these enable ip defra. For other rulesets this prevents false matches. Fixes: 96518518cc41 ("netfilter: add nftables") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_exthdr.c | 2 +- net/netfilter/nft_payload.c | 9 +++++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c index dbe1f2e7dd9ed..9e927ab4df151 100644 --- a/net/netfilter/nft_exthdr.c +++ b/net/netfilter/nft_exthdr.c @@ -167,7 +167,7 @@ nft_tcp_header_pointer(const struct nft_pktinfo *pkt, { struct tcphdr *tcph; - if (pkt->tprot != IPPROTO_TCP) + if (pkt->tprot != IPPROTO_TCP || pkt->fragoff) return NULL; tcph = skb_header_pointer(pkt->skb, nft_thoff(pkt), sizeof(*tcph), buffer); diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c index 940fed9a760b3..5cc06aef43452 100644 --- a/net/netfilter/nft_payload.c +++ b/net/netfilter/nft_payload.c @@ -83,7 +83,7 @@ static int __nft_payload_inner_offset(struct nft_pktinfo *pkt) { unsigned int thoff = nft_thoff(pkt); - if (!(pkt->flags & NFT_PKTINFO_L4PROTO)) + if (!(pkt->flags & NFT_PKTINFO_L4PROTO) || pkt->fragoff) return -1; switch (pkt->tprot) { @@ -147,7 +147,7 @@ void nft_payload_eval(const struct nft_expr *expr, offset = skb_network_offset(skb); break; case NFT_PAYLOAD_TRANSPORT_HEADER: - if (!(pkt->flags & NFT_PKTINFO_L4PROTO)) + if (!(pkt->flags & NFT_PKTINFO_L4PROTO) || pkt->fragoff) goto err; offset = nft_thoff(pkt); break; @@ -688,7 +688,7 @@ static void nft_payload_set_eval(const struct nft_expr *expr, offset = skb_network_offset(skb); break; case NFT_PAYLOAD_TRANSPORT_HEADER: - if (!(pkt->flags & NFT_PKTINFO_L4PROTO)) + if (!(pkt->flags & NFT_PKTINFO_L4PROTO) || pkt->fragoff) goto err; offset = nft_thoff(pkt); break; @@ -728,7 +728,8 @@ static void nft_payload_set_eval(const struct nft_expr *expr, if (priv->csum_type == NFT_PAYLOAD_CSUM_SCTP && pkt->tprot == IPPROTO_SCTP && skb->ip_summed != CHECKSUM_PARTIAL) { - if (nft_payload_csum_sctp(skb, nft_thoff(pkt))) + if (pkt->fragoff == 0 && + nft_payload_csum_sctp(skb, nft_thoff(pkt))) goto err; } From cc4f9d62037ebcb811f4908bba2986c01df1bd50 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sat, 29 Jan 2022 17:47:00 +0100 Subject: [PATCH 3/6] netfilter: conntrack: move synack init code to helper It seems more readable to use a common helper in the followup fix rather than copypaste or goto. No functional change intended. The function is only called for syn-ack or syn in repy direction in case of simultaneous open. Signed-off-by: Florian Westphal Acked-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_proto_tcp.c | 47 ++++++++++++++++---------- 1 file changed, 29 insertions(+), 18 deletions(-) diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index af5115e127cfd..88c89e97d8a25 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -446,6 +446,32 @@ static void tcp_sack(const struct sk_buff *skb, unsigned int dataoff, } } +static void tcp_init_sender(struct ip_ct_tcp_state *sender, + struct ip_ct_tcp_state *receiver, + const struct sk_buff *skb, + unsigned int dataoff, + const struct tcphdr *tcph, + u32 end, u32 win) +{ + /* SYN-ACK in reply to a SYN + * or SYN from reply direction in simultaneous open. + */ + sender->td_end = + sender->td_maxend = end; + sender->td_maxwin = (win == 0 ? 1 : win); + + tcp_options(skb, dataoff, tcph, sender); + /* RFC 1323: + * Both sides must send the Window Scale option + * to enable window scaling in either direction. + */ + if (!(sender->flags & IP_CT_TCP_FLAG_WINDOW_SCALE && + receiver->flags & IP_CT_TCP_FLAG_WINDOW_SCALE)) { + sender->td_scale = 0; + receiver->td_scale = 0; + } +} + static bool tcp_in_window(struct nf_conn *ct, enum ip_conntrack_dir dir, unsigned int index, @@ -499,24 +525,9 @@ static bool tcp_in_window(struct nf_conn *ct, * Initialize sender data. */ if (tcph->syn) { - /* - * SYN-ACK in reply to a SYN - * or SYN from reply direction in simultaneous open. - */ - sender->td_end = - sender->td_maxend = end; - sender->td_maxwin = (win == 0 ? 1 : win); - - tcp_options(skb, dataoff, tcph, sender); - /* - * RFC 1323: - * Both sides must send the Window Scale option - * to enable window scaling in either direction. - */ - if (!(sender->flags & IP_CT_TCP_FLAG_WINDOW_SCALE - && receiver->flags & IP_CT_TCP_FLAG_WINDOW_SCALE)) - sender->td_scale = - receiver->td_scale = 0; + tcp_init_sender(sender, receiver, + skb, dataoff, tcph, + end, win); if (!tcph->ack) /* Simultaneous open */ return true; From 82b72cb94666b3dbd7152bb9f441b068af7a921b Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sat, 29 Jan 2022 17:47:01 +0100 Subject: [PATCH 4/6] netfilter: conntrack: re-init state for retransmitted syn-ack TCP conntrack assumes that a syn-ack retransmit is identical to the previous syn-ack. This isn't correct and causes stuck 3whs in some more esoteric scenarios. tcpdump to illustrate the problem: client > server: Flags [S] seq 1365731894, win 29200, [mss 1460,sackOK,TS val 2083035583 ecr 0,wscale 7] server > client: Flags [S.] seq 145824453, ack 643160523, win 65535, [mss 8952,wscale 5,TS val 3215367629 ecr 2082921663] Note the invalid/outdated synack ack number. Conntrack marks this syn-ack as out-of-window/invalid, but it did initialize the reply direction parameters based on this packets content. client > server: Flags [S] seq 1365731894, win 29200, [mss 1460,sackOK,TS val 2083036623 ecr 0,wscale 7] ... retransmit... server > client: Flags [S.], seq 145824453, ack 643160523, win 65535, [mss 8952,wscale 5,TS val 3215368644 ecr 2082921663] and another bogus synack. This repeats, then client re-uses for a new attempt: client > server: Flags [S], seq 2375731741, win 29200, [mss 1460,sackOK,TS val 2083100223 ecr 0,wscale 7] server > client: Flags [S.], seq 145824453, ack 643160523, win 65535, [mss 8952,wscale 5,TS val 3215430754 ecr 2082921663] ... but still gets a invalid syn-ack. This repeats until: server > client: Flags [S.], seq 145824453, ack 643160523, win 65535, [mss 8952,wscale 5,TS val 3215437785 ecr 2082921663] server > client: Flags [R.], seq 145824454, ack 643160523, win 65535, [mss 8952,wscale 5,TS val 3215443451 ecr 2082921663] client > server: Flags [S], seq 2375731741, win 29200, [mss 1460,sackOK,TS val 2083115583 ecr 0,wscale 7] server > client: Flags [S.], seq 162602410, ack 2375731742, win 65535, [mss 8952,wscale 5,TS val 3215445754 ecr 2083115583] This syn-ack has the correct ack number, but conntrack flags it as invalid: The internal state was created from the first syn-ack seen so the sequence number of the syn-ack is treated as being outside of the announced window. Don't assume that retransmitted syn-ack is identical to previous one. Treat it like the first syn-ack and reinit state. Signed-off-by: Florian Westphal Acked-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_proto_tcp.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 88c89e97d8a25..d1582b888c0d8 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -571,6 +571,18 @@ static bool tcp_in_window(struct nf_conn *ct, sender->td_maxwin = (win == 0 ? 1 : win); tcp_options(skb, dataoff, tcph, sender); + } else if (tcph->syn && dir == IP_CT_DIR_REPLY && + state->state == TCP_CONNTRACK_SYN_SENT) { + /* Retransmitted syn-ack, or syn (simultaneous open). + * + * Re-init state for this direction, just like for the first + * syn(-ack) reply, it might differ in seq, ack or tcp options. + */ + tcp_init_sender(sender, receiver, + skb, dataoff, tcph, + end, win); + if (!tcph->ack) + return true; } if (!(tcph->ack)) { From 1f6339e034d5780ad7097c8d8c11b26e0762afba Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sat, 29 Jan 2022 18:30:18 +0100 Subject: [PATCH 5/6] MAINTAINERS: netfilter: update git links nf and nf-next have a new location. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- MAINTAINERS | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index bb83dcbcb619b..3a9cb567d47cc 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -13297,8 +13297,8 @@ W: http://www.iptables.org/ W: http://www.nftables.org/ Q: http://patchwork.ozlabs.org/project/netfilter-devel/list/ C: irc://irc.libera.chat/netfilter -T: git git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf.git -T: git git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf-next.git F: include/linux/netfilter* F: include/linux/netfilter/ F: include/net/netfilter/ From d1ca60efc53d665cf89ed847a14a510a81770b81 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 2 Feb 2022 12:00:56 +0100 Subject: [PATCH 6/6] netfilter: ctnetlink: disable helper autoassign When userspace, e.g. conntrackd, inserts an entry with a specified helper, its possible that the helper is lost immediately after its added: ctnetlink_create_conntrack -> nf_ct_helper_ext_add + assign helper -> ctnetlink_setup_nat -> ctnetlink_parse_nat_setup -> parse_nat_setup -> nfnetlink_parse_nat_setup -> nf_nat_setup_info -> nf_conntrack_alter_reply -> __nf_ct_try_assign_helper ... and __nf_ct_try_assign_helper will zero the helper again. Set IPS_HELPER bit to bypass auto-assign logic, its unwanted, just like when helper is assigned via ruleset. Dropped old 'not strictly necessary' comment, it referred to use of rcu_assign_pointer() before it got replaced by RCU_INIT_POINTER(). NB: Fixes tag intentionally incorrect, this extends the referenced commit, but this change won't build without IPS_HELPER introduced there. Fixes: 6714cf5465d280 ("netfilter: nf_conntrack: fix explicit helper attachment and NAT") Reported-by: Pham Thanh Tuyen Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_conntrack_common.h | 2 +- net/netfilter/nf_conntrack_netlink.c | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/include/uapi/linux/netfilter/nf_conntrack_common.h b/include/uapi/linux/netfilter/nf_conntrack_common.h index 4b3395082d15c..26071021e986f 100644 --- a/include/uapi/linux/netfilter/nf_conntrack_common.h +++ b/include/uapi/linux/netfilter/nf_conntrack_common.h @@ -106,7 +106,7 @@ enum ip_conntrack_status { IPS_NAT_CLASH = IPS_UNTRACKED, #endif - /* Conntrack got a helper explicitly attached via CT target. */ + /* Conntrack got a helper explicitly attached (ruleset, ctnetlink). */ IPS_HELPER_BIT = 13, IPS_HELPER = (1 << IPS_HELPER_BIT), diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index ac438370f94a9..7032402ffd331 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -2311,7 +2311,8 @@ ctnetlink_create_conntrack(struct net *net, if (helper->from_nlattr) helper->from_nlattr(helpinfo, ct); - /* not in hash table yet so not strictly necessary */ + /* disable helper auto-assignment for this entry */ + ct->status |= IPS_HELPER; RCU_INIT_POINTER(help->helper, helper); } } else {