From 4386b921857793440ebd4db3d6b70639149c7074 Mon Sep 17 00:00:00 2001 From: Sriram Yagnaraman Date: Fri, 24 Feb 2023 10:52:51 +0100 Subject: [PATCH 1/9] netfilter: bridge: introduce broute meta statement nftables equivalent for ebtables -t broute. Implement broute meta statement to set br_netfilter_broute flag in skb to force a packet to be routed instead of being bridged. Signed-off-by: Sriram Yagnaraman Signed-off-by: Florian Westphal --- include/uapi/linux/netfilter/nf_tables.h | 2 + net/bridge/netfilter/nft_meta_bridge.c | 71 +++++++++++++++++++++++- 2 files changed, 70 insertions(+), 3 deletions(-) diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index ff677f3a6cadb..9c6f02c26054a 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -931,6 +931,7 @@ enum nft_exthdr_attributes { * @NFT_META_TIME_HOUR: hour of day (in seconds) * @NFT_META_SDIF: slave device interface index * @NFT_META_SDIFNAME: slave device interface name + * @NFT_META_BRI_BROUTE: packet br_netfilter_broute bit */ enum nft_meta_keys { NFT_META_LEN, @@ -969,6 +970,7 @@ enum nft_meta_keys { NFT_META_TIME_HOUR, NFT_META_SDIF, NFT_META_SDIFNAME, + NFT_META_BRI_BROUTE, __NFT_META_IIFTYPE, }; diff --git a/net/bridge/netfilter/nft_meta_bridge.c b/net/bridge/netfilter/nft_meta_bridge.c index c3ecd77e25cb8..bd4d1b4d745f6 100644 --- a/net/bridge/netfilter/nft_meta_bridge.c +++ b/net/bridge/netfilter/nft_meta_bridge.c @@ -8,6 +8,9 @@ #include #include #include +#include /* NF_BR_PRE_ROUTING */ + +#include "../br_private.h" static const struct net_device * nft_meta_get_bridge(const struct net_device *dev) @@ -102,6 +105,50 @@ static const struct nft_expr_ops nft_meta_bridge_get_ops = { .reduce = nft_meta_get_reduce, }; +static void nft_meta_bridge_set_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + const struct nft_meta *meta = nft_expr_priv(expr); + u32 *sreg = ®s->data[meta->sreg]; + struct sk_buff *skb = pkt->skb; + u8 value8; + + switch (meta->key) { + case NFT_META_BRI_BROUTE: + value8 = nft_reg_load8(sreg); + BR_INPUT_SKB_CB(skb)->br_netfilter_broute = !!value8; + break; + default: + nft_meta_set_eval(expr, regs, pkt); + } +} + +static int nft_meta_bridge_set_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_meta *priv = nft_expr_priv(expr); + unsigned int len; + int err; + + priv->key = ntohl(nla_get_be32(tb[NFTA_META_KEY])); + switch (priv->key) { + case NFT_META_BRI_BROUTE: + len = sizeof(u8); + break; + default: + return nft_meta_set_init(ctx, expr, tb); + } + + priv->len = len; + err = nft_parse_register_load(tb[NFTA_META_SREG], &priv->sreg, len); + if (err < 0) + return err; + + return 0; +} + static bool nft_meta_bridge_set_reduce(struct nft_regs_track *track, const struct nft_expr *expr) { @@ -120,15 +167,33 @@ static bool nft_meta_bridge_set_reduce(struct nft_regs_track *track, return false; } +static int nft_meta_bridge_set_validate(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nft_data **data) +{ + struct nft_meta *priv = nft_expr_priv(expr); + unsigned int hooks; + + switch (priv->key) { + case NFT_META_BRI_BROUTE: + hooks = 1 << NF_BR_PRE_ROUTING; + break; + default: + return nft_meta_set_validate(ctx, expr, data); + } + + return nft_chain_validate_hooks(ctx->chain, hooks); +} + static const struct nft_expr_ops nft_meta_bridge_set_ops = { .type = &nft_meta_bridge_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_meta)), - .eval = nft_meta_set_eval, - .init = nft_meta_set_init, + .eval = nft_meta_bridge_set_eval, + .init = nft_meta_bridge_set_init, .destroy = nft_meta_set_destroy, .dump = nft_meta_set_dump, .reduce = nft_meta_bridge_set_reduce, - .validate = nft_meta_set_validate, + .validate = nft_meta_bridge_set_validate, }; static const struct nft_expr_ops * From 9ccff83b1322f95da7a74784cf6f47a481e03dc5 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Tue, 7 Mar 2023 16:31:27 -0500 Subject: [PATCH 2/9] netfilter: bridge: call pskb_may_pull in br_nf_check_hbh_len When checking Hop-by-hop option header, if the option data is in nonlinear area, it should do pskb_may_pull instead of discarding the skb as a bad IPv6 packet. Signed-off-by: Xin Long Reviewed-by: Simon Horman Acked-by: Nikolay Aleksandrov Reviewed-by: Aaron Conole Signed-off-by: Florian Westphal --- net/bridge/br_netfilter_ipv6.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/net/bridge/br_netfilter_ipv6.c b/net/bridge/br_netfilter_ipv6.c index 6b07f30675bb0..afd1c718b683a 100644 --- a/net/bridge/br_netfilter_ipv6.c +++ b/net/bridge/br_netfilter_ipv6.c @@ -45,14 +45,18 @@ */ static int br_nf_check_hbh_len(struct sk_buff *skb) { - unsigned char *raw = (u8 *)(ipv6_hdr(skb) + 1); + int len, off = sizeof(struct ipv6hdr); + unsigned char *nh; u32 pkt_len; - const unsigned char *nh = skb_network_header(skb); - int off = raw - nh; - int len = (raw[1] + 1) << 3; - if ((raw + len) - skb->data > skb_headlen(skb)) + if (!pskb_may_pull(skb, off + 8)) goto bad; + nh = (unsigned char *)(ipv6_hdr(skb) + 1); + len = (nh[1] + 1) << 3; + + if (!pskb_may_pull(skb, off + len)) + goto bad; + nh = skb_network_header(skb); off += 2; len -= 2; From a7f1a2f43e683c8ffca691d45f2cb32c052158fa Mon Sep 17 00:00:00 2001 From: Xin Long Date: Tue, 7 Mar 2023 16:31:28 -0500 Subject: [PATCH 3/9] netfilter: bridge: check len before accessing more nh data In the while loop of br_nf_check_hbh_len(), similar to ip6_parse_tlv(), before accessing 'nh[off + 1]', it should add a check 'len < 2'; and before parsing IPV6_TLV_JUMBO, it should add a check 'optlen > len', in case of overflows. Signed-off-by: Xin Long Reviewed-by: Simon Horman Acked-by: Nikolay Aleksandrov Reviewed-by: Aaron Conole Signed-off-by: Florian Westphal --- net/bridge/br_netfilter_ipv6.c | 45 +++++++++++++++------------------- 1 file changed, 20 insertions(+), 25 deletions(-) diff --git a/net/bridge/br_netfilter_ipv6.c b/net/bridge/br_netfilter_ipv6.c index afd1c718b683a..8be3c5c8b925d 100644 --- a/net/bridge/br_netfilter_ipv6.c +++ b/net/bridge/br_netfilter_ipv6.c @@ -50,54 +50,49 @@ static int br_nf_check_hbh_len(struct sk_buff *skb) u32 pkt_len; if (!pskb_may_pull(skb, off + 8)) - goto bad; + return -1; nh = (unsigned char *)(ipv6_hdr(skb) + 1); len = (nh[1] + 1) << 3; if (!pskb_may_pull(skb, off + len)) - goto bad; + return -1; nh = skb_network_header(skb); off += 2; len -= 2; - while (len > 0) { - int optlen = nh[off + 1] + 2; - - switch (nh[off]) { - case IPV6_TLV_PAD1: - optlen = 1; - break; + int optlen; - case IPV6_TLV_PADN: - break; + if (nh[off] == IPV6_TLV_PAD1) { + off++; + len--; + continue; + } + if (len < 2) + return -1; + optlen = nh[off + 1] + 2; + if (optlen > len) + return -1; - case IPV6_TLV_JUMBO: + if (nh[off] == IPV6_TLV_JUMBO) { if (nh[off + 1] != 4 || (off & 3) != 2) - goto bad; + return -1; pkt_len = ntohl(*(__be32 *)(nh + off + 2)); if (pkt_len <= IPV6_MAXPLEN || ipv6_hdr(skb)->payload_len) - goto bad; + return -1; if (pkt_len > skb->len - sizeof(struct ipv6hdr)) - goto bad; + return -1; if (pskb_trim_rcsum(skb, pkt_len + sizeof(struct ipv6hdr))) - goto bad; + return -1; nh = skb_network_header(skb); - break; - default: - if (optlen > len) - goto bad; - break; } off += optlen; len -= optlen; } - if (len == 0) - return 0; -bad: - return -1; + + return len ? -1 : 0; } int br_validate_ipv6(struct net *net, struct sk_buff *skb) From 0b24bd71a6c0214aaa2115302dd6598b89d2fa8a Mon Sep 17 00:00:00 2001 From: Xin Long Date: Tue, 7 Mar 2023 16:31:29 -0500 Subject: [PATCH 4/9] netfilter: bridge: move pskb_trim_rcsum out of br_nf_check_hbh_len br_nf_check_hbh_len() is a function to check the Hop-by-hop option header, and shouldn't do pskb_trim_rcsum() there. This patch is to pass pkt_len out to br_validate_ipv6() and do pskb_trim_rcsum() after calling br_validate_ipv6() instead. Signed-off-by: Xin Long Reviewed-by: Simon Horman Acked-by: Nikolay Aleksandrov Reviewed-by: Aaron Conole Signed-off-by: Florian Westphal --- net/bridge/br_netfilter_ipv6.c | 33 ++++++++++++++------------------- 1 file changed, 14 insertions(+), 19 deletions(-) diff --git a/net/bridge/br_netfilter_ipv6.c b/net/bridge/br_netfilter_ipv6.c index 8be3c5c8b925d..a0d6dfb3e2556 100644 --- a/net/bridge/br_netfilter_ipv6.c +++ b/net/bridge/br_netfilter_ipv6.c @@ -43,11 +43,10 @@ /* We only check the length. A bridge shouldn't do any hop-by-hop stuff * anyway */ -static int br_nf_check_hbh_len(struct sk_buff *skb) +static int br_nf_check_hbh_len(struct sk_buff *skb, u32 *plen) { int len, off = sizeof(struct ipv6hdr); unsigned char *nh; - u32 pkt_len; if (!pskb_may_pull(skb, off + 8)) return -1; @@ -75,6 +74,8 @@ static int br_nf_check_hbh_len(struct sk_buff *skb) return -1; if (nh[off] == IPV6_TLV_JUMBO) { + u32 pkt_len; + if (nh[off + 1] != 4 || (off & 3) != 2) return -1; pkt_len = ntohl(*(__be32 *)(nh + off + 2)); @@ -83,10 +84,7 @@ static int br_nf_check_hbh_len(struct sk_buff *skb) return -1; if (pkt_len > skb->len - sizeof(struct ipv6hdr)) return -1; - if (pskb_trim_rcsum(skb, - pkt_len + sizeof(struct ipv6hdr))) - return -1; - nh = skb_network_header(skb); + *plen = pkt_len; } off += optlen; len -= optlen; @@ -114,22 +112,19 @@ int br_validate_ipv6(struct net *net, struct sk_buff *skb) goto inhdr_error; pkt_len = ntohs(hdr->payload_len); + if (hdr->nexthdr == NEXTHDR_HOP && br_nf_check_hbh_len(skb, &pkt_len)) + goto drop; - if (pkt_len || hdr->nexthdr != NEXTHDR_HOP) { - if (pkt_len + ip6h_len > skb->len) { - __IP6_INC_STATS(net, idev, - IPSTATS_MIB_INTRUNCATEDPKTS); - goto drop; - } - if (pskb_trim_rcsum(skb, pkt_len + ip6h_len)) { - __IP6_INC_STATS(net, idev, - IPSTATS_MIB_INDISCARDS); - goto drop; - } - hdr = ipv6_hdr(skb); + if (pkt_len + ip6h_len > skb->len) { + __IP6_INC_STATS(net, idev, + IPSTATS_MIB_INTRUNCATEDPKTS); + goto drop; } - if (hdr->nexthdr == NEXTHDR_HOP && br_nf_check_hbh_len(skb)) + if (pskb_trim_rcsum(skb, pkt_len + ip6h_len)) { + __IP6_INC_STATS(net, idev, + IPSTATS_MIB_INDISCARDS); goto drop; + } memset(IP6CB(skb), 0, sizeof(struct inet6_skb_parm)); /* No IP options in IPv6 header; however it should be From 28e144cf5f72ce1c304571bc448e37c27495903a Mon Sep 17 00:00:00 2001 From: Xin Long Date: Tue, 7 Mar 2023 16:31:30 -0500 Subject: [PATCH 5/9] netfilter: move br_nf_check_hbh_len to utils Rename br_nf_check_hbh_len() to nf_ip6_check_hbh_len() and move it to netfilter utils, so that it can be used by other modules, like ovs and tc. Signed-off-by: Xin Long Reviewed-by: Simon Horman Reviewed-by: Nikolay Aleksandrov Reviewed-by: Aaron Conole Signed-off-by: Florian Westphal --- include/linux/netfilter_ipv6.h | 2 ++ net/bridge/br_netfilter_ipv6.c | 55 +--------------------------------- net/netfilter/utils.c | 52 ++++++++++++++++++++++++++++++++ 3 files changed, 55 insertions(+), 54 deletions(-) diff --git a/include/linux/netfilter_ipv6.h b/include/linux/netfilter_ipv6.h index 48314ade1506f..7834c0be2831d 100644 --- a/include/linux/netfilter_ipv6.h +++ b/include/linux/netfilter_ipv6.h @@ -197,6 +197,8 @@ static inline int nf_cookie_v6_check(const struct ipv6hdr *iph, __sum16 nf_ip6_checksum(struct sk_buff *skb, unsigned int hook, unsigned int dataoff, u_int8_t protocol); +int nf_ip6_check_hbh_len(struct sk_buff *skb, u32 *plen); + int ipv6_netfilter_init(void); void ipv6_netfilter_fini(void); diff --git a/net/bridge/br_netfilter_ipv6.c b/net/bridge/br_netfilter_ipv6.c index a0d6dfb3e2556..550039dfc31a9 100644 --- a/net/bridge/br_netfilter_ipv6.c +++ b/net/bridge/br_netfilter_ipv6.c @@ -40,59 +40,6 @@ #include #endif -/* We only check the length. A bridge shouldn't do any hop-by-hop stuff - * anyway - */ -static int br_nf_check_hbh_len(struct sk_buff *skb, u32 *plen) -{ - int len, off = sizeof(struct ipv6hdr); - unsigned char *nh; - - if (!pskb_may_pull(skb, off + 8)) - return -1; - nh = (unsigned char *)(ipv6_hdr(skb) + 1); - len = (nh[1] + 1) << 3; - - if (!pskb_may_pull(skb, off + len)) - return -1; - nh = skb_network_header(skb); - - off += 2; - len -= 2; - while (len > 0) { - int optlen; - - if (nh[off] == IPV6_TLV_PAD1) { - off++; - len--; - continue; - } - if (len < 2) - return -1; - optlen = nh[off + 1] + 2; - if (optlen > len) - return -1; - - if (nh[off] == IPV6_TLV_JUMBO) { - u32 pkt_len; - - if (nh[off + 1] != 4 || (off & 3) != 2) - return -1; - pkt_len = ntohl(*(__be32 *)(nh + off + 2)); - if (pkt_len <= IPV6_MAXPLEN || - ipv6_hdr(skb)->payload_len) - return -1; - if (pkt_len > skb->len - sizeof(struct ipv6hdr)) - return -1; - *plen = pkt_len; - } - off += optlen; - len -= optlen; - } - - return len ? -1 : 0; -} - int br_validate_ipv6(struct net *net, struct sk_buff *skb) { const struct ipv6hdr *hdr; @@ -112,7 +59,7 @@ int br_validate_ipv6(struct net *net, struct sk_buff *skb) goto inhdr_error; pkt_len = ntohs(hdr->payload_len); - if (hdr->nexthdr == NEXTHDR_HOP && br_nf_check_hbh_len(skb, &pkt_len)) + if (hdr->nexthdr == NEXTHDR_HOP && nf_ip6_check_hbh_len(skb, &pkt_len)) goto drop; if (pkt_len + ip6h_len > skb->len) { diff --git a/net/netfilter/utils.c b/net/netfilter/utils.c index 2182d361e273f..acef4155f0daf 100644 --- a/net/netfilter/utils.c +++ b/net/netfilter/utils.c @@ -215,3 +215,55 @@ int nf_reroute(struct sk_buff *skb, struct nf_queue_entry *entry) } return ret; } + +/* Only get and check the lengths, not do any hop-by-hop stuff. */ +int nf_ip6_check_hbh_len(struct sk_buff *skb, u32 *plen) +{ + int len, off = sizeof(struct ipv6hdr); + unsigned char *nh; + + if (!pskb_may_pull(skb, off + 8)) + return -ENOMEM; + nh = (unsigned char *)(ipv6_hdr(skb) + 1); + len = (nh[1] + 1) << 3; + + if (!pskb_may_pull(skb, off + len)) + return -ENOMEM; + nh = skb_network_header(skb); + + off += 2; + len -= 2; + while (len > 0) { + int optlen; + + if (nh[off] == IPV6_TLV_PAD1) { + off++; + len--; + continue; + } + if (len < 2) + return -EBADMSG; + optlen = nh[off + 1] + 2; + if (optlen > len) + return -EBADMSG; + + if (nh[off] == IPV6_TLV_JUMBO) { + u32 pkt_len; + + if (nh[off + 1] != 4 || (off & 3) != 2) + return -EBADMSG; + pkt_len = ntohl(*(__be32 *)(nh + off + 2)); + if (pkt_len <= IPV6_MAXPLEN || + ipv6_hdr(skb)->payload_len) + return -EBADMSG; + if (pkt_len > skb->len - sizeof(struct ipv6hdr)) + return -EBADMSG; + *plen = pkt_len; + } + off += optlen; + len -= optlen; + } + + return len ? -EBADMSG : 0; +} +EXPORT_SYMBOL_GPL(nf_ip6_check_hbh_len); From eaafdaa3e92234b877b645431957549a1f87e2bf Mon Sep 17 00:00:00 2001 From: Xin Long Date: Tue, 7 Mar 2023 16:31:31 -0500 Subject: [PATCH 6/9] netfilter: use nf_ip6_check_hbh_len in nf_ct_skb_network_trim For IPv6 Jumbo packets, the ipv6_hdr(skb)->payload_len is always 0, and its real payload_len ( > 65535) is saved in hbh exthdr. With 0 length for the jumbo packets, all data and exthdr will be trimmed in nf_ct_skb_network_trim(). This patch is to call nf_ip6_check_hbh_len() to get real pkt_len of the IPv6 packet, similar to br_validate_ipv6(). Signed-off-by: Xin Long Reviewed-by: Simon Horman Reviewed-by: Nikolay Aleksandrov Reviewed-by: Aaron Conole Signed-off-by: Florian Westphal --- net/netfilter/nf_conntrack_ovs.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nf_conntrack_ovs.c b/net/netfilter/nf_conntrack_ovs.c index 52b776bdf526d..068e9489e1c27 100644 --- a/net/netfilter/nf_conntrack_ovs.c +++ b/net/netfilter/nf_conntrack_ovs.c @@ -6,6 +6,7 @@ #include #include #include +#include /* 'skb' should already be pulled to nh_ofs. */ int nf_ct_helper(struct sk_buff *skb, struct nf_conn *ct, @@ -120,8 +121,14 @@ int nf_ct_skb_network_trim(struct sk_buff *skb, int family) len = skb_ip_totlen(skb); break; case NFPROTO_IPV6: - len = sizeof(struct ipv6hdr) - + ntohs(ipv6_hdr(skb)->payload_len); + len = ntohs(ipv6_hdr(skb)->payload_len); + if (ipv6_hdr(skb)->nexthdr == NEXTHDR_HOP) { + int err = nf_ip6_check_hbh_len(skb, &len); + + if (err) + return err; + } + len += sizeof(struct ipv6hdr); break; default: len = skb->len; From 6bb382bcf742de5c17209848325653059c995a04 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Tue, 7 Mar 2023 16:31:32 -0500 Subject: [PATCH 7/9] selftests: add a selftest for big tcp This test runs on the client-router-server topo, and monitors the traffic on the RX devices of router and server while sending BIG TCP packets with netperf from client to server. Meanwhile, it changes 'tso' on the TX devs and 'gro' on the RX devs. Then it checks if any BIG TCP packets appears on the RX devs with 'ip/ip6tables -m length ! --length 0:65535' for each case. Note that we also add tc action ct in link1 ingress to cover the ipv6 jumbo packets process in nf_ct_skb_network_trim() of nf_conntrack_ovs. Signed-off-by: Xin Long Reviewed-by: Aaron Conole Reviewed-by: Nikolay Aleksandrov Signed-off-by: Florian Westphal --- tools/testing/selftests/net/Makefile | 1 + tools/testing/selftests/net/big_tcp.sh | 180 +++++++++++++++++++++++++ 2 files changed, 181 insertions(+) create mode 100755 tools/testing/selftests/net/big_tcp.sh diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 6cd8993454d7e..099741290184e 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -48,6 +48,7 @@ TEST_PROGS += l2_tos_ttl_inherit.sh TEST_PROGS += bind_bhash.sh TEST_PROGS += ip_local_port_range.sh TEST_PROGS += rps_default_mask.sh +TEST_PROGS += big_tcp.sh TEST_PROGS_EXTENDED := in_netns.sh setup_loopback.sh setup_veth.sh TEST_PROGS_EXTENDED += toeplitz_client.sh toeplitz.sh TEST_GEN_FILES = socket nettest diff --git a/tools/testing/selftests/net/big_tcp.sh b/tools/testing/selftests/net/big_tcp.sh new file mode 100755 index 0000000000000..cde9a91c47971 --- /dev/null +++ b/tools/testing/selftests/net/big_tcp.sh @@ -0,0 +1,180 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Testing For IPv4 and IPv6 BIG TCP. +# TOPO: CLIENT_NS (link0)<--->(link1) ROUTER_NS (link2)<--->(link3) SERVER_NS + +CLIENT_NS=$(mktemp -u client-XXXXXXXX) +CLIENT_IP4="198.51.100.1" +CLIENT_IP6="2001:db8:1::1" + +SERVER_NS=$(mktemp -u server-XXXXXXXX) +SERVER_IP4="203.0.113.1" +SERVER_IP6="2001:db8:2::1" + +ROUTER_NS=$(mktemp -u router-XXXXXXXX) +SERVER_GW4="203.0.113.2" +CLIENT_GW4="198.51.100.2" +SERVER_GW6="2001:db8:2::2" +CLIENT_GW6="2001:db8:1::2" + +MAX_SIZE=128000 +CHK_SIZE=65535 + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + +setup() { + ip netns add $CLIENT_NS + ip netns add $SERVER_NS + ip netns add $ROUTER_NS + ip -net $ROUTER_NS link add link1 type veth peer name link0 netns $CLIENT_NS + ip -net $ROUTER_NS link add link2 type veth peer name link3 netns $SERVER_NS + + ip -net $CLIENT_NS link set link0 up + ip -net $CLIENT_NS link set link0 mtu 1442 + ip -net $CLIENT_NS addr add $CLIENT_IP4/24 dev link0 + ip -net $CLIENT_NS addr add $CLIENT_IP6/64 dev link0 nodad + ip -net $CLIENT_NS route add $SERVER_IP4 dev link0 via $CLIENT_GW4 + ip -net $CLIENT_NS route add $SERVER_IP6 dev link0 via $CLIENT_GW6 + ip -net $CLIENT_NS link set dev link0 \ + gro_ipv4_max_size $MAX_SIZE gso_ipv4_max_size $MAX_SIZE + ip -net $CLIENT_NS link set dev link0 \ + gro_max_size $MAX_SIZE gso_max_size $MAX_SIZE + ip net exec $CLIENT_NS sysctl -wq net.ipv4.tcp_window_scaling=10 + + ip -net $ROUTER_NS link set link1 up + ip -net $ROUTER_NS link set link2 up + ip -net $ROUTER_NS addr add $CLIENT_GW4/24 dev link1 + ip -net $ROUTER_NS addr add $CLIENT_GW6/64 dev link1 nodad + ip -net $ROUTER_NS addr add $SERVER_GW4/24 dev link2 + ip -net $ROUTER_NS addr add $SERVER_GW6/64 dev link2 nodad + ip -net $ROUTER_NS link set dev link1 \ + gro_ipv4_max_size $MAX_SIZE gso_ipv4_max_size $MAX_SIZE + ip -net $ROUTER_NS link set dev link2 \ + gro_ipv4_max_size $MAX_SIZE gso_ipv4_max_size $MAX_SIZE + ip -net $ROUTER_NS link set dev link1 \ + gro_max_size $MAX_SIZE gso_max_size $MAX_SIZE + ip -net $ROUTER_NS link set dev link2 \ + gro_max_size $MAX_SIZE gso_max_size $MAX_SIZE + # test for nf_ct_skb_network_trim in nf_conntrack_ovs used by TC ct action. + ip net exec $ROUTER_NS tc qdisc add dev link1 ingress + ip net exec $ROUTER_NS tc filter add dev link1 ingress \ + proto ip flower ip_proto tcp action ct + ip net exec $ROUTER_NS tc filter add dev link1 ingress \ + proto ipv6 flower ip_proto tcp action ct + ip net exec $ROUTER_NS sysctl -wq net.ipv4.ip_forward=1 + ip net exec $ROUTER_NS sysctl -wq net.ipv6.conf.all.forwarding=1 + + ip -net $SERVER_NS link set link3 up + ip -net $SERVER_NS addr add $SERVER_IP4/24 dev link3 + ip -net $SERVER_NS addr add $SERVER_IP6/64 dev link3 nodad + ip -net $SERVER_NS route add $CLIENT_IP4 dev link3 via $SERVER_GW4 + ip -net $SERVER_NS route add $CLIENT_IP6 dev link3 via $SERVER_GW6 + ip -net $SERVER_NS link set dev link3 \ + gro_ipv4_max_size $MAX_SIZE gso_ipv4_max_size $MAX_SIZE + ip -net $SERVER_NS link set dev link3 \ + gro_max_size $MAX_SIZE gso_max_size $MAX_SIZE + ip net exec $SERVER_NS sysctl -wq net.ipv4.tcp_window_scaling=10 + ip net exec $SERVER_NS netserver 2>&1 >/dev/null +} + +cleanup() { + ip net exec $SERVER_NS pkill netserver + ip -net $ROUTER_NS link del link1 + ip -net $ROUTER_NS link del link2 + ip netns del "$CLIENT_NS" + ip netns del "$SERVER_NS" + ip netns del "$ROUTER_NS" +} + +start_counter() { + local ipt="iptables" + local iface=$1 + local netns=$2 + + [ "$NF" = "6" ] && ipt="ip6tables" + ip net exec $netns $ipt -t raw -A PREROUTING -i $iface \ + -m length ! --length 0:$CHK_SIZE -j ACCEPT +} + +check_counter() { + local ipt="iptables" + local iface=$1 + local netns=$2 + + [ "$NF" = "6" ] && ipt="ip6tables" + test `ip net exec $netns $ipt -t raw -L -v |grep $iface | awk '{print $1}'` != "0" +} + +stop_counter() { + local ipt="iptables" + local iface=$1 + local netns=$2 + + [ "$NF" = "6" ] && ipt="ip6tables" + ip net exec $netns $ipt -t raw -D PREROUTING -i $iface \ + -m length ! --length 0:$CHK_SIZE -j ACCEPT +} + +do_netperf() { + local serip=$SERVER_IP4 + local netns=$1 + + [ "$NF" = "6" ] && serip=$SERVER_IP6 + ip net exec $netns netperf -$NF -t TCP_STREAM -H $serip 2>&1 >/dev/null +} + +do_test() { + local cli_tso=$1 + local gw_gro=$2 + local gw_tso=$3 + local ser_gro=$4 + local ret="PASS" + + ip net exec $CLIENT_NS ethtool -K link0 tso $cli_tso + ip net exec $ROUTER_NS ethtool -K link1 gro $gw_gro + ip net exec $ROUTER_NS ethtool -K link2 tso $gw_tso + ip net exec $SERVER_NS ethtool -K link3 gro $ser_gro + + start_counter link1 $ROUTER_NS + start_counter link3 $SERVER_NS + do_netperf $CLIENT_NS + + if check_counter link1 $ROUTER_NS; then + check_counter link3 $SERVER_NS || ret="FAIL_on_link3" + else + ret="FAIL_on_link1" + fi + + stop_counter link1 $ROUTER_NS + stop_counter link3 $SERVER_NS + printf "%-9s %-8s %-8s %-8s: [%s]\n" \ + $cli_tso $gw_gro $gw_tso $ser_gro $ret + test $ret = "PASS" +} + +testup() { + echo "CLI GSO | GW GRO | GW GSO | SER GRO" && \ + do_test "on" "on" "on" "on" && \ + do_test "on" "off" "on" "off" && \ + do_test "off" "on" "on" "on" && \ + do_test "on" "on" "off" "on" && \ + do_test "off" "on" "off" "on" +} + +if ! netperf -V &> /dev/null; then + echo "SKIP: Could not run test without netperf tool" + exit $ksft_skip +fi + +if ! ip link help 2>&1 | grep gso_ipv4_max_size &> /dev/null; then + echo "SKIP: Could not run test without gso/gro_ipv4_max_size supported in ip-link" + exit $ksft_skip +fi + +trap cleanup EXIT +setup && echo "Testing for BIG TCP:" && \ +NF=4 testup && echo "***v4 Tests Done***" && \ +NF=6 testup && echo "***v6 Tests Done***" +exit $? From e5d015a114dad6a50c22cf282007f7d92086df46 Mon Sep 17 00:00:00 2001 From: Jeremy Sowden Date: Tue, 7 Mar 2023 23:30:48 +0000 Subject: [PATCH 8/9] netfilter: conntrack: fix typo There's a spelling mistake in a comment. Fix it. Signed-off-by: Jeremy Sowden Signed-off-by: Florian Westphal --- net/netfilter/nf_conntrack_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 7250082e7de56..004c54132a3b5 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -1294,7 +1294,7 @@ __nf_conntrack_confirm(struct sk_buff *skb) } EXPORT_SYMBOL_GPL(__nf_conntrack_confirm); -/* Returns true if a connection correspondings to the tuple (required +/* Returns true if a connection corresponds to the tuple (required for NAT). */ int nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple, From b0ca200077b3872056e6a8291c9a50f803658c2a Mon Sep 17 00:00:00 2001 From: Jeremy Sowden Date: Tue, 7 Mar 2023 23:30:49 +0000 Subject: [PATCH 9/9] netfilter: nat: fix indentation of function arguments A couple of arguments to a function call are incorrectly indented. Fix them. Signed-off-by: Jeremy Sowden Signed-off-by: Florian Westphal --- net/netfilter/nf_nat_core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index e29e4ccb5c5a3..ce829d434f138 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -549,8 +549,8 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple, if (range->flags & NF_NAT_RANGE_PROTO_SPECIFIED) { if (!(range->flags & NF_NAT_RANGE_PROTO_OFFSET) && l4proto_in_range(tuple, maniptype, - &range->min_proto, - &range->max_proto) && + &range->min_proto, + &range->max_proto) && (range->min_proto.all == range->max_proto.all || !nf_nat_used_tuple(tuple, ct))) return;