From 2ed37183abb70233146dc82f19cb1cbceed2b505 Mon Sep 17 00:00:00 2001 From: Oz Shlomo Date: Wed, 3 Mar 2021 14:59:53 +0200 Subject: [PATCH 01/10] netfilter: flowtable: separate replace, destroy and stats to different workqueues Currently the flow table offload replace, destroy and stats work items are executed on a single workqueue. As such, DESTROY and STATS commands may be backloged after a burst of REPLACE work items. This scenario can bloat up memory and may cause active connections to age. Instatiate add, del and stats workqueues to avoid backlogs of non-dependent actions. Provide sysfs control over the workqueue attributes, allowing userspace applications to control the workqueue cpumask. Signed-off-by: Oz Shlomo Reviewed-by: Paul Blakey Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_flow_table_offload.c | 44 ++++++++++++++++++++++----- 1 file changed, 36 insertions(+), 8 deletions(-) diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c index 2a6993fa40d78..1b979c8b3ba0b 100644 --- a/net/netfilter/nf_flow_table_offload.c +++ b/net/netfilter/nf_flow_table_offload.c @@ -13,7 +13,9 @@ #include #include -static struct workqueue_struct *nf_flow_offload_wq; +static struct workqueue_struct *nf_flow_offload_add_wq; +static struct workqueue_struct *nf_flow_offload_del_wq; +static struct workqueue_struct *nf_flow_offload_stats_wq; struct flow_offload_work { struct list_head list; @@ -826,7 +828,12 @@ static void flow_offload_work_handler(struct work_struct *work) static void flow_offload_queue_work(struct flow_offload_work *offload) { - queue_work(nf_flow_offload_wq, &offload->work); + if (offload->cmd == FLOW_CLS_REPLACE) + queue_work(nf_flow_offload_add_wq, &offload->work); + else if (offload->cmd == FLOW_CLS_DESTROY) + queue_work(nf_flow_offload_del_wq, &offload->work); + else + queue_work(nf_flow_offload_stats_wq, &offload->work); } static struct flow_offload_work * @@ -898,8 +905,11 @@ void nf_flow_offload_stats(struct nf_flowtable *flowtable, void nf_flow_table_offload_flush(struct nf_flowtable *flowtable) { - if (nf_flowtable_hw_offload(flowtable)) - flush_workqueue(nf_flow_offload_wq); + if (nf_flowtable_hw_offload(flowtable)) { + flush_workqueue(nf_flow_offload_add_wq); + flush_workqueue(nf_flow_offload_del_wq); + flush_workqueue(nf_flow_offload_stats_wq); + } } static int nf_flow_table_block_setup(struct nf_flowtable *flowtable, @@ -1011,15 +1021,33 @@ EXPORT_SYMBOL_GPL(nf_flow_table_offload_setup); int nf_flow_table_offload_init(void) { - nf_flow_offload_wq = alloc_workqueue("nf_flow_table_offload", - WQ_UNBOUND, 0); - if (!nf_flow_offload_wq) + nf_flow_offload_add_wq = alloc_workqueue("nf_ft_offload_add", + WQ_UNBOUND | WQ_SYSFS, 0); + if (!nf_flow_offload_add_wq) return -ENOMEM; + nf_flow_offload_del_wq = alloc_workqueue("nf_ft_offload_del", + WQ_UNBOUND | WQ_SYSFS, 0); + if (!nf_flow_offload_del_wq) + goto err_del_wq; + + nf_flow_offload_stats_wq = alloc_workqueue("nf_ft_offload_stats", + WQ_UNBOUND | WQ_SYSFS, 0); + if (!nf_flow_offload_stats_wq) + goto err_stats_wq; + return 0; + +err_stats_wq: + destroy_workqueue(nf_flow_offload_del_wq); +err_del_wq: + destroy_workqueue(nf_flow_offload_add_wq); + return -ENOMEM; } void nf_flow_table_offload_exit(void) { - destroy_workqueue(nf_flow_offload_wq); + destroy_workqueue(nf_flow_offload_add_wq); + destroy_workqueue(nf_flow_offload_del_wq); + destroy_workqueue(nf_flow_offload_stats_wq); } From c2168e6bd7ec50cedb69b3be1ba6146e28893c69 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Fri, 5 Mar 2021 02:42:09 -0600 Subject: [PATCH 02/10] netfilter: Fix fall-through warnings for Clang In preparation to enable -Wimplicit-fallthrough for Clang, fix multiple warnings by explicitly adding multiple break statements instead of just letting the code fall through to the next case. Link: https://github.com/KSPP/linux/issues/115 Acked-by: Florian Westphal Signed-off-by: Gustavo A. R. Silva Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_proto_dccp.c | 1 + net/netfilter/nf_tables_api.c | 1 + net/netfilter/nft_ct.c | 1 + 3 files changed, 3 insertions(+) diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c index db7479db85121..4f33307fa3cf4 100644 --- a/net/netfilter/nf_conntrack_proto_dccp.c +++ b/net/netfilter/nf_conntrack_proto_dccp.c @@ -397,6 +397,7 @@ dccp_new(struct nf_conn *ct, const struct sk_buff *skb, msg = "not picking up existing connection "; goto out_invalid; } + break; case CT_DCCP_REQUEST: break; case CT_DCCP_INVALID: diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 224c8e537cb33..083c112bee0b8 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -8557,6 +8557,7 @@ static int nf_tables_check_loops(const struct nft_ctx *ctx, data->verdict.chain); if (err < 0) return err; + break; default: break; } diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index 882fe8648653d..0592a94560843 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -527,6 +527,7 @@ static void __nft_ct_set_destroy(const struct nft_ctx *ctx, struct nft_ct *priv) case NFT_CT_ZONE: if (--nft_ct_pcpu_template_refcnt == 0) nft_ct_tmpl_put_pcpu(); + break; #endif default: break; From d4a96be65423296e42091b0b79973b8d446e7798 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Thu, 11 Mar 2021 13:55:59 +0800 Subject: [PATCH 03/10] netfilter: conntrack: Remove unused variable declaration commit e97c3e278e95 ("tproxy: split off ipv6 defragmentation to a separate module") left behind this. Signed-off-by: YueHaibing Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/ipv6/nf_conntrack_ipv6.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h index 7b3c873f88396..e95483192d1bc 100644 --- a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h +++ b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h @@ -4,7 +4,4 @@ extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6; -#include -extern struct ctl_table nf_ct_ipv6_sysctl_table[]; - #endif /* _NF_CONNTRACK_IPV6_H*/ From 2fc11745c3ffa324643c1e6d8cf8f5273d9f9571 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 17 Mar 2021 15:53:09 +0100 Subject: [PATCH 04/10] netfilter: flowtable: consolidate skb_try_make_writable() call Fetch the layer 4 header size to be mangled by NAT when building the tuple, then use it to make writable the network and the transport headers. After this update, the NAT routines now assumes that the skbuff area is writable. Do the pointer refetch only after the single skb_try_make_writable() call. Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_flow_table_core.c | 12 ----- net/netfilter/nf_flow_table_ip.c | 74 +++++++++++++----------------- 2 files changed, 31 insertions(+), 55 deletions(-) diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c index 5fa657b8e03df..ff5d94b644acc 100644 --- a/net/netfilter/nf_flow_table_core.c +++ b/net/netfilter/nf_flow_table_core.c @@ -395,9 +395,6 @@ static int nf_flow_nat_port_tcp(struct sk_buff *skb, unsigned int thoff, { struct tcphdr *tcph; - if (skb_try_make_writable(skb, thoff + sizeof(*tcph))) - return -1; - tcph = (void *)(skb_network_header(skb) + thoff); inet_proto_csum_replace2(&tcph->check, skb, port, new_port, false); @@ -409,9 +406,6 @@ static int nf_flow_nat_port_udp(struct sk_buff *skb, unsigned int thoff, { struct udphdr *udph; - if (skb_try_make_writable(skb, thoff + sizeof(*udph))) - return -1; - udph = (void *)(skb_network_header(skb) + thoff); if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) { inet_proto_csum_replace2(&udph->check, skb, port, @@ -447,9 +441,6 @@ int nf_flow_snat_port(const struct flow_offload *flow, struct flow_ports *hdr; __be16 port, new_port; - if (skb_try_make_writable(skb, thoff + sizeof(*hdr))) - return -1; - hdr = (void *)(skb_network_header(skb) + thoff); switch (dir) { @@ -478,9 +469,6 @@ int nf_flow_dnat_port(const struct flow_offload *flow, struct flow_ports *hdr; __be16 port, new_port; - if (skb_try_make_writable(skb, thoff + sizeof(*hdr))) - return -1; - hdr = (void *)(skb_network_header(skb) + thoff); switch (dir) { diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c index a698dbe28ef5a..2b8ee5dcef64f 100644 --- a/net/netfilter/nf_flow_table_ip.c +++ b/net/netfilter/nf_flow_table_ip.c @@ -39,9 +39,6 @@ static int nf_flow_nat_ip_tcp(struct sk_buff *skb, unsigned int thoff, { struct tcphdr *tcph; - if (skb_try_make_writable(skb, thoff + sizeof(*tcph))) - return -1; - tcph = (void *)(skb_network_header(skb) + thoff); inet_proto_csum_replace4(&tcph->check, skb, addr, new_addr, true); @@ -53,9 +50,6 @@ static int nf_flow_nat_ip_udp(struct sk_buff *skb, unsigned int thoff, { struct udphdr *udph; - if (skb_try_make_writable(skb, thoff + sizeof(*udph))) - return -1; - udph = (void *)(skb_network_header(skb) + thoff); if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) { inet_proto_csum_replace4(&udph->check, skb, addr, @@ -136,19 +130,17 @@ static int nf_flow_dnat_ip(const struct flow_offload *flow, struct sk_buff *skb, } static int nf_flow_nat_ip(const struct flow_offload *flow, struct sk_buff *skb, - unsigned int thoff, enum flow_offload_tuple_dir dir) + unsigned int thoff, enum flow_offload_tuple_dir dir, + struct iphdr *iph) { - struct iphdr *iph = ip_hdr(skb); - if (test_bit(NF_FLOW_SNAT, &flow->flags) && (nf_flow_snat_port(flow, skb, thoff, iph->protocol, dir) < 0 || - nf_flow_snat_ip(flow, skb, ip_hdr(skb), thoff, dir) < 0)) + nf_flow_snat_ip(flow, skb, iph, thoff, dir) < 0)) return -1; - iph = ip_hdr(skb); if (test_bit(NF_FLOW_DNAT, &flow->flags) && (nf_flow_dnat_port(flow, skb, thoff, iph->protocol, dir) < 0 || - nf_flow_dnat_ip(flow, skb, ip_hdr(skb), thoff, dir) < 0)) + nf_flow_dnat_ip(flow, skb, iph, thoff, dir) < 0)) return -1; return 0; @@ -160,10 +152,10 @@ static bool ip_has_options(unsigned int thoff) } static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev, - struct flow_offload_tuple *tuple) + struct flow_offload_tuple *tuple, u32 *hdrsize) { - unsigned int thoff, hdrsize; struct flow_ports *ports; + unsigned int thoff; struct iphdr *iph; if (!pskb_may_pull(skb, sizeof(*iph))) @@ -178,10 +170,10 @@ static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev, switch (iph->protocol) { case IPPROTO_TCP: - hdrsize = sizeof(struct tcphdr); + *hdrsize = sizeof(struct tcphdr); break; case IPPROTO_UDP: - hdrsize = sizeof(struct udphdr); + *hdrsize = sizeof(struct udphdr); break; default: return -1; @@ -191,7 +183,7 @@ static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev, return -1; thoff = iph->ihl * 4; - if (!pskb_may_pull(skb, thoff + hdrsize)) + if (!pskb_may_pull(skb, thoff + *hdrsize)) return -1; iph = ip_hdr(skb); @@ -252,11 +244,12 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb, unsigned int thoff; struct iphdr *iph; __be32 nexthop; + u32 hdrsize; if (skb->protocol != htons(ETH_P_IP)) return NF_ACCEPT; - if (nf_flow_tuple_ip(skb, state->in, &tuple) < 0) + if (nf_flow_tuple_ip(skb, state->in, &tuple, &hdrsize) < 0) return NF_ACCEPT; tuplehash = flow_offload_lookup(flow_table, &tuple); @@ -271,11 +264,13 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb, if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu))) return NF_ACCEPT; - if (skb_try_make_writable(skb, sizeof(*iph))) + iph = ip_hdr(skb); + thoff = iph->ihl * 4; + if (skb_try_make_writable(skb, thoff + hdrsize)) return NF_DROP; - thoff = ip_hdr(skb)->ihl * 4; - if (nf_flow_state_check(flow, ip_hdr(skb)->protocol, skb, thoff)) + iph = ip_hdr(skb); + if (nf_flow_state_check(flow, iph->protocol, skb, thoff)) return NF_ACCEPT; flow_offload_refresh(flow_table, flow); @@ -285,10 +280,9 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb, return NF_ACCEPT; } - if (nf_flow_nat_ip(flow, skb, thoff, dir) < 0) + if (nf_flow_nat_ip(flow, skb, thoff, dir, iph) < 0) return NF_DROP; - iph = ip_hdr(skb); ip_decrease_ttl(iph); skb->tstamp = 0; @@ -317,9 +311,6 @@ static int nf_flow_nat_ipv6_tcp(struct sk_buff *skb, unsigned int thoff, { struct tcphdr *tcph; - if (skb_try_make_writable(skb, thoff + sizeof(*tcph))) - return -1; - tcph = (void *)(skb_network_header(skb) + thoff); inet_proto_csum_replace16(&tcph->check, skb, addr->s6_addr32, new_addr->s6_addr32, true); @@ -333,9 +324,6 @@ static int nf_flow_nat_ipv6_udp(struct sk_buff *skb, unsigned int thoff, { struct udphdr *udph; - if (skb_try_make_writable(skb, thoff + sizeof(*udph))) - return -1; - udph = (void *)(skb_network_header(skb) + thoff); if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) { inet_proto_csum_replace16(&udph->check, skb, addr->s6_addr32, @@ -417,31 +405,30 @@ static int nf_flow_dnat_ipv6(const struct flow_offload *flow, static int nf_flow_nat_ipv6(const struct flow_offload *flow, struct sk_buff *skb, - enum flow_offload_tuple_dir dir) + enum flow_offload_tuple_dir dir, + struct ipv6hdr *ip6h) { - struct ipv6hdr *ip6h = ipv6_hdr(skb); unsigned int thoff = sizeof(*ip6h); if (test_bit(NF_FLOW_SNAT, &flow->flags) && (nf_flow_snat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 || - nf_flow_snat_ipv6(flow, skb, ipv6_hdr(skb), thoff, dir) < 0)) + nf_flow_snat_ipv6(flow, skb, ip6h, thoff, dir) < 0)) return -1; - ip6h = ipv6_hdr(skb); if (test_bit(NF_FLOW_DNAT, &flow->flags) && (nf_flow_dnat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 || - nf_flow_dnat_ipv6(flow, skb, ipv6_hdr(skb), thoff, dir) < 0)) + nf_flow_dnat_ipv6(flow, skb, ip6h, thoff, dir) < 0)) return -1; return 0; } static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev, - struct flow_offload_tuple *tuple) + struct flow_offload_tuple *tuple, u32 *hdrsize) { - unsigned int thoff, hdrsize; struct flow_ports *ports; struct ipv6hdr *ip6h; + unsigned int thoff; if (!pskb_may_pull(skb, sizeof(*ip6h))) return -1; @@ -450,10 +437,10 @@ static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev, switch (ip6h->nexthdr) { case IPPROTO_TCP: - hdrsize = sizeof(struct tcphdr); + *hdrsize = sizeof(struct tcphdr); break; case IPPROTO_UDP: - hdrsize = sizeof(struct udphdr); + *hdrsize = sizeof(struct udphdr); break; default: return -1; @@ -463,7 +450,7 @@ static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev, return -1; thoff = sizeof(*ip6h); - if (!pskb_may_pull(skb, thoff + hdrsize)) + if (!pskb_may_pull(skb, thoff + *hdrsize)) return -1; ip6h = ipv6_hdr(skb); @@ -493,11 +480,12 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb, struct net_device *outdev; struct ipv6hdr *ip6h; struct rt6_info *rt; + u32 hdrsize; if (skb->protocol != htons(ETH_P_IPV6)) return NF_ACCEPT; - if (nf_flow_tuple_ipv6(skb, state->in, &tuple) < 0) + if (nf_flow_tuple_ipv6(skb, state->in, &tuple, &hdrsize) < 0) return NF_ACCEPT; tuplehash = flow_offload_lookup(flow_table, &tuple); @@ -523,13 +511,13 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb, return NF_ACCEPT; } - if (skb_try_make_writable(skb, sizeof(*ip6h))) + if (skb_try_make_writable(skb, sizeof(*ip6h) + hdrsize)) return NF_DROP; - if (nf_flow_nat_ipv6(flow, skb, dir) < 0) + ip6h = ipv6_hdr(skb); + if (nf_flow_nat_ipv6(flow, skb, dir, ip6h) < 0) return NF_DROP; - ip6h = ipv6_hdr(skb); ip6h->hop_limit--; skb->tstamp = 0; From 2babb46c8c825e5039bbf0c273d82df3210dd43b Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 17 Mar 2021 15:54:43 +0100 Subject: [PATCH 05/10] netfilter: flowtable: move skb_try_make_writable() before NAT in IPv4 For consistency with the IPv6 flowtable datapath and to make sure the skbuff is writable right before the NAT header updates. Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_flow_table_ip.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c index 2b8ee5dcef64f..95adf74515ea9 100644 --- a/net/netfilter/nf_flow_table_ip.c +++ b/net/netfilter/nf_flow_table_ip.c @@ -266,10 +266,6 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb, iph = ip_hdr(skb); thoff = iph->ihl * 4; - if (skb_try_make_writable(skb, thoff + hdrsize)) - return NF_DROP; - - iph = ip_hdr(skb); if (nf_flow_state_check(flow, iph->protocol, skb, thoff)) return NF_ACCEPT; @@ -280,6 +276,10 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb, return NF_ACCEPT; } + if (skb_try_make_writable(skb, thoff + hdrsize)) + return NF_DROP; + + iph = ip_hdr(skb); if (nf_flow_nat_ip(flow, skb, thoff, dir, iph) < 0) return NF_DROP; From 4f08f173d08cad4664e447e580dc0c5aa6332db3 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 17 Mar 2021 15:55:11 +0100 Subject: [PATCH 06/10] netfilter: flowtable: move FLOW_OFFLOAD_DIR_MAX away from enumeration This allows to remove the default case which should not ever happen and that was added to avoid gcc warnings on unhandled FLOW_OFFLOAD_DIR_MAX enumeration case. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_flow_table.h | 2 +- net/netfilter/nf_flow_table_core.c | 4 ---- net/netfilter/nf_flow_table_ip.c | 8 -------- 3 files changed, 1 insertion(+), 13 deletions(-) diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h index 54c4d5c908a52..ce507251b3d88 100644 --- a/include/net/netfilter/nf_flow_table.h +++ b/include/net/netfilter/nf_flow_table.h @@ -86,8 +86,8 @@ static inline bool nf_flowtable_hw_offload(struct nf_flowtable *flowtable) enum flow_offload_tuple_dir { FLOW_OFFLOAD_DIR_ORIGINAL = IP_CT_DIR_ORIGINAL, FLOW_OFFLOAD_DIR_REPLY = IP_CT_DIR_REPLY, - FLOW_OFFLOAD_DIR_MAX = IP_CT_DIR_MAX }; +#define FLOW_OFFLOAD_DIR_MAX IP_CT_DIR_MAX struct flow_offload_tuple { union { diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c index ff5d94b644acc..3bdbd962a0840 100644 --- a/net/netfilter/nf_flow_table_core.c +++ b/net/netfilter/nf_flow_table_core.c @@ -454,8 +454,6 @@ int nf_flow_snat_port(const struct flow_offload *flow, new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_port; hdr->dest = new_port; break; - default: - return -1; } return nf_flow_nat_port(skb, thoff, protocol, port, new_port); @@ -482,8 +480,6 @@ int nf_flow_dnat_port(const struct flow_offload *flow, new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_port; hdr->source = new_port; break; - default: - return -1; } return nf_flow_nat_port(skb, thoff, protocol, port, new_port); diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c index 95adf74515ea9..0579e15c49688 100644 --- a/net/netfilter/nf_flow_table_ip.c +++ b/net/netfilter/nf_flow_table_ip.c @@ -96,8 +96,6 @@ static int nf_flow_snat_ip(const struct flow_offload *flow, struct sk_buff *skb, new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v4.s_addr; iph->daddr = new_addr; break; - default: - return -1; } csum_replace4(&iph->check, addr, new_addr); @@ -121,8 +119,6 @@ static int nf_flow_dnat_ip(const struct flow_offload *flow, struct sk_buff *skb, new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v4.s_addr; iph->saddr = new_addr; break; - default: - return -1; } csum_replace4(&iph->check, addr, new_addr); @@ -371,8 +367,6 @@ static int nf_flow_snat_ipv6(const struct flow_offload *flow, new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v6; ip6h->daddr = new_addr; break; - default: - return -1; } return nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr); @@ -396,8 +390,6 @@ static int nf_flow_dnat_ipv6(const struct flow_offload *flow, new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v6; ip6h->saddr = new_addr; break; - default: - return -1; } return nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr); From f4401262b927b84d2f1861e347627fa0d77d4eb7 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 17 Mar 2021 15:55:25 +0100 Subject: [PATCH 07/10] netfilter: flowtable: fast NAT functions never fail Simplify existing fast NAT routines by returning void. After the skb_try_make_writable() call consolidation, these routines cannot ever fail. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_flow_table.h | 12 +-- net/netfilter/nf_flow_table_core.c | 41 +++---- net/netfilter/nf_flow_table_ip.c | 147 +++++++++++--------------- 3 files changed, 84 insertions(+), 116 deletions(-) diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h index ce507251b3d88..fb165697c8a1f 100644 --- a/include/net/netfilter/nf_flow_table.h +++ b/include/net/netfilter/nf_flow_table.h @@ -229,12 +229,12 @@ void nf_flow_table_free(struct nf_flowtable *flow_table); void flow_offload_teardown(struct flow_offload *flow); -int nf_flow_snat_port(const struct flow_offload *flow, - struct sk_buff *skb, unsigned int thoff, - u8 protocol, enum flow_offload_tuple_dir dir); -int nf_flow_dnat_port(const struct flow_offload *flow, - struct sk_buff *skb, unsigned int thoff, - u8 protocol, enum flow_offload_tuple_dir dir); +void nf_flow_snat_port(const struct flow_offload *flow, + struct sk_buff *skb, unsigned int thoff, + u8 protocol, enum flow_offload_tuple_dir dir); +void nf_flow_dnat_port(const struct flow_offload *flow, + struct sk_buff *skb, unsigned int thoff, + u8 protocol, enum flow_offload_tuple_dir dir); struct flow_ports { __be16 source, dest; diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c index 3bdbd962a0840..8ffd3f3c288c6 100644 --- a/net/netfilter/nf_flow_table_core.c +++ b/net/netfilter/nf_flow_table_core.c @@ -389,20 +389,17 @@ static void nf_flow_offload_work_gc(struct work_struct *work) queue_delayed_work(system_power_efficient_wq, &flow_table->gc_work, HZ); } - -static int nf_flow_nat_port_tcp(struct sk_buff *skb, unsigned int thoff, - __be16 port, __be16 new_port) +static void nf_flow_nat_port_tcp(struct sk_buff *skb, unsigned int thoff, + __be16 port, __be16 new_port) { struct tcphdr *tcph; tcph = (void *)(skb_network_header(skb) + thoff); inet_proto_csum_replace2(&tcph->check, skb, port, new_port, false); - - return 0; } -static int nf_flow_nat_port_udp(struct sk_buff *skb, unsigned int thoff, - __be16 port, __be16 new_port) +static void nf_flow_nat_port_udp(struct sk_buff *skb, unsigned int thoff, + __be16 port, __be16 new_port) { struct udphdr *udph; @@ -413,30 +410,24 @@ static int nf_flow_nat_port_udp(struct sk_buff *skb, unsigned int thoff, if (!udph->check) udph->check = CSUM_MANGLED_0; } - - return 0; } -static int nf_flow_nat_port(struct sk_buff *skb, unsigned int thoff, - u8 protocol, __be16 port, __be16 new_port) +static void nf_flow_nat_port(struct sk_buff *skb, unsigned int thoff, + u8 protocol, __be16 port, __be16 new_port) { switch (protocol) { case IPPROTO_TCP: - if (nf_flow_nat_port_tcp(skb, thoff, port, new_port) < 0) - return NF_DROP; + nf_flow_nat_port_tcp(skb, thoff, port, new_port); break; case IPPROTO_UDP: - if (nf_flow_nat_port_udp(skb, thoff, port, new_port) < 0) - return NF_DROP; + nf_flow_nat_port_udp(skb, thoff, port, new_port); break; } - - return 0; } -int nf_flow_snat_port(const struct flow_offload *flow, - struct sk_buff *skb, unsigned int thoff, - u8 protocol, enum flow_offload_tuple_dir dir) +void nf_flow_snat_port(const struct flow_offload *flow, + struct sk_buff *skb, unsigned int thoff, + u8 protocol, enum flow_offload_tuple_dir dir) { struct flow_ports *hdr; __be16 port, new_port; @@ -456,13 +447,13 @@ int nf_flow_snat_port(const struct flow_offload *flow, break; } - return nf_flow_nat_port(skb, thoff, protocol, port, new_port); + nf_flow_nat_port(skb, thoff, protocol, port, new_port); } EXPORT_SYMBOL_GPL(nf_flow_snat_port); -int nf_flow_dnat_port(const struct flow_offload *flow, - struct sk_buff *skb, unsigned int thoff, - u8 protocol, enum flow_offload_tuple_dir dir) +void nf_flow_dnat_port(const struct flow_offload *flow, struct sk_buff *skb, + unsigned int thoff, u8 protocol, + enum flow_offload_tuple_dir dir) { struct flow_ports *hdr; __be16 port, new_port; @@ -482,7 +473,7 @@ int nf_flow_dnat_port(const struct flow_offload *flow, break; } - return nf_flow_nat_port(skb, thoff, protocol, port, new_port); + nf_flow_nat_port(skb, thoff, protocol, port, new_port); } EXPORT_SYMBOL_GPL(nf_flow_dnat_port); diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c index 0579e15c49688..714dc083f0933 100644 --- a/net/netfilter/nf_flow_table_ip.c +++ b/net/netfilter/nf_flow_table_ip.c @@ -34,19 +34,17 @@ static int nf_flow_state_check(struct flow_offload *flow, int proto, return 0; } -static int nf_flow_nat_ip_tcp(struct sk_buff *skb, unsigned int thoff, - __be32 addr, __be32 new_addr) +static void nf_flow_nat_ip_tcp(struct sk_buff *skb, unsigned int thoff, + __be32 addr, __be32 new_addr) { struct tcphdr *tcph; tcph = (void *)(skb_network_header(skb) + thoff); inet_proto_csum_replace4(&tcph->check, skb, addr, new_addr, true); - - return 0; } -static int nf_flow_nat_ip_udp(struct sk_buff *skb, unsigned int thoff, - __be32 addr, __be32 new_addr) +static void nf_flow_nat_ip_udp(struct sk_buff *skb, unsigned int thoff, + __be32 addr, __be32 new_addr) { struct udphdr *udph; @@ -57,31 +55,25 @@ static int nf_flow_nat_ip_udp(struct sk_buff *skb, unsigned int thoff, if (!udph->check) udph->check = CSUM_MANGLED_0; } - - return 0; } -static int nf_flow_nat_ip_l4proto(struct sk_buff *skb, struct iphdr *iph, - unsigned int thoff, __be32 addr, - __be32 new_addr) +static void nf_flow_nat_ip_l4proto(struct sk_buff *skb, struct iphdr *iph, + unsigned int thoff, __be32 addr, + __be32 new_addr) { switch (iph->protocol) { case IPPROTO_TCP: - if (nf_flow_nat_ip_tcp(skb, thoff, addr, new_addr) < 0) - return NF_DROP; + nf_flow_nat_ip_tcp(skb, thoff, addr, new_addr); break; case IPPROTO_UDP: - if (nf_flow_nat_ip_udp(skb, thoff, addr, new_addr) < 0) - return NF_DROP; + nf_flow_nat_ip_udp(skb, thoff, addr, new_addr); break; } - - return 0; } -static int nf_flow_snat_ip(const struct flow_offload *flow, struct sk_buff *skb, - struct iphdr *iph, unsigned int thoff, - enum flow_offload_tuple_dir dir) +static void nf_flow_snat_ip(const struct flow_offload *flow, + struct sk_buff *skb, struct iphdr *iph, + unsigned int thoff, enum flow_offload_tuple_dir dir) { __be32 addr, new_addr; @@ -99,12 +91,12 @@ static int nf_flow_snat_ip(const struct flow_offload *flow, struct sk_buff *skb, } csum_replace4(&iph->check, addr, new_addr); - return nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr); + nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr); } -static int nf_flow_dnat_ip(const struct flow_offload *flow, struct sk_buff *skb, - struct iphdr *iph, unsigned int thoff, - enum flow_offload_tuple_dir dir) +static void nf_flow_dnat_ip(const struct flow_offload *flow, + struct sk_buff *skb, struct iphdr *iph, + unsigned int thoff, enum flow_offload_tuple_dir dir) { __be32 addr, new_addr; @@ -122,24 +114,21 @@ static int nf_flow_dnat_ip(const struct flow_offload *flow, struct sk_buff *skb, } csum_replace4(&iph->check, addr, new_addr); - return nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr); + nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr); } -static int nf_flow_nat_ip(const struct flow_offload *flow, struct sk_buff *skb, +static void nf_flow_nat_ip(const struct flow_offload *flow, struct sk_buff *skb, unsigned int thoff, enum flow_offload_tuple_dir dir, struct iphdr *iph) { - if (test_bit(NF_FLOW_SNAT, &flow->flags) && - (nf_flow_snat_port(flow, skb, thoff, iph->protocol, dir) < 0 || - nf_flow_snat_ip(flow, skb, iph, thoff, dir) < 0)) - return -1; - - if (test_bit(NF_FLOW_DNAT, &flow->flags) && - (nf_flow_dnat_port(flow, skb, thoff, iph->protocol, dir) < 0 || - nf_flow_dnat_ip(flow, skb, iph, thoff, dir) < 0)) - return -1; - - return 0; + if (test_bit(NF_FLOW_SNAT, &flow->flags)) { + nf_flow_snat_port(flow, skb, thoff, iph->protocol, dir); + nf_flow_snat_ip(flow, skb, iph, thoff, dir); + } + if (test_bit(NF_FLOW_DNAT, &flow->flags)) { + nf_flow_dnat_port(flow, skb, thoff, iph->protocol, dir); + nf_flow_dnat_ip(flow, skb, iph, thoff, dir); + } } static bool ip_has_options(unsigned int thoff) @@ -276,8 +265,7 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb, return NF_DROP; iph = ip_hdr(skb); - if (nf_flow_nat_ip(flow, skb, thoff, dir, iph) < 0) - return NF_DROP; + nf_flow_nat_ip(flow, skb, thoff, dir, iph); ip_decrease_ttl(iph); skb->tstamp = 0; @@ -301,22 +289,21 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb, } EXPORT_SYMBOL_GPL(nf_flow_offload_ip_hook); -static int nf_flow_nat_ipv6_tcp(struct sk_buff *skb, unsigned int thoff, - struct in6_addr *addr, - struct in6_addr *new_addr) +static void nf_flow_nat_ipv6_tcp(struct sk_buff *skb, unsigned int thoff, + struct in6_addr *addr, + struct in6_addr *new_addr, + struct ipv6hdr *ip6h) { struct tcphdr *tcph; tcph = (void *)(skb_network_header(skb) + thoff); inet_proto_csum_replace16(&tcph->check, skb, addr->s6_addr32, new_addr->s6_addr32, true); - - return 0; } -static int nf_flow_nat_ipv6_udp(struct sk_buff *skb, unsigned int thoff, - struct in6_addr *addr, - struct in6_addr *new_addr) +static void nf_flow_nat_ipv6_udp(struct sk_buff *skb, unsigned int thoff, + struct in6_addr *addr, + struct in6_addr *new_addr) { struct udphdr *udph; @@ -327,32 +314,26 @@ static int nf_flow_nat_ipv6_udp(struct sk_buff *skb, unsigned int thoff, if (!udph->check) udph->check = CSUM_MANGLED_0; } - - return 0; } -static int nf_flow_nat_ipv6_l4proto(struct sk_buff *skb, struct ipv6hdr *ip6h, - unsigned int thoff, struct in6_addr *addr, - struct in6_addr *new_addr) +static void nf_flow_nat_ipv6_l4proto(struct sk_buff *skb, struct ipv6hdr *ip6h, + unsigned int thoff, struct in6_addr *addr, + struct in6_addr *new_addr) { switch (ip6h->nexthdr) { case IPPROTO_TCP: - if (nf_flow_nat_ipv6_tcp(skb, thoff, addr, new_addr) < 0) - return NF_DROP; + nf_flow_nat_ipv6_tcp(skb, thoff, addr, new_addr, ip6h); break; case IPPROTO_UDP: - if (nf_flow_nat_ipv6_udp(skb, thoff, addr, new_addr) < 0) - return NF_DROP; + nf_flow_nat_ipv6_udp(skb, thoff, addr, new_addr); break; } - - return 0; } -static int nf_flow_snat_ipv6(const struct flow_offload *flow, - struct sk_buff *skb, struct ipv6hdr *ip6h, - unsigned int thoff, - enum flow_offload_tuple_dir dir) +static void nf_flow_snat_ipv6(const struct flow_offload *flow, + struct sk_buff *skb, struct ipv6hdr *ip6h, + unsigned int thoff, + enum flow_offload_tuple_dir dir) { struct in6_addr addr, new_addr; @@ -369,13 +350,13 @@ static int nf_flow_snat_ipv6(const struct flow_offload *flow, break; } - return nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr); + nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr); } -static int nf_flow_dnat_ipv6(const struct flow_offload *flow, - struct sk_buff *skb, struct ipv6hdr *ip6h, - unsigned int thoff, - enum flow_offload_tuple_dir dir) +static void nf_flow_dnat_ipv6(const struct flow_offload *flow, + struct sk_buff *skb, struct ipv6hdr *ip6h, + unsigned int thoff, + enum flow_offload_tuple_dir dir) { struct in6_addr addr, new_addr; @@ -392,27 +373,24 @@ static int nf_flow_dnat_ipv6(const struct flow_offload *flow, break; } - return nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr); + nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr); } -static int nf_flow_nat_ipv6(const struct flow_offload *flow, - struct sk_buff *skb, - enum flow_offload_tuple_dir dir, - struct ipv6hdr *ip6h) +static void nf_flow_nat_ipv6(const struct flow_offload *flow, + struct sk_buff *skb, + enum flow_offload_tuple_dir dir, + struct ipv6hdr *ip6h) { unsigned int thoff = sizeof(*ip6h); - if (test_bit(NF_FLOW_SNAT, &flow->flags) && - (nf_flow_snat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 || - nf_flow_snat_ipv6(flow, skb, ip6h, thoff, dir) < 0)) - return -1; - - if (test_bit(NF_FLOW_DNAT, &flow->flags) && - (nf_flow_dnat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 || - nf_flow_dnat_ipv6(flow, skb, ip6h, thoff, dir) < 0)) - return -1; - - return 0; + if (test_bit(NF_FLOW_SNAT, &flow->flags)) { + nf_flow_snat_port(flow, skb, thoff, ip6h->nexthdr, dir); + nf_flow_snat_ipv6(flow, skb, ip6h, thoff, dir); + } + if (test_bit(NF_FLOW_DNAT, &flow->flags)) { + nf_flow_dnat_port(flow, skb, thoff, ip6h->nexthdr, dir); + nf_flow_dnat_ipv6(flow, skb, ip6h, thoff, dir); + } } static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev, @@ -507,8 +485,7 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb, return NF_DROP; ip6h = ipv6_hdr(skb); - if (nf_flow_nat_ipv6(flow, skb, dir, ip6h) < 0) - return NF_DROP; + nf_flow_nat_ipv6(flow, skb, dir, ip6h); ip6h->hop_limit--; skb->tstamp = 0; From e5075c0badaaac245a6fa0b4625b5cd714d8ade3 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 17 Mar 2021 15:55:28 +0100 Subject: [PATCH 08/10] netfilter: flowtable: call dst_check() to fall back to classic forwarding In case the route is stale, pass up the packet to the classic forwarding path for re-evaluation and schedule this flow entry for removal. Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_flow_table_ip.c | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c index 714dc083f0933..3a8423899defc 100644 --- a/net/netfilter/nf_flow_table_ip.c +++ b/net/netfilter/nf_flow_table_ip.c @@ -197,14 +197,6 @@ static bool nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu) return true; } -static int nf_flow_offload_dst_check(struct dst_entry *dst) -{ - if (unlikely(dst_xfrm(dst))) - return dst_check(dst, 0) ? 0 : -1; - - return 0; -} - static unsigned int nf_flow_xmit_xfrm(struct sk_buff *skb, const struct nf_hook_state *state, struct dst_entry *dst) @@ -256,7 +248,7 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb, flow_offload_refresh(flow_table, flow); - if (nf_flow_offload_dst_check(&rt->dst)) { + if (!dst_check(&rt->dst, 0)) { flow_offload_teardown(flow); return NF_ACCEPT; } @@ -476,7 +468,7 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb, flow_offload_refresh(flow_table, flow); - if (nf_flow_offload_dst_check(&rt->dst)) { + if (!dst_check(&rt->dst, 0)) { flow_offload_teardown(flow); return NF_ACCEPT; } From 1b9cd7690a1ef68c8f3756cae1ab88bf86660f0b Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 17 Mar 2021 15:56:40 +0100 Subject: [PATCH 09/10] netfilter: flowtable: refresh timeout after dst and writable checks Refresh the timeout (and retry hardware offload) once the skbuff dst is confirmed to be current and after the skbuff is made writable. Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_flow_table_ip.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c index 3a8423899defc..3be58b6d60af1 100644 --- a/net/netfilter/nf_flow_table_ip.c +++ b/net/netfilter/nf_flow_table_ip.c @@ -246,8 +246,6 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb, if (nf_flow_state_check(flow, iph->protocol, skb, thoff)) return NF_ACCEPT; - flow_offload_refresh(flow_table, flow); - if (!dst_check(&rt->dst, 0)) { flow_offload_teardown(flow); return NF_ACCEPT; @@ -256,6 +254,8 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb, if (skb_try_make_writable(skb, thoff + hdrsize)) return NF_DROP; + flow_offload_refresh(flow_table, flow); + iph = ip_hdr(skb); nf_flow_nat_ip(flow, skb, thoff, dir, iph); @@ -466,8 +466,6 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb, sizeof(*ip6h))) return NF_ACCEPT; - flow_offload_refresh(flow_table, flow); - if (!dst_check(&rt->dst, 0)) { flow_offload_teardown(flow); return NF_ACCEPT; @@ -476,6 +474,8 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb, if (skb_try_make_writable(skb, sizeof(*ip6h) + hdrsize)) return NF_DROP; + flow_offload_refresh(flow_table, flow); + ip6h = ipv6_hdr(skb); nf_flow_nat_ipv6(flow, skb, dir, ip6h); From 0ce7cf4127f14078ca598ba9700d813178a59409 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 18 Mar 2021 01:25:05 +0100 Subject: [PATCH 10/10] netfilter: nftables: update table flags from the commit phase Do not update table flags from the preparation phase. Store the flags update into the transaction, then update the flags from the commit phase. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 9 ++++++--- net/netfilter/nf_tables_api.c | 31 ++++++++++++++++--------------- 2 files changed, 22 insertions(+), 18 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index fdec57d862b73..67bc36f7f4fb7 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -1498,13 +1498,16 @@ struct nft_trans_chain { struct nft_trans_table { bool update; - bool enable; + u8 state; + u32 flags; }; #define nft_trans_table_update(trans) \ (((struct nft_trans_table *)trans->data)->update) -#define nft_trans_table_enable(trans) \ - (((struct nft_trans_table *)trans->data)->enable) +#define nft_trans_table_state(trans) \ + (((struct nft_trans_table *)trans->data)->state) +#define nft_trans_table_flags(trans) \ + (((struct nft_trans_table *)trans->data)->flags) struct nft_trans_elem { struct nft_set *set; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 083c112bee0b8..bd5e8122ea5e7 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -900,6 +900,12 @@ static void nf_tables_table_disable(struct net *net, struct nft_table *table) nft_table_disable(net, table, 0); } +enum { + NFT_TABLE_STATE_UNCHANGED = 0, + NFT_TABLE_STATE_DORMANT, + NFT_TABLE_STATE_WAKEUP +}; + static int nf_tables_updtable(struct nft_ctx *ctx) { struct nft_trans *trans; @@ -929,19 +935,17 @@ static int nf_tables_updtable(struct nft_ctx *ctx) if ((flags & NFT_TABLE_F_DORMANT) && !(ctx->table->flags & NFT_TABLE_F_DORMANT)) { - nft_trans_table_enable(trans) = false; + nft_trans_table_state(trans) = NFT_TABLE_STATE_DORMANT; } else if (!(flags & NFT_TABLE_F_DORMANT) && ctx->table->flags & NFT_TABLE_F_DORMANT) { - ctx->table->flags &= ~NFT_TABLE_F_DORMANT; ret = nf_tables_table_enable(ctx->net, ctx->table); if (ret >= 0) - nft_trans_table_enable(trans) = true; - else - ctx->table->flags |= NFT_TABLE_F_DORMANT; + nft_trans_table_state(trans) = NFT_TABLE_STATE_WAKEUP; } if (ret < 0) goto err; + nft_trans_table_flags(trans) = flags; nft_trans_table_update(trans) = true; list_add_tail(&trans->list, &ctx->net->nft.commit_list); return 0; @@ -8068,11 +8072,10 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) switch (trans->msg_type) { case NFT_MSG_NEWTABLE: if (nft_trans_table_update(trans)) { - if (!nft_trans_table_enable(trans)) { - nf_tables_table_disable(net, - trans->ctx.table); - trans->ctx.table->flags |= NFT_TABLE_F_DORMANT; - } + if (nft_trans_table_state(trans) == NFT_TABLE_STATE_DORMANT) + nf_tables_table_disable(net, trans->ctx.table); + + trans->ctx.table->flags = nft_trans_table_flags(trans); } else { nft_clear(net, trans->ctx.table); } @@ -8283,11 +8286,9 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) switch (trans->msg_type) { case NFT_MSG_NEWTABLE: if (nft_trans_table_update(trans)) { - if (nft_trans_table_enable(trans)) { - nf_tables_table_disable(net, - trans->ctx.table); - trans->ctx.table->flags |= NFT_TABLE_F_DORMANT; - } + if (nft_trans_table_state(trans) == NFT_TABLE_STATE_WAKEUP) + nf_tables_table_disable(net, trans->ctx.table); + nft_trans_destroy(trans); } else { list_del_rcu(&trans->ctx.table->list);