From d5608a0578a07766a97c00904d85f6a7221ddb37 Mon Sep 17 00:00:00 2001 From: Michael Zhou Date: Fri, 7 Aug 2020 01:15:25 +1000 Subject: [PATCH 01/13] netfilter: ip6t_NPT: rewrite addresses in ICMPv6 original packet Detect and rewrite a prefix embedded in an ICMPv6 original packet that was rewritten by a corresponding DNPT/SNPT rule so it will be recognised by the host that sent the original packet. Example Rules in effect on the 1:2:3:4::/64 + 5:6:7:8::/64 side router: * SNPT src-pfx 1:2:3:4::/64 dst-pfx 5:6:7:8::/64 * DNPT src-pfx 5:6:7:8::/64 dst-pfx 1:2:3:4::/64 No rules on the 9:a:b:c::/64 side. 1. 1:2:3:4::1 sends UDP packet to 9:a:b:c::1 2. Router applies SNPT changing src to 5:6:7:8::ffef::1 3. 9:a:b:c::1 receives packet with (src 5:6:7:8::ffef::1 dst 9:a:b:c::1) and replies with ICMPv6 port unreachable to 5:6:7:8::ffef::1, including original packet (src 5:6:7:8::ffef::1 dst 9:a:b:c::1) 4. Router forwards ICMPv6 packet with (src 9:a:b:c::1 dst 5:6:7:8::ffef::1) including original packet (src 5:6:7:8::ffef::1 dst 9:a:b:c::1) and applies DNPT changing dst to 1:2:3:4::1 5. 1:2:3:4::1 receives ICMPv6 packet with (src 9:a:b:c::1 dst 1:2:3:4::1) including original packet (src 5:6:7:8::ffef::1 dst 9:a:b:c::1). It doesn't recognise the original packet as the src doesn't match anything it originally sent With this change, at step 4, DNPT will also rewrite the original packet src to 1:2:3:4::1, so at step 5, 1:2:3:4::1 will recognise the ICMPv6 error and provide feedback to the application properly. Conversely, SNPT will help when ICMPv6 errors are sent from the translated network. 1. 9:a:b:c::1 sends UDP packet to 5:6:7:8::ffef::1 2. Router applies DNPT changing dst to 1:2:3:4::1 3. 1:2:3:4::1 receives packet with (src 9:a:b:c::1 dst 1:2:3:4::1) and replies with ICMPv6 port unreachable to 9:a:b:c::1 including original packet (src 9:a:b:c::1 dst 1:2:3:4::1) 4. Router forwards ICMPv6 packet with (src 1:2:3:4::1 dst 9:a:b:c::1) including original packet (src 9:a:b:c::1 dst 1:2:3:4::1) and applies SNPT changing src to 5:6:7:8::ffef::1 5. 9:a:b:c::1 receives ICMPv6 packet with (src 5:6:7:8::ffef::1 dst 9:a:b:c::1) including original packet (src 9:a:b:c::1 dst 1:2:3:4::1). It doesn't recognise the original packet as the dst doesn't match anything it already sent The change to SNPT means the ICMPv6 original packet dst will be rewritten to 5:6:7:8::ffef::1 in step 4, allowing the error to be properly recognised in step 5. Signed-off-by: Michael Zhou Signed-off-by: Pablo Neira Ayuso --- net/ipv6/netfilter/ip6t_NPT.c | 39 +++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/net/ipv6/netfilter/ip6t_NPT.c b/net/ipv6/netfilter/ip6t_NPT.c index 9ee077bf4f493..787c74aa85e33 100644 --- a/net/ipv6/netfilter/ip6t_NPT.c +++ b/net/ipv6/netfilter/ip6t_NPT.c @@ -77,16 +77,43 @@ static bool ip6t_npt_map_pfx(const struct ip6t_npt_tginfo *npt, return true; } +static struct ipv6hdr *icmpv6_bounced_ipv6hdr(struct sk_buff *skb, + struct ipv6hdr *_bounced_hdr) +{ + if (ipv6_hdr(skb)->nexthdr != IPPROTO_ICMPV6) + return NULL; + + if (!icmpv6_is_err(icmp6_hdr(skb)->icmp6_type)) + return NULL; + + return skb_header_pointer(skb, + skb_transport_offset(skb) + sizeof(struct icmp6hdr), + sizeof(struct ipv6hdr), + _bounced_hdr); +} + static unsigned int ip6t_snpt_tg(struct sk_buff *skb, const struct xt_action_param *par) { const struct ip6t_npt_tginfo *npt = par->targinfo; + struct ipv6hdr _bounced_hdr; + struct ipv6hdr *bounced_hdr; + struct in6_addr bounced_pfx; if (!ip6t_npt_map_pfx(npt, &ipv6_hdr(skb)->saddr)) { icmpv6_send(skb, ICMPV6_PARAMPROB, ICMPV6_HDR_FIELD, offsetof(struct ipv6hdr, saddr)); return NF_DROP; } + + /* rewrite dst addr of bounced packet which was sent to dst range */ + bounced_hdr = icmpv6_bounced_ipv6hdr(skb, &_bounced_hdr); + if (bounced_hdr) { + ipv6_addr_prefix(&bounced_pfx, &bounced_hdr->daddr, npt->src_pfx_len); + if (ipv6_addr_cmp(&bounced_pfx, &npt->src_pfx.in6) == 0) + ip6t_npt_map_pfx(npt, &bounced_hdr->daddr); + } + return XT_CONTINUE; } @@ -94,12 +121,24 @@ static unsigned int ip6t_dnpt_tg(struct sk_buff *skb, const struct xt_action_param *par) { const struct ip6t_npt_tginfo *npt = par->targinfo; + struct ipv6hdr _bounced_hdr; + struct ipv6hdr *bounced_hdr; + struct in6_addr bounced_pfx; if (!ip6t_npt_map_pfx(npt, &ipv6_hdr(skb)->daddr)) { icmpv6_send(skb, ICMPV6_PARAMPROB, ICMPV6_HDR_FIELD, offsetof(struct ipv6hdr, daddr)); return NF_DROP; } + + /* rewrite src addr of bounced packet which was sent from dst range */ + bounced_hdr = icmpv6_bounced_ipv6hdr(skb, &_bounced_hdr); + if (bounced_hdr) { + ipv6_addr_prefix(&bounced_pfx, &bounced_hdr->saddr, npt->src_pfx_len); + if (ipv6_addr_cmp(&bounced_pfx, &npt->src_pfx.in6) == 0) + ip6t_npt_map_pfx(npt, &bounced_hdr->saddr); + } + return XT_CONTINUE; } From c5a8a8498eed1c164afc94f50a939c1a10abf8ad Mon Sep 17 00:00:00 2001 From: Peilin Ye Date: Tue, 11 Aug 2020 03:46:40 -0400 Subject: [PATCH 02/13] ipvs: Fix uninit-value in do_ip_vs_set_ctl() do_ip_vs_set_ctl() is referencing uninitialized stack value when `len` is zero. Fix it. Reported-by: syzbot+23b5f9e7caf61d9a3898@syzkaller.appspotmail.com Link: https://syzkaller.appspot.com/bug?id=46ebfb92a8a812621a001ef04d90dfa459520fe2 Suggested-by: Julian Anastasov Signed-off-by: Peilin Ye Acked-by: Julian Anastasov Reviewed-by: Simon Horman Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipvs/ip_vs_ctl.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 678c5b14841c1..8dbfd84322a88 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -2508,6 +2508,10 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, sockptr_t ptr, unsigned int len) /* Set timeout values for (tcp tcpfin udp) */ ret = ip_vs_set_timeout(ipvs, (struct ip_vs_timeout_user *)arg); goto out_unlock; + } else if (!len) { + /* No more commands with len == 0 below */ + ret = -EINVAL; + goto out_unlock; } usvc_compat = (struct ip_vs_service_user *)arg; @@ -2584,9 +2588,6 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, sockptr_t ptr, unsigned int len) break; case IP_VS_SO_SET_DELDEST: ret = ip_vs_del_dest(svc, &udest); - break; - default: - ret = -EINVAL; } out_unlock: From 7a81575b806e5dab214025e6757362c62d946405 Mon Sep 17 00:00:00 2001 From: "Jose M. Guisado Gomez" Date: Thu, 20 Aug 2020 10:19:01 +0200 Subject: [PATCH 03/13] netfilter: nf_tables: add userdata attributes to nft_table Enables storing userdata for nft_table. Field udata points to user data and udlen store its length. Adds new attribute flag NFTA_TABLE_USERDATA Signed-off-by: Jose M. Guisado Gomez Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 2 ++ include/uapi/linux/netfilter/nf_tables.h | 2 ++ net/netfilter/nf_tables_api.c | 22 +++++++++++++++++++++- 3 files changed, 25 insertions(+), 1 deletion(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index bf9491b77d168..97a7e147a59a9 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -1080,6 +1080,8 @@ struct nft_table { flags:8, genmask:2; char *name; + u16 udlen; + u8 *udata; }; void nft_register_chain_type(const struct nft_chain_type *); diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 42f351c1f5c53..aeb88cbd303ea 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -172,6 +172,7 @@ enum nft_table_flags { * @NFTA_TABLE_NAME: name of the table (NLA_STRING) * @NFTA_TABLE_FLAGS: bitmask of enum nft_table_flags (NLA_U32) * @NFTA_TABLE_USE: number of chains in this table (NLA_U32) + * @NFTA_TABLE_USERDATA: user data (NLA_BINARY) */ enum nft_table_attributes { NFTA_TABLE_UNSPEC, @@ -180,6 +181,7 @@ enum nft_table_attributes { NFTA_TABLE_USE, NFTA_TABLE_HANDLE, NFTA_TABLE_PAD, + NFTA_TABLE_USERDATA, __NFTA_TABLE_MAX }; #define NFTA_TABLE_MAX (__NFTA_TABLE_MAX - 1) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index fd814e514f942..6ccce2a2e715d 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -650,6 +650,8 @@ static const struct nla_policy nft_table_policy[NFTA_TABLE_MAX + 1] = { .len = NFT_TABLE_MAXNAMELEN - 1 }, [NFTA_TABLE_FLAGS] = { .type = NLA_U32 }, [NFTA_TABLE_HANDLE] = { .type = NLA_U64 }, + [NFTA_TABLE_USERDATA] = { .type = NLA_BINARY, + .len = NFT_USERDATA_MAXLEN } }; static int nf_tables_fill_table_info(struct sk_buff *skb, struct net *net, @@ -676,6 +678,11 @@ static int nf_tables_fill_table_info(struct sk_buff *skb, struct net *net, NFTA_TABLE_PAD)) goto nla_put_failure; + if (table->udata) { + if (nla_put(skb, NFTA_TABLE_USERDATA, table->udlen, table->udata)) + goto nla_put_failure; + } + nlmsg_end(skb, nlh); return 0; @@ -977,8 +984,9 @@ static int nf_tables_newtable(struct net *net, struct sock *nlsk, int family = nfmsg->nfgen_family; const struct nlattr *attr; struct nft_table *table; - u32 flags = 0; struct nft_ctx ctx; + u32 flags = 0; + u16 udlen = 0; int err; lockdep_assert_held(&net->nft.commit_mutex); @@ -1014,6 +1022,16 @@ static int nf_tables_newtable(struct net *net, struct sock *nlsk, if (table->name == NULL) goto err_strdup; + if (nla[NFTA_TABLE_USERDATA]) { + udlen = nla_len(nla[NFTA_TABLE_USERDATA]); + table->udata = kzalloc(udlen, GFP_KERNEL); + if (table->udata == NULL) + goto err_table_udata; + + nla_memcpy(table->udata, nla[NFTA_TABLE_USERDATA], udlen); + table->udlen = udlen; + } + err = rhltable_init(&table->chains_ht, &nft_chain_ht_params); if (err) goto err_chain_ht; @@ -1036,6 +1054,8 @@ static int nf_tables_newtable(struct net *net, struct sock *nlsk, err_trans: rhltable_destroy(&table->chains_ht); err_chain_ht: + kfree(table->udata); +err_table_udata: kfree(table->name); err_strdup: kfree(table); From b1328e54ac5e5998b9f7d5735029f7e64e61b488 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 26 Aug 2020 00:52:42 +0200 Subject: [PATCH 04/13] netfilter: conntrack: do not increment two error counters at same time The /proc interface for nf_conntrack displays the "error" counter as "icmp_error". It makes sense to not increment "invalid" when failing to handle an icmp packet since those are special. For example, its possible for conntrack to see partial and/or fragmented packets inside icmp errors. This should be a separate event and not get mixed with the "invalid" counter. Likewise, remove the "error" increment for errors from get_l4proto(). After this, the error counter will only increment for errors coming from icmp(v6) packet handling. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_core.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 5b97d233f89ba..3cfbafdff941a 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -1725,10 +1725,8 @@ nf_conntrack_handle_icmp(struct nf_conn *tmpl, else return NF_ACCEPT; - if (ret <= 0) { + if (ret <= 0) NF_CT_STAT_INC_ATOMIC(state->net, error); - NF_CT_STAT_INC_ATOMIC(state->net, invalid); - } return ret; } @@ -1813,7 +1811,6 @@ nf_conntrack_in(struct sk_buff *skb, const struct nf_hook_state *state) dataoff = get_l4proto(skb, skb_network_offset(skb), state->pf, &protonum); if (dataoff <= 0) { pr_debug("not prepared to track yet or error occurred\n"); - NF_CT_STAT_INC_ATOMIC(state->net, error); NF_CT_STAT_INC_ATOMIC(state->net, invalid); ret = NF_ACCEPT; goto out; From 4afc41dfa5a716e9e7a90c22972583f337c0bcbf Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 26 Aug 2020 00:52:43 +0200 Subject: [PATCH 05/13] netfilter: conntrack: remove ignore stats This counter increments when nf_conntrack_in sees a packet that already has a conntrack attached or when the packet is marked as UNTRACKED. Neither is an error. The former is normal for loopback traffic. The second happens for certain ICMPv6 packets or when nftables/ip(6)tables rules are in place. In case someone needs to count UNTRACKED packets, or packets that are marked as untracked before conntrack_in this can be done with both nftables and ip(6)tables rules. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/nf_conntrack_common.h | 1 - include/uapi/linux/netfilter/nfnetlink_conntrack.h | 2 +- net/netfilter/nf_conntrack_core.c | 4 +--- net/netfilter/nf_conntrack_netlink.c | 1 - net/netfilter/nf_conntrack_standalone.c | 2 +- 5 files changed, 3 insertions(+), 7 deletions(-) diff --git a/include/linux/netfilter/nf_conntrack_common.h b/include/linux/netfilter/nf_conntrack_common.h index 1db83c931d9c0..96b90d7e361f3 100644 --- a/include/linux/netfilter/nf_conntrack_common.h +++ b/include/linux/netfilter/nf_conntrack_common.h @@ -8,7 +8,6 @@ struct ip_conntrack_stat { unsigned int found; unsigned int invalid; - unsigned int ignore; unsigned int insert; unsigned int insert_failed; unsigned int drop; diff --git a/include/uapi/linux/netfilter/nfnetlink_conntrack.h b/include/uapi/linux/netfilter/nfnetlink_conntrack.h index 262881792671b..3e471558da82e 100644 --- a/include/uapi/linux/netfilter/nfnetlink_conntrack.h +++ b/include/uapi/linux/netfilter/nfnetlink_conntrack.h @@ -247,7 +247,7 @@ enum ctattr_stats_cpu { CTA_STATS_FOUND, CTA_STATS_NEW, /* no longer used */ CTA_STATS_INVALID, - CTA_STATS_IGNORE, + CTA_STATS_IGNORE, /* no longer used */ CTA_STATS_DELETE, /* no longer used */ CTA_STATS_DELETE_LIST, /* no longer used */ CTA_STATS_INSERT, diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 3cfbafdff941a..a111bcf1b93c7 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -1800,10 +1800,8 @@ nf_conntrack_in(struct sk_buff *skb, const struct nf_hook_state *state) if (tmpl || ctinfo == IP_CT_UNTRACKED) { /* Previously seen (loopback or untracked)? Ignore. */ if ((tmpl && !nf_ct_is_template(tmpl)) || - ctinfo == IP_CT_UNTRACKED) { - NF_CT_STAT_INC_ATOMIC(state->net, ignore); + ctinfo == IP_CT_UNTRACKED) return NF_ACCEPT; - } skb->_nfct = 0; } diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 832eabecfbddc..c64f23a8f373d 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -2509,7 +2509,6 @@ ctnetlink_ct_stat_cpu_fill_info(struct sk_buff *skb, u32 portid, u32 seq, if (nla_put_be32(skb, CTA_STATS_FOUND, htonl(st->found)) || nla_put_be32(skb, CTA_STATS_INVALID, htonl(st->invalid)) || - nla_put_be32(skb, CTA_STATS_IGNORE, htonl(st->ignore)) || nla_put_be32(skb, CTA_STATS_INSERT, htonl(st->insert)) || nla_put_be32(skb, CTA_STATS_INSERT_FAILED, htonl(st->insert_failed)) || diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index a604f43e3e6bd..b673a03624d23 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -439,7 +439,7 @@ static int ct_cpu_seq_show(struct seq_file *seq, void *v) st->found, 0, st->invalid, - st->ignore, + 0, 0, 0, st->insert, From bc92470413f3af152db0d8f90ef3eb13f8cc417a Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 26 Aug 2020 00:52:44 +0200 Subject: [PATCH 06/13] netfilter: conntrack: add clash resolution stat counter There is a misconception about what "insert_failed" means. We increment this even when a clash got resolved, so it might not indicate a problem. Add a dedicated counter for clash resolution and only increment insert_failed if a clash cannot be resolved. For the old /proc interface, export this in place of an older stat that got removed a while back. For ctnetlink, export this with a new attribute. Also correct an outdated comment that implies we add a duplicate tuple -- we only add the (unique) reply direction. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/nf_conntrack_common.h | 1 + include/uapi/linux/netfilter/nfnetlink_conntrack.h | 1 + net/netfilter/nf_conntrack_core.c | 9 +++++---- net/netfilter/nf_conntrack_netlink.c | 4 +++- net/netfilter/nf_conntrack_standalone.c | 2 +- 5 files changed, 11 insertions(+), 6 deletions(-) diff --git a/include/linux/netfilter/nf_conntrack_common.h b/include/linux/netfilter/nf_conntrack_common.h index 96b90d7e361f3..0c7d8d1e945dd 100644 --- a/include/linux/netfilter/nf_conntrack_common.h +++ b/include/linux/netfilter/nf_conntrack_common.h @@ -10,6 +10,7 @@ struct ip_conntrack_stat { unsigned int invalid; unsigned int insert; unsigned int insert_failed; + unsigned int clash_resolve; unsigned int drop; unsigned int early_drop; unsigned int error; diff --git a/include/uapi/linux/netfilter/nfnetlink_conntrack.h b/include/uapi/linux/netfilter/nfnetlink_conntrack.h index 3e471558da82e..d8484be72fdc3 100644 --- a/include/uapi/linux/netfilter/nfnetlink_conntrack.h +++ b/include/uapi/linux/netfilter/nfnetlink_conntrack.h @@ -256,6 +256,7 @@ enum ctattr_stats_cpu { CTA_STATS_EARLY_DROP, CTA_STATS_ERROR, CTA_STATS_SEARCH_RESTART, + CTA_STATS_CLASH_RESOLVE, __CTA_STATS_MAX, }; #define CTA_STATS_MAX (__CTA_STATS_MAX - 1) diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index a111bcf1b93c7..93e77ca0efad1 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -859,7 +859,6 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct) out: nf_conntrack_double_unlock(hash, reply_hash); - NF_CT_STAT_INC(net, insert_failed); local_bh_enable(); return -EEXIST; } @@ -934,7 +933,7 @@ static int __nf_ct_resolve_clash(struct sk_buff *skb, nf_conntrack_put(&loser_ct->ct_general); nf_ct_set(skb, ct, ctinfo); - NF_CT_STAT_INC(net, insert_failed); + NF_CT_STAT_INC(net, clash_resolve); return NF_ACCEPT; } @@ -998,6 +997,8 @@ static int nf_ct_resolve_clash_harder(struct sk_buff *skb, u32 repl_idx) hlist_nulls_add_head_rcu(&loser_ct->tuplehash[IP_CT_DIR_REPLY].hnnode, &nf_conntrack_hash[repl_idx]); + + NF_CT_STAT_INC(net, clash_resolve); return NF_ACCEPT; } @@ -1027,10 +1028,10 @@ static int nf_ct_resolve_clash_harder(struct sk_buff *skb, u32 repl_idx) * * Failing that, the new, unconfirmed conntrack is still added to the table * provided that the collision only occurs in the ORIGINAL direction. - * The new entry will be added after the existing one in the hash list, + * The new entry will be added only in the non-clashing REPLY direction, * so packets in the ORIGINAL direction will continue to match the existing * entry. The new entry will also have a fixed timeout so it expires -- - * due to the collision, it will not see bidirectional traffic. + * due to the collision, it will only see reply traffic. * * Returns NF_DROP if the clash could not be resolved. */ diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index c64f23a8f373d..89d99f6dfd0a2 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -2516,7 +2516,9 @@ ctnetlink_ct_stat_cpu_fill_info(struct sk_buff *skb, u32 portid, u32 seq, nla_put_be32(skb, CTA_STATS_EARLY_DROP, htonl(st->early_drop)) || nla_put_be32(skb, CTA_STATS_ERROR, htonl(st->error)) || nla_put_be32(skb, CTA_STATS_SEARCH_RESTART, - htonl(st->search_restart))) + htonl(st->search_restart)) || + nla_put_be32(skb, CTA_STATS_CLASH_RESOLVE, + htonl(st->clash_resolve))) goto nla_put_failure; nlmsg_end(skb, nlh); diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index b673a03624d23..0ff39740797db 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -435,7 +435,7 @@ static int ct_cpu_seq_show(struct seq_file *seq, void *v) seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x " "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n", nr_conntracks, - 0, + st->clash_resolve, /* was: searched */ st->found, 0, st->invalid, From ff73e7479b8eea594a985ca29f4b45d604dbcb2c Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 26 Aug 2020 00:52:45 +0200 Subject: [PATCH 07/13] netfilter: conntrack: remove unneeded nf_ct_put We can delay refcount increment until we reassign the existing entry to the current skb. A 0 refcount can't happen while the nf_conn object is still in the hash table and parallel mutations are impossible because we hold the bucket lock. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_core.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 93e77ca0efad1..234b7cab37c30 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -908,6 +908,7 @@ static void __nf_conntrack_insert_prepare(struct nf_conn *ct) tstamp->start = ktime_get_real_ns(); } +/* caller must hold locks to prevent concurrent changes */ static int __nf_ct_resolve_clash(struct sk_buff *skb, struct nf_conntrack_tuple_hash *h) { @@ -921,13 +922,12 @@ static int __nf_ct_resolve_clash(struct sk_buff *skb, if (nf_ct_is_dying(ct)) return NF_DROP; - if (!atomic_inc_not_zero(&ct->ct_general.use)) - return NF_DROP; - if (((ct->status & IPS_NAT_DONE_MASK) == 0) || nf_ct_match(ct, loser_ct)) { struct net *net = nf_ct_net(ct); + nf_conntrack_get(&ct->ct_general); + nf_ct_acct_merge(ct, ctinfo, loser_ct); nf_ct_add_to_dying_list(loser_ct); nf_conntrack_put(&loser_ct->ct_general); @@ -937,7 +937,6 @@ static int __nf_ct_resolve_clash(struct sk_buff *skb, return NF_ACCEPT; } - nf_ct_put(ct); return NF_DROP; } From f5143e10a2e41f70ed6b1f3bbd8119ede5240c80 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Thu, 27 Aug 2020 22:08:13 +0800 Subject: [PATCH 08/13] netfilter: xt_HMARK: Use ip_is_fragment() helper Use ip_is_fragment() to simpify code. Signed-off-by: YueHaibing Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_HMARK.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/xt_HMARK.c b/net/netfilter/xt_HMARK.c index 713fb38541dfc..8928ec56c3888 100644 --- a/net/netfilter/xt_HMARK.c +++ b/net/netfilter/xt_HMARK.c @@ -276,7 +276,7 @@ hmark_pkt_set_htuple_ipv4(const struct sk_buff *skb, struct hmark_tuple *t, return 0; /* follow-up fragments don't contain ports, skip all fragments */ - if (ip->frag_off & htons(IP_MF | IP_OFFSET)) + if (ip_is_fragment(ip)) return 0; hmark_set_tuple_ports(skb, (ip->ihl * 4) + nhoff, t, info); From 67407a406db337acdaabecd3747d160d89a929e4 Mon Sep 17 00:00:00 2001 From: Balazs Scheidler Date: Sat, 29 Aug 2020 08:19:15 +0200 Subject: [PATCH 09/13] netfilter: nft_socket: add wildcard support Add NFT_SOCKET_WILDCARD to match to wildcard socket listener. Signed-off-by: Balazs Scheidler Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 2 ++ net/netfilter/nft_socket.c | 27 ++++++++++++++++++++++++ 2 files changed, 29 insertions(+) diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index aeb88cbd303ea..543dc697b7960 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -1010,10 +1010,12 @@ enum nft_socket_attributes { * * @NFT_SOCKET_TRANSPARENT: Value of the IP(V6)_TRANSPARENT socket option * @NFT_SOCKET_MARK: Value of the socket mark + * @NFT_SOCKET_WILDCARD: Whether the socket is zero-bound (e.g. 0.0.0.0 or ::0) */ enum nft_socket_keys { NFT_SOCKET_TRANSPARENT, NFT_SOCKET_MARK, + NFT_SOCKET_WILDCARD, __NFT_SOCKET_MAX }; #define NFT_SOCKET_MAX (__NFT_SOCKET_MAX - 1) diff --git a/net/netfilter/nft_socket.c b/net/netfilter/nft_socket.c index 637ce3e8c575c..a28aca5124cef 100644 --- a/net/netfilter/nft_socket.c +++ b/net/netfilter/nft_socket.c @@ -14,6 +14,25 @@ struct nft_socket { }; }; +static void nft_socket_wildcard(const struct nft_pktinfo *pkt, + struct nft_regs *regs, struct sock *sk, + u32 *dest) +{ + switch (nft_pf(pkt)) { + case NFPROTO_IPV4: + nft_reg_store8(dest, inet_sk(sk)->inet_rcv_saddr == 0); + break; +#if IS_ENABLED(CONFIG_NF_TABLES_IPV6) + case NFPROTO_IPV6: + nft_reg_store8(dest, ipv6_addr_any(&sk->sk_v6_rcv_saddr)); + break; +#endif + default: + regs->verdict.code = NFT_BREAK; + return; + } +} + static void nft_socket_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) @@ -59,6 +78,13 @@ static void nft_socket_eval(const struct nft_expr *expr, return; } break; + case NFT_SOCKET_WILDCARD: + if (!sk_fullsock(sk)) { + regs->verdict.code = NFT_BREAK; + return; + } + nft_socket_wildcard(pkt, regs, sk, dest); + break; default: WARN_ON(1); regs->verdict.code = NFT_BREAK; @@ -97,6 +123,7 @@ static int nft_socket_init(const struct nft_ctx *ctx, priv->key = ntohl(nla_get_u32(tb[NFTA_SOCKET_KEY])); switch(priv->key) { case NFT_SOCKET_TRANSPARENT: + case NFT_SOCKET_WILDCARD: len = sizeof(u8); break; case NFT_SOCKET_MARK: From 144b0a0e608690d46e9a77819249bdd8d23bdcb6 Mon Sep 17 00:00:00 2001 From: Yaroslav Bolyukin Date: Sat, 29 Aug 2020 18:59:53 +0500 Subject: [PATCH 10/13] ipvs: remove dependency on ip6_tables This dependency was added because ipv6_find_hdr was in iptables specific code but is no longer required Fixes: f8f626754ebe ("ipv6: Move ipv6_find_hdr() out of Netfilter code.") Fixes: 63dca2c0b0e7 ("ipvs: Fix faulty IPv6 extension header handling in IPVS") Signed-off-by: Yaroslav Bolyukin Acked-by: Julian Anastasov Signed-off-by: Pablo Neira Ayuso --- include/net/ip_vs.h | 3 --- net/netfilter/ipvs/Kconfig | 1 - 2 files changed, 4 deletions(-) diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 9a59a33787cb8..d609e957a3ec0 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -25,9 +25,6 @@ #include #include /* for struct ipv6hdr */ #include -#if IS_ENABLED(CONFIG_IP_VS_IPV6) -#include -#endif #if IS_ENABLED(CONFIG_NF_CONNTRACK) #include #endif diff --git a/net/netfilter/ipvs/Kconfig b/net/netfilter/ipvs/Kconfig index 2c1593089eded..eb0e329f9b8d6 100644 --- a/net/netfilter/ipvs/Kconfig +++ b/net/netfilter/ipvs/Kconfig @@ -29,7 +29,6 @@ if IP_VS config IP_VS_IPV6 bool "IPv6 support for IPVS" depends on IPV6 = y || IP_VS = IPV6 - select IP6_NF_IPTABLES select NF_DEFRAG_IPV6 help Add IPv6 support to IPVS. From 36c3be8a2c8ae61b2c5ba451b48937c3c494965d Mon Sep 17 00:00:00 2001 From: Wang Hai Date: Fri, 4 Sep 2020 20:56:53 +0800 Subject: [PATCH 11/13] netfilter: ebt_stp: Remove unused macro BPDU_TYPE_TCN BPDU_TYPE_TCN is never used after it was introduced. So better to remove it. Reported-by: Hulk Robot Signed-off-by: Wang Hai Signed-off-by: Pablo Neira Ayuso --- net/bridge/netfilter/ebt_stp.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/bridge/netfilter/ebt_stp.c b/net/bridge/netfilter/ebt_stp.c index 0d6d20c9105e7..8f68afda5f81e 100644 --- a/net/bridge/netfilter/ebt_stp.c +++ b/net/bridge/netfilter/ebt_stp.c @@ -15,7 +15,6 @@ #include #define BPDU_TYPE_CONFIG 0 -#define BPDU_TYPE_TCN 0x80 struct stp_header { u8 dsap; From 0c5edd77a21dc863aa7639ea07c9d7a5b15f62f9 Mon Sep 17 00:00:00 2001 From: Fabian Frederick Date: Mon, 7 Sep 2020 18:14:28 +0200 Subject: [PATCH 12/13] selftests/net: replace obsolete NFT_CHAIN configuration Replace old parameters with global NFT_NAT from commit db8ab38880e0 ("netfilter: nf_tables: merge ipv4 and ipv6 nat chain types") Signed-off-by: Fabian Frederick Signed-off-by: Pablo Neira Ayuso --- tools/testing/selftests/net/config | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config index 3b42c06b59858..5a57ea02802df 100644 --- a/tools/testing/selftests/net/config +++ b/tools/testing/selftests/net/config @@ -24,8 +24,7 @@ CONFIG_IP_NF_NAT=m CONFIG_NF_TABLES=m CONFIG_NF_TABLES_IPV6=y CONFIG_NF_TABLES_IPV4=y -CONFIG_NFT_CHAIN_NAT_IPV6=m -CONFIG_NFT_CHAIN_NAT_IPV4=m +CONFIG_NFT_NAT=m CONFIG_NET_SCH_FQ=m CONFIG_NET_SCH_ETF=m CONFIG_NET_SCH_NETEM=y From b131c96496b369c7b14125e7c50e89ac7cec8051 Mon Sep 17 00:00:00 2001 From: "Jose M. Guisado Gomez" Date: Tue, 8 Sep 2020 13:01:41 +0200 Subject: [PATCH 13/13] netfilter: nf_tables: add userdata support for nft_object Enables storing userdata for nft_object. Initially this will store an optional comment but can be extended in the future as needed. Adds new attribute NFTA_OBJ_USERDATA to nft_object. Signed-off-by: Jose M. Guisado Gomez Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 2 ++ include/uapi/linux/netfilter/nf_tables.h | 2 ++ net/netfilter/nf_tables_api.c | 35 ++++++++++++++++++------ 3 files changed, 31 insertions(+), 8 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 97a7e147a59a9..99c1b3188b1e8 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -1123,6 +1123,8 @@ struct nft_object { u32 genmask:2, use:30; u64 handle; + u16 udlen; + u8 *udata; /* runtime data below here */ const struct nft_object_ops *ops ____cacheline_aligned; unsigned char data[] diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 543dc697b7960..2a6e09dea1a04 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -1559,6 +1559,7 @@ enum nft_ct_expectation_attributes { * @NFTA_OBJ_DATA: stateful object data (NLA_NESTED) * @NFTA_OBJ_USE: number of references to this expression (NLA_U32) * @NFTA_OBJ_HANDLE: object handle (NLA_U64) + * @NFTA_OBJ_USERDATA: user data (NLA_BINARY) */ enum nft_object_attributes { NFTA_OBJ_UNSPEC, @@ -1569,6 +1570,7 @@ enum nft_object_attributes { NFTA_OBJ_USE, NFTA_OBJ_HANDLE, NFTA_OBJ_PAD, + NFTA_OBJ_USERDATA, __NFTA_OBJ_MAX }; #define NFTA_OBJ_MAX (__NFTA_OBJ_MAX - 1) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 6ccce2a2e715d..e9b4848e9dd01 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -5755,6 +5755,8 @@ static const struct nla_policy nft_obj_policy[NFTA_OBJ_MAX + 1] = { [NFTA_OBJ_TYPE] = { .type = NLA_U32 }, [NFTA_OBJ_DATA] = { .type = NLA_NESTED }, [NFTA_OBJ_HANDLE] = { .type = NLA_U64}, + [NFTA_OBJ_USERDATA] = { .type = NLA_BINARY, + .len = NFT_USERDATA_MAXLEN }, }; static struct nft_object *nft_obj_init(const struct nft_ctx *ctx, @@ -5902,6 +5904,7 @@ static int nf_tables_newobj(struct net *net, struct sock *nlsk, struct nft_object *obj; struct nft_ctx ctx; u32 objtype; + u16 udlen; int err; if (!nla[NFTA_OBJ_TYPE] || @@ -5946,7 +5949,7 @@ static int nf_tables_newobj(struct net *net, struct sock *nlsk, obj = nft_obj_init(&ctx, type, nla[NFTA_OBJ_DATA]); if (IS_ERR(obj)) { err = PTR_ERR(obj); - goto err1; + goto err_init; } obj->key.table = table; obj->handle = nf_tables_alloc_handle(table); @@ -5954,32 +5957,44 @@ static int nf_tables_newobj(struct net *net, struct sock *nlsk, obj->key.name = nla_strdup(nla[NFTA_OBJ_NAME], GFP_KERNEL); if (!obj->key.name) { err = -ENOMEM; - goto err2; + goto err_strdup; + } + + if (nla[NFTA_OBJ_USERDATA]) { + udlen = nla_len(nla[NFTA_OBJ_USERDATA]); + obj->udata = kzalloc(udlen, GFP_KERNEL); + if (obj->udata == NULL) + goto err_userdata; + + nla_memcpy(obj->udata, nla[NFTA_OBJ_USERDATA], udlen); + obj->udlen = udlen; } err = nft_trans_obj_add(&ctx, NFT_MSG_NEWOBJ, obj); if (err < 0) - goto err3; + goto err_trans; err = rhltable_insert(&nft_objname_ht, &obj->rhlhead, nft_objname_ht_params); if (err < 0) - goto err4; + goto err_obj_ht; list_add_tail_rcu(&obj->list, &table->objects); table->use++; return 0; -err4: +err_obj_ht: /* queued in transaction log */ INIT_LIST_HEAD(&obj->list); return err; -err3: +err_trans: kfree(obj->key.name); -err2: +err_userdata: + kfree(obj->udata); +err_strdup: if (obj->ops->destroy) obj->ops->destroy(&ctx, obj); kfree(obj); -err1: +err_init: module_put(type->owner); return err; } @@ -6011,6 +6026,10 @@ static int nf_tables_fill_obj_info(struct sk_buff *skb, struct net *net, NFTA_OBJ_PAD)) goto nla_put_failure; + if (obj->udata && + nla_put(skb, NFTA_OBJ_USERDATA, obj->udlen, obj->udata)) + goto nla_put_failure; + nlmsg_end(skb, nlh); return 0;