From 56a97e701c49ff66dd1a5e5e02775209ab5147d3 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 10 Jul 2017 15:06:39 +0200 Subject: [PATCH 01/47] netfilter: expect: add to hash table after expect init assuming we have lockless readers we should make sure they can only see expectations that have already been initialized. hlist_add_head_rcu acts as memory barrier, move it after timer setup. Theoretically we could crash due to a del_timer() on other cpu seeing garbage data. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_expect.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index 899c2c36da136..2c63808bea96e 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -368,12 +368,6 @@ static void nf_ct_expect_insert(struct nf_conntrack_expect *exp) /* two references : one for hash insert, one for the timer */ refcount_add(2, &exp->use); - hlist_add_head_rcu(&exp->lnode, &master_help->expectations); - master_help->expecting[exp->class]++; - - hlist_add_head_rcu(&exp->hnode, &nf_ct_expect_hash[h]); - net->ct.expect_count++; - setup_timer(&exp->timeout, nf_ct_expectation_timed_out, (unsigned long)exp); helper = rcu_dereference_protected(master_help->helper, @@ -384,6 +378,12 @@ static void nf_ct_expect_insert(struct nf_conntrack_expect *exp) } add_timer(&exp->timeout); + hlist_add_head_rcu(&exp->lnode, &master_help->expectations); + master_help->expecting[exp->class]++; + + hlist_add_head_rcu(&exp->hnode, &nf_ct_expect_hash[h]); + net->ct.expect_count++; + NF_CT_STAT_INC(net, expect_create); } From 9f08ea848117ab521efcfd3e004d8e1a0edc640c Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 18 Jul 2017 20:18:09 +0200 Subject: [PATCH 02/47] netfilter: nf_tables: keep chain counters away from hot path These chain counters are only used by the iptables-compat tool, that allow users to use the x_tables extensions from the existing nf_tables framework. This patch makes nf_tables by ~5% for the general usecase, ie. native nft users, where no chain counters are used at all. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables_core.h | 2 ++ net/netfilter/nf_tables_api.c | 11 +++-------- net/netfilter/nf_tables_core.c | 26 ++++++++++++++++++-------- 3 files changed, 23 insertions(+), 16 deletions(-) diff --git a/include/net/netfilter/nf_tables_core.h b/include/net/netfilter/nf_tables_core.h index 8f690effec373..424684c337719 100644 --- a/include/net/netfilter/nf_tables_core.h +++ b/include/net/netfilter/nf_tables_core.h @@ -49,6 +49,8 @@ struct nft_payload_set { }; extern const struct nft_expr_ops nft_payload_fast_ops; + +extern struct static_key_false nft_counters_enabled; extern struct static_key_false nft_trace_enabled; #endif /* _NET_NF_TABLES_CORE_H */ diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 7843efa33c598..7fbf0070aba1c 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -1240,6 +1240,8 @@ static void nf_tables_chain_destroy(struct nft_chain *chain) module_put(basechain->type->owner); free_percpu(basechain->stats); + if (basechain->stats) + static_branch_dec(&nft_counters_enabled); if (basechain->ops[0].dev != NULL) dev_put(basechain->ops[0].dev); kfree(basechain); @@ -1504,14 +1506,7 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk, return PTR_ERR(stats); } basechain->stats = stats; - } else { - stats = netdev_alloc_pcpu_stats(struct nft_stats); - if (stats == NULL) { - nft_chain_release_hook(&hook); - kfree(basechain); - return -ENOMEM; - } - rcu_assign_pointer(basechain->stats, stats); + static_branch_inc(&nft_counters_enabled); } hookfn = hook.type->hooks[hook.num]; diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index 65dbeadcb1188..c5bab08b0d734 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -114,6 +114,22 @@ static bool nft_payload_fast_eval(const struct nft_expr *expr, return true; } +DEFINE_STATIC_KEY_FALSE(nft_counters_enabled); + +static noinline void nft_update_chain_stats(const struct nft_chain *chain, + const struct nft_pktinfo *pkt) +{ + struct nft_stats *stats; + + local_bh_disable(); + stats = this_cpu_ptr(rcu_dereference(nft_base_chain(chain)->stats)); + u64_stats_update_begin(&stats->syncp); + stats->pkts++; + stats->bytes += pkt->skb->len; + u64_stats_update_end(&stats->syncp); + local_bh_enable(); +} + struct nft_jumpstack { const struct nft_chain *chain; const struct nft_rule *rule; @@ -130,7 +146,6 @@ nft_do_chain(struct nft_pktinfo *pkt, void *priv) struct nft_regs regs; unsigned int stackptr = 0; struct nft_jumpstack jumpstack[NFT_JUMP_STACK_SIZE]; - struct nft_stats *stats; int rulenum; unsigned int gencursor = nft_genmask_cur(net); struct nft_traceinfo info; @@ -220,13 +235,8 @@ nft_do_chain(struct nft_pktinfo *pkt, void *priv) nft_trace_packet(&info, basechain, NULL, -1, NFT_TRACETYPE_POLICY); - rcu_read_lock_bh(); - stats = this_cpu_ptr(rcu_dereference(nft_base_chain(basechain)->stats)); - u64_stats_update_begin(&stats->syncp); - stats->pkts++; - stats->bytes += pkt->skb->len; - u64_stats_update_end(&stats->syncp); - rcu_read_unlock_bh(); + if (static_branch_unlikely(&nft_counters_enabled)) + nft_update_chain_stats(basechain, pkt); return nft_base_chain(basechain)->policy; } From 0b35f6031a00329800bacc04085188c300c3a4d8 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Wed, 19 Jul 2017 14:27:33 +0900 Subject: [PATCH 03/47] netfilter: Remove duplicated rcu_read_lock. This patch removes duplicate rcu_read_lock(). 1. IPVS part: According to Julian Anastasov's mention, contexts of ipvs are described at: http://marc.info/?l=netfilter-devel&m=149562884514072&w=2, in summary: - packet RX/TX: does not need locks because packets come from hooks. - sync msg RX: backup server uses RCU locks while registering new connections. - ip_vs_ctl.c: configuration get/set, RCU locks needed. - xt_ipvs.c: It is a netfilter match, running from hook context. As result, rcu_read_lock and rcu_read_unlock can be removed from: - ip_vs_core.c: all - ip_vs_ctl.c: - only from ip_vs_has_real_service - ip_vs_ftp.c: all - ip_vs_proto_sctp.c: all - ip_vs_proto_tcp.c: all - ip_vs_proto_udp.c: all - ip_vs_xmit.c: all (contains only packet processing) 2. Netfilter part: There are three types of functions that are guaranteed the rcu_read_lock(). First, as result, functions are only called by nf_hook(): - nf_conntrack_broadcast_help(), pptp_expectfn(), set_expected_rtp_rtcp(). - tcpmss_reverse_mtu(), tproxy_laddr4(), tproxy_laddr6(). - match_lookup_rt6(), check_hlist(), hashlimit_mt_common(). - xt_osf_match_packet(). Second, functions that caller already held the rcu_read_lock(). - destroy_conntrack(), ctnetlink_conntrack_event(). - ctnl_timeout_find_get(), nfqnl_nf_hook_drop(). Third, functions that are mixed with type1 and type2. These functions are called by nf_hook() also these are called by ordinary functions that already held the rcu_read_lock(): - __ctnetlink_glue_build(), ctnetlink_expect_event(). - ctnetlink_proto_size(). Applied files are below: - nf_conntrack_broadcast.c, nf_conntrack_core.c, nf_conntrack_netlink.c. - nf_conntrack_pptp.c, nf_conntrack_sip.c, nfnetlink_cttimeout.c. - nfnetlink_queue.c, xt_TCPMSS.c, xt_TPROXY.c, xt_addrtype.c. - xt_connlimit.c, xt_hashlimit.c, xt_osf.c Detailed calltrace can be found at: http://marc.info/?l=netfilter-devel&m=149667610710350&w=2 Signed-off-by: Taehee Yoo Acked-by: Julian Anastasov Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipvs/ip_vs_core.c | 8 ----- net/netfilter/ipvs/ip_vs_ctl.c | 3 -- net/netfilter/ipvs/ip_vs_ftp.c | 2 -- net/netfilter/ipvs/ip_vs_proto_sctp.c | 11 ++---- net/netfilter/ipvs/ip_vs_proto_tcp.c | 10 +----- net/netfilter/ipvs/ip_vs_proto_udp.c | 10 +----- net/netfilter/ipvs/ip_vs_xmit.c | 46 +++----------------------- net/netfilter/nf_conntrack_broadcast.c | 2 -- net/netfilter/nf_conntrack_core.c | 3 -- net/netfilter/nf_conntrack_netlink.c | 12 ------- net/netfilter/nf_conntrack_pptp.c | 2 -- net/netfilter/nf_conntrack_sip.c | 6 +--- net/netfilter/nfnetlink_cttimeout.c | 2 -- net/netfilter/nfnetlink_queue.c | 2 -- net/netfilter/xt_TCPMSS.c | 2 -- net/netfilter/xt_TPROXY.c | 4 --- net/netfilter/xt_addrtype.c | 3 -- net/netfilter/xt_connlimit.c | 3 -- net/netfilter/xt_hashlimit.c | 8 ++--- net/netfilter/xt_osf.c | 2 -- 20 files changed, 13 insertions(+), 128 deletions(-) diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index e31956b58abaf..2ff9d9070c955 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -125,14 +125,12 @@ ip_vs_in_stats(struct ip_vs_conn *cp, struct sk_buff *skb) s->cnt.inbytes += skb->len; u64_stats_update_end(&s->syncp); - rcu_read_lock(); svc = rcu_dereference(dest->svc); s = this_cpu_ptr(svc->stats.cpustats); u64_stats_update_begin(&s->syncp); s->cnt.inpkts++; s->cnt.inbytes += skb->len; u64_stats_update_end(&s->syncp); - rcu_read_unlock(); s = this_cpu_ptr(ipvs->tot_stats.cpustats); u64_stats_update_begin(&s->syncp); @@ -159,14 +157,12 @@ ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb) s->cnt.outbytes += skb->len; u64_stats_update_end(&s->syncp); - rcu_read_lock(); svc = rcu_dereference(dest->svc); s = this_cpu_ptr(svc->stats.cpustats); u64_stats_update_begin(&s->syncp); s->cnt.outpkts++; s->cnt.outbytes += skb->len; u64_stats_update_end(&s->syncp); - rcu_read_unlock(); s = this_cpu_ptr(ipvs->tot_stats.cpustats); u64_stats_update_begin(&s->syncp); @@ -1222,7 +1218,6 @@ static struct ip_vs_conn *__ip_vs_rs_conn_out(unsigned int hooknum, if (!pptr) return NULL; - rcu_read_lock(); dest = ip_vs_find_real_service(ipvs, af, iph->protocol, &iph->saddr, pptr[0]); if (dest) { @@ -1237,7 +1232,6 @@ static struct ip_vs_conn *__ip_vs_rs_conn_out(unsigned int hooknum, pptr[0], pptr[1]); } } - rcu_read_unlock(); return cp; } @@ -1689,11 +1683,9 @@ ip_vs_in_icmp(struct netns_ipvs *ipvs, struct sk_buff *skb, int *related, if (dest) { struct ip_vs_dest_dst *dest_dst; - rcu_read_lock(); dest_dst = rcu_dereference(dest->dest_dst); if (dest_dst) mtu = dst_mtu(dest_dst->dst_cache); - rcu_read_unlock(); } if (mtu > 68 + sizeof(struct iphdr)) mtu -= sizeof(struct iphdr); diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 1fa3c2307b6ea..4f940d7eb2f7e 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -550,18 +550,15 @@ bool ip_vs_has_real_service(struct netns_ipvs *ipvs, int af, __u16 protocol, /* Check for "full" addressed entries */ hash = ip_vs_rs_hashkey(af, daddr, dport); - rcu_read_lock(); hlist_for_each_entry_rcu(dest, &ipvs->rs_table[hash], d_list) { if (dest->port == dport && dest->af == af && ip_vs_addr_equal(af, &dest->addr, daddr) && (dest->protocol == protocol || dest->vfwmark)) { /* HIT */ - rcu_read_unlock(); return true; } } - rcu_read_unlock(); return false; } diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c index fb780be76d15a..3e17d32b629d1 100644 --- a/net/netfilter/ipvs/ip_vs_ftp.c +++ b/net/netfilter/ipvs/ip_vs_ftp.c @@ -269,13 +269,11 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp, * hopefully it will succeed on the retransmitted * packet. */ - rcu_read_lock(); mangled = nf_nat_mangle_tcp_packet(skb, ct, ctinfo, iph->ihl * 4, start - data, end - start, buf, buf_len); - rcu_read_unlock(); if (mangled) { ip_vs_nfct_expect_related(skb, ct, n_cp, IPPROTO_TCP, 0, 0); diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c index 3ffad4adaddf9..e1efa446b305e 100644 --- a/net/netfilter/ipvs/ip_vs_proto_sctp.c +++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c @@ -38,7 +38,6 @@ sctp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb, return 0; } - rcu_read_lock(); if (likely(!ip_vs_iph_inverse(iph))) svc = ip_vs_service_find(ipvs, af, skb->mark, iph->protocol, &iph->daddr, ports[1]); @@ -53,7 +52,6 @@ sctp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb, * It seems that we are very loaded. * We have to drop this packet :( */ - rcu_read_unlock(); *verdict = NF_DROP; return 0; } @@ -67,11 +65,9 @@ sctp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb, *verdict = ip_vs_leave(svc, skb, pd, iph); else *verdict = NF_DROP; - rcu_read_unlock(); return 0; } } - rcu_read_unlock(); /* NF_ACCEPT */ return 1; } @@ -526,12 +522,10 @@ static int sctp_app_conn_bind(struct ip_vs_conn *cp) /* Lookup application incarnations and bind the right one */ hash = sctp_app_hashkey(cp->vport); - rcu_read_lock(); list_for_each_entry_rcu(inc, &ipvs->sctp_apps[hash], p_list) { if (inc->port == cp->vport) { if (unlikely(!ip_vs_app_inc_get(inc))) break; - rcu_read_unlock(); IP_VS_DBG_BUF(9, "%s: Binding conn %s:%u->" "%s:%u to app %s on port %u\n", @@ -544,11 +538,10 @@ static int sctp_app_conn_bind(struct ip_vs_conn *cp) cp->app = inc; if (inc->init_conn) result = inc->init_conn(inc, cp); - goto out; + break; } } - rcu_read_unlock(); -out: + return result; } diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c index 12dc8d5bc37d7..121a321b91bea 100644 --- a/net/netfilter/ipvs/ip_vs_proto_tcp.c +++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c @@ -63,7 +63,6 @@ tcp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb, } /* No !th->ack check to allow scheduling on SYN+ACK for Active FTP */ - rcu_read_lock(); if (likely(!ip_vs_iph_inverse(iph))) svc = ip_vs_service_find(ipvs, af, skb->mark, iph->protocol, @@ -80,7 +79,6 @@ tcp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb, * It seems that we are very loaded. * We have to drop this packet :( */ - rcu_read_unlock(); *verdict = NF_DROP; return 0; } @@ -95,11 +93,9 @@ tcp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb, *verdict = ip_vs_leave(svc, skb, pd, iph); else *verdict = NF_DROP; - rcu_read_unlock(); return 0; } } - rcu_read_unlock(); /* NF_ACCEPT */ return 1; } @@ -661,12 +657,10 @@ tcp_app_conn_bind(struct ip_vs_conn *cp) /* Lookup application incarnations and bind the right one */ hash = tcp_app_hashkey(cp->vport); - rcu_read_lock(); list_for_each_entry_rcu(inc, &ipvs->tcp_apps[hash], p_list) { if (inc->port == cp->vport) { if (unlikely(!ip_vs_app_inc_get(inc))) break; - rcu_read_unlock(); IP_VS_DBG_BUF(9, "%s(): Binding conn %s:%u->" "%s:%u to app %s on port %u\n", @@ -680,12 +674,10 @@ tcp_app_conn_bind(struct ip_vs_conn *cp) cp->app = inc; if (inc->init_conn) result = inc->init_conn(inc, cp); - goto out; + break; } } - rcu_read_unlock(); - out: return result; } diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c index e494e9a88c7fb..30e11cd6aa8a9 100644 --- a/net/netfilter/ipvs/ip_vs_proto_udp.c +++ b/net/netfilter/ipvs/ip_vs_proto_udp.c @@ -53,7 +53,6 @@ udp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb, return 0; } - rcu_read_lock(); if (likely(!ip_vs_iph_inverse(iph))) svc = ip_vs_service_find(ipvs, af, skb->mark, iph->protocol, &iph->daddr, ports[1]); @@ -69,7 +68,6 @@ udp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb, * It seems that we are very loaded. * We have to drop this packet :( */ - rcu_read_unlock(); *verdict = NF_DROP; return 0; } @@ -84,11 +82,9 @@ udp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb, *verdict = ip_vs_leave(svc, skb, pd, iph); else *verdict = NF_DROP; - rcu_read_unlock(); return 0; } } - rcu_read_unlock(); /* NF_ACCEPT */ return 1; } @@ -410,12 +406,10 @@ static int udp_app_conn_bind(struct ip_vs_conn *cp) /* Lookup application incarnations and bind the right one */ hash = udp_app_hashkey(cp->vport); - rcu_read_lock(); list_for_each_entry_rcu(inc, &ipvs->udp_apps[hash], p_list) { if (inc->port == cp->vport) { if (unlikely(!ip_vs_app_inc_get(inc))) break; - rcu_read_unlock(); IP_VS_DBG_BUF(9, "%s(): Binding conn %s:%u->" "%s:%u to app %s on port %u\n", @@ -429,12 +423,10 @@ static int udp_app_conn_bind(struct ip_vs_conn *cp) cp->app = inc; if (inc->init_conn) result = inc->init_conn(inc, cp); - goto out; + break; } } - rcu_read_unlock(); - out: return result; } diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index 2eab1e0400f48..90d396814798e 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -678,7 +678,6 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, EnterFunction(10); - rcu_read_lock(); if (__ip_vs_get_out_rt(cp->ipvs, cp->af, skb, NULL, iph->daddr, IP_VS_RT_MODE_NON_LOCAL, NULL, ipvsh) < 0) goto tx_error; @@ -689,14 +688,12 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, skb->ignore_df = 1; ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 0); - rcu_read_unlock(); LeaveFunction(10); return NF_STOLEN; tx_error: kfree_skb(skb); - rcu_read_unlock(); LeaveFunction(10); return NF_STOLEN; } @@ -710,7 +707,6 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, EnterFunction(10); - rcu_read_lock(); if (__ip_vs_get_out_rt_v6(cp->ipvs, cp->af, skb, NULL, &iph->daddr, NULL, ipvsh, 0, IP_VS_RT_MODE_NON_LOCAL) < 0) @@ -720,14 +716,12 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, skb->ignore_df = 1; ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 0); - rcu_read_unlock(); LeaveFunction(10); return NF_STOLEN; tx_error: kfree_skb(skb); - rcu_read_unlock(); LeaveFunction(10); return NF_STOLEN; } @@ -746,7 +740,6 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, EnterFunction(10); - rcu_read_lock(); /* check if it is a connection of no-client-port */ if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) { __be16 _pt, *p; @@ -815,14 +808,12 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, skb->ignore_df = 1; rc = ip_vs_nat_send_or_cont(NFPROTO_IPV4, skb, cp, local); - rcu_read_unlock(); LeaveFunction(10); return rc; tx_error: kfree_skb(skb); - rcu_read_unlock(); LeaveFunction(10); return NF_STOLEN; } @@ -837,7 +828,6 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, EnterFunction(10); - rcu_read_lock(); /* check if it is a connection of no-client-port */ if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT && !ipvsh->fragoffs)) { __be16 _pt, *p; @@ -906,7 +896,6 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, skb->ignore_df = 1; rc = ip_vs_nat_send_or_cont(NFPROTO_IPV6, skb, cp, local); - rcu_read_unlock(); LeaveFunction(10); return rc; @@ -914,7 +903,6 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, tx_error: LeaveFunction(10); kfree_skb(skb); - rcu_read_unlock(); return NF_STOLEN; } #endif @@ -1035,7 +1023,6 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, EnterFunction(10); - rcu_read_lock(); local = __ip_vs_get_out_rt(ipvs, cp->af, skb, cp->dest, cp->daddr.ip, IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL | @@ -1043,10 +1030,8 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, IP_VS_RT_MODE_TUNNEL, &saddr, ipvsh); if (local < 0) goto tx_error; - if (local) { - rcu_read_unlock(); + if (local) return ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 1); - } rt = skb_rtable(skb); tdev = rt->dst.dev; @@ -1095,7 +1080,6 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, ip_local_out(net, skb->sk, skb); else if (ret == NF_DROP) kfree_skb(skb); - rcu_read_unlock(); LeaveFunction(10); @@ -1104,7 +1088,6 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, tx_error: if (!IS_ERR(skb)) kfree_skb(skb); - rcu_read_unlock(); LeaveFunction(10); return NF_STOLEN; } @@ -1127,7 +1110,6 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, EnterFunction(10); - rcu_read_lock(); local = __ip_vs_get_out_rt_v6(cp->ipvs, cp->af, skb, cp->dest, &cp->daddr.in6, &saddr, ipvsh, 1, @@ -1136,10 +1118,8 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, IP_VS_RT_MODE_TUNNEL); if (local < 0) goto tx_error; - if (local) { - rcu_read_unlock(); + if (local) return ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 1); - } rt = (struct rt6_info *) skb_dst(skb); tdev = rt->dst.dev; @@ -1185,7 +1165,6 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, ip6_local_out(cp->ipvs->net, skb->sk, skb); else if (ret == NF_DROP) kfree_skb(skb); - rcu_read_unlock(); LeaveFunction(10); @@ -1194,7 +1173,6 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, tx_error: if (!IS_ERR(skb)) kfree_skb(skb); - rcu_read_unlock(); LeaveFunction(10); return NF_STOLEN; } @@ -1213,17 +1191,14 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, EnterFunction(10); - rcu_read_lock(); local = __ip_vs_get_out_rt(cp->ipvs, cp->af, skb, cp->dest, cp->daddr.ip, IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL | IP_VS_RT_MODE_KNOWN_NH, NULL, ipvsh); if (local < 0) goto tx_error; - if (local) { - rcu_read_unlock(); + if (local) return ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 1); - } ip_send_check(ip_hdr(skb)); @@ -1231,14 +1206,12 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, skb->ignore_df = 1; ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 0); - rcu_read_unlock(); LeaveFunction(10); return NF_STOLEN; tx_error: kfree_skb(skb); - rcu_read_unlock(); LeaveFunction(10); return NF_STOLEN; } @@ -1252,7 +1225,6 @@ ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, EnterFunction(10); - rcu_read_lock(); local = __ip_vs_get_out_rt_v6(cp->ipvs, cp->af, skb, cp->dest, &cp->daddr.in6, NULL, ipvsh, 0, @@ -1261,23 +1233,19 @@ ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, IP_VS_RT_MODE_KNOWN_NH); if (local < 0) goto tx_error; - if (local) { - rcu_read_unlock(); + if (local) return ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 1); - } /* Another hack: avoid icmp_send in ip_fragment */ skb->ignore_df = 1; ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 0); - rcu_read_unlock(); LeaveFunction(10); return NF_STOLEN; tx_error: kfree_skb(skb); - rcu_read_unlock(); LeaveFunction(10); return NF_STOLEN; } @@ -1322,7 +1290,6 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, rt_mode = (hooknum != NF_INET_FORWARD) ? IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL | IP_VS_RT_MODE_RDR : IP_VS_RT_MODE_NON_LOCAL; - rcu_read_lock(); local = __ip_vs_get_out_rt(cp->ipvs, cp->af, skb, cp->dest, cp->daddr.ip, rt_mode, NULL, iph); if (local < 0) @@ -1368,12 +1335,10 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, skb->ignore_df = 1; rc = ip_vs_nat_send_or_cont(NFPROTO_IPV4, skb, cp, local); - rcu_read_unlock(); goto out; tx_error: kfree_skb(skb); - rcu_read_unlock(); rc = NF_STOLEN; out: LeaveFunction(10); @@ -1414,7 +1379,6 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, rt_mode = (hooknum != NF_INET_FORWARD) ? IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL | IP_VS_RT_MODE_RDR : IP_VS_RT_MODE_NON_LOCAL; - rcu_read_lock(); local = __ip_vs_get_out_rt_v6(cp->ipvs, cp->af, skb, cp->dest, &cp->daddr.in6, NULL, ipvsh, 0, rt_mode); if (local < 0) @@ -1460,12 +1424,10 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, skb->ignore_df = 1; rc = ip_vs_nat_send_or_cont(NFPROTO_IPV6, skb, cp, local); - rcu_read_unlock(); goto out; tx_error: kfree_skb(skb); - rcu_read_unlock(); rc = NF_STOLEN; out: LeaveFunction(10); diff --git a/net/netfilter/nf_conntrack_broadcast.c b/net/netfilter/nf_conntrack_broadcast.c index 4e99cca61612f..ecc3ab7846339 100644 --- a/net/netfilter/nf_conntrack_broadcast.c +++ b/net/netfilter/nf_conntrack_broadcast.c @@ -40,7 +40,6 @@ int nf_conntrack_broadcast_help(struct sk_buff *skb, if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL) goto out; - rcu_read_lock(); in_dev = __in_dev_get_rcu(rt->dst.dev); if (in_dev != NULL) { for_primary_ifa(in_dev) { @@ -50,7 +49,6 @@ int nf_conntrack_broadcast_help(struct sk_buff *skb, } } endfor_ifa(in_dev); } - rcu_read_unlock(); if (mask == 0) goto out; diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 9979f46c81dce..69746928cc0a7 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -407,13 +407,10 @@ destroy_conntrack(struct nf_conntrack *nfct) nf_ct_tmpl_free(ct); return; } - rcu_read_lock(); l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct)); if (l4proto->destroy) l4proto->destroy(ct); - rcu_read_unlock(); - local_bh_disable(); /* Expectations will have been removed in clean_from_lists, * except TFTP can create an expectation on the first packet, diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 7999e70c3bfbe..4dba71de4de79 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -539,13 +539,11 @@ static inline size_t ctnetlink_proto_size(const struct nf_conn *ct) struct nf_conntrack_l4proto *l4proto; size_t len = 0; - rcu_read_lock(); l3proto = __nf_ct_l3proto_find(nf_ct_l3num(ct)); len += l3proto->nla_size; l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct)); len += l4proto->nla_size; - rcu_read_unlock(); return len; } @@ -664,7 +662,6 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item) nfmsg->version = NFNETLINK_V0; nfmsg->res_id = 0; - rcu_read_lock(); zone = nf_ct_zone(ct); nest_parms = nla_nest_start(skb, CTA_TUPLE_ORIG | NLA_F_NESTED); @@ -736,8 +733,6 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item) && ctnetlink_dump_mark(skb, ct) < 0) goto nla_put_failure; #endif - rcu_read_unlock(); - nlmsg_end(skb, nlh); err = nfnetlink_send(skb, net, item->portid, group, item->report, GFP_ATOMIC); @@ -747,7 +742,6 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item) return 0; nla_put_failure: - rcu_read_unlock(); nlmsg_cancel(skb, nlh); nlmsg_failure: kfree_skb(skb); @@ -2213,7 +2207,6 @@ static int __ctnetlink_glue_build(struct sk_buff *skb, struct nf_conn *ct) const struct nf_conntrack_zone *zone; struct nlattr *nest_parms; - rcu_read_lock(); zone = nf_ct_zone(ct); nest_parms = nla_nest_start(skb, CTA_TUPLE_ORIG | NLA_F_NESTED); @@ -2272,11 +2265,9 @@ static int __ctnetlink_glue_build(struct sk_buff *skb, struct nf_conn *ct) #endif if (ctnetlink_dump_labels(skb, ct) < 0) goto nla_put_failure; - rcu_read_unlock(); return 0; nla_put_failure: - rcu_read_unlock(); return -ENOSPC; } @@ -2661,17 +2652,14 @@ ctnetlink_expect_event(unsigned int events, struct nf_exp_event *item) nfmsg->version = NFNETLINK_V0; nfmsg->res_id = 0; - rcu_read_lock(); if (ctnetlink_exp_dump_expect(skb, exp) < 0) goto nla_put_failure; - rcu_read_unlock(); nlmsg_end(skb, nlh); nfnetlink_send(skb, net, item->portid, group, item->report, GFP_ATOMIC); return 0; nla_put_failure: - rcu_read_unlock(); nlmsg_cancel(skb, nlh); nlmsg_failure: kfree_skb(skb); diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c index 6959e93063d4c..11562f2a08bb0 100644 --- a/net/netfilter/nf_conntrack_pptp.c +++ b/net/netfilter/nf_conntrack_pptp.c @@ -113,7 +113,6 @@ static void pptp_expectfn(struct nf_conn *ct, /* Can you see how rusty this code is, compared with the pre-2.6.11 * one? That's what happened to my shiny newnat of 2002 ;( -HW */ - rcu_read_lock(); nf_nat_pptp_expectfn = rcu_dereference(nf_nat_pptp_hook_expectfn); if (nf_nat_pptp_expectfn && ct->master->status & IPS_NAT_MASK) nf_nat_pptp_expectfn(ct, exp); @@ -136,7 +135,6 @@ static void pptp_expectfn(struct nf_conn *ct, pr_debug("not found\n"); } } - rcu_read_unlock(); } static int destroy_sibling_or_exp(struct net *net, struct nf_conn *ct, diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index d38af4274335b..4dbb5bad4363b 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -884,7 +884,6 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int protoff, tuple.dst.u3 = *daddr; tuple.dst.u.udp.port = port; - rcu_read_lock(); do { exp = __nf_ct_expect_find(net, nf_ct_zone(ct), &tuple); @@ -918,10 +917,8 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int protoff, goto err1; } - if (skip_expect) { - rcu_read_unlock(); + if (skip_expect) return NF_ACCEPT; - } rtp_exp = nf_ct_expect_alloc(ct); if (rtp_exp == NULL) @@ -952,7 +949,6 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int protoff, err2: nf_ct_expect_put(rtp_exp); err1: - rcu_read_unlock(); return ret; } diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c index 400e9ae971533..7ce9e86d374c2 100644 --- a/net/netfilter/nfnetlink_cttimeout.c +++ b/net/netfilter/nfnetlink_cttimeout.c @@ -505,7 +505,6 @@ ctnl_timeout_find_get(struct net *net, const char *name) { struct ctnl_timeout *timeout, *matching = NULL; - rcu_read_lock(); list_for_each_entry_rcu(timeout, &net->nfct_timeout_list, head) { if (strncmp(timeout->name, name, CTNL_TIMEOUT_NAME_MAX) != 0) continue; @@ -521,7 +520,6 @@ ctnl_timeout_find_get(struct net *net, const char *name) break; } err: - rcu_read_unlock(); return matching; } diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 16fa04086880c..7c543bfbf624f 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -928,7 +928,6 @@ static unsigned int nfqnl_nf_hook_drop(struct net *net) unsigned int instances = 0; int i; - rcu_read_lock(); for (i = 0; i < INSTANCE_BUCKETS; i++) { struct nfqnl_instance *inst; struct hlist_head *head = &q->instance_table[i]; @@ -938,7 +937,6 @@ static unsigned int nfqnl_nf_hook_drop(struct net *net) instances++; } } - rcu_read_unlock(); return instances; } diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c index c64aca611ac5c..9dae4d665965e 100644 --- a/net/netfilter/xt_TCPMSS.c +++ b/net/netfilter/xt_TCPMSS.c @@ -62,11 +62,9 @@ static u_int32_t tcpmss_reverse_mtu(struct net *net, memset(fl6, 0, sizeof(*fl6)); fl6->daddr = ipv6_hdr(skb)->saddr; } - rcu_read_lock(); ai = nf_get_afinfo(family); if (ai != NULL) ai->route(net, (struct dst_entry **)&rt, &fl, false); - rcu_read_unlock(); if (rt != NULL) { mtu = dst_mtu(&rt->dst); diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c index d767e35fff6bd..2b74f37132fc2 100644 --- a/net/netfilter/xt_TPROXY.c +++ b/net/netfilter/xt_TPROXY.c @@ -70,13 +70,11 @@ tproxy_laddr4(struct sk_buff *skb, __be32 user_laddr, __be32 daddr) return user_laddr; laddr = 0; - rcu_read_lock(); indev = __in_dev_get_rcu(skb->dev); for_primary_ifa(indev) { laddr = ifa->ifa_local; break; } endfor_ifa(indev); - rcu_read_unlock(); return laddr ? laddr : daddr; } @@ -391,7 +389,6 @@ tproxy_laddr6(struct sk_buff *skb, const struct in6_addr *user_laddr, return user_laddr; laddr = NULL; - rcu_read_lock(); indev = __in6_dev_get(skb->dev); if (indev) { read_lock_bh(&indev->lock); @@ -404,7 +401,6 @@ tproxy_laddr6(struct sk_buff *skb, const struct in6_addr *user_laddr, } read_unlock_bh(&indev->lock); } - rcu_read_unlock(); return laddr ? laddr : daddr; } diff --git a/net/netfilter/xt_addrtype.c b/net/netfilter/xt_addrtype.c index e329dabde35f5..3b2be2ae69875 100644 --- a/net/netfilter/xt_addrtype.c +++ b/net/netfilter/xt_addrtype.c @@ -47,8 +47,6 @@ static u32 match_lookup_rt6(struct net *net, const struct net_device *dev, if (dev) flow.flowi6_oif = dev->ifindex; - rcu_read_lock(); - afinfo = nf_get_afinfo(NFPROTO_IPV6); if (afinfo != NULL) { const struct nf_ipv6_ops *v6ops; @@ -63,7 +61,6 @@ static u32 match_lookup_rt6(struct net *net, const struct net_device *dev, } else { route_err = 1; } - rcu_read_unlock(); if (route_err) return XT_ADDRTYPE_UNREACHABLE; diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c index b8fd4ab762edb..97589b8a2a40b 100644 --- a/net/netfilter/xt_connlimit.c +++ b/net/netfilter/xt_connlimit.c @@ -144,7 +144,6 @@ static unsigned int check_hlist(struct net *net, unsigned int length = 0; *addit = true; - rcu_read_lock(); /* check the saved connections */ hlist_for_each_entry_safe(conn, n, head, node) { @@ -179,8 +178,6 @@ static unsigned int check_hlist(struct net *net, length++; } - rcu_read_unlock(); - return length; } diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c index 762e1874f28b7..ffdb611e54a26 100644 --- a/net/netfilter/xt_hashlimit.c +++ b/net/netfilter/xt_hashlimit.c @@ -659,12 +659,12 @@ hashlimit_mt_common(const struct sk_buff *skb, struct xt_action_param *par, if (hashlimit_init_dst(hinfo, &dst, skb, par->thoff) < 0) goto hotdrop; - rcu_read_lock_bh(); + local_bh_disable(); dh = dsthash_find(hinfo, &dst); if (dh == NULL) { dh = dsthash_alloc_init(hinfo, &dst, &race); if (dh == NULL) { - rcu_read_unlock_bh(); + local_bh_enable(); goto hotdrop; } else if (race) { /* Already got an entry, update expiration timeout */ @@ -689,12 +689,12 @@ hashlimit_mt_common(const struct sk_buff *skb, struct xt_action_param *par, /* below the limit */ dh->rateinfo.credit -= cost; spin_unlock(&dh->lock); - rcu_read_unlock_bh(); + local_bh_enable(); return !(cfg->mode & XT_HASHLIMIT_INVERT); } spin_unlock(&dh->lock); - rcu_read_unlock_bh(); + local_bh_enable(); /* default match is underlimit - so over the limit, we need to invert */ return cfg->mode & XT_HASHLIMIT_INVERT; diff --git a/net/netfilter/xt_osf.c b/net/netfilter/xt_osf.c index 71cfa9551d083..36e14b1f061dd 100644 --- a/net/netfilter/xt_osf.c +++ b/net/netfilter/xt_osf.c @@ -226,7 +226,6 @@ xt_osf_match_packet(const struct sk_buff *skb, struct xt_action_param *p) sizeof(struct tcphdr), optsize, opts); } - rcu_read_lock(); list_for_each_entry_rcu(kf, &xt_osf_fingers[df], finger_entry) { int foptsize, optnum; @@ -340,7 +339,6 @@ xt_osf_match_packet(const struct sk_buff *skb, struct xt_action_param *p) info->loglevel == XT_OSF_LOGLEVEL_FIRST) break; } - rcu_read_unlock(); if (!fcount && (info->flags & XT_OSF_LOG)) nf_log_packet(net, xt_family(p), xt_hooknum(p), skb, xt_in(p), From 784b4e612d42a2b7578d7fab2ed78940e10536bc Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Wed, 19 Jul 2017 16:32:23 +0200 Subject: [PATCH 04/47] netfilter: nf_tables: Attach process info to NFT_MSG_NEWGEN notifications This is helpful for 'nft monitor' to track which process caused a given change to the ruleset. Signed-off-by: Phil Sutter Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 2 ++ net/netfilter/nf_tables_api.c | 5 ++++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 683f6f88fcace..6f0a950e21c39 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -1221,6 +1221,8 @@ enum nft_objref_attributes { enum nft_gen_attributes { NFTA_GEN_UNSPEC, NFTA_GEN_ID, + NFTA_GEN_PROC_PID, + NFTA_GEN_PROC_NAME, __NFTA_GEN_MAX }; #define NFTA_GEN_MAX (__NFTA_GEN_MAX - 1) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 7fbf0070aba1c..b77ad0813564b 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -4657,6 +4657,7 @@ static int nf_tables_fill_gen_info(struct sk_buff *skb, struct net *net, { struct nlmsghdr *nlh; struct nfgenmsg *nfmsg; + char buf[TASK_COMM_LEN]; int event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, NFT_MSG_NEWGEN); nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), 0); @@ -4668,7 +4669,9 @@ static int nf_tables_fill_gen_info(struct sk_buff *skb, struct net *net, nfmsg->version = NFNETLINK_V0; nfmsg->res_id = htons(net->nft.base_seq & 0xffff); - if (nla_put_be32(skb, NFTA_GEN_ID, htonl(net->nft.base_seq))) + if (nla_put_be32(skb, NFTA_GEN_ID, htonl(net->nft.base_seq)) || + nla_put_be32(skb, NFTA_GEN_PROC_PID, htonl(task_pid_nr(current))) || + nla_put_string(skb, NFTA_GEN_PROC_NAME, get_task_comm(buf, current))) goto nla_put_failure; nlmsg_end(skb, nlh); From f347ec852c7a83e1803192d2c1fce4e42e0715a5 Mon Sep 17 00:00:00 2001 From: "Pablo M. Bermudo Garay" Date: Fri, 21 Jul 2017 01:54:37 +0200 Subject: [PATCH 05/47] netfilter: nf_tables: fib: use skb_header_pointer This is a preparatory patch for adding fib support to the netdev family. The netdev family receives the packets from ingress hook. At this point we have no guarantee that the ip header is linear. So this patch replaces ip_hdr with skb_header_pointer in order to address that possible situation. Signed-off-by: Pablo M. Bermudo Garay Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/nft_fib_ipv4.c | 20 ++++++++++++++++---- net/ipv6/netfilter/nft_fib_ipv6.c | 29 +++++++++++++++++++++++------ 2 files changed, 39 insertions(+), 10 deletions(-) diff --git a/net/ipv4/netfilter/nft_fib_ipv4.c b/net/ipv4/netfilter/nft_fib_ipv4.c index de3681df2ce71..e50976e3c2133 100644 --- a/net/ipv4/netfilter/nft_fib_ipv4.c +++ b/net/ipv4/netfilter/nft_fib_ipv4.c @@ -32,9 +32,10 @@ void nft_fib4_eval_type(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) { const struct nft_fib *priv = nft_expr_priv(expr); + int noff = skb_network_offset(pkt->skb); u32 *dst = ®s->data[priv->dreg]; const struct net_device *dev = NULL; - const struct iphdr *iph; + struct iphdr *iph, _iph; __be32 addr; if (priv->flags & NFTA_FIB_F_IIF) @@ -42,7 +43,12 @@ void nft_fib4_eval_type(const struct nft_expr *expr, struct nft_regs *regs, else if (priv->flags & NFTA_FIB_F_OIF) dev = nft_out(pkt); - iph = ip_hdr(pkt->skb); + iph = skb_header_pointer(pkt->skb, noff, sizeof(_iph), &_iph); + if (!iph) { + regs->verdict.code = NFT_BREAK; + return; + } + if (priv->flags & NFTA_FIB_F_DADDR) addr = iph->daddr; else @@ -61,8 +67,9 @@ void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) { const struct nft_fib *priv = nft_expr_priv(expr); + int noff = skb_network_offset(pkt->skb); u32 *dest = ®s->data[priv->dreg]; - const struct iphdr *iph; + struct iphdr *iph, _iph; struct fib_result res; struct flowi4 fl4 = { .flowi4_scope = RT_SCOPE_UNIVERSE, @@ -95,7 +102,12 @@ void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs, return; } - iph = ip_hdr(pkt->skb); + iph = skb_header_pointer(pkt->skb, noff, sizeof(_iph), &_iph); + if (!iph) { + regs->verdict.code = NFT_BREAK; + return; + } + if (ipv4_is_zeronet(iph->saddr)) { if (ipv4_is_lbcast(iph->daddr) || ipv4_is_local_multicast(iph->daddr)) { diff --git a/net/ipv6/netfilter/nft_fib_ipv6.c b/net/ipv6/netfilter/nft_fib_ipv6.c index 43f91d9b086c7..54b5899543ef5 100644 --- a/net/ipv6/netfilter/nft_fib_ipv6.c +++ b/net/ipv6/netfilter/nft_fib_ipv6.c @@ -25,9 +25,9 @@ static int get_ifindex(const struct net_device *dev) static int nft_fib6_flowi_init(struct flowi6 *fl6, const struct nft_fib *priv, const struct nft_pktinfo *pkt, - const struct net_device *dev) + const struct net_device *dev, + struct ipv6hdr *iph) { - const struct ipv6hdr *iph = ipv6_hdr(pkt->skb); int lookup_flags = 0; if (priv->flags & NFTA_FIB_F_DADDR) { @@ -55,7 +55,8 @@ static int nft_fib6_flowi_init(struct flowi6 *fl6, const struct nft_fib *priv, } static u32 __nft_fib6_eval_type(const struct nft_fib *priv, - const struct nft_pktinfo *pkt) + const struct nft_pktinfo *pkt, + struct ipv6hdr *iph) { const struct net_device *dev = NULL; const struct nf_ipv6_ops *v6ops; @@ -77,7 +78,7 @@ static u32 __nft_fib6_eval_type(const struct nft_fib *priv, else if (priv->flags & NFTA_FIB_F_OIF) dev = nft_out(pkt); - nft_fib6_flowi_init(&fl6, priv, pkt, dev); + nft_fib6_flowi_init(&fl6, priv, pkt, dev, iph); v6ops = nf_get_ipv6_ops(); if (dev && v6ops && v6ops->chk_addr(nft_net(pkt), &fl6.daddr, dev, true)) @@ -131,9 +132,17 @@ void nft_fib6_eval_type(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) { const struct nft_fib *priv = nft_expr_priv(expr); + int noff = skb_network_offset(pkt->skb); u32 *dest = ®s->data[priv->dreg]; + struct ipv6hdr *iph, _iph; - *dest = __nft_fib6_eval_type(priv, pkt); + iph = skb_header_pointer(pkt->skb, noff, sizeof(_iph), &_iph); + if (!iph) { + regs->verdict.code = NFT_BREAK; + return; + } + + *dest = __nft_fib6_eval_type(priv, pkt, iph); } EXPORT_SYMBOL_GPL(nft_fib6_eval_type); @@ -141,8 +150,10 @@ void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) { const struct nft_fib *priv = nft_expr_priv(expr); + int noff = skb_network_offset(pkt->skb); const struct net_device *oif = NULL; u32 *dest = ®s->data[priv->dreg]; + struct ipv6hdr *iph, _iph; struct flowi6 fl6 = { .flowi6_iif = LOOPBACK_IFINDEX, .flowi6_proto = pkt->tprot, @@ -155,7 +166,13 @@ void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs, else if (priv->flags & NFTA_FIB_F_OIF) oif = nft_out(pkt); - lookup_flags = nft_fib6_flowi_init(&fl6, priv, pkt, oif); + iph = skb_header_pointer(pkt->skb, noff, sizeof(_iph), &_iph); + if (!iph) { + regs->verdict.code = NFT_BREAK; + return; + } + + lookup_flags = nft_fib6_flowi_init(&fl6, priv, pkt, oif, iph); if (nft_hook(pkt) == NF_INET_PRE_ROUTING && nft_fib_is_loopback(pkt->skb, nft_in(pkt))) { From 6392c226037c2b90d3062126c65fc354e47156f7 Mon Sep 17 00:00:00 2001 From: "Pablo M. Bermudo Garay" Date: Fri, 21 Jul 2017 01:54:38 +0200 Subject: [PATCH 06/47] netfilter: nf_tables: add fib expression to the netdev family Add fib expression support for netdev family. Like inet family, netdev delegates the actual decision to the corresponding backend, either ipv4 or ipv6. This allows to perform very early reverse path filtering, among other things. You can find more information about fib expression in the f6d0cbcf09c5 ("") commit message. Signed-off-by: Pablo M. Bermudo Garay Signed-off-by: Pablo Neira Ayuso --- net/netfilter/Kconfig | 9 ++++ net/netfilter/Makefile | 1 + net/netfilter/nft_fib_netdev.c | 87 ++++++++++++++++++++++++++++++++++ 3 files changed, 97 insertions(+) create mode 100644 net/netfilter/nft_fib_netdev.c diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 9b28864cc36a9..e4a13cc8a2e76 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -636,6 +636,15 @@ config NFT_FWD_NETDEV help This option enables packet forwarding for the "netdev" family. +config NFT_FIB_NETDEV + depends on NFT_FIB_IPV4 + depends on NFT_FIB_IPV6 + tristate "Netfilter nf_tables netdev fib lookups support" + help + This option allows using the FIB expression from the netdev table. + The lookup will be delegated to the IPv4 or IPv6 FIB depending + on the protocol of the packet. + endif # NF_TABLES_NETDEV endif # NF_TABLES diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 9133809193014..d3891c93edd6e 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -100,6 +100,7 @@ obj-$(CONFIG_NFT_REDIR) += nft_redir.o obj-$(CONFIG_NFT_HASH) += nft_hash.o obj-$(CONFIG_NFT_FIB) += nft_fib.o obj-$(CONFIG_NFT_FIB_INET) += nft_fib_inet.o +obj-$(CONFIG_NFT_FIB_NETDEV) += nft_fib_netdev.o # nf_tables netdev obj-$(CONFIG_NFT_DUP_NETDEV) += nft_dup_netdev.o diff --git a/net/netfilter/nft_fib_netdev.c b/net/netfilter/nft_fib_netdev.c new file mode 100644 index 0000000000000..3997ee36cfbd5 --- /dev/null +++ b/net/netfilter/nft_fib_netdev.c @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2017 Pablo M. Bermudo Garay + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This code is based on net/netfilter/nft_fib_inet.c, written by + * Florian Westphal . + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +static void nft_fib_netdev_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + const struct nft_fib *priv = nft_expr_priv(expr); + + switch (ntohs(pkt->skb->protocol)) { + case ETH_P_IP: + switch (priv->result) { + case NFT_FIB_RESULT_OIF: + case NFT_FIB_RESULT_OIFNAME: + return nft_fib4_eval(expr, regs, pkt); + case NFT_FIB_RESULT_ADDRTYPE: + return nft_fib4_eval_type(expr, regs, pkt); + } + break; + case ETH_P_IPV6: + switch (priv->result) { + case NFT_FIB_RESULT_OIF: + case NFT_FIB_RESULT_OIFNAME: + return nft_fib6_eval(expr, regs, pkt); + case NFT_FIB_RESULT_ADDRTYPE: + return nft_fib6_eval_type(expr, regs, pkt); + } + break; + } + + regs->verdict.code = NFT_BREAK; +} + +static struct nft_expr_type nft_fib_netdev_type; +static const struct nft_expr_ops nft_fib_netdev_ops = { + .type = &nft_fib_netdev_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_fib)), + .eval = nft_fib_netdev_eval, + .init = nft_fib_init, + .dump = nft_fib_dump, + .validate = nft_fib_validate, +}; + +static struct nft_expr_type nft_fib_netdev_type __read_mostly = { + .family = NFPROTO_NETDEV, + .name = "fib", + .ops = &nft_fib_netdev_ops, + .policy = nft_fib_policy, + .maxattr = NFTA_FIB_MAX, + .owner = THIS_MODULE, +}; + +static int __init nft_fib_netdev_module_init(void) +{ + return nft_register_expr(&nft_fib_netdev_type); +} + +static void __exit nft_fib_netdev_module_exit(void) +{ + nft_unregister_expr(&nft_fib_netdev_type); +} + +module_init(nft_fib_netdev_module_init); +module_exit(nft_fib_netdev_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Pablo M. Bermudo Garay "); +MODULE_ALIAS_NFT_AF_EXPR(5, "fib"); From a232cd0e0cf1ede77b047bc0c98142cd51f318e3 Mon Sep 17 00:00:00 2001 From: "subashab@codeaurora.org" Date: Thu, 20 Jul 2017 19:42:19 -0600 Subject: [PATCH 07/47] netfilter: conntrack: Change to deferable work queue Delayed workqueue causes wakeups to idle CPUs. This was causing a power impact for devices. Use deferable work queue instead so that gc_worker runs when CPU is active only. Signed-off-by: Subash Abhinov Kasiviswanathan Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 69746928cc0a7..c6f1cf0bff569 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -1081,7 +1081,7 @@ static void gc_worker(struct work_struct *work) static void conntrack_gc_work_init(struct conntrack_gc_work *gc_work) { - INIT_DELAYED_WORK(&gc_work->dwork, gc_worker); + INIT_DEFERRABLE_WORK(&gc_work->dwork, gc_worker); gc_work->next_gc_run = HZ; gc_work->exiting = false; } From ac7b848390036dadd4351899d2a23748075916bd Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 26 Jul 2017 00:02:31 +0200 Subject: [PATCH 08/47] netfilter: expect: add and use nf_ct_expect_iterate helpers We have several spots that open-code a expect walk, add a helper that is similar to nf_ct_iterate_destroy/nf_ct_iterate_cleanup. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_expect.h | 5 ++ net/netfilter/nf_conntrack_expect.c | 54 ++++++++++++++++++ net/netfilter/nf_conntrack_helper.c | 34 +++++------ net/netfilter/nf_conntrack_netlink.c | 63 +++++++-------------- 4 files changed, 95 insertions(+), 61 deletions(-) diff --git a/include/net/netfilter/nf_conntrack_expect.h b/include/net/netfilter/nf_conntrack_expect.h index 2ba54feaccd8d..818def0111101 100644 --- a/include/net/netfilter/nf_conntrack_expect.h +++ b/include/net/netfilter/nf_conntrack_expect.h @@ -107,6 +107,11 @@ void nf_ct_remove_expectations(struct nf_conn *ct); void nf_ct_unexpect_related(struct nf_conntrack_expect *exp); bool nf_ct_remove_expect(struct nf_conntrack_expect *exp); +void nf_ct_expect_iterate_destroy(bool (*iter)(struct nf_conntrack_expect *e, void *data), void *data); +void nf_ct_expect_iterate_net(struct net *net, + bool (*iter)(struct nf_conntrack_expect *e, void *data), + void *data, u32 portid, int report); + /* Allocate space for an expectation: this is mandatory before calling nf_ct_expect_related. You will have to call put afterwards. */ struct nf_conntrack_expect *nf_ct_expect_alloc(struct nf_conn *me); diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index 2c63808bea96e..dad2c0c22ad58 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -474,6 +474,60 @@ int nf_ct_expect_related_report(struct nf_conntrack_expect *expect, } EXPORT_SYMBOL_GPL(nf_ct_expect_related_report); +void nf_ct_expect_iterate_destroy(bool (*iter)(struct nf_conntrack_expect *e, void *data), + void *data) +{ + struct nf_conntrack_expect *exp; + const struct hlist_node *next; + unsigned int i; + + spin_lock_bh(&nf_conntrack_expect_lock); + + for (i = 0; i < nf_ct_expect_hsize; i++) { + hlist_for_each_entry_safe(exp, next, + &nf_ct_expect_hash[i], + hnode) { + if (iter(exp, data) && del_timer(&exp->timeout)) { + nf_ct_unlink_expect(exp); + nf_ct_expect_put(exp); + } + } + } + + spin_unlock_bh(&nf_conntrack_expect_lock); +} +EXPORT_SYMBOL_GPL(nf_ct_expect_iterate_destroy); + +void nf_ct_expect_iterate_net(struct net *net, + bool (*iter)(struct nf_conntrack_expect *e, void *data), + void *data, + u32 portid, int report) +{ + struct nf_conntrack_expect *exp; + const struct hlist_node *next; + unsigned int i; + + spin_lock_bh(&nf_conntrack_expect_lock); + + for (i = 0; i < nf_ct_expect_hsize; i++) { + hlist_for_each_entry_safe(exp, next, + &nf_ct_expect_hash[i], + hnode) { + + if (!net_eq(nf_ct_exp_net(exp), net)) + continue; + + if (iter(exp, data) && del_timer(&exp->timeout)) { + nf_ct_unlink_expect_report(exp, portid, report); + nf_ct_expect_put(exp); + } + } + } + + spin_unlock_bh(&nf_conntrack_expect_lock); +} +EXPORT_SYMBOL_GPL(nf_ct_expect_iterate_net); + #ifdef CONFIG_NF_CONNTRACK_PROCFS struct ct_expect_iter_state { struct seq_net_private p; diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index 9129bb3b51535..551a1eddf0fab 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -437,12 +437,22 @@ int nf_conntrack_helper_register(struct nf_conntrack_helper *me) } EXPORT_SYMBOL_GPL(nf_conntrack_helper_register); -void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me) +static bool expect_iter_me(struct nf_conntrack_expect *exp, void *data) { - struct nf_conntrack_expect *exp; - const struct hlist_node *next; - unsigned int i; + struct nf_conn_help *help = nfct_help(exp->master); + const struct nf_conntrack_helper *me = data; + const struct nf_conntrack_helper *this; + + if (exp->helper == me) + return true; + this = rcu_dereference_protected(help->helper, + lockdep_is_held(&nf_conntrack_expect_lock)); + return this == me; +} + +void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me) +{ mutex_lock(&nf_ct_helper_mutex); hlist_del_rcu(&me->hnode); nf_ct_helper_count--; @@ -453,21 +463,7 @@ void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me) */ synchronize_rcu(); - /* Get rid of expectations */ - spin_lock_bh(&nf_conntrack_expect_lock); - for (i = 0; i < nf_ct_expect_hsize; i++) { - hlist_for_each_entry_safe(exp, next, - &nf_ct_expect_hash[i], hnode) { - struct nf_conn_help *help = nfct_help(exp->master); - if ((rcu_dereference_protected( - help->helper, - lockdep_is_held(&nf_conntrack_expect_lock) - ) == me || exp->helper == me)) - nf_ct_remove_expect(exp); - } - } - spin_unlock_bh(&nf_conntrack_expect_lock); - + nf_ct_expect_iterate_destroy(expect_iter_me, NULL); nf_ct_iterate_destroy(unhelp, me); } EXPORT_SYMBOL_GPL(nf_conntrack_helper_unregister); diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 4dba71de4de79..4922c8aefb2a1 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -2898,6 +2898,21 @@ static int ctnetlink_get_expect(struct net *net, struct sock *ctnl, return err == -EAGAIN ? -ENOBUFS : err; } +static bool expect_iter_name(struct nf_conntrack_expect *exp, void *data) +{ + const struct nf_conn_help *m_help; + const char *name = data; + + m_help = nfct_help(exp->master); + + return strcmp(m_help->helper->name, name) == 0; +} + +static bool expect_iter_all(struct nf_conntrack_expect *exp, void *data) +{ + return true; +} + static int ctnetlink_del_expect(struct net *net, struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const cda[], @@ -2906,10 +2921,8 @@ static int ctnetlink_del_expect(struct net *net, struct sock *ctnl, struct nf_conntrack_expect *exp; struct nf_conntrack_tuple tuple; struct nfgenmsg *nfmsg = nlmsg_data(nlh); - struct hlist_node *next; u_int8_t u3 = nfmsg->nfgen_family; struct nf_conntrack_zone zone; - unsigned int i; int err; if (cda[CTA_EXPECT_TUPLE]) { @@ -2949,49 +2962,15 @@ static int ctnetlink_del_expect(struct net *net, struct sock *ctnl, nf_ct_expect_put(exp); } else if (cda[CTA_EXPECT_HELP_NAME]) { char *name = nla_data(cda[CTA_EXPECT_HELP_NAME]); - struct nf_conn_help *m_help; - /* delete all expectations for this helper */ - spin_lock_bh(&nf_conntrack_expect_lock); - for (i = 0; i < nf_ct_expect_hsize; i++) { - hlist_for_each_entry_safe(exp, next, - &nf_ct_expect_hash[i], - hnode) { - - if (!net_eq(nf_ct_exp_net(exp), net)) - continue; - - m_help = nfct_help(exp->master); - if (!strcmp(m_help->helper->name, name) && - del_timer(&exp->timeout)) { - nf_ct_unlink_expect_report(exp, - NETLINK_CB(skb).portid, - nlmsg_report(nlh)); - nf_ct_expect_put(exp); - } - } - } - spin_unlock_bh(&nf_conntrack_expect_lock); + nf_ct_expect_iterate_net(net, expect_iter_name, name, + NETLINK_CB(skb).portid, + nlmsg_report(nlh)); } else { /* This basically means we have to flush everything*/ - spin_lock_bh(&nf_conntrack_expect_lock); - for (i = 0; i < nf_ct_expect_hsize; i++) { - hlist_for_each_entry_safe(exp, next, - &nf_ct_expect_hash[i], - hnode) { - - if (!net_eq(nf_ct_exp_net(exp), net)) - continue; - - if (del_timer(&exp->timeout)) { - nf_ct_unlink_expect_report(exp, - NETLINK_CB(skb).portid, - nlmsg_report(nlh)); - nf_ct_expect_put(exp); - } - } - } - spin_unlock_bh(&nf_conntrack_expect_lock); + nf_ct_expect_iterate_net(net, expect_iter_all, NULL, + NETLINK_CB(skb).portid, + nlmsg_report(nlh)); } return 0; From 84657984c26fd0b64743a397f3a1a587fa4b575a Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 26 Jul 2017 00:02:32 +0200 Subject: [PATCH 09/47] netfilter: add and use nf_ct_unconfirmed_destroy This also removes __nf_ct_unconfirmed_destroy() call from nf_ct_iterate_cleanup_net, so that function can be used only when missing conntracks from unconfirmed list isn't a problem. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack.h | 3 +++ net/netfilter/nf_conntrack_core.c | 15 +++++++++++---- net/netfilter/nfnetlink_cttimeout.c | 1 + 3 files changed, 15 insertions(+), 4 deletions(-) diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 48407569585da..6e6f678aaac71 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -224,6 +224,9 @@ extern s32 (*nf_ct_nat_offset)(const struct nf_conn *ct, enum ip_conntrack_dir dir, u32 seq); +/* Set all unconfirmed conntrack as dying */ +void nf_ct_unconfirmed_destroy(struct net *); + /* Iterate over all conntracks: if iter returns true, it's deleted. */ void nf_ct_iterate_cleanup_net(struct net *net, int (*iter)(struct nf_conn *i, void *data), diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index c6f1cf0bff569..80ab4e937765d 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -1686,6 +1686,17 @@ __nf_ct_unconfirmed_destroy(struct net *net) } } +void nf_ct_unconfirmed_destroy(struct net *net) +{ + might_sleep(); + + if (atomic_read(&net->ct.count) > 0) { + __nf_ct_unconfirmed_destroy(net); + synchronize_net(); + } +} +EXPORT_SYMBOL_GPL(nf_ct_unconfirmed_destroy); + void nf_ct_iterate_cleanup_net(struct net *net, int (*iter)(struct nf_conn *i, void *data), void *data, u32 portid, int report) @@ -1697,14 +1708,10 @@ void nf_ct_iterate_cleanup_net(struct net *net, if (atomic_read(&net->ct.count) == 0) return; - __nf_ct_unconfirmed_destroy(net); - d.iter = iter; d.data = data; d.net = net; - synchronize_net(); - nf_ct_iterate_cleanup(iter_net_only, &d, portid, report); } EXPORT_SYMBOL_GPL(nf_ct_iterate_cleanup_net); diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c index 7ce9e86d374c2..f4fb6d4dd0b9f 100644 --- a/net/netfilter/nfnetlink_cttimeout.c +++ b/net/netfilter/nfnetlink_cttimeout.c @@ -570,6 +570,7 @@ static void __net_exit cttimeout_net_exit(struct net *net) { struct ctnl_timeout *cur, *tmp; + nf_ct_unconfirmed_destroy(net); ctnl_untimeout(net, NULL); list_for_each_entry_safe(cur, tmp, &net->nfct_timeout_list, head) { From e2a750070aeec7af3818065b39d61cb38627ce64 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 26 Jul 2017 00:02:33 +0200 Subject: [PATCH 10/47] netfilter: conntrack: destroy functions need to free queued packets queued skbs might be using conntrack extensions that are being removed, such as timeout. This happens for skbs that have a skb->nfct in unconfirmed state (i.e., not in hash table yet). This is destructive, but there are only two use cases: - module removal (rare) - netns cleanup (most likely no conntracks exist, and if they do, they are removed anyway later on). Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_core.c | 4 ++++ net/netfilter/nf_queue.c | 1 + 2 files changed, 5 insertions(+) diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 80ab4e937765d..2bc4991861869 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -56,6 +56,8 @@ #include #include +#include "nf_internals.h" + #define NF_CONNTRACK_VERSION "0.5.0" int (*nfnetlink_parse_nat_setup_hook)(struct nf_conn *ct, @@ -1692,6 +1694,7 @@ void nf_ct_unconfirmed_destroy(struct net *net) if (atomic_read(&net->ct.count) > 0) { __nf_ct_unconfirmed_destroy(net); + nf_queue_nf_hook_drop(net); synchronize_net(); } } @@ -1737,6 +1740,7 @@ nf_ct_iterate_destroy(int (*iter)(struct nf_conn *i, void *data), void *data) if (atomic_read(&net->ct.count) == 0) continue; __nf_ct_unconfirmed_destroy(net); + nf_queue_nf_hook_drop(net); } rtnl_unlock(); diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index 043850c9d154d..4f4d80a58fb5f 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c @@ -109,6 +109,7 @@ unsigned int nf_queue_nf_hook_drop(struct net *net) return count; } +EXPORT_SYMBOL_GPL(nf_queue_nf_hook_drop); static int __nf_queue(struct sk_buff *skb, const struct nf_hook_state *state, struct nf_hook_entry *hook_entry, unsigned int queuenum) From 5da773a3e81e6093c4346ee8cd356fc214d7c76c Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 26 Jul 2017 00:02:34 +0200 Subject: [PATCH 11/47] netfilter: nfnetlink_queue: don't queue dying conntracks to userspace When skb is queued to userspace it leaves softirq/rcu protection. skb->nfct (via conntrack extensions such as helper) could then reference modules that no longer exist if the conntrack was not yet confirmed. nf_ct_iterate_destroy() will set the DYING bit for unconfirmed conntracks, we therefore solve this race as follows: 1. take the queue spinlock. 2. check if the conntrack is unconfirmed and has dying bit set. In this case, we must discard skb while we're still inside rcu read-side section. 3. If nf_ct_iterate_destroy() is called right after the packet is queued to userspace, it will be removed from the queue via nf_ct_iterate_destroy -> nf_queue_nf_hook_drop. When userspace sends the verdict (nfnetlink takes rcu read lock), there are two cases to consider: 1. nf_ct_iterate_destroy() was called while packet was out. In this case, skb will have been removed from the queue already and no reinject takes place as we won't find a matching entry for the packet id. 2. nf_ct_iterate_destroy() gets called right after verdict callback found and removed the skb from queue list. In this case, skb->nfct is marked as dying but it is still valid. The skb will be dropped either in nf_conntrack_confirm (we don't insert DYING conntracks into hash table) or when we try to queue the skb again, but either events don't occur before the rcu read lock is dropped. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink_queue.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 7c543bfbf624f..c9796629858f7 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -41,6 +41,10 @@ #include "../bridge/br_private.h" #endif +#if IS_ENABLED(CONFIG_NF_CONNTRACK) +#include +#endif + #define NFQNL_QMAX_DEFAULT 1024 /* We're using struct nlattr which has 16bit nla_len. Note that nla_len @@ -612,6 +616,18 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, return NULL; } +static bool nf_ct_drop_unconfirmed(const struct nf_queue_entry *entry) +{ +#if IS_ENABLED(CONFIG_NF_CONNTRACK) + static const unsigned long flags = IPS_CONFIRMED | IPS_DYING; + const struct nf_conn *ct = (void *)skb_nfct(entry->skb); + + if (ct && ((ct->status & flags) == IPS_DYING)) + return true; +#endif + return false; +} + static int __nfqnl_enqueue_packet(struct net *net, struct nfqnl_instance *queue, struct nf_queue_entry *entry) @@ -628,6 +644,9 @@ __nfqnl_enqueue_packet(struct net *net, struct nfqnl_instance *queue, } spin_lock_bh(&queue->lock); + if (nf_ct_drop_unconfirmed(entry)) + goto err_out_free_nskb; + if (queue->queue_total >= queue->queue_maxlen) { if (queue->flags & NFQA_CFG_F_FAIL_OPEN) { failopen = 1; From 591bb2789bc2a93f379b13d277f441f1b427102d Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 26 Jul 2017 11:40:52 +0200 Subject: [PATCH 12/47] netfilter: nf_hook_ops structs can be const We no longer place these on a list so they can be const. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- drivers/net/ipvlan/ipvlan_main.c | 2 +- net/bridge/br_netfilter_hooks.c | 2 +- net/bridge/netfilter/ebtable_filter.c | 2 +- net/bridge/netfilter/ebtable_nat.c | 2 +- net/decnet/netfilter/dn_rtmsg.c | 2 +- net/ipv4/netfilter/ipt_CLUSTERIP.c | 2 +- net/ipv4/netfilter/ipt_SYNPROXY.c | 2 +- net/ipv4/netfilter/iptable_nat.c | 2 +- net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 2 +- net/ipv4/netfilter/nf_defrag_ipv4.c | 2 +- net/ipv6/ila/ila_xlat.c | 2 +- net/ipv6/netfilter/ip6t_SYNPROXY.c | 2 +- net/ipv6/netfilter/ip6table_nat.c | 2 +- net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c | 2 +- net/ipv6/netfilter/nf_defrag_ipv6_hooks.c | 2 +- net/netfilter/ipvs/ip_vs_core.c | 2 +- security/selinux/hooks.c | 2 +- security/smack/smack_netfilter.c | 2 +- 18 files changed, 18 insertions(+), 18 deletions(-) diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c index fdde207354169..943e6907dc19e 100644 --- a/drivers/net/ipvlan/ipvlan_main.c +++ b/drivers/net/ipvlan/ipvlan_main.c @@ -15,7 +15,7 @@ struct ipvlan_netns { unsigned int ipvl_nf_hook_refcnt; }; -static struct nf_hook_ops ipvl_nfops[] __read_mostly = { +static const struct nf_hook_ops ipvl_nfops[] = { { .hook = ipvlan_nf_input, .pf = NFPROTO_IPV4, diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index 2261e5194c82c..626f4b2cef164 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -887,7 +887,7 @@ EXPORT_SYMBOL_GPL(br_netfilter_enable); /* For br_nf_post_routing, we need (prio = NF_BR_PRI_LAST), because * br_dev_queue_push_xmit is called afterwards */ -static struct nf_hook_ops br_nf_ops[] __read_mostly = { +static const struct nf_hook_ops br_nf_ops[] = { { .hook = br_nf_pre_routing, .pf = NFPROTO_BRIDGE, diff --git a/net/bridge/netfilter/ebtable_filter.c b/net/bridge/netfilter/ebtable_filter.c index f22ef7c219137..45a00dbdbcad6 100644 --- a/net/bridge/netfilter/ebtable_filter.c +++ b/net/bridge/netfilter/ebtable_filter.c @@ -70,7 +70,7 @@ ebt_out_hook(void *priv, struct sk_buff *skb, return ebt_do_table(skb, state, state->net->xt.frame_filter); } -static struct nf_hook_ops ebt_ops_filter[] __read_mostly = { +static const struct nf_hook_ops ebt_ops_filter[] = { { .hook = ebt_in_hook, .pf = NFPROTO_BRIDGE, diff --git a/net/bridge/netfilter/ebtable_nat.c b/net/bridge/netfilter/ebtable_nat.c index 2f7a4f3144063..4ecf50662b7d8 100644 --- a/net/bridge/netfilter/ebtable_nat.c +++ b/net/bridge/netfilter/ebtable_nat.c @@ -70,7 +70,7 @@ ebt_nat_out(void *priv, struct sk_buff *skb, return ebt_do_table(skb, state, state->net->xt.frame_nat); } -static struct nf_hook_ops ebt_ops_nat[] __read_mostly = { +static const struct nf_hook_ops ebt_ops_nat[] = { { .hook = ebt_nat_out, .pf = NFPROTO_BRIDGE, diff --git a/net/decnet/netfilter/dn_rtmsg.c b/net/decnet/netfilter/dn_rtmsg.c index aa8ffecc46a43..ab395e55cd789 100644 --- a/net/decnet/netfilter/dn_rtmsg.c +++ b/net/decnet/netfilter/dn_rtmsg.c @@ -115,7 +115,7 @@ static inline void dnrmg_receive_user_skb(struct sk_buff *skb) RCV_SKB_FAIL(-EINVAL); } -static struct nf_hook_ops dnrmg_ops __read_mostly = { +static const struct nf_hook_ops dnrmg_ops = { .hook = dnrmg_hook, .pf = NFPROTO_DECNET, .hooknum = NF_DN_ROUTE, diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index 7d72decb80f9f..6637e8b37ee22 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -624,7 +624,7 @@ arp_mangle(void *priv, return NF_ACCEPT; } -static struct nf_hook_ops cip_arp_ops __read_mostly = { +static const struct nf_hook_ops cip_arp_ops = { .hook = arp_mangle, .pf = NFPROTO_ARP, .hooknum = NF_ARP_OUT, diff --git a/net/ipv4/netfilter/ipt_SYNPROXY.c b/net/ipv4/netfilter/ipt_SYNPROXY.c index f1528f7175a8c..811689e523c31 100644 --- a/net/ipv4/netfilter/ipt_SYNPROXY.c +++ b/net/ipv4/netfilter/ipt_SYNPROXY.c @@ -416,7 +416,7 @@ static unsigned int ipv4_synproxy_hook(void *priv, return NF_ACCEPT; } -static struct nf_hook_ops ipv4_synproxy_ops[] __read_mostly = { +static const struct nf_hook_ops ipv4_synproxy_ops[] = { { .hook = ipv4_synproxy_hook, .pf = NFPROTO_IPV4, diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c index 138a24bc76ad9..a1a07b338ccfd 100644 --- a/net/ipv4/netfilter/iptable_nat.c +++ b/net/ipv4/netfilter/iptable_nat.c @@ -67,7 +67,7 @@ static unsigned int iptable_nat_ipv4_local_fn(void *priv, return nf_nat_ipv4_local_fn(priv, skb, state, iptable_nat_do_chain); } -static struct nf_hook_ops nf_nat_ipv4_ops[] __read_mostly = { +static const struct nf_hook_ops nf_nat_ipv4_ops[] = { /* Before packet filtering, change destination */ { .hook = iptable_nat_ipv4_in, diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index 2e14ed11a35cf..63e4ea0e01f87 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -174,7 +174,7 @@ static unsigned int ipv4_conntrack_local(void *priv, /* Connection tracking may drop packets, but never alters them, so make it the first hook. */ -static struct nf_hook_ops ipv4_conntrack_ops[] __read_mostly = { +static const struct nf_hook_ops ipv4_conntrack_ops[] = { { .hook = ipv4_conntrack_in, .pf = NFPROTO_IPV4, diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c index 346bf7ccac088..37fe1616ca0bc 100644 --- a/net/ipv4/netfilter/nf_defrag_ipv4.c +++ b/net/ipv4/netfilter/nf_defrag_ipv4.c @@ -90,7 +90,7 @@ static unsigned int ipv4_conntrack_defrag(void *priv, return NF_ACCEPT; } -static struct nf_hook_ops ipv4_defrag_ops[] = { +static const struct nf_hook_ops ipv4_defrag_ops[] = { { .hook = ipv4_conntrack_defrag, .pf = NFPROTO_IPV4, diff --git a/net/ipv6/ila/ila_xlat.c b/net/ipv6/ila/ila_xlat.c index 77f7f8c7d93d6..5bd419c1abc8b 100644 --- a/net/ipv6/ila/ila_xlat.c +++ b/net/ipv6/ila/ila_xlat.c @@ -208,7 +208,7 @@ ila_nf_input(void *priv, return NF_ACCEPT; } -static struct nf_hook_ops ila_nf_hook_ops[] __read_mostly = { +static const struct nf_hook_ops ila_nf_hook_ops[] = { { .hook = ila_nf_input, .pf = NFPROTO_IPV6, diff --git a/net/ipv6/netfilter/ip6t_SYNPROXY.c b/net/ipv6/netfilter/ip6t_SYNPROXY.c index ce203dd729e06..a5cd43d75393d 100644 --- a/net/ipv6/netfilter/ip6t_SYNPROXY.c +++ b/net/ipv6/netfilter/ip6t_SYNPROXY.c @@ -438,7 +438,7 @@ static unsigned int ipv6_synproxy_hook(void *priv, return NF_ACCEPT; } -static struct nf_hook_ops ipv6_synproxy_ops[] __read_mostly = { +static const struct nf_hook_ops ipv6_synproxy_ops[] = { { .hook = ipv6_synproxy_hook, .pf = NFPROTO_IPV6, diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c index 7d2bd940291fd..991512576c8c8 100644 --- a/net/ipv6/netfilter/ip6table_nat.c +++ b/net/ipv6/netfilter/ip6table_nat.c @@ -69,7 +69,7 @@ static unsigned int ip6table_nat_local_fn(void *priv, return nf_nat_ipv6_local_fn(priv, skb, state, ip6table_nat_do_chain); } -static struct nf_hook_ops nf_nat_ipv6_ops[] __read_mostly = { +static const struct nf_hook_ops nf_nat_ipv6_ops[] = { /* Before packet filtering, change destination */ { .hook = ip6table_nat_in, diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index 4e34024868334..f2d2f4a9294b7 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -191,7 +191,7 @@ static unsigned int ipv6_conntrack_local(void *priv, return nf_conntrack_in(state->net, PF_INET6, state->hook, skb); } -static struct nf_hook_ops ipv6_conntrack_ops[] __read_mostly = { +static const struct nf_hook_ops ipv6_conntrack_ops[] = { { .hook = ipv6_conntrack_in, .pf = NFPROTO_IPV6, diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c index ada60d1a991b7..b326da59257f6 100644 --- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c +++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c @@ -74,7 +74,7 @@ static unsigned int ipv6_defrag(void *priv, return err == 0 ? NF_ACCEPT : NF_DROP; } -static struct nf_hook_ops ipv6_defrag_ops[] = { +static const struct nf_hook_ops ipv6_defrag_ops[] = { { .hook = ipv6_defrag, .pf = NFPROTO_IPV6, diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 2ff9d9070c955..5cb7cac9177d8 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -2101,7 +2101,7 @@ ip_vs_forward_icmp_v6(void *priv, struct sk_buff *skb, #endif -static struct nf_hook_ops ip_vs_ops[] __read_mostly = { +static const struct nf_hook_ops ip_vs_ops[] = { /* After packet filtering, change source only for VS/NAT */ { .hook = ip_vs_reply4, diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 33fd061305c40..2f2e1338cd3d7 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -6530,7 +6530,7 @@ security_initcall(selinux_init); #if defined(CONFIG_NETFILTER) -static struct nf_hook_ops selinux_nf_ops[] = { +static const struct nf_hook_ops selinux_nf_ops[] = { { .hook = selinux_ipv4_postroute, .pf = NFPROTO_IPV4, diff --git a/security/smack/smack_netfilter.c b/security/smack/smack_netfilter.c index cdeb0f3243dd6..e36d17835d4ff 100644 --- a/security/smack/smack_netfilter.c +++ b/security/smack/smack_netfilter.c @@ -58,7 +58,7 @@ static unsigned int smack_ipv4_output(void *priv, return NF_ACCEPT; } -static struct nf_hook_ops smack_nf_ops[] = { +static const struct nf_hook_ops smack_nf_ops[] = { { .hook = smack_ipv4_output, .pf = NFPROTO_IPV4, From 6e692678d74289d6129bbd4bb20bb9fe01278faa Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Thu, 27 Jul 2017 16:56:39 +0200 Subject: [PATCH 13/47] netfilter: nf_tables: No need to check chain existence when tracing nft_trace_notify() is called only from __nft_trace_packet(), which assigns its parameter 'chain' to info->chain. __nft_trace_packet() in turn later dereferences 'chain' unconditionally, which indicates that it's never NULL. Same does nft_do_chain(), the only user of the tracing infrastructure. Hence it is safe to assume the check removed here is not needed. Signed-off-by: Phil Sutter Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_trace.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/net/netfilter/nf_tables_trace.c b/net/netfilter/nf_tables_trace.c index e1b15e7a5793f..0c3a0049e4aa2 100644 --- a/net/netfilter/nf_tables_trace.c +++ b/net/netfilter/nf_tables_trace.c @@ -217,14 +217,11 @@ void nft_trace_notify(struct nft_traceinfo *info) if (trace_fill_id(skb, pkt->skb)) goto nla_put_failure; - if (info->chain) { - if (nla_put_string(skb, NFTA_TRACE_CHAIN, - info->chain->name)) - goto nla_put_failure; - if (nla_put_string(skb, NFTA_TRACE_TABLE, - info->chain->table->name)) - goto nla_put_failure; - } + if (nla_put_string(skb, NFTA_TRACE_CHAIN, info->chain->name)) + goto nla_put_failure; + + if (nla_put_string(skb, NFTA_TRACE_TABLE, info->chain->table->name)) + goto nla_put_failure; if (nf_trace_fill_rule_info(skb, info)) goto nla_put_failure; From 2cf0c8b3e6942ecafe6ebb1a6d0328a81641bf39 Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Thu, 27 Jul 2017 16:56:40 +0200 Subject: [PATCH 14/47] netlink: Introduce nla_strdup() This is similar to strdup() for netlink string attributes. Signed-off-by: Phil Sutter Signed-off-by: Pablo Neira Ayuso --- include/net/netlink.h | 1 + lib/nlattr.c | 24 ++++++++++++++++++++++++ 2 files changed, 25 insertions(+) diff --git a/include/net/netlink.h b/include/net/netlink.h index ef8e6c3a80a63..c8c2eb5ae55ef 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -247,6 +247,7 @@ int nla_parse(struct nlattr **tb, int maxtype, const struct nlattr *head, int nla_policy_len(const struct nla_policy *, int); struct nlattr *nla_find(const struct nlattr *head, int len, int attrtype); size_t nla_strlcpy(char *dst, const struct nlattr *nla, size_t dstsize); +char *nla_strdup(const struct nlattr *nla, gfp_t flags); int nla_memcpy(void *dest, const struct nlattr *src, int count); int nla_memcmp(const struct nlattr *nla, const void *data, size_t size); int nla_strcmp(const struct nlattr *nla, const char *str); diff --git a/lib/nlattr.c b/lib/nlattr.c index fb52435be42dd..f13013f7e21a1 100644 --- a/lib/nlattr.c +++ b/lib/nlattr.c @@ -271,6 +271,30 @@ size_t nla_strlcpy(char *dst, const struct nlattr *nla, size_t dstsize) } EXPORT_SYMBOL(nla_strlcpy); +/** + * nla_strdup - Copy string attribute payload into a newly allocated buffer + * @nla: attribute to copy the string from + * @flags: the type of memory to allocate (see kmalloc). + * + * Returns a pointer to the allocated buffer or NULL on error. + */ +char *nla_strdup(const struct nlattr *nla, gfp_t flags) +{ + size_t srclen = nla_len(nla); + char *src = nla_data(nla), *dst; + + if (srclen > 0 && src[srclen - 1] == '\0') + srclen--; + + dst = kmalloc(srclen + 1, flags); + if (dst != NULL) { + memcpy(dst, src, srclen); + dst[srclen] = '\0'; + } + return dst; +} +EXPORT_SYMBOL(nla_strdup); + /** * nla_memcpy - Copy a netlink attribute into another memory area * @dest: where to copy to memcpy From e46abbcc05aa8a16b0e7f5c94e86d11af9aa2770 Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Thu, 27 Jul 2017 16:56:41 +0200 Subject: [PATCH 15/47] netfilter: nf_tables: Allow table names of up to 255 chars Allocate all table names dynamically to allow for arbitrary lengths but introduce NFT_NAME_MAXLEN as an upper sanity boundary. It's value was chosen to allow using a domain name as per RFC 1035. Signed-off-by: Phil Sutter Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 2 +- include/uapi/linux/netfilter/nf_tables.h | 3 +- net/netfilter/nf_tables_api.c | 49 +++++++++++++++++------- net/netfilter/nf_tables_trace.c | 2 +- 4 files changed, 40 insertions(+), 16 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index bd5be0d691d51..05ecf78ec0787 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -957,7 +957,7 @@ struct nft_table { u32 use; u16 flags:14, genmask:2; - char name[NFT_TABLE_MAXNAMELEN]; + char *name; }; enum nft_af_flags { diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 6f0a950e21c39..0b94e572ef166 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -1,7 +1,8 @@ #ifndef _LINUX_NF_TABLES_H #define _LINUX_NF_TABLES_H -#define NFT_TABLE_MAXNAMELEN 32 +#define NFT_NAME_MAXLEN 256 +#define NFT_TABLE_MAXNAMELEN NFT_NAME_MAXLEN #define NFT_CHAIN_MAXNAMELEN 32 #define NFT_SET_MAXNAMELEN 32 #define NFT_OBJ_MAXNAMELEN 32 diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index b77ad0813564b..c2e392d5e512f 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -726,7 +726,10 @@ static int nf_tables_newtable(struct net *net, struct sock *nlsk, if (table == NULL) goto err2; - nla_strlcpy(table->name, name, NFT_TABLE_MAXNAMELEN); + table->name = nla_strdup(name, GFP_KERNEL); + if (table->name == NULL) + goto err3; + INIT_LIST_HEAD(&table->chains); INIT_LIST_HEAD(&table->sets); INIT_LIST_HEAD(&table->objects); @@ -735,10 +738,12 @@ static int nf_tables_newtable(struct net *net, struct sock *nlsk, nft_ctx_init(&ctx, net, skb, nlh, afi, table, NULL, nla); err = nft_trans_table_add(&ctx, NFT_MSG_NEWTABLE); if (err < 0) - goto err3; + goto err4; list_add_tail_rcu(&table->list, &afi->tables); return 0; +err4: + kfree(table->name); err3: kfree(table); err2: @@ -865,6 +870,7 @@ static void nf_tables_table_destroy(struct nft_ctx *ctx) { BUG_ON(ctx->table->use > 0); + kfree(ctx->table->name); kfree(ctx->table); module_put(ctx->afi->owner); } @@ -1972,7 +1978,7 @@ static void nf_tables_rule_notify(const struct nft_ctx *ctx, } struct nft_rule_dump_ctx { - char table[NFT_TABLE_MAXNAMELEN]; + char *table; char chain[NFT_CHAIN_MAXNAMELEN]; }; @@ -1997,7 +2003,7 @@ static int nf_tables_dump_rules(struct sk_buff *skb, continue; list_for_each_entry_rcu(table, &afi->tables, list) { - if (ctx && ctx->table[0] && + if (ctx && ctx->table && strcmp(ctx->table, table->name) != 0) continue; @@ -2037,7 +2043,12 @@ static int nf_tables_dump_rules(struct sk_buff *skb, static int nf_tables_dump_rules_done(struct netlink_callback *cb) { - kfree(cb->data); + struct nft_rule_dump_ctx *ctx = cb->data; + + if (ctx) { + kfree(ctx->table); + kfree(ctx); + } return 0; } @@ -2069,9 +2080,14 @@ static int nf_tables_getrule(struct net *net, struct sock *nlsk, if (!ctx) return -ENOMEM; - if (nla[NFTA_RULE_TABLE]) - nla_strlcpy(ctx->table, nla[NFTA_RULE_TABLE], - sizeof(ctx->table)); + if (nla[NFTA_RULE_TABLE]) { + ctx->table = nla_strdup(nla[NFTA_RULE_TABLE], + GFP_KERNEL); + if (!ctx->table) { + kfree(ctx); + return -ENOMEM; + } + } if (nla[NFTA_RULE_CHAIN]) nla_strlcpy(ctx->chain, nla[NFTA_RULE_CHAIN], sizeof(ctx->chain)); @@ -4410,7 +4426,7 @@ static int nf_tables_fill_obj_info(struct sk_buff *skb, struct net *net, } struct nft_obj_filter { - char table[NFT_OBJ_MAXNAMELEN]; + char *table; u32 type; }; @@ -4475,7 +4491,10 @@ static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb) static int nf_tables_dump_obj_done(struct netlink_callback *cb) { - kfree(cb->data); + struct nft_obj_filter *filter = cb->data; + + kfree(filter->table); + kfree(filter); return 0; } @@ -4489,9 +4508,13 @@ nft_obj_filter_alloc(const struct nlattr * const nla[]) if (!filter) return ERR_PTR(-ENOMEM); - if (nla[NFTA_OBJ_TABLE]) - nla_strlcpy(filter->table, nla[NFTA_OBJ_TABLE], - NFT_TABLE_MAXNAMELEN); + if (nla[NFTA_OBJ_TABLE]) { + filter->table = nla_strdup(nla[NFTA_OBJ_TABLE], GFP_KERNEL); + if (!filter->table) { + kfree(filter); + return ERR_PTR(-ENOMEM); + } + } if (nla[NFTA_OBJ_TYPE]) filter->type = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE])); diff --git a/net/netfilter/nf_tables_trace.c b/net/netfilter/nf_tables_trace.c index 0c3a0049e4aa2..62787d985e9d3 100644 --- a/net/netfilter/nf_tables_trace.c +++ b/net/netfilter/nf_tables_trace.c @@ -175,7 +175,7 @@ void nft_trace_notify(struct nft_traceinfo *info) return; size = nlmsg_total_size(sizeof(struct nfgenmsg)) + - nla_total_size(NFT_TABLE_MAXNAMELEN) + + nla_total_size(strlen(info->chain->table->name)) + nla_total_size(NFT_CHAIN_MAXNAMELEN) + nla_total_size_64bit(sizeof(__be64)) + /* rule handle */ nla_total_size(sizeof(__be32)) + /* trace type */ From b7263e071aba736cea9e71cdf2e76dfa7aebd039 Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Thu, 27 Jul 2017 16:56:42 +0200 Subject: [PATCH 16/47] netfilter: nf_tables: Allow chain name of up to 255 chars Same conversion as for table names, use NFT_NAME_MAXLEN as upper boundary as well. Signed-off-by: Phil Sutter Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 4 +-- include/uapi/linux/netfilter/nf_tables.h | 2 +- net/netfilter/nf_tables_api.c | 34 ++++++++++++++++++------ net/netfilter/nf_tables_trace.c | 27 +++++++++++++++++-- 4 files changed, 54 insertions(+), 13 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 05ecf78ec0787..be1610162ee02 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -859,7 +859,7 @@ struct nft_chain { u16 level; u8 flags:6, genmask:2; - char name[NFT_CHAIN_MAXNAMELEN]; + char *name; }; enum nft_chain_type { @@ -1272,7 +1272,7 @@ struct nft_trans_set { struct nft_trans_chain { bool update; - char name[NFT_CHAIN_MAXNAMELEN]; + char *name; struct nft_stats __percpu *stats; u8 policy; }; diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 0b94e572ef166..d9c03a8608ee3 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -3,7 +3,7 @@ #define NFT_NAME_MAXLEN 256 #define NFT_TABLE_MAXNAMELEN NFT_NAME_MAXLEN -#define NFT_CHAIN_MAXNAMELEN 32 +#define NFT_CHAIN_MAXNAMELEN NFT_NAME_MAXLEN #define NFT_SET_MAXNAMELEN 32 #define NFT_OBJ_MAXNAMELEN 32 #define NFT_USERDATA_MAXLEN 256 diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index c2e392d5e512f..747499039709e 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -1250,8 +1250,10 @@ static void nf_tables_chain_destroy(struct nft_chain *chain) static_branch_dec(&nft_counters_enabled); if (basechain->ops[0].dev != NULL) dev_put(basechain->ops[0].dev); + kfree(chain->name); kfree(basechain); } else { + kfree(chain->name); kfree(chain); } } @@ -1476,8 +1478,13 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk, nft_trans_chain_policy(trans) = -1; if (nla[NFTA_CHAIN_HANDLE] && name) { - nla_strlcpy(nft_trans_chain_name(trans), name, - NFT_CHAIN_MAXNAMELEN); + nft_trans_chain_name(trans) = + nla_strdup(name, GFP_KERNEL); + if (!nft_trans_chain_name(trans)) { + kfree(trans); + free_percpu(stats); + return -ENOMEM; + } } list_add_tail(&trans->list, &net->nft.commit_list); return 0; @@ -1544,7 +1551,11 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk, INIT_LIST_HEAD(&chain->rules); chain->handle = nf_tables_alloc_handle(table); chain->table = table; - nla_strlcpy(chain->name, name, NFT_CHAIN_MAXNAMELEN); + chain->name = nla_strdup(name, GFP_KERNEL); + if (!chain->name) { + err = -ENOMEM; + goto err1; + } err = nf_tables_register_hooks(net, table, chain, afi->nops); if (err < 0) @@ -1979,7 +1990,7 @@ static void nf_tables_rule_notify(const struct nft_ctx *ctx, struct nft_rule_dump_ctx { char *table; - char chain[NFT_CHAIN_MAXNAMELEN]; + char *chain; }; static int nf_tables_dump_rules(struct sk_buff *skb, @@ -2047,6 +2058,7 @@ static int nf_tables_dump_rules_done(struct netlink_callback *cb) if (ctx) { kfree(ctx->table); + kfree(ctx->chain); kfree(ctx); } return 0; @@ -2088,9 +2100,15 @@ static int nf_tables_getrule(struct net *net, struct sock *nlsk, return -ENOMEM; } } - if (nla[NFTA_RULE_CHAIN]) - nla_strlcpy(ctx->chain, nla[NFTA_RULE_CHAIN], - sizeof(ctx->chain)); + if (nla[NFTA_RULE_CHAIN]) { + ctx->chain = nla_strdup(nla[NFTA_RULE_CHAIN], + GFP_KERNEL); + if (!ctx->chain) { + kfree(ctx->table); + kfree(ctx); + return -ENOMEM; + } + } c.data = ctx; } @@ -4863,7 +4881,7 @@ static void nft_chain_commit_update(struct nft_trans *trans) { struct nft_base_chain *basechain; - if (nft_trans_chain_name(trans)[0]) + if (nft_trans_chain_name(trans)) strcpy(trans->ctx.chain->name, nft_trans_chain_name(trans)); if (!nft_is_base_chain(trans->ctx.chain)) diff --git a/net/netfilter/nf_tables_trace.c b/net/netfilter/nf_tables_trace.c index 62787d985e9d3..e1dc527a493b8 100644 --- a/net/netfilter/nf_tables_trace.c +++ b/net/netfilter/nf_tables_trace.c @@ -162,6 +162,27 @@ static int nf_trace_fill_rule_info(struct sk_buff *nlskb, NFTA_TRACE_PAD); } +static bool nft_trace_have_verdict_chain(struct nft_traceinfo *info) +{ + switch (info->type) { + case NFT_TRACETYPE_RETURN: + case NFT_TRACETYPE_RULE: + break; + default: + return false; + } + + switch (info->verdict->code) { + case NFT_JUMP: + case NFT_GOTO: + break; + default: + return false; + } + + return true; +} + void nft_trace_notify(struct nft_traceinfo *info) { const struct nft_pktinfo *pkt = info->pkt; @@ -176,12 +197,11 @@ void nft_trace_notify(struct nft_traceinfo *info) size = nlmsg_total_size(sizeof(struct nfgenmsg)) + nla_total_size(strlen(info->chain->table->name)) + - nla_total_size(NFT_CHAIN_MAXNAMELEN) + + nla_total_size(strlen(info->chain->name)) + nla_total_size_64bit(sizeof(__be64)) + /* rule handle */ nla_total_size(sizeof(__be32)) + /* trace type */ nla_total_size(0) + /* VERDICT, nested */ nla_total_size(sizeof(u32)) + /* verdict code */ - nla_total_size(NFT_CHAIN_MAXNAMELEN) + /* jump target */ nla_total_size(sizeof(u32)) + /* id */ nla_total_size(NFT_TRACETYPE_LL_HSIZE) + nla_total_size(NFT_TRACETYPE_NETWORK_HSIZE) + @@ -194,6 +214,9 @@ void nft_trace_notify(struct nft_traceinfo *info) nla_total_size(sizeof(u32)) + /* nfproto */ nla_total_size(sizeof(u32)); /* policy */ + if (nft_trace_have_verdict_chain(info)) + size += nla_total_size(strlen(info->verdict->chain->name)); /* jump target */ + skb = nlmsg_new(size, GFP_ATOMIC); if (!skb) return; From 387454901bd62022ac1b04e15bd8d4fcc60bbed4 Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Thu, 27 Jul 2017 16:56:43 +0200 Subject: [PATCH 17/47] netfilter: nf_tables: Allow set names of up to 255 chars Same conversion as for table names, use NFT_NAME_MAXLEN as upper boundary as well. Signed-off-by: Phil Sutter Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 2 +- include/uapi/linux/netfilter/nf_tables.h | 2 +- net/netfilter/nf_tables_api.c | 18 ++++++++++++++---- 3 files changed, 16 insertions(+), 6 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index be1610162ee02..66ba62fa7d90e 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -396,7 +396,7 @@ void nft_unregister_set(struct nft_set_type *type); struct nft_set { struct list_head list; struct list_head bindings; - char name[NFT_SET_MAXNAMELEN]; + char *name; u32 ktype; u32 dtype; u32 objtype; diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index d9c03a8608ee3..b5e73e80b7b6a 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -4,7 +4,7 @@ #define NFT_NAME_MAXLEN 256 #define NFT_TABLE_MAXNAMELEN NFT_NAME_MAXLEN #define NFT_CHAIN_MAXNAMELEN NFT_NAME_MAXLEN -#define NFT_SET_MAXNAMELEN 32 +#define NFT_SET_MAXNAMELEN NFT_NAME_MAXLEN #define NFT_OBJ_MAXNAMELEN 32 #define NFT_USERDATA_MAXLEN 256 diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 747499039709e..e6a07f27b1a33 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2650,7 +2650,7 @@ static int nf_tables_set_alloc_name(struct nft_ctx *ctx, struct nft_set *set, unsigned long *inuse; unsigned int n = 0, min = 0; - p = strnchr(name, NFT_SET_MAXNAMELEN, '%'); + p = strchr(name, '%'); if (p != NULL) { if (p[1] != 'd' || strchr(p + 2, '%')) return -EINVAL; @@ -2681,7 +2681,10 @@ static int nf_tables_set_alloc_name(struct nft_ctx *ctx, struct nft_set *set, free_page((unsigned long)inuse); } - snprintf(set->name, sizeof(set->name), name, min + n); + set->name = kasprintf(GFP_KERNEL, name, min + n); + if (!set->name) + return -ENOMEM; + list_for_each_entry(i, &ctx->table->sets, list) { if (!nft_is_active_next(ctx->net, i)) continue; @@ -2958,7 +2961,7 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk, struct nft_table *table; struct nft_set *set; struct nft_ctx ctx; - char name[NFT_SET_MAXNAMELEN]; + char *name; unsigned int size; bool create; u64 timeout; @@ -3104,8 +3107,14 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk, goto err1; } - nla_strlcpy(name, nla[NFTA_SET_NAME], sizeof(set->name)); + name = nla_strdup(nla[NFTA_SET_NAME], GFP_KERNEL); + if (!name) { + err = -ENOMEM; + goto err2; + } + err = nf_tables_set_alloc_name(&ctx, set, name); + kfree(name); if (err < 0) goto err2; @@ -3155,6 +3164,7 @@ static void nft_set_destroy(struct nft_set *set) { set->ops->destroy(set); module_put(set->ops->type->owner); + kfree(set->name); kvfree(set); } From 615095752100748e221028fc96163c2b78185ae4 Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Thu, 27 Jul 2017 16:56:44 +0200 Subject: [PATCH 18/47] netfilter: nf_tables: Allow object names of up to 255 chars Same conversion as for table names, use NFT_NAME_MAXLEN as upper boundary as well. Signed-off-by: Phil Sutter Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 2 +- include/uapi/linux/netfilter/nf_tables.h | 2 +- net/netfilter/nf_tables_api.c | 11 +++++++++-- 3 files changed, 11 insertions(+), 4 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 66ba62fa7d90e..f9795fe394f31 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -1016,7 +1016,7 @@ int nft_verdict_dump(struct sk_buff *skb, int type, */ struct nft_object { struct list_head list; - char name[NFT_OBJ_MAXNAMELEN]; + char *name; struct nft_table *table; u32 genmask:2, use:30; diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index b5e73e80b7b6a..be25cf69295b4 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -5,7 +5,7 @@ #define NFT_TABLE_MAXNAMELEN NFT_NAME_MAXLEN #define NFT_CHAIN_MAXNAMELEN NFT_NAME_MAXLEN #define NFT_SET_MAXNAMELEN NFT_NAME_MAXLEN -#define NFT_OBJ_MAXNAMELEN 32 +#define NFT_OBJ_MAXNAMELEN NFT_NAME_MAXLEN #define NFT_USERDATA_MAXLEN 256 /** diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index e6a07f27b1a33..149785ff1c7b6 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -4402,15 +4402,21 @@ static int nf_tables_newobj(struct net *net, struct sock *nlsk, goto err1; } obj->table = table; - nla_strlcpy(obj->name, nla[NFTA_OBJ_NAME], NFT_OBJ_MAXNAMELEN); + obj->name = nla_strdup(nla[NFTA_OBJ_NAME], GFP_KERNEL); + if (!obj->name) { + err = -ENOMEM; + goto err2; + } err = nft_trans_obj_add(&ctx, NFT_MSG_NEWOBJ, obj); if (err < 0) - goto err2; + goto err3; list_add_tail_rcu(&obj->list, &table->objects); table->use++; return 0; +err3: + kfree(obj->name); err2: if (obj->type->destroy) obj->type->destroy(obj); @@ -4626,6 +4632,7 @@ static void nft_obj_destroy(struct nft_object *obj) obj->type->destroy(obj); module_put(obj->type->owner); + kfree(obj->name); kfree(obj); } From 9b7e26aee7cf27ffb37bb2f17229cecc89a833bd Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 28 Jul 2017 10:34:42 +0200 Subject: [PATCH 19/47] netfilter: nft_set_rbtree: use seqcount to avoid lock in most cases switch to lockless lockup. write side now also increments sequence counter. On lookup, sample counter value and only take the lock if we did not find a match and the counter has changed. This avoids need to write to private area in normal (lookup) cases. In case we detect a writer (seqretry is true) we fall back to taking the readlock. The readlock is also used during dumps to ensure we get a consistent tree walk. Similar technique (rbtree+seqlock) was used by David Howells in rxrpc. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_set_rbtree.c | 49 +++++++++++++++++++++++++--------- 1 file changed, 37 insertions(+), 12 deletions(-) diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c index bce5382f1d49d..d83a4ec5900d4 100644 --- a/net/netfilter/nft_set_rbtree.c +++ b/net/netfilter/nft_set_rbtree.c @@ -19,8 +19,9 @@ #include struct nft_rbtree { - rwlock_t lock; struct rb_root root; + rwlock_t lock; + seqcount_t count; }; struct nft_rbtree_elem { @@ -40,8 +41,9 @@ static bool nft_rbtree_equal(const struct nft_set *set, const void *this, return memcmp(this, nft_set_ext_key(&interval->ext), set->klen) == 0; } -static bool nft_rbtree_lookup(const struct net *net, const struct nft_set *set, - const u32 *key, const struct nft_set_ext **ext) +static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set, + const u32 *key, const struct nft_set_ext **ext, + unsigned int seq) { struct nft_rbtree *priv = nft_set_priv(set); const struct nft_rbtree_elem *rbe, *interval = NULL; @@ -50,15 +52,17 @@ static bool nft_rbtree_lookup(const struct net *net, const struct nft_set *set, const void *this; int d; - read_lock_bh(&priv->lock); - parent = priv->root.rb_node; + parent = rcu_dereference_raw(priv->root.rb_node); while (parent != NULL) { + if (read_seqcount_retry(&priv->count, seq)) + return false; + rbe = rb_entry(parent, struct nft_rbtree_elem, node); this = nft_set_ext_key(&rbe->ext); d = memcmp(this, key, set->klen); if (d < 0) { - parent = parent->rb_left; + parent = rcu_dereference_raw(parent->rb_left); if (interval && nft_rbtree_equal(set, this, interval) && nft_rbtree_interval_end(this) && @@ -66,15 +70,14 @@ static bool nft_rbtree_lookup(const struct net *net, const struct nft_set *set, continue; interval = rbe; } else if (d > 0) - parent = parent->rb_right; + parent = rcu_dereference_raw(parent->rb_right); else { if (!nft_set_elem_active(&rbe->ext, genmask)) { - parent = parent->rb_left; + parent = rcu_dereference_raw(parent->rb_left); continue; } if (nft_rbtree_interval_end(rbe)) goto out; - read_unlock_bh(&priv->lock); *ext = &rbe->ext; return true; @@ -84,15 +87,32 @@ static bool nft_rbtree_lookup(const struct net *net, const struct nft_set *set, if (set->flags & NFT_SET_INTERVAL && interval != NULL && nft_set_elem_active(&interval->ext, genmask) && !nft_rbtree_interval_end(interval)) { - read_unlock_bh(&priv->lock); *ext = &interval->ext; return true; } out: - read_unlock_bh(&priv->lock); return false; } +static bool nft_rbtree_lookup(const struct net *net, const struct nft_set *set, + const u32 *key, const struct nft_set_ext **ext) +{ + struct nft_rbtree *priv = nft_set_priv(set); + unsigned int seq = read_seqcount_begin(&priv->count); + bool ret; + + ret = __nft_rbtree_lookup(net, set, key, ext, seq); + if (ret || !read_seqcount_retry(&priv->count, seq)) + return ret; + + read_lock_bh(&priv->lock); + seq = read_seqcount_begin(&priv->count); + ret = __nft_rbtree_lookup(net, set, key, ext, seq); + read_unlock_bh(&priv->lock); + + return ret; +} + static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set, struct nft_rbtree_elem *new, struct nft_set_ext **ext) @@ -130,7 +150,7 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set, } } } - rb_link_node(&new->node, parent, p); + rb_link_node_rcu(&new->node, parent, p); rb_insert_color(&new->node, &priv->root); return 0; } @@ -144,7 +164,9 @@ static int nft_rbtree_insert(const struct net *net, const struct nft_set *set, int err; write_lock_bh(&priv->lock); + write_seqcount_begin(&priv->count); err = __nft_rbtree_insert(net, set, rbe, ext); + write_seqcount_end(&priv->count); write_unlock_bh(&priv->lock); return err; @@ -158,7 +180,9 @@ static void nft_rbtree_remove(const struct net *net, struct nft_rbtree_elem *rbe = elem->priv; write_lock_bh(&priv->lock); + write_seqcount_begin(&priv->count); rb_erase(&rbe->node, &priv->root); + write_seqcount_end(&priv->count); write_unlock_bh(&priv->lock); } @@ -264,6 +288,7 @@ static int nft_rbtree_init(const struct nft_set *set, struct nft_rbtree *priv = nft_set_priv(set); rwlock_init(&priv->lock); + seqcount_init(&priv->count); priv->root = RB_ROOT; return 0; } From 4d3a57f23dec59f0a2362e63540b2d01b37afe0a Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 28 Jul 2017 11:22:04 +0200 Subject: [PATCH 20/47] netfilter: conntrack: do not enable connection tracking unless needed Discussion during NFWS 2017 in Faro has shown that the current conntrack behaviour is unreasonable. Even if conntrack module is loaded on behalf of a single net namespace, its turned on for all namespaces, which is expensive. Commit 481fa373476 ("netfilter: conntrack: add nf_conntrack_default_on sysctl") attempted to provide an alternative to the 'default on' behaviour by adding a sysctl to change it. However, as Eric points out, the sysctl only becomes available once the module is loaded, and then its too late. So we either have to move the sysctl to the core, or, alternatively, change conntrack to become active only once the rule set requires this. This does the latter, conntrack is only enabled when a rule needs it. Reported-by: Eric Dumazet Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- .../networking/nf_conntrack-sysctl.txt | 11 ------- include/net/netfilter/nf_conntrack_l3proto.h | 15 ---------- .../netfilter/nf_conntrack_l3proto_ipv4.c | 16 ++-------- .../netfilter/nf_conntrack_l3proto_ipv6.c | 17 ++--------- net/netfilter/nf_conntrack_proto.c | 29 ------------------- net/netfilter/nf_conntrack_standalone.c | 10 ------- 6 files changed, 4 insertions(+), 94 deletions(-) diff --git a/Documentation/networking/nf_conntrack-sysctl.txt b/Documentation/networking/nf_conntrack-sysctl.txt index 497d668288f95..433b6724797ad 100644 --- a/Documentation/networking/nf_conntrack-sysctl.txt +++ b/Documentation/networking/nf_conntrack-sysctl.txt @@ -96,17 +96,6 @@ nf_conntrack_max - INTEGER Size of connection tracking table. Default value is nf_conntrack_buckets value * 4. -nf_conntrack_default_on - BOOLEAN - 0 - don't register conntrack in new net namespaces - 1 - register conntrack in new net namespaces (default) - - This controls wheter newly created network namespaces have connection - tracking enabled by default. It will be enabled automatically - regardless of this setting if the new net namespace requires - connection tracking, e.g. when NAT rules are created. - This setting is only visible in initial user namespace, it has no - effect on existing namespaces. - nf_conntrack_tcp_be_liberal - BOOLEAN 0 - disabled (default) not 0 - enabled diff --git a/include/net/netfilter/nf_conntrack_l3proto.h b/include/net/netfilter/nf_conntrack_l3proto.h index 6d14b36e3a490..1b8de164d7446 100644 --- a/include/net/netfilter/nf_conntrack_l3proto.h +++ b/include/net/netfilter/nf_conntrack_l3proto.h @@ -73,21 +73,6 @@ struct nf_conntrack_l3proto { extern struct nf_conntrack_l3proto __rcu *nf_ct_l3protos[NFPROTO_NUMPROTO]; -#ifdef CONFIG_SYSCTL -/* Protocol pernet registration. */ -int nf_ct_l3proto_pernet_register(struct net *net, - struct nf_conntrack_l3proto *proto); -#else -static inline int nf_ct_l3proto_pernet_register(struct net *n, - struct nf_conntrack_l3proto *p) -{ - return 0; -} -#endif - -void nf_ct_l3proto_pernet_unregister(struct net *net, - struct nf_conntrack_l3proto *proto); - /* Protocol global registration. */ int nf_ct_l3proto_register(struct nf_conntrack_l3proto *proto); void nf_ct_l3proto_unregister(struct nf_conntrack_l3proto *proto); diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index 63e4ea0e01f87..de5f0e6ddd1b1 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -398,24 +398,12 @@ static struct nf_conntrack_l4proto *builtin_l4proto4[] = { static int ipv4_net_init(struct net *net) { - int ret = 0; - - ret = nf_ct_l4proto_pernet_register(net, builtin_l4proto4, - ARRAY_SIZE(builtin_l4proto4)); - if (ret < 0) - return ret; - ret = nf_ct_l3proto_pernet_register(net, &nf_conntrack_l3proto_ipv4); - if (ret < 0) { - pr_err("nf_conntrack_ipv4: pernet registration failed\n"); - nf_ct_l4proto_pernet_unregister(net, builtin_l4proto4, - ARRAY_SIZE(builtin_l4proto4)); - } - return ret; + return nf_ct_l4proto_pernet_register(net, builtin_l4proto4, + ARRAY_SIZE(builtin_l4proto4)); } static void ipv4_net_exit(struct net *net) { - nf_ct_l3proto_pernet_unregister(net, &nf_conntrack_l3proto_ipv4); nf_ct_l4proto_pernet_unregister(net, builtin_l4proto4, ARRAY_SIZE(builtin_l4proto4)); } diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index f2d2f4a9294b7..ddef5ee9e0a86 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -398,25 +398,12 @@ static struct nf_conntrack_l4proto *builtin_l4proto6[] = { static int ipv6_net_init(struct net *net) { - int ret = 0; - - ret = nf_ct_l4proto_pernet_register(net, builtin_l4proto6, - ARRAY_SIZE(builtin_l4proto6)); - if (ret < 0) - return ret; - - ret = nf_ct_l3proto_pernet_register(net, &nf_conntrack_l3proto_ipv6); - if (ret < 0) { - pr_err("nf_conntrack_ipv6: pernet registration failed.\n"); - nf_ct_l4proto_pernet_unregister(net, builtin_l4proto6, - ARRAY_SIZE(builtin_l4proto6)); - } - return ret; + return nf_ct_l4proto_pernet_register(net, builtin_l4proto6, + ARRAY_SIZE(builtin_l4proto6)); } static void ipv6_net_exit(struct net *net) { - nf_ct_l3proto_pernet_unregister(net, &nf_conntrack_l3proto_ipv6); nf_ct_l4proto_pernet_unregister(net, builtin_l4proto6, ARRAY_SIZE(builtin_l4proto6)); } diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index 1dcad229c3cc7..7c89dade6fd3b 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c @@ -238,20 +238,6 @@ int nf_ct_l3proto_register(struct nf_conntrack_l3proto *proto) } EXPORT_SYMBOL_GPL(nf_ct_l3proto_register); -#ifdef CONFIG_SYSCTL -extern unsigned int nf_conntrack_default_on; - -int nf_ct_l3proto_pernet_register(struct net *net, - struct nf_conntrack_l3proto *proto) -{ - if (nf_conntrack_default_on == 0) - return 0; - - return proto->net_ns_get ? proto->net_ns_get(net) : 0; -} -EXPORT_SYMBOL_GPL(nf_ct_l3proto_pernet_register); -#endif - void nf_ct_l3proto_unregister(struct nf_conntrack_l3proto *proto) { BUG_ON(proto->l3proto >= NFPROTO_NUMPROTO); @@ -270,21 +256,6 @@ void nf_ct_l3proto_unregister(struct nf_conntrack_l3proto *proto) } EXPORT_SYMBOL_GPL(nf_ct_l3proto_unregister); -void nf_ct_l3proto_pernet_unregister(struct net *net, - struct nf_conntrack_l3proto *proto) -{ - /* - * nf_conntrack_default_on *might* have registered hooks. - * ->net_ns_put must cope with more puts() than get(), i.e. - * if nf_conntrack_default_on was 0 at time of - * nf_ct_l3proto_pernet_register invocation this net_ns_put() - * should be a noop. - */ - if (proto->net_ns_put) - proto->net_ns_put(net); -} -EXPORT_SYMBOL_GPL(nf_ct_l3proto_pernet_unregister); - static struct nf_proto_net *nf_ct_l4proto_net(struct net *net, struct nf_conntrack_l4proto *l4proto) { diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index ccb5cb9043e0e..5b6c675d55b14 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -452,9 +452,6 @@ static int log_invalid_proto_max __read_mostly = 255; /* size the user *wants to set */ static unsigned int nf_conntrack_htable_size_user __read_mostly; -extern unsigned int nf_conntrack_default_on; -unsigned int nf_conntrack_default_on __read_mostly = 1; - static int nf_conntrack_hash_sysctl(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) @@ -520,13 +517,6 @@ static struct ctl_table nf_ct_sysctl_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, - { - .procname = "nf_conntrack_default_on", - .data = &nf_conntrack_default_on, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, { } }; From 5b9ccdcb98429b7e5c814772e3d9448c76441d87 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Sat, 29 Jul 2017 20:31:00 +0900 Subject: [PATCH 21/47] netfilter: xtables: Remove unused variable in compat_copy_entry_from_user() The target variable is not used in the compat_copy_entry_from_user(). So It can be removed. Signed-off-by: Taehee Yoo Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/arp_tables.c | 2 -- net/ipv4/netfilter/ip_tables.c | 2 -- 2 files changed, 4 deletions(-) diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 0bc3c3d73e61e..cf520d30cb94b 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -1117,7 +1117,6 @@ compat_copy_entry_from_user(struct compat_arpt_entry *e, void **dstptr, struct xt_table_info *newinfo, unsigned char *base) { struct xt_entry_target *t; - struct xt_target *target; struct arpt_entry *de; unsigned int origsize; int h; @@ -1132,7 +1131,6 @@ compat_copy_entry_from_user(struct compat_arpt_entry *e, void **dstptr, de->target_offset = e->target_offset - (origsize - *size); t = compat_arpt_get_target(e); - target = t->u.kernel.target; xt_compat_target_from_user(t, dstptr, size); de->next_offset = e->next_offset - (origsize - *size); diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 2a55a40211cbf..f47e8dad5e955 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -1355,7 +1355,6 @@ compat_copy_entry_from_user(struct compat_ipt_entry *e, void **dstptr, struct xt_table_info *newinfo, unsigned char *base) { struct xt_entry_target *t; - struct xt_target *target; struct ipt_entry *de; unsigned int origsize; int h; @@ -1374,7 +1373,6 @@ compat_copy_entry_from_user(struct compat_ipt_entry *e, void **dstptr, de->target_offset = e->target_offset - (origsize - *size); t = compat_ipt_get_target(e); - target = t->u.kernel.target; xt_compat_target_from_user(t, dstptr, size); de->next_offset = e->next_offset - (origsize - *size); From 2a04aabf5c96c9e25df488949b21223bcc623815 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Tue, 1 Aug 2017 12:25:01 +0200 Subject: [PATCH 22/47] netfilter: constify nf_conntrack_l3/4proto parameters When a nf_conntrack_l3/4proto parameter is not on the left hand side of an assignment, its address is not taken, and it is not passed to a function that may modify its fields, then it can be declared as const. This change is useful from a documentation point of view, and can possibly facilitate making some nf_conntrack_l3/4proto structures const subsequently. Done with the help of Coccinelle. Signed-off-by: Julia Lawall Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_l4proto.h | 14 ++++++------ include/net/netfilter/nf_conntrack_timeout.h | 2 +- net/netfilter/nf_conntrack_core.c | 8 +++---- net/netfilter/nf_conntrack_netlink.c | 6 ++--- net/netfilter/nf_conntrack_proto.c | 24 ++++++++++---------- net/netfilter/nfnetlink_cttimeout.c | 5 ++-- 6 files changed, 30 insertions(+), 29 deletions(-) diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h index 7032e044bbe2a..b6e27cafb1d93 100644 --- a/include/net/netfilter/nf_conntrack_l4proto.h +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -125,23 +125,23 @@ struct nf_conntrack_l4proto *__nf_ct_l4proto_find(u_int16_t l3proto, struct nf_conntrack_l4proto *nf_ct_l4proto_find_get(u_int16_t l3proto, u_int8_t l4proto); -void nf_ct_l4proto_put(struct nf_conntrack_l4proto *p); +void nf_ct_l4proto_put(const struct nf_conntrack_l4proto *p); /* Protocol pernet registration. */ int nf_ct_l4proto_pernet_register_one(struct net *net, - struct nf_conntrack_l4proto *proto); + const struct nf_conntrack_l4proto *proto); void nf_ct_l4proto_pernet_unregister_one(struct net *net, - struct nf_conntrack_l4proto *proto); + const struct nf_conntrack_l4proto *proto); int nf_ct_l4proto_pernet_register(struct net *net, - struct nf_conntrack_l4proto *proto[], + struct nf_conntrack_l4proto *const proto[], unsigned int num_proto); void nf_ct_l4proto_pernet_unregister(struct net *net, - struct nf_conntrack_l4proto *proto[], - unsigned int num_proto); + struct nf_conntrack_l4proto *const proto[], + unsigned int num_proto); /* Protocol global registration. */ int nf_ct_l4proto_register_one(struct nf_conntrack_l4proto *proto); -void nf_ct_l4proto_unregister_one(struct nf_conntrack_l4proto *proto); +void nf_ct_l4proto_unregister_one(const struct nf_conntrack_l4proto *proto); int nf_ct_l4proto_register(struct nf_conntrack_l4proto *proto[], unsigned int num_proto); void nf_ct_l4proto_unregister(struct nf_conntrack_l4proto *proto[], diff --git a/include/net/netfilter/nf_conntrack_timeout.h b/include/net/netfilter/nf_conntrack_timeout.h index d40b89355fdd3..b222957062b56 100644 --- a/include/net/netfilter/nf_conntrack_timeout.h +++ b/include/net/netfilter/nf_conntrack_timeout.h @@ -68,7 +68,7 @@ struct nf_conn_timeout *nf_ct_timeout_ext_add(struct nf_conn *ct, static inline unsigned int * nf_ct_timeout_lookup(struct net *net, struct nf_conn *ct, - struct nf_conntrack_l4proto *l4proto) + const struct nf_conntrack_l4proto *l4proto) { #ifdef CONFIG_NF_CONNTRACK_TIMEOUT struct nf_conn_timeout *timeout_ext; diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 2bc4991861869..f2f00eaf217da 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -1176,8 +1176,8 @@ EXPORT_SYMBOL_GPL(nf_conntrack_free); static noinline struct nf_conntrack_tuple_hash * init_conntrack(struct net *net, struct nf_conn *tmpl, const struct nf_conntrack_tuple *tuple, - struct nf_conntrack_l3proto *l3proto, - struct nf_conntrack_l4proto *l4proto, + const struct nf_conntrack_l3proto *l3proto, + const struct nf_conntrack_l4proto *l4proto, struct sk_buff *skb, unsigned int dataoff, u32 hash) { @@ -1288,8 +1288,8 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl, unsigned int dataoff, u_int16_t l3num, u_int8_t protonum, - struct nf_conntrack_l3proto *l3proto, - struct nf_conntrack_l4proto *l4proto) + const struct nf_conntrack_l3proto *l3proto, + const struct nf_conntrack_l4proto *l4proto) { const struct nf_conntrack_zone *zone; struct nf_conntrack_tuple tuple; diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 4922c8aefb2a1..f4ca48817f669 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -61,8 +61,8 @@ MODULE_LICENSE("GPL"); static char __initdata version[] = "0.93"; static int ctnetlink_dump_tuples_proto(struct sk_buff *skb, - const struct nf_conntrack_tuple *tuple, - struct nf_conntrack_l4proto *l4proto) + const struct nf_conntrack_tuple *tuple, + const struct nf_conntrack_l4proto *l4proto) { int ret = 0; struct nlattr *nest_parms; @@ -86,7 +86,7 @@ static int ctnetlink_dump_tuples_proto(struct sk_buff *skb, static int ctnetlink_dump_tuples_ip(struct sk_buff *skb, const struct nf_conntrack_tuple *tuple, - struct nf_conntrack_l3proto *l3proto) + const struct nf_conntrack_l3proto *l3proto) { int ret = 0; struct nlattr *nest_parms; diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index 7c89dade6fd3b..27810cf816a6b 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c @@ -188,7 +188,7 @@ nf_ct_l4proto_find_get(u_int16_t l3num, u_int8_t l4num) } EXPORT_SYMBOL_GPL(nf_ct_l4proto_find_get); -void nf_ct_l4proto_put(struct nf_conntrack_l4proto *p) +void nf_ct_l4proto_put(const struct nf_conntrack_l4proto *p) { module_put(p->me); } @@ -257,7 +257,7 @@ void nf_ct_l3proto_unregister(struct nf_conntrack_l3proto *proto) EXPORT_SYMBOL_GPL(nf_ct_l3proto_unregister); static struct nf_proto_net *nf_ct_l4proto_net(struct net *net, - struct nf_conntrack_l4proto *l4proto) + const struct nf_conntrack_l4proto *l4proto) { if (l4proto->get_net_proto) { /* statically built-in protocols use static per-net */ @@ -272,7 +272,7 @@ static struct nf_proto_net *nf_ct_l4proto_net(struct net *net, static int nf_ct_l4proto_register_sysctl(struct net *net, struct nf_proto_net *pn, - struct nf_conntrack_l4proto *l4proto) + const struct nf_conntrack_l4proto *l4proto) { int err = 0; @@ -295,8 +295,8 @@ int nf_ct_l4proto_register_sysctl(struct net *net, static void nf_ct_l4proto_unregister_sysctl(struct net *net, - struct nf_proto_net *pn, - struct nf_conntrack_l4proto *l4proto) + struct nf_proto_net *pn, + const struct nf_conntrack_l4proto *l4proto) { #ifdef CONFIG_SYSCTL if (pn->ctl_table_header != NULL) @@ -366,7 +366,7 @@ int nf_ct_l4proto_register_one(struct nf_conntrack_l4proto *l4proto) EXPORT_SYMBOL_GPL(nf_ct_l4proto_register_one); int nf_ct_l4proto_pernet_register_one(struct net *net, - struct nf_conntrack_l4proto *l4proto) + const struct nf_conntrack_l4proto *l4proto) { int ret = 0; struct nf_proto_net *pn = NULL; @@ -391,7 +391,7 @@ int nf_ct_l4proto_pernet_register_one(struct net *net, } EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_register_one); -static void __nf_ct_l4proto_unregister_one(struct nf_conntrack_l4proto *l4proto) +static void __nf_ct_l4proto_unregister_one(const struct nf_conntrack_l4proto *l4proto) { BUG_ON(l4proto->l3proto >= ARRAY_SIZE(nf_ct_protos)); @@ -404,7 +404,7 @@ static void __nf_ct_l4proto_unregister_one(struct nf_conntrack_l4proto *l4proto) &nf_conntrack_l4proto_generic); } -void nf_ct_l4proto_unregister_one(struct nf_conntrack_l4proto *l4proto) +void nf_ct_l4proto_unregister_one(const struct nf_conntrack_l4proto *l4proto) { mutex_lock(&nf_ct_proto_mutex); __nf_ct_l4proto_unregister_one(l4proto); @@ -415,7 +415,7 @@ void nf_ct_l4proto_unregister_one(struct nf_conntrack_l4proto *l4proto) EXPORT_SYMBOL_GPL(nf_ct_l4proto_unregister_one); void nf_ct_l4proto_pernet_unregister_one(struct net *net, - struct nf_conntrack_l4proto *l4proto) + const struct nf_conntrack_l4proto *l4proto) { struct nf_proto_net *pn = nf_ct_l4proto_net(net, l4proto); @@ -449,7 +449,7 @@ int nf_ct_l4proto_register(struct nf_conntrack_l4proto *l4proto[], EXPORT_SYMBOL_GPL(nf_ct_l4proto_register); int nf_ct_l4proto_pernet_register(struct net *net, - struct nf_conntrack_l4proto *l4proto[], + struct nf_conntrack_l4proto *const l4proto[], unsigned int num_proto) { int ret = -EINVAL; @@ -485,8 +485,8 @@ void nf_ct_l4proto_unregister(struct nf_conntrack_l4proto *l4proto[], EXPORT_SYMBOL_GPL(nf_ct_l4proto_unregister); void nf_ct_l4proto_pernet_unregister(struct net *net, - struct nf_conntrack_l4proto *l4proto[], - unsigned int num_proto) + struct nf_conntrack_l4proto *const l4proto[], + unsigned int num_proto) { while (num_proto-- != 0) nf_ct_l4proto_pernet_unregister_one(net, l4proto[num_proto]); diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c index f4fb6d4dd0b9f..fcabccc99f0df 100644 --- a/net/netfilter/nfnetlink_cttimeout.c +++ b/net/netfilter/nfnetlink_cttimeout.c @@ -47,7 +47,8 @@ static const struct nla_policy cttimeout_nla_policy[CTA_TIMEOUT_MAX+1] = { }; static int -ctnl_timeout_parse_policy(void *timeouts, struct nf_conntrack_l4proto *l4proto, +ctnl_timeout_parse_policy(void *timeouts, + const struct nf_conntrack_l4proto *l4proto, struct net *net, const struct nlattr *attr) { int ret = 0; @@ -401,7 +402,7 @@ static int cttimeout_default_set(struct net *net, struct sock *ctnl, static int cttimeout_default_fill_info(struct net *net, struct sk_buff *skb, u32 portid, u32 seq, u32 type, int event, - struct nf_conntrack_l4proto *l4proto) + const struct nf_conntrack_l4proto *l4proto) { struct nlmsghdr *nlh; struct nfgenmsg *nfmsg; From 549d2d41c1a448380872858302ee91be5a3ed499 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Tue, 1 Aug 2017 12:48:03 +0200 Subject: [PATCH 23/47] netfilter: constify nf_loginfo structures The nf_loginfo structures are only passed as the seventh argument to nf_log_trace, which is declared as const or stored in a local const variable. Thus the nf_loginfo structures themselves can be const. Done with the help of Coccinelle. // @r disable optional_qualifier@ identifier i; position p; @@ static struct nf_loginfo i@p = { ... }; @ok1@ identifier r.i; expression list[6] es; position p; @@ nf_log_trace(es,&i@p,...) @ok2@ identifier r.i; const struct nf_loginfo *e; position p; @@ e = &i@p @bad@ position p != {r.p,ok1.p,ok2.p}; identifier r.i; struct nf_loginfo e; @@ e@i@p @depends on !bad disable optional_qualifier@ identifier r.i; @@ static +const struct nf_loginfo i = { ... }; // Signed-off-by: Julia Lawall Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/ip_tables.c | 2 +- net/ipv4/netfilter/nf_log_arp.c | 2 +- net/ipv4/netfilter/nf_log_ipv4.c | 2 +- net/ipv6/netfilter/ip6_tables.c | 2 +- net/ipv6/netfilter/nf_log_ipv6.c | 2 +- net/netfilter/nf_tables_core.c | 2 +- net/netfilter/nfnetlink_log.c | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index f47e8dad5e955..2aea896f57084 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -151,7 +151,7 @@ static const char *const comments[] = { [NF_IP_TRACE_COMMENT_POLICY] = "policy", }; -static struct nf_loginfo trace_loginfo = { +static const struct nf_loginfo trace_loginfo = { .type = NF_LOG_TYPE_LOG, .u = { .log = { diff --git a/net/ipv4/netfilter/nf_log_arp.c b/net/ipv4/netfilter/nf_log_arp.c index 2f3895ddc275d..df5c2a2061a4b 100644 --- a/net/ipv4/netfilter/nf_log_arp.c +++ b/net/ipv4/netfilter/nf_log_arp.c @@ -25,7 +25,7 @@ #include #include -static struct nf_loginfo default_loginfo = { +static const struct nf_loginfo default_loginfo = { .type = NF_LOG_TYPE_LOG, .u = { .log = { diff --git a/net/ipv4/netfilter/nf_log_ipv4.c b/net/ipv4/netfilter/nf_log_ipv4.c index c83a9963269bf..4388de0e5380c 100644 --- a/net/ipv4/netfilter/nf_log_ipv4.c +++ b/net/ipv4/netfilter/nf_log_ipv4.c @@ -24,7 +24,7 @@ #include #include -static struct nf_loginfo default_loginfo = { +static const struct nf_loginfo default_loginfo = { .type = NF_LOG_TYPE_LOG, .u = { .log = { diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 1f90644056ac7..9f6644958e5e3 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -176,7 +176,7 @@ static const char *const comments[] = { [NF_IP6_TRACE_COMMENT_POLICY] = "policy", }; -static struct nf_loginfo trace_loginfo = { +static const struct nf_loginfo trace_loginfo = { .type = NF_LOG_TYPE_LOG, .u = { .log = { diff --git a/net/ipv6/netfilter/nf_log_ipv6.c b/net/ipv6/netfilter/nf_log_ipv6.c index 97c724224da7b..b397a8fe88b93 100644 --- a/net/ipv6/netfilter/nf_log_ipv6.c +++ b/net/ipv6/netfilter/nf_log_ipv6.c @@ -25,7 +25,7 @@ #include #include -static struct nf_loginfo default_loginfo = { +static const struct nf_loginfo default_loginfo = { .type = NF_LOG_TYPE_LOG, .u = { .log = { diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index c5bab08b0d734..dfd0bf3810d2e 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -29,7 +29,7 @@ static const char *const comments[__NFT_TRACETYPE_MAX] = { [NFT_TRACETYPE_RULE] = "rule", }; -static struct nf_loginfo trace_loginfo = { +static const struct nf_loginfo trace_loginfo = { .type = NF_LOG_TYPE_LOG, .u = { .log = { diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index c684ba95dbb49..cad6498f10b03 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -606,7 +606,7 @@ __build_packet_message(struct nfnl_log_net *log, return -1; } -static struct nf_loginfo default_loginfo = { +static const struct nf_loginfo default_loginfo = { .type = NF_LOG_TYPE_ULOG, .u = { .ulog = { From a2acc543408e1cbdcd7915a268cdbc451f09832a Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Wed, 2 Aug 2017 18:14:43 +0900 Subject: [PATCH 24/47] netfilter: connlimit: merge root4 and root6. The root4 variable is used only when connlimit extension module has been stored by the iptables command. and the roo6 variable is used only when connlimit extension module has been stored by the ip6tables command. So the root4 and roo6 variable does not be used at the same time. Signed-off-by: Taehee Yoo Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_connlimit.c | 23 ++++++++--------------- 1 file changed, 8 insertions(+), 15 deletions(-) diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c index 97589b8a2a40b..ffa8eec980e9e 100644 --- a/net/netfilter/xt_connlimit.c +++ b/net/netfilter/xt_connlimit.c @@ -58,8 +58,7 @@ struct xt_connlimit_rb { static spinlock_t xt_connlimit_locks[CONNLIMIT_LOCK_SLOTS] __cacheline_aligned_in_smp; struct xt_connlimit_data { - struct rb_root climit_root4[CONNLIMIT_SLOTS]; - struct rb_root climit_root6[CONNLIMIT_SLOTS]; + struct rb_root climit_root[CONNLIMIT_SLOTS]; }; static u_int32_t connlimit_rnd __read_mostly; @@ -294,13 +293,11 @@ static int count_them(struct net *net, int count; u32 hash; - if (family == NFPROTO_IPV6) { + if (family == NFPROTO_IPV6) hash = connlimit_iphash6(addr, mask); - root = &data->climit_root6[hash]; - } else { + else hash = connlimit_iphash(addr->ip & mask->ip); - root = &data->climit_root4[hash]; - } + root = &data->climit_root[hash]; spin_lock_bh(&xt_connlimit_locks[hash % CONNLIMIT_LOCK_SLOTS]); @@ -379,10 +376,8 @@ static int connlimit_mt_check(const struct xt_mtchk_param *par) return -ENOMEM; } - for (i = 0; i < ARRAY_SIZE(info->data->climit_root4); ++i) - info->data->climit_root4[i] = RB_ROOT; - for (i = 0; i < ARRAY_SIZE(info->data->climit_root6); ++i) - info->data->climit_root6[i] = RB_ROOT; + for (i = 0; i < ARRAY_SIZE(info->data->climit_root); ++i) + info->data->climit_root[i] = RB_ROOT; return 0; } @@ -413,10 +408,8 @@ static void connlimit_mt_destroy(const struct xt_mtdtor_param *par) nf_ct_netns_put(par->net, par->family); - for (i = 0; i < ARRAY_SIZE(info->data->climit_root4); ++i) - destroy_tree(&info->data->climit_root4[i]); - for (i = 0; i < ARRAY_SIZE(info->data->climit_root6); ++i) - destroy_tree(&info->data->climit_root6[i]); + for (i = 0; i < ARRAY_SIZE(info->data->climit_root); ++i) + destroy_tree(&info->data->climit_root[i]); kfree(info->data); } From 166327d79d8d44a1668eff0fda8286e9a193a251 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Thu, 3 Aug 2017 10:26:20 +0900 Subject: [PATCH 25/47] netfilter: remove prototype of netfilter_queue_init The netfilter_queue_init() has been removed. so we can remove the prototype of that. Signed-off-by: Taehee Yoo Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_internals.h | 1 - 1 file changed, 1 deletion(-) diff --git a/net/netfilter/nf_internals.h b/net/netfilter/nf_internals.h index bfa742da83aff..19f00a47a7108 100644 --- a/net/netfilter/nf_internals.h +++ b/net/netfilter/nf_internals.h @@ -15,7 +15,6 @@ int nf_queue(struct sk_buff *skb, struct nf_hook_state *state, struct nf_hook_entry **entryp, unsigned int verdict); unsigned int nf_queue_nf_hook_drop(struct net *net); -int __init netfilter_queue_init(void); /* nf_log.c */ int __init netfilter_log_init(void); From 46b20c38f37c48bbcb832f933e1bee7d951da99b Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Mon, 7 Aug 2017 21:44:25 +0800 Subject: [PATCH 26/47] netfilter: use audit_log() Use audit_log() instead of open-coding it. Signed-off-by: Geliang Tang Signed-off-by: Pablo Neira Ayuso --- net/bridge/netfilter/ebtables.c | 13 ++++--------- net/netfilter/x_tables.c | 14 ++++---------- 2 files changed, 8 insertions(+), 19 deletions(-) diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 9c6e619f452bc..54c7ef4e970e1 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -1069,15 +1069,10 @@ static int do_replace_finish(struct net *net, struct ebt_replace *repl, #ifdef CONFIG_AUDIT if (audit_enabled) { - struct audit_buffer *ab; - - ab = audit_log_start(current->audit_context, GFP_KERNEL, - AUDIT_NETFILTER_CFG); - if (ab) { - audit_log_format(ab, "table=%s family=%u entries=%u", - repl->name, AF_BRIDGE, repl->nentries); - audit_log_end(ab); - } + audit_log(current->audit_context, GFP_KERNEL, + AUDIT_NETFILTER_CFG, + "table=%s family=%u entries=%u", + repl->name, AF_BRIDGE, repl->nentries); } #endif return ret; diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index e1648238a9c99..c83a3b5e1c6c2 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -1192,16 +1192,10 @@ xt_replace_table(struct xt_table *table, #ifdef CONFIG_AUDIT if (audit_enabled) { - struct audit_buffer *ab; - - ab = audit_log_start(current->audit_context, GFP_KERNEL, - AUDIT_NETFILTER_CFG); - if (ab) { - audit_log_format(ab, "table=%s family=%u entries=%u", - table->name, table->af, - private->number); - audit_log_end(ab); - } + audit_log(current->audit_context, GFP_KERNEL, + AUDIT_NETFILTER_CFG, + "table=%s family=%u entries=%u", + table->name, table->af, private->number); } #endif From a18177008b2613f009ef210b7da695056a932321 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 8 Aug 2017 15:15:27 +0200 Subject: [PATCH 27/47] netfilter: exthdr: factor out tcp option access Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_exthdr.c | 33 +++++++++++++++++++++------------ 1 file changed, 21 insertions(+), 12 deletions(-) diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c index 1ec49fe5845f1..921c95f2c583b 100644 --- a/net/netfilter/nft_exthdr.c +++ b/net/netfilter/nft_exthdr.c @@ -61,6 +61,26 @@ static void nft_exthdr_ipv6_eval(const struct nft_expr *expr, regs->verdict.code = NFT_BREAK; } +static void * +nft_tcp_header_pointer(const struct nft_pktinfo *pkt, + unsigned int len, void *buffer, unsigned int *tcphdr_len) +{ + struct tcphdr *tcph; + + if (!pkt->tprot_set || pkt->tprot != IPPROTO_TCP) + return NULL; + + tcph = skb_header_pointer(pkt->skb, pkt->xt.thoff, sizeof(*tcph), buffer); + if (!tcph) + return NULL; + + *tcphdr_len = __tcp_hdrlen(tcph); + if (*tcphdr_len < sizeof(*tcph) || *tcphdr_len > len) + return NULL; + + return skb_header_pointer(pkt->skb, pkt->xt.thoff, *tcphdr_len, buffer); +} + static void nft_exthdr_tcp_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) @@ -72,18 +92,7 @@ static void nft_exthdr_tcp_eval(const struct nft_expr *expr, struct tcphdr *tcph; u8 *opt; - if (!pkt->tprot_set || pkt->tprot != IPPROTO_TCP) - goto err; - - tcph = skb_header_pointer(pkt->skb, pkt->xt.thoff, sizeof(*tcph), buff); - if (!tcph) - goto err; - - tcphdr_len = __tcp_hdrlen(tcph); - if (tcphdr_len < sizeof(*tcph)) - goto err; - - tcph = skb_header_pointer(pkt->skb, pkt->xt.thoff, tcphdr_len, buff); + tcph = nft_tcp_header_pointer(pkt, sizeof(buff), buff, &tcphdr_len); if (!tcph) goto err; From 5e7d695a482c6e581addf42717469bd363dd734e Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 8 Aug 2017 15:15:28 +0200 Subject: [PATCH 28/47] netfilter: exthdr: split netlink dump function so eval and uncoming eval_set versions can reuse a common helper. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_exthdr.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c index 921c95f2c583b..e3a6eebe7e0cc 100644 --- a/net/netfilter/nft_exthdr.c +++ b/net/netfilter/nft_exthdr.c @@ -180,12 +180,8 @@ static int nft_exthdr_init(const struct nft_ctx *ctx, NFT_DATA_VALUE, priv->len); } -static int nft_exthdr_dump(struct sk_buff *skb, const struct nft_expr *expr) +static int nft_exthdr_dump_common(struct sk_buff *skb, const struct nft_exthdr *priv) { - const struct nft_exthdr *priv = nft_expr_priv(expr); - - if (nft_dump_register(skb, NFTA_EXTHDR_DREG, priv->dreg)) - goto nla_put_failure; if (nla_put_u8(skb, NFTA_EXTHDR_TYPE, priv->type)) goto nla_put_failure; if (nla_put_be32(skb, NFTA_EXTHDR_OFFSET, htonl(priv->offset))) @@ -202,6 +198,16 @@ static int nft_exthdr_dump(struct sk_buff *skb, const struct nft_expr *expr) return -1; } +static int nft_exthdr_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_exthdr *priv = nft_expr_priv(expr); + + if (nft_dump_register(skb, NFTA_EXTHDR_DREG, priv->dreg)) + return -1; + + return nft_exthdr_dump_common(skb, priv); +} + static struct nft_expr_type nft_exthdr_type; static const struct nft_expr_ops nft_exthdr_ipv6_ops = { .type = &nft_exthdr_type, From 99d1712bc41c7c9a5a473c104a4ad15427757b22 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 8 Aug 2017 15:15:29 +0200 Subject: [PATCH 29/47] netfilter: exthdr: tcp option set support This allows setting 2 and 4 byte quantities in the tcp option space. Main purpose is to allow native replacement for xt_TCPMSS to work around pmtu blackholes. Writes to kind and len are now allowed at the moment, it does not seem useful to do this as it causes corruption of the tcp option space. We can always lift this restriction later if a use-case appears. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 4 +- net/netfilter/nft_exthdr.c | 164 ++++++++++++++++++++++- 2 files changed, 165 insertions(+), 3 deletions(-) diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index be25cf69295b4..40fd199f7531a 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -732,7 +732,8 @@ enum nft_exthdr_op { * @NFTA_EXTHDR_OFFSET: extension header offset (NLA_U32) * @NFTA_EXTHDR_LEN: extension header length (NLA_U32) * @NFTA_EXTHDR_FLAGS: extension header flags (NLA_U32) - * @NFTA_EXTHDR_OP: option match type (NLA_U8) + * @NFTA_EXTHDR_OP: option match type (NLA_U32) + * @NFTA_EXTHDR_SREG: option match type (NLA_U32) */ enum nft_exthdr_attributes { NFTA_EXTHDR_UNSPEC, @@ -742,6 +743,7 @@ enum nft_exthdr_attributes { NFTA_EXTHDR_LEN, NFTA_EXTHDR_FLAGS, NFTA_EXTHDR_OP, + NFTA_EXTHDR_SREG, __NFTA_EXTHDR_MAX }; #define NFTA_EXTHDR_MAX (__NFTA_EXTHDR_MAX - 1) diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c index e3a6eebe7e0cc..f5a0bf5e3bdd2 100644 --- a/net/netfilter/nft_exthdr.c +++ b/net/netfilter/nft_exthdr.c @@ -8,6 +8,7 @@ * Development of this code funded by Astaro AG (http://www.astaro.com/) */ +#include #include #include #include @@ -23,6 +24,7 @@ struct nft_exthdr { u8 len; u8 op; enum nft_registers dreg:8; + enum nft_registers sreg:8; u8 flags; }; @@ -124,6 +126,88 @@ static void nft_exthdr_tcp_eval(const struct nft_expr *expr, regs->verdict.code = NFT_BREAK; } +static void nft_exthdr_tcp_set_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + u8 buff[sizeof(struct tcphdr) + MAX_TCP_OPTION_SPACE]; + struct nft_exthdr *priv = nft_expr_priv(expr); + unsigned int i, optl, tcphdr_len, offset; + struct tcphdr *tcph; + u8 *opt; + u32 src; + + tcph = nft_tcp_header_pointer(pkt, sizeof(buff), buff, &tcphdr_len); + if (!tcph) + return; + + opt = (u8 *)tcph; + for (i = sizeof(*tcph); i < tcphdr_len - 1; i += optl) { + union { + u8 octet; + __be16 v16; + __be32 v32; + } old, new; + + optl = optlen(opt, i); + + if (priv->type != opt[i]) + continue; + + if (i + optl > tcphdr_len || priv->len + priv->offset > optl) + return; + + if (!skb_make_writable(pkt->skb, pkt->xt.thoff + i + priv->len)) + return; + + tcph = nft_tcp_header_pointer(pkt, sizeof(buff), buff, + &tcphdr_len); + if (!tcph) + return; + + src = regs->data[priv->sreg]; + offset = i + priv->offset; + + switch (priv->len) { + case 2: + old.v16 = get_unaligned((u16 *)(opt + offset)); + new.v16 = src; + + switch (priv->type) { + case TCPOPT_MSS: + /* increase can cause connection to stall */ + if (ntohs(old.v16) <= ntohs(new.v16)) + return; + break; + } + + if (old.v16 == new.v16) + return; + + put_unaligned(new.v16, (u16*)(opt + offset)); + inet_proto_csum_replace2(&tcph->check, pkt->skb, + old.v16, new.v16, false); + break; + case 4: + new.v32 = src; + old.v32 = get_unaligned((u32 *)(opt + offset)); + + if (old.v32 == new.v32) + return; + + put_unaligned(new.v32, (u32*)(opt + offset)); + inet_proto_csum_replace4(&tcph->check, pkt->skb, + old.v32, new.v32, false); + break; + default: + WARN_ON_ONCE(1); + break; + } + + return; + } +} + static const struct nla_policy nft_exthdr_policy[NFTA_EXTHDR_MAX + 1] = { [NFTA_EXTHDR_DREG] = { .type = NLA_U32 }, [NFTA_EXTHDR_TYPE] = { .type = NLA_U8 }, @@ -180,6 +264,55 @@ static int nft_exthdr_init(const struct nft_ctx *ctx, NFT_DATA_VALUE, priv->len); } +static int nft_exthdr_tcp_set_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_exthdr *priv = nft_expr_priv(expr); + u32 offset, len, flags = 0, op = NFT_EXTHDR_OP_IPV6; + int err; + + if (!tb[NFTA_EXTHDR_SREG] || + !tb[NFTA_EXTHDR_TYPE] || + !tb[NFTA_EXTHDR_OFFSET] || + !tb[NFTA_EXTHDR_LEN]) + return -EINVAL; + + if (tb[NFTA_EXTHDR_DREG] || tb[NFTA_EXTHDR_FLAGS]) + return -EINVAL; + + err = nft_parse_u32_check(tb[NFTA_EXTHDR_OFFSET], U8_MAX, &offset); + if (err < 0) + return err; + + err = nft_parse_u32_check(tb[NFTA_EXTHDR_LEN], U8_MAX, &len); + if (err < 0) + return err; + + if (offset < 2) + return -EOPNOTSUPP; + + switch (len) { + case 2: break; + case 4: break; + default: + return -EOPNOTSUPP; + } + + err = nft_parse_u32_check(tb[NFTA_EXTHDR_OP], U8_MAX, &op); + if (err < 0) + return err; + + priv->type = nla_get_u8(tb[NFTA_EXTHDR_TYPE]); + priv->offset = offset; + priv->len = len; + priv->sreg = nft_parse_register(tb[NFTA_EXTHDR_SREG]); + priv->flags = flags; + priv->op = op; + + return nft_validate_register_load(priv->sreg, priv->len); +} + static int nft_exthdr_dump_common(struct sk_buff *skb, const struct nft_exthdr *priv) { if (nla_put_u8(skb, NFTA_EXTHDR_TYPE, priv->type)) @@ -208,6 +341,16 @@ static int nft_exthdr_dump(struct sk_buff *skb, const struct nft_expr *expr) return nft_exthdr_dump_common(skb, priv); } +static int nft_exthdr_dump_set(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_exthdr *priv = nft_expr_priv(expr); + + if (nft_dump_register(skb, NFTA_EXTHDR_SREG, priv->sreg)) + return -1; + + return nft_exthdr_dump_common(skb, priv); +} + static struct nft_expr_type nft_exthdr_type; static const struct nft_expr_ops nft_exthdr_ipv6_ops = { .type = &nft_exthdr_type, @@ -225,6 +368,14 @@ static const struct nft_expr_ops nft_exthdr_tcp_ops = { .dump = nft_exthdr_dump, }; +static const struct nft_expr_ops nft_exthdr_tcp_set_ops = { + .type = &nft_exthdr_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_exthdr)), + .eval = nft_exthdr_tcp_set_eval, + .init = nft_exthdr_tcp_set_init, + .dump = nft_exthdr_dump_set, +}; + static const struct nft_expr_ops * nft_exthdr_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[]) @@ -234,12 +385,21 @@ nft_exthdr_select_ops(const struct nft_ctx *ctx, if (!tb[NFTA_EXTHDR_OP]) return &nft_exthdr_ipv6_ops; + if (tb[NFTA_EXTHDR_SREG] && tb[NFTA_EXTHDR_DREG]) + return ERR_PTR(-EOPNOTSUPP); + op = ntohl(nla_get_u32(tb[NFTA_EXTHDR_OP])); switch (op) { case NFT_EXTHDR_OP_TCPOPT: - return &nft_exthdr_tcp_ops; + if (tb[NFTA_EXTHDR_SREG]) + return &nft_exthdr_tcp_set_ops; + if (tb[NFTA_EXTHDR_DREG]) + return &nft_exthdr_tcp_ops; + break; case NFT_EXTHDR_OP_IPV6: - return &nft_exthdr_ipv6_ops; + if (tb[NFTA_EXTHDR_DREG]) + return &nft_exthdr_ipv6_ops; + break; } return ERR_PTR(-EOPNOTSUPP); From 6b5dc98e8fac041a3decfc3186e08c1c570ea691 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 8 Aug 2017 15:48:04 +0200 Subject: [PATCH 30/47] netfilter: rt: add support to fetch path mss to be used in combination with tcp option set support to mimic iptables TCPMSS --clamp-mss-to-pmtu. v2: Eric Dumazet points out dst must be initialized. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 2 + net/netfilter/nft_rt.c | 66 ++++++++++++++++++++++++ 2 files changed, 68 insertions(+) diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 40fd199f7531a..b49da72efa68c 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -811,11 +811,13 @@ enum nft_meta_keys { * @NFT_RT_CLASSID: realm value of packet's route (skb->dst->tclassid) * @NFT_RT_NEXTHOP4: routing nexthop for IPv4 * @NFT_RT_NEXTHOP6: routing nexthop for IPv6 + * @NFT_RT_TCPMSS: fetch current path tcp mss */ enum nft_rt_keys { NFT_RT_CLASSID, NFT_RT_NEXTHOP4, NFT_RT_NEXTHOP6, + NFT_RT_TCPMSS, }; /** diff --git a/net/netfilter/nft_rt.c b/net/netfilter/nft_rt.c index c7383d8f88d0b..e142e65d31761 100644 --- a/net/netfilter/nft_rt.c +++ b/net/netfilter/nft_rt.c @@ -23,6 +23,42 @@ struct nft_rt { enum nft_registers dreg:8; }; +static u16 get_tcpmss(const struct nft_pktinfo *pkt, const struct dst_entry *skbdst) +{ + u32 minlen = sizeof(struct ipv6hdr), mtu = dst_mtu(skbdst); + const struct sk_buff *skb = pkt->skb; + const struct nf_afinfo *ai; + struct flowi fl; + + memset(&fl, 0, sizeof(fl)); + + switch (nft_pf(pkt)) { + case NFPROTO_IPV4: + fl.u.ip4.daddr = ip_hdr(skb)->saddr; + minlen = sizeof(struct iphdr); + break; + case NFPROTO_IPV6: + fl.u.ip6.daddr = ipv6_hdr(skb)->saddr; + break; + } + + ai = nf_get_afinfo(nft_pf(pkt)); + if (ai) { + struct dst_entry *dst = NULL; + + ai->route(nft_net(pkt), &dst, &fl, false); + if (dst) { + mtu = min(mtu, dst_mtu(dst)); + dst_release(dst); + } + } + + if (mtu <= minlen || mtu > 0xffff) + return TCP_MSS_DEFAULT; + + return mtu - minlen; +} + static void nft_rt_get_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) @@ -57,6 +93,9 @@ static void nft_rt_get_eval(const struct nft_expr *expr, &ipv6_hdr(skb)->daddr), sizeof(struct in6_addr)); break; + case NFT_RT_TCPMSS: + nft_reg_store16(dest, get_tcpmss(pkt, dst)); + break; default: WARN_ON(1); goto err; @@ -94,6 +133,9 @@ static int nft_rt_get_init(const struct nft_ctx *ctx, case NFT_RT_NEXTHOP6: len = sizeof(struct in6_addr); break; + case NFT_RT_TCPMSS: + len = sizeof(u16); + break; default: return -EOPNOTSUPP; } @@ -118,6 +160,29 @@ static int nft_rt_get_dump(struct sk_buff *skb, return -1; } +static int nft_rt_validate(const struct nft_ctx *ctx, const struct nft_expr *expr, + const struct nft_data **data) +{ + const struct nft_rt *priv = nft_expr_priv(expr); + unsigned int hooks; + + switch (priv->key) { + case NFT_RT_NEXTHOP4: + case NFT_RT_NEXTHOP6: + case NFT_RT_CLASSID: + return 0; + case NFT_RT_TCPMSS: + hooks = (1 << NF_INET_FORWARD) | + (1 << NF_INET_LOCAL_OUT) | + (1 << NF_INET_POST_ROUTING); + break; + default: + return -EINVAL; + } + + return nft_chain_validate_hooks(ctx->chain, hooks); +} + static struct nft_expr_type nft_rt_type; static const struct nft_expr_ops nft_rt_get_ops = { .type = &nft_rt_type, @@ -125,6 +190,7 @@ static const struct nft_expr_ops nft_rt_get_ops = { .eval = nft_rt_get_eval, .init = nft_rt_get_init, .dump = nft_rt_get_dump, + .validate = nft_rt_validate, }; static struct nft_expr_type nft_rt_type __read_mostly = { From eee6ebbac18a189ef33d25ea9b8bcae176515e49 Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Fri, 11 Aug 2017 11:16:07 -0700 Subject: [PATCH 31/47] netfilter: nf_nat_h323: fix logical-not-parentheses warning Clang produces the following warning: net/ipv4/netfilter/nf_nat_h323.c:553:6: error: logical not is only applied to the left hand side of this comparison [-Werror,-Wlogical-not-parentheses] if (!set_h225_addr(skb, protoff, data, dataoff, taddr, ^ add parentheses after the '!' to evaluate the comparison first add parentheses around left hand side expression to silence this warning There's not necessarily a bug here, but it's cleaner to return early, ex: if (x) return ... rather than: if (x == 0) ... else return Also added a return code check that seemed to be missing in one instance. Signed-off-by: Nick Desaulniers Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/nf_nat_h323.c | 57 +++++++++++++++++--------------- 1 file changed, 30 insertions(+), 27 deletions(-) diff --git a/net/ipv4/netfilter/nf_nat_h323.c b/net/ipv4/netfilter/nf_nat_h323.c index 574f7ebba0b62..ac8342dcb55eb 100644 --- a/net/ipv4/netfilter/nf_nat_h323.c +++ b/net/ipv4/netfilter/nf_nat_h323.c @@ -252,16 +252,16 @@ static int nat_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct, if (set_h245_addr(skb, protoff, data, dataoff, taddr, &ct->tuplehash[!dir].tuple.dst.u3, htons((port & htons(1)) ? nated_port + 1 : - nated_port)) == 0) { - /* Save ports */ - info->rtp_port[i][dir] = rtp_port; - info->rtp_port[i][!dir] = htons(nated_port); - } else { + nated_port))) { nf_ct_unexpect_related(rtp_exp); nf_ct_unexpect_related(rtcp_exp); return -1; } + /* Save ports */ + info->rtp_port[i][dir] = rtp_port; + info->rtp_port[i][!dir] = htons(nated_port); + /* Success */ pr_debug("nf_nat_h323: expect RTP %pI4:%hu->%pI4:%hu\n", &rtp_exp->tuple.src.u3.ip, @@ -370,15 +370,15 @@ static int nat_h245(struct sk_buff *skb, struct nf_conn *ct, /* Modify signal */ if (set_h225_addr(skb, protoff, data, dataoff, taddr, &ct->tuplehash[!dir].tuple.dst.u3, - htons(nated_port)) == 0) { - /* Save ports */ - info->sig_port[dir] = port; - info->sig_port[!dir] = htons(nated_port); - } else { + htons(nated_port))) { nf_ct_unexpect_related(exp); return -1; } + /* Save ports */ + info->sig_port[dir] = port; + info->sig_port[!dir] = htons(nated_port); + pr_debug("nf_nat_q931: expect H.245 %pI4:%hu->%pI4:%hu\n", &exp->tuple.src.u3.ip, ntohs(exp->tuple.src.u.tcp.port), @@ -462,24 +462,27 @@ static int nat_q931(struct sk_buff *skb, struct nf_conn *ct, /* Modify signal */ if (set_h225_addr(skb, protoff, data, 0, &taddr[idx], &ct->tuplehash[!dir].tuple.dst.u3, - htons(nated_port)) == 0) { - /* Save ports */ - info->sig_port[dir] = port; - info->sig_port[!dir] = htons(nated_port); - - /* Fix for Gnomemeeting */ - if (idx > 0 && - get_h225_addr(ct, *data, &taddr[0], &addr, &port) && - (ntohl(addr.ip) & 0xff000000) == 0x7f000000) { - set_h225_addr(skb, protoff, data, 0, &taddr[0], - &ct->tuplehash[!dir].tuple.dst.u3, - info->sig_port[!dir]); - } - } else { + htons(nated_port))) { nf_ct_unexpect_related(exp); return -1; } + /* Save ports */ + info->sig_port[dir] = port; + info->sig_port[!dir] = htons(nated_port); + + /* Fix for Gnomemeeting */ + if (idx > 0 && + get_h225_addr(ct, *data, &taddr[0], &addr, &port) && + (ntohl(addr.ip) & 0xff000000) == 0x7f000000) { + if (set_h225_addr(skb, protoff, data, 0, &taddr[0], + &ct->tuplehash[!dir].tuple.dst.u3, + info->sig_port[!dir])) { + nf_ct_unexpect_related(exp); + return -1; + } + } + /* Success */ pr_debug("nf_nat_ras: expect Q.931 %pI4:%hu->%pI4:%hu\n", &exp->tuple.src.u3.ip, @@ -550,9 +553,9 @@ static int nat_callforwarding(struct sk_buff *skb, struct nf_conn *ct, } /* Modify signal */ - if (!set_h225_addr(skb, protoff, data, dataoff, taddr, - &ct->tuplehash[!dir].tuple.dst.u3, - htons(nated_port)) == 0) { + if (set_h225_addr(skb, protoff, data, dataoff, taddr, + &ct->tuplehash[!dir].tuple.dst.u3, + htons(nated_port))) { nf_ct_unexpect_related(exp); return -1; } From 0d03510038bda70b5a4a252e8216822e6ce0cbdb Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sat, 12 Aug 2017 00:57:02 +0200 Subject: [PATCH 32/47] netfilter: conntrack: compute l3proto nla size at compile time avoids a pointer and allows struct to be const later on. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_l3proto.h | 19 ++++++++----------- .../netfilter/nf_conntrack_l3proto_ipv4.c | 13 +++++++------ .../netfilter/nf_conntrack_l3proto_ipv6.c | 14 ++++++++------ net/netfilter/nf_conntrack_netlink.c | 3 ++- net/netfilter/nf_conntrack_proto.c | 9 +++------ 5 files changed, 28 insertions(+), 30 deletions(-) diff --git a/include/net/netfilter/nf_conntrack_l3proto.h b/include/net/netfilter/nf_conntrack_l3proto.h index 1b8de164d7446..6a27ffea7480b 100644 --- a/include/net/netfilter/nf_conntrack_l3proto.h +++ b/include/net/netfilter/nf_conntrack_l3proto.h @@ -20,6 +20,9 @@ struct nf_conntrack_l3proto { /* L3 Protocol Family number. ex) PF_INET */ u_int16_t l3proto; + /* size of tuple nlattr, fills a hole */ + u16 nla_size; + /* Protocol name */ const char *name; @@ -49,23 +52,17 @@ struct nf_conntrack_l3proto { int (*get_l4proto)(const struct sk_buff *skb, unsigned int nhoff, unsigned int *dataoff, u_int8_t *protonum); +#if IS_ENABLED(CONFIG_NF_CT_NETLINK) int (*tuple_to_nlattr)(struct sk_buff *skb, const struct nf_conntrack_tuple *t); - - /* Called when netns wants to use connection tracking */ - int (*net_ns_get)(struct net *); - void (*net_ns_put)(struct net *); - - /* - * Calculate size of tuple nlattr - */ - int (*nlattr_tuple_size)(void); - int (*nlattr_to_tuple)(struct nlattr *tb[], struct nf_conntrack_tuple *t); const struct nla_policy *nla_policy; +#endif - size_t nla_size; + /* Called when netns wants to use connection tracking */ + int (*net_ns_get)(struct net *); + void (*net_ns_put)(struct net *); /* Module (if any) which this is connected to. */ struct module *me; diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index de5f0e6ddd1b1..9fb8cb033d804 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -303,11 +303,6 @@ static int ipv4_nlattr_to_tuple(struct nlattr *tb[], return 0; } - -static int ipv4_nlattr_tuple_size(void) -{ - return nla_policy_len(ipv4_nla_policy, CTA_IP_MAX + 1); -} #endif static struct nf_sockopt_ops so_getorigdst = { @@ -365,9 +360,10 @@ struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 __read_mostly = { .get_l4proto = ipv4_get_l4proto, #if IS_ENABLED(CONFIG_NF_CT_NETLINK) .tuple_to_nlattr = ipv4_tuple_to_nlattr, - .nlattr_tuple_size = ipv4_nlattr_tuple_size, .nlattr_to_tuple = ipv4_nlattr_to_tuple, .nla_policy = ipv4_nla_policy, + .nla_size = NLA_ALIGN(NLA_HDRLEN + sizeof(u32)) + /* CTA_IP_V4_SRC */ + NLA_ALIGN(NLA_HDRLEN + sizeof(u32)), /* CTA_IP_V4_DST */ #endif .net_ns_get = ipv4_hooks_register, .net_ns_put = ipv4_hooks_unregister, @@ -421,6 +417,11 @@ static int __init nf_conntrack_l3proto_ipv4_init(void) need_conntrack(); +#if IS_ENABLED(CONFIG_NF_CT_NETLINK) + if (WARN_ON(nla_policy_len(ipv4_nla_policy, CTA_IP_MAX + 1) != + nf_conntrack_l3proto_ipv4.nla_size)) + return -EINVAL; +#endif ret = nf_register_sockopt(&so_getorigdst); if (ret < 0) { pr_err("Unable to register netfilter socket option\n"); diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index ddef5ee9e0a86..6b4d59fd02147 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -308,11 +308,6 @@ static int ipv6_nlattr_to_tuple(struct nlattr *tb[], return 0; } - -static int ipv6_nlattr_tuple_size(void) -{ - return nla_policy_len(ipv6_nla_policy, CTA_IP_MAX + 1); -} #endif static int ipv6_hooks_register(struct net *net) @@ -360,9 +355,10 @@ struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6 __read_mostly = { .get_l4proto = ipv6_get_l4proto, #if IS_ENABLED(CONFIG_NF_CT_NETLINK) .tuple_to_nlattr = ipv6_tuple_to_nlattr, - .nlattr_tuple_size = ipv6_nlattr_tuple_size, .nlattr_to_tuple = ipv6_nlattr_to_tuple, .nla_policy = ipv6_nla_policy, + .nla_size = NLA_ALIGN(NLA_HDRLEN + sizeof(u32[4])) + + NLA_ALIGN(NLA_HDRLEN + sizeof(u32[4])), #endif .net_ns_get = ipv6_hooks_register, .net_ns_put = ipv6_hooks_unregister, @@ -421,6 +417,12 @@ static int __init nf_conntrack_l3proto_ipv6_init(void) need_conntrack(); +#if IS_ENABLED(CONFIG_NF_CT_NETLINK) + if (WARN_ON(nla_policy_len(ipv6_nla_policy, CTA_IP_MAX + 1) != + nf_conntrack_l3proto_ipv6.nla_size)) + return -EINVAL; +#endif + ret = nf_register_sockopt(&so_getorigdst6); if (ret < 0) { pr_err("Unable to register netfilter socket option\n"); diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index f4ca48817f669..b59a453a0fd82 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -540,7 +540,8 @@ static inline size_t ctnetlink_proto_size(const struct nf_conn *ct) size_t len = 0; l3proto = __nf_ct_l3proto_find(nf_ct_l3num(ct)); - len += l3proto->nla_size; + len = l3proto->nla_size; + len *= 3u; /* ORIG, REPLY, MASTER */ l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct)); len += l4proto->nla_size; diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index 27810cf816a6b..85104a27cc898 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c @@ -214,10 +214,10 @@ int nf_ct_l3proto_register(struct nf_conntrack_l3proto *proto) if (proto->l3proto >= NFPROTO_NUMPROTO) return -EBUSY; - - if (proto->tuple_to_nlattr && !proto->nlattr_tuple_size) +#if IS_ENABLED(CONFIG_NF_CT_NETLINK) + if (proto->tuple_to_nlattr && proto->nla_size == 0) return -EINVAL; - +#endif mutex_lock(&nf_ct_proto_mutex); old = rcu_dereference_protected(nf_ct_l3protos[proto->l3proto], lockdep_is_held(&nf_ct_proto_mutex)); @@ -226,9 +226,6 @@ int nf_ct_l3proto_register(struct nf_conntrack_l3proto *proto) goto out_unlock; } - if (proto->nlattr_tuple_size) - proto->nla_size = 3 * proto->nlattr_tuple_size(); - rcu_assign_pointer(nf_ct_l3protos[proto->l3proto], proto); out_unlock: From a3134d537f8209f5b149d7ed9f287047158845f0 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sat, 12 Aug 2017 00:57:03 +0200 Subject: [PATCH 33/47] netfilter: conntrack: remove protocol name from l3proto struct no need to waste storage for something that is only needed in one place and can be deduced from protocol number. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_l3proto.h | 3 --- net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 1 - net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c | 1 - net/netfilter/nf_conntrack_l3proto_generic.c | 1 - net/netfilter/nf_conntrack_standalone.c | 12 +++++++++++- 5 files changed, 11 insertions(+), 7 deletions(-) diff --git a/include/net/netfilter/nf_conntrack_l3proto.h b/include/net/netfilter/nf_conntrack_l3proto.h index 6a27ffea7480b..e31861e4fa6aa 100644 --- a/include/net/netfilter/nf_conntrack_l3proto.h +++ b/include/net/netfilter/nf_conntrack_l3proto.h @@ -23,9 +23,6 @@ struct nf_conntrack_l3proto { /* size of tuple nlattr, fills a hole */ u16 nla_size; - /* Protocol name */ - const char *name; - /* * Try to fill in the third arg: nhoff is offset of l3 proto * hdr. Return true if possible. diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index 9fb8cb033d804..9f7ea862068ca 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -353,7 +353,6 @@ static void ipv4_hooks_unregister(struct net *net) struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 __read_mostly = { .l3proto = PF_INET, - .name = "ipv4", .pkt_to_tuple = ipv4_pkt_to_tuple, .invert_tuple = ipv4_invert_tuple, .print_tuple = ipv4_print_tuple, diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index 6b4d59fd02147..91d37fbe28de5 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -348,7 +348,6 @@ static void ipv6_hooks_unregister(struct net *net) struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6 __read_mostly = { .l3proto = PF_INET6, - .name = "ipv6", .pkt_to_tuple = ipv6_pkt_to_tuple, .invert_tuple = ipv6_invert_tuple, .print_tuple = ipv6_print_tuple, diff --git a/net/netfilter/nf_conntrack_l3proto_generic.c b/net/netfilter/nf_conntrack_l3proto_generic.c index cf9ace70becec..0387971582bc4 100644 --- a/net/netfilter/nf_conntrack_l3proto_generic.c +++ b/net/netfilter/nf_conntrack_l3proto_generic.c @@ -64,7 +64,6 @@ static int generic_get_l4proto(const struct sk_buff *skb, unsigned int nhoff, struct nf_conntrack_l3proto nf_conntrack_l3proto_generic __read_mostly = { .l3proto = PF_UNSPEC, - .name = "unknown", .pkt_to_tuple = generic_pkt_to_tuple, .invert_tuple = generic_invert_tuple, .print_tuple = generic_print_tuple, diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 5b6c675d55b14..359d7e6a4503d 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -198,6 +198,16 @@ ct_show_delta_time(struct seq_file *s, const struct nf_conn *ct) } #endif +static const char* l3proto_name(u16 proto) +{ + switch (proto) { + case AF_INET: return "ipv4"; + case AF_INET6: return "ipv6"; + } + + return "unknown"; +} + /* return 0 on success, 1 in case of error */ static int ct_seq_show(struct seq_file *s, void *v) { @@ -231,7 +241,7 @@ static int ct_seq_show(struct seq_file *s, void *v) ret = -ENOSPC; seq_printf(s, "%-8s %u %-8s %u %ld ", - l3proto->name, nf_ct_l3num(ct), + l3proto_name(l3proto->l3proto), nf_ct_l3num(ct), l4proto->name, nf_ct_protonum(ct), nf_ct_expires(ct) / HZ); From 09ec82f5af99d1e35614eb0844b920fc335a313d Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sat, 12 Aug 2017 00:57:04 +0200 Subject: [PATCH 34/47] netfilter: conntrack: remove protocol name from l4proto struct no need to waste storage for something that is only needed in one place and can be deduced from protocol number. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_l4proto.h | 3 --- net/ipv4/netfilter/nf_conntrack_proto_icmp.c | 1 - net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c | 1 - net/netfilter/nf_conntrack_proto.c | 8 ++++---- net/netfilter/nf_conntrack_proto_dccp.c | 2 -- net/netfilter/nf_conntrack_proto_generic.c | 1 - net/netfilter/nf_conntrack_proto_gre.c | 1 - net/netfilter/nf_conntrack_proto_sctp.c | 2 -- net/netfilter/nf_conntrack_proto_tcp.c | 2 -- net/netfilter/nf_conntrack_proto_udp.c | 4 ---- net/netfilter/nf_conntrack_standalone.c | 17 ++++++++++++++++- 11 files changed, 20 insertions(+), 22 deletions(-) diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h index b6e27cafb1d93..47c16bae5e003 100644 --- a/include/net/netfilter/nf_conntrack_l4proto.h +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -108,9 +108,6 @@ struct nf_conntrack_l4proto { /* Return the per-net protocol part. */ struct nf_proto_net *(*get_net_proto)(struct net *net); - /* Protocol name */ - const char *name; - /* Module (if any) which this is connected to. */ struct module *me; }; diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c index 73c591d8a9a8e..fdbeb03e46009 100644 --- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c @@ -362,7 +362,6 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp __read_mostly = { .l3proto = PF_INET, .l4proto = IPPROTO_ICMP, - .name = "icmp", .pkt_to_tuple = icmp_pkt_to_tuple, .invert_tuple = icmp_invert_tuple, .print_tuple = icmp_print_tuple, diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c index d5f028e33f658..805ab122767ad 100644 --- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c @@ -367,7 +367,6 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6 __read_mostly = { .l3proto = PF_INET6, .l4proto = IPPROTO_ICMPV6, - .name = "icmpv6", .pkt_to_tuple = icmpv6_pkt_to_tuple, .invert_tuple = icmpv6_invert_tuple, .print_tuple = icmpv6_print_tuple, diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index 85104a27cc898..0ecab7163d625 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c @@ -437,8 +437,8 @@ int nf_ct_l4proto_register(struct nf_conntrack_l4proto *l4proto[], } if (i != num_proto) { ver = l4proto[i]->l3proto == PF_INET6 ? 6 : 4; - pr_err("nf_conntrack_ipv%d: can't register %s%d proto.\n", - ver, l4proto[i]->name, ver); + pr_err("nf_conntrack_ipv%d: can't register l4 %d proto.\n", + ver, l4proto[i]->l4proto); nf_ct_l4proto_unregister(l4proto, i); } return ret; @@ -458,8 +458,8 @@ int nf_ct_l4proto_pernet_register(struct net *net, break; } if (i != num_proto) { - pr_err("nf_conntrack_%s%d: pernet registration failed\n", - l4proto[i]->name, + pr_err("nf_conntrack_proto_%d %d: pernet registration failed\n", + l4proto[i]->l4proto, l4proto[i]->l3proto == PF_INET6 ? 6 : 4); nf_ct_l4proto_pernet_unregister(net, l4proto, i); } diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c index 4707d997558af..a0492184a0a87 100644 --- a/net/netfilter/nf_conntrack_proto_dccp.c +++ b/net/netfilter/nf_conntrack_proto_dccp.c @@ -880,7 +880,6 @@ static struct nf_proto_net *dccp_get_net_proto(struct net *net) struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp4 __read_mostly = { .l3proto = AF_INET, .l4proto = IPPROTO_DCCP, - .name = "dccp", .pkt_to_tuple = dccp_pkt_to_tuple, .invert_tuple = dccp_invert_tuple, .new = dccp_new, @@ -916,7 +915,6 @@ EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_dccp4); struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp6 __read_mostly = { .l3proto = AF_INET6, .l4proto = IPPROTO_DCCP, - .name = "dccp", .pkt_to_tuple = dccp_pkt_to_tuple, .invert_tuple = dccp_invert_tuple, .new = dccp_new, diff --git a/net/netfilter/nf_conntrack_proto_generic.c b/net/netfilter/nf_conntrack_proto_generic.c index d5868bad33a7e..4fe8b3312823a 100644 --- a/net/netfilter/nf_conntrack_proto_generic.c +++ b/net/netfilter/nf_conntrack_proto_generic.c @@ -187,7 +187,6 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_generic __read_mostly = { .l3proto = PF_UNSPEC, .l4proto = 255, - .name = "unknown", .pkt_to_tuple = generic_pkt_to_tuple, .invert_tuple = generic_invert_tuple, .print_tuple = generic_print_tuple, diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c index 87bb40a3feb58..984bcfdbd4d78 100644 --- a/net/netfilter/nf_conntrack_proto_gre.c +++ b/net/netfilter/nf_conntrack_proto_gre.c @@ -364,7 +364,6 @@ static int gre_init_net(struct net *net, u_int16_t proto) static struct nf_conntrack_l4proto nf_conntrack_l4proto_gre4 __read_mostly = { .l3proto = AF_INET, .l4proto = IPPROTO_GRE, - .name = "gre", .pkt_to_tuple = gre_pkt_to_tuple, .invert_tuple = gre_invert_tuple, .print_tuple = gre_print_tuple, diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index 6eef29d2eec40..1d7a995ea049c 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -791,7 +791,6 @@ static struct nf_proto_net *sctp_get_net_proto(struct net *net) struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4 __read_mostly = { .l3proto = PF_INET, .l4proto = IPPROTO_SCTP, - .name = "sctp", .pkt_to_tuple = sctp_pkt_to_tuple, .invert_tuple = sctp_invert_tuple, .print_tuple = sctp_print_tuple, @@ -828,7 +827,6 @@ EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_sctp4); struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6 __read_mostly = { .l3proto = PF_INET6, .l4proto = IPPROTO_SCTP, - .name = "sctp", .pkt_to_tuple = sctp_pkt_to_tuple, .invert_tuple = sctp_invert_tuple, .print_tuple = sctp_print_tuple, diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 9758a7dfd83ef..e3e59e3d0592c 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -1556,7 +1556,6 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 __read_mostly = { .l3proto = PF_INET, .l4proto = IPPROTO_TCP, - .name = "tcp", .pkt_to_tuple = tcp_pkt_to_tuple, .invert_tuple = tcp_invert_tuple, .print_tuple = tcp_print_tuple, @@ -1594,7 +1593,6 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6 __read_mostly = { .l3proto = PF_INET6, .l4proto = IPPROTO_TCP, - .name = "tcp", .pkt_to_tuple = tcp_pkt_to_tuple, .invert_tuple = tcp_invert_tuple, .print_tuple = tcp_print_tuple, diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c index f6ebce6178ca6..ec861a1169f10 100644 --- a/net/netfilter/nf_conntrack_proto_udp.c +++ b/net/netfilter/nf_conntrack_proto_udp.c @@ -313,7 +313,6 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4 __read_mostly = { .l3proto = PF_INET, .l4proto = IPPROTO_UDP, - .name = "udp", .allow_clash = true, .pkt_to_tuple = udp_pkt_to_tuple, .invert_tuple = udp_invert_tuple, @@ -347,7 +346,6 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite4 __read_mostly = { .l3proto = PF_INET, .l4proto = IPPROTO_UDPLITE, - .name = "udplite", .allow_clash = true, .pkt_to_tuple = udp_pkt_to_tuple, .invert_tuple = udp_invert_tuple, @@ -381,7 +379,6 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6 __read_mostly = { .l3proto = PF_INET6, .l4proto = IPPROTO_UDP, - .name = "udp", .allow_clash = true, .pkt_to_tuple = udp_pkt_to_tuple, .invert_tuple = udp_invert_tuple, @@ -415,7 +412,6 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6 __read_mostly = { .l3proto = PF_INET6, .l4proto = IPPROTO_UDPLITE, - .name = "udplite", .allow_clash = true, .pkt_to_tuple = udp_pkt_to_tuple, .invert_tuple = udp_invert_tuple, diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 359d7e6a4503d..b28f9e93f574e 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -208,6 +208,21 @@ static const char* l3proto_name(u16 proto) return "unknown"; } +static const char* l4proto_name(u16 proto) +{ + switch (proto) { + case IPPROTO_ICMP: return "icmp"; + case IPPROTO_TCP: return "tcp"; + case IPPROTO_UDP: return "udp"; + case IPPROTO_DCCP: return "dccp"; + case IPPROTO_GRE: return "gre"; + case IPPROTO_SCTP: return "sctp"; + case IPPROTO_UDPLITE: return "udplite"; + } + + return "unknown"; +} + /* return 0 on success, 1 in case of error */ static int ct_seq_show(struct seq_file *s, void *v) { @@ -242,7 +257,7 @@ static int ct_seq_show(struct seq_file *s, void *v) ret = -ENOSPC; seq_printf(s, "%-8s %u %-8s %u %ld ", l3proto_name(l3proto->l3proto), nf_ct_l3num(ct), - l4proto->name, nf_ct_protonum(ct), + l4proto_name(l4proto->l4proto), nf_ct_protonum(ct), nf_ct_expires(ct) / HZ); if (l4proto->print_conntrack) From 036c69400a34cf8f6d39c88325dd510462809e7b Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sat, 12 Aug 2017 00:57:05 +0200 Subject: [PATCH 35/47] netfilter: conntrack: reduce size of l4protocol trackers can use u16 for both, shrinks size by another 8 bytes. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_l4proto.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h index 47c16bae5e003..15c58dd3f7016 100644 --- a/include/net/netfilter/nf_conntrack_l4proto.h +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -92,12 +92,12 @@ struct nf_conntrack_l4proto { #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) struct { - size_t obj_size; int (*nlattr_to_obj)(struct nlattr *tb[], struct net *net, void *data); int (*obj_to_nlattr)(struct sk_buff *skb, const void *data); - unsigned int nlattr_max; + u16 obj_size; + u16 nlattr_max; const struct nla_policy *nla_policy; } ctnl_timeout; #endif From 91950833dd5a34ac6336aa88da6d43aaeb56ac6d Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sat, 12 Aug 2017 00:57:06 +0200 Subject: [PATCH 36/47] netfilter: conntrack: place print_tuple in procfs part CONFIG_NF_CONNTRACK_PROCFS is deprecated, no need to use a function pointer in the trackers for this. Place the printf formatting in the one place that uses it. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_l3proto.h | 4 -- include/net/netfilter/nf_conntrack_l4proto.h | 4 -- .../netfilter/nf_conntrack_l3proto_ipv4.c | 8 --- net/ipv4/netfilter/nf_conntrack_proto_icmp.c | 11 ---- .../netfilter/nf_conntrack_l3proto_ipv6.c | 8 --- .../netfilter/nf_conntrack_proto_icmpv6.c | 11 ---- net/netfilter/nf_conntrack_l3proto_generic.c | 6 -- net/netfilter/nf_conntrack_proto_dccp.c | 10 ---- net/netfilter/nf_conntrack_proto_generic.c | 7 --- net/netfilter/nf_conntrack_proto_gre.c | 10 ---- net/netfilter/nf_conntrack_proto_sctp.c | 11 ---- net/netfilter/nf_conntrack_proto_tcp.c | 11 ---- net/netfilter/nf_conntrack_proto_udp.c | 13 ----- net/netfilter/nf_conntrack_standalone.c | 58 ++++++++++++++++++- 14 files changed, 56 insertions(+), 116 deletions(-) diff --git a/include/net/netfilter/nf_conntrack_l3proto.h b/include/net/netfilter/nf_conntrack_l3proto.h index e31861e4fa6aa..dabb53b0913c3 100644 --- a/include/net/netfilter/nf_conntrack_l3proto.h +++ b/include/net/netfilter/nf_conntrack_l3proto.h @@ -37,10 +37,6 @@ struct nf_conntrack_l3proto { bool (*invert_tuple)(struct nf_conntrack_tuple *inverse, const struct nf_conntrack_tuple *orig); - /* Print out the per-protocol part of the tuple. */ - void (*print_tuple)(struct seq_file *s, - const struct nf_conntrack_tuple *); - /* * Called before tracking. * *dataoff: offset of protocol header (TCP, UDP,...) in skb diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h index 15c58dd3f7016..7e8da04a5eb6c 100644 --- a/include/net/netfilter/nf_conntrack_l4proto.h +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -61,10 +61,6 @@ struct nf_conntrack_l4proto { /* called by gc worker if table is full */ bool (*can_early_drop)(const struct nf_conn *ct); - /* Print out the per-protocol part of the tuple. Return like seq_* */ - void (*print_tuple)(struct seq_file *s, - const struct nf_conntrack_tuple *); - /* Print out the private part of the conntrack. */ void (*print_conntrack)(struct seq_file *s, struct nf_conn *); diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index 9f7ea862068ca..fe374da4bc13e 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -63,13 +63,6 @@ static bool ipv4_invert_tuple(struct nf_conntrack_tuple *tuple, return true; } -static void ipv4_print_tuple(struct seq_file *s, - const struct nf_conntrack_tuple *tuple) -{ - seq_printf(s, "src=%pI4 dst=%pI4 ", - &tuple->src.u3.ip, &tuple->dst.u3.ip); -} - static int ipv4_get_l4proto(const struct sk_buff *skb, unsigned int nhoff, unsigned int *dataoff, u_int8_t *protonum) { @@ -355,7 +348,6 @@ struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 __read_mostly = { .l3proto = PF_INET, .pkt_to_tuple = ipv4_pkt_to_tuple, .invert_tuple = ipv4_invert_tuple, - .print_tuple = ipv4_print_tuple, .get_l4proto = ipv4_get_l4proto, #if IS_ENABLED(CONFIG_NF_CT_NETLINK) .tuple_to_nlattr = ipv4_tuple_to_nlattr, diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c index fdbeb03e46009..434b4e20f6db5 100644 --- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c @@ -71,16 +71,6 @@ static bool icmp_invert_tuple(struct nf_conntrack_tuple *tuple, return true; } -/* Print out the per-protocol part of the tuple. */ -static void icmp_print_tuple(struct seq_file *s, - const struct nf_conntrack_tuple *tuple) -{ - seq_printf(s, "type=%u code=%u id=%u ", - tuple->dst.u.icmp.type, - tuple->dst.u.icmp.code, - ntohs(tuple->src.u.icmp.id)); -} - static unsigned int *icmp_get_timeouts(struct net *net) { return &icmp_pernet(net)->timeout; @@ -364,7 +354,6 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp __read_mostly = .l4proto = IPPROTO_ICMP, .pkt_to_tuple = icmp_pkt_to_tuple, .invert_tuple = icmp_invert_tuple, - .print_tuple = icmp_print_tuple, .packet = icmp_packet, .get_timeouts = icmp_get_timeouts, .new = icmp_new, diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index 91d37fbe28de5..fe01dc953c56a 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -67,13 +67,6 @@ static bool ipv6_invert_tuple(struct nf_conntrack_tuple *tuple, return true; } -static void ipv6_print_tuple(struct seq_file *s, - const struct nf_conntrack_tuple *tuple) -{ - seq_printf(s, "src=%pI6 dst=%pI6 ", - tuple->src.u3.ip6, tuple->dst.u3.ip6); -} - static int ipv6_get_l4proto(const struct sk_buff *skb, unsigned int nhoff, unsigned int *dataoff, u_int8_t *protonum) { @@ -350,7 +343,6 @@ struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6 __read_mostly = { .l3proto = PF_INET6, .pkt_to_tuple = ipv6_pkt_to_tuple, .invert_tuple = ipv6_invert_tuple, - .print_tuple = ipv6_print_tuple, .get_l4proto = ipv6_get_l4proto, #if IS_ENABLED(CONFIG_NF_CT_NETLINK) .tuple_to_nlattr = ipv6_tuple_to_nlattr, diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c index 805ab122767ad..808f63e2e1ffe 100644 --- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c @@ -84,16 +84,6 @@ static bool icmpv6_invert_tuple(struct nf_conntrack_tuple *tuple, return true; } -/* Print out the per-protocol part of the tuple. */ -static void icmpv6_print_tuple(struct seq_file *s, - const struct nf_conntrack_tuple *tuple) -{ - seq_printf(s, "type=%u code=%u id=%u ", - tuple->dst.u.icmp.type, - tuple->dst.u.icmp.code, - ntohs(tuple->src.u.icmp.id)); -} - static unsigned int *icmpv6_get_timeouts(struct net *net) { return &icmpv6_pernet(net)->timeout; @@ -369,7 +359,6 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6 __read_mostly = .l4proto = IPPROTO_ICMPV6, .pkt_to_tuple = icmpv6_pkt_to_tuple, .invert_tuple = icmpv6_invert_tuple, - .print_tuple = icmpv6_print_tuple, .packet = icmpv6_packet, .get_timeouts = icmpv6_get_timeouts, .new = icmpv6_new, diff --git a/net/netfilter/nf_conntrack_l3proto_generic.c b/net/netfilter/nf_conntrack_l3proto_generic.c index 0387971582bc4..397e6911214f8 100644 --- a/net/netfilter/nf_conntrack_l3proto_generic.c +++ b/net/netfilter/nf_conntrack_l3proto_generic.c @@ -49,11 +49,6 @@ static bool generic_invert_tuple(struct nf_conntrack_tuple *tuple, return true; } -static void generic_print_tuple(struct seq_file *s, - const struct nf_conntrack_tuple *tuple) -{ -} - static int generic_get_l4proto(const struct sk_buff *skb, unsigned int nhoff, unsigned int *dataoff, u_int8_t *protonum) { @@ -66,7 +61,6 @@ struct nf_conntrack_l3proto nf_conntrack_l3proto_generic __read_mostly = { .l3proto = PF_UNSPEC, .pkt_to_tuple = generic_pkt_to_tuple, .invert_tuple = generic_invert_tuple, - .print_tuple = generic_print_tuple, .get_l4proto = generic_get_l4proto, }; EXPORT_SYMBOL_GPL(nf_conntrack_l3proto_generic); diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c index a0492184a0a87..d2df49ac390a3 100644 --- a/net/netfilter/nf_conntrack_proto_dccp.c +++ b/net/netfilter/nf_conntrack_proto_dccp.c @@ -623,14 +623,6 @@ static bool dccp_can_early_drop(const struct nf_conn *ct) return false; } -static void dccp_print_tuple(struct seq_file *s, - const struct nf_conntrack_tuple *tuple) -{ - seq_printf(s, "sport=%hu dport=%hu ", - ntohs(tuple->src.u.dccp.port), - ntohs(tuple->dst.u.dccp.port)); -} - static void dccp_print_conntrack(struct seq_file *s, struct nf_conn *ct) { seq_printf(s, "%s ", dccp_state_names[ct->proto.dccp.state]); @@ -887,7 +879,6 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp4 __read_mostly = { .get_timeouts = dccp_get_timeouts, .error = dccp_error, .can_early_drop = dccp_can_early_drop, - .print_tuple = dccp_print_tuple, .print_conntrack = dccp_print_conntrack, #if IS_ENABLED(CONFIG_NF_CT_NETLINK) .to_nlattr = dccp_to_nlattr, @@ -922,7 +913,6 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp6 __read_mostly = { .get_timeouts = dccp_get_timeouts, .error = dccp_error, .can_early_drop = dccp_can_early_drop, - .print_tuple = dccp_print_tuple, .print_conntrack = dccp_print_conntrack, #if IS_ENABLED(CONFIG_NF_CT_NETLINK) .to_nlattr = dccp_to_nlattr, diff --git a/net/netfilter/nf_conntrack_proto_generic.c b/net/netfilter/nf_conntrack_proto_generic.c index 4fe8b3312823a..2bc3d0c1a5bf4 100644 --- a/net/netfilter/nf_conntrack_proto_generic.c +++ b/net/netfilter/nf_conntrack_proto_generic.c @@ -62,12 +62,6 @@ static bool generic_invert_tuple(struct nf_conntrack_tuple *tuple, return true; } -/* Print out the per-protocol part of the tuple. */ -static void generic_print_tuple(struct seq_file *s, - const struct nf_conntrack_tuple *tuple) -{ -} - static unsigned int *generic_get_timeouts(struct net *net) { return &(generic_pernet(net)->timeout); @@ -189,7 +183,6 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_generic __read_mostly = .l4proto = 255, .pkt_to_tuple = generic_pkt_to_tuple, .invert_tuple = generic_invert_tuple, - .print_tuple = generic_print_tuple, .packet = generic_packet, .get_timeouts = generic_get_timeouts, .new = generic_new, diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c index 984bcfdbd4d78..cd28095dd7a42 100644 --- a/net/netfilter/nf_conntrack_proto_gre.c +++ b/net/netfilter/nf_conntrack_proto_gre.c @@ -224,15 +224,6 @@ static bool gre_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, return true; } -/* print gre part of tuple */ -static void gre_print_tuple(struct seq_file *s, - const struct nf_conntrack_tuple *tuple) -{ - seq_printf(s, "srckey=0x%x dstkey=0x%x ", - ntohs(tuple->src.u.gre.key), - ntohs(tuple->dst.u.gre.key)); -} - /* print private data for conntrack */ static void gre_print_conntrack(struct seq_file *s, struct nf_conn *ct) { @@ -366,7 +357,6 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_gre4 __read_mostly = { .l4proto = IPPROTO_GRE, .pkt_to_tuple = gre_pkt_to_tuple, .invert_tuple = gre_invert_tuple, - .print_tuple = gre_print_tuple, .print_conntrack = gre_print_conntrack, .get_timeouts = gre_get_timeouts, .packet = gre_packet, diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index 1d7a995ea049c..da83b401be17b 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -174,15 +174,6 @@ static bool sctp_invert_tuple(struct nf_conntrack_tuple *tuple, return true; } -/* Print out the per-protocol part of the tuple. */ -static void sctp_print_tuple(struct seq_file *s, - const struct nf_conntrack_tuple *tuple) -{ - seq_printf(s, "sport=%hu dport=%hu ", - ntohs(tuple->src.u.sctp.port), - ntohs(tuple->dst.u.sctp.port)); -} - /* Print out the private part of the conntrack. */ static void sctp_print_conntrack(struct seq_file *s, struct nf_conn *ct) { @@ -793,7 +784,6 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4 __read_mostly = { .l4proto = IPPROTO_SCTP, .pkt_to_tuple = sctp_pkt_to_tuple, .invert_tuple = sctp_invert_tuple, - .print_tuple = sctp_print_tuple, .print_conntrack = sctp_print_conntrack, .packet = sctp_packet, .get_timeouts = sctp_get_timeouts, @@ -829,7 +819,6 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6 __read_mostly = { .l4proto = IPPROTO_SCTP, .pkt_to_tuple = sctp_pkt_to_tuple, .invert_tuple = sctp_invert_tuple, - .print_tuple = sctp_print_tuple, .print_conntrack = sctp_print_conntrack, .packet = sctp_packet, .get_timeouts = sctp_get_timeouts, diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index e3e59e3d0592c..c868b36b8945f 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -301,15 +301,6 @@ static bool tcp_invert_tuple(struct nf_conntrack_tuple *tuple, return true; } -/* Print out the per-protocol part of the tuple. */ -static void tcp_print_tuple(struct seq_file *s, - const struct nf_conntrack_tuple *tuple) -{ - seq_printf(s, "sport=%hu dport=%hu ", - ntohs(tuple->src.u.tcp.port), - ntohs(tuple->dst.u.tcp.port)); -} - /* Print out the private part of the conntrack. */ static void tcp_print_conntrack(struct seq_file *s, struct nf_conn *ct) { @@ -1558,7 +1549,6 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 __read_mostly = .l4proto = IPPROTO_TCP, .pkt_to_tuple = tcp_pkt_to_tuple, .invert_tuple = tcp_invert_tuple, - .print_tuple = tcp_print_tuple, .print_conntrack = tcp_print_conntrack, .packet = tcp_packet, .get_timeouts = tcp_get_timeouts, @@ -1595,7 +1585,6 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6 __read_mostly = .l4proto = IPPROTO_TCP, .pkt_to_tuple = tcp_pkt_to_tuple, .invert_tuple = tcp_invert_tuple, - .print_tuple = tcp_print_tuple, .print_conntrack = tcp_print_conntrack, .packet = tcp_packet, .get_timeouts = tcp_get_timeouts, diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c index ec861a1169f10..dcf3030d22263 100644 --- a/net/netfilter/nf_conntrack_proto_udp.c +++ b/net/netfilter/nf_conntrack_proto_udp.c @@ -63,15 +63,6 @@ static bool udp_invert_tuple(struct nf_conntrack_tuple *tuple, return true; } -/* Print out the per-protocol part of the tuple. */ -static void udp_print_tuple(struct seq_file *s, - const struct nf_conntrack_tuple *tuple) -{ - seq_printf(s, "sport=%hu dport=%hu ", - ntohs(tuple->src.u.udp.port), - ntohs(tuple->dst.u.udp.port)); -} - static unsigned int *udp_get_timeouts(struct net *net) { return udp_pernet(net)->timeouts; @@ -316,7 +307,6 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4 __read_mostly = .allow_clash = true, .pkt_to_tuple = udp_pkt_to_tuple, .invert_tuple = udp_invert_tuple, - .print_tuple = udp_print_tuple, .packet = udp_packet, .get_timeouts = udp_get_timeouts, .new = udp_new, @@ -349,7 +339,6 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite4 __read_mostly = .allow_clash = true, .pkt_to_tuple = udp_pkt_to_tuple, .invert_tuple = udp_invert_tuple, - .print_tuple = udp_print_tuple, .packet = udp_packet, .get_timeouts = udp_get_timeouts, .new = udp_new, @@ -382,7 +371,6 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6 __read_mostly = .allow_clash = true, .pkt_to_tuple = udp_pkt_to_tuple, .invert_tuple = udp_invert_tuple, - .print_tuple = udp_print_tuple, .packet = udp_packet, .get_timeouts = udp_get_timeouts, .new = udp_new, @@ -415,7 +403,6 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6 __read_mostly = .allow_clash = true, .pkt_to_tuple = udp_pkt_to_tuple, .invert_tuple = udp_invert_tuple, - .print_tuple = udp_print_tuple, .packet = udp_packet, .get_timeouts = udp_get_timeouts, .new = udp_new, diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index b28f9e93f574e..9eb85858d764a 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -41,8 +41,62 @@ print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple, const struct nf_conntrack_l3proto *l3proto, const struct nf_conntrack_l4proto *l4proto) { - l3proto->print_tuple(s, tuple); - l4proto->print_tuple(s, tuple); + switch (l3proto->l3proto) { + case NFPROTO_IPV4: + seq_printf(s, "src=%pI4 dst=%pI4 ", + &tuple->src.u3.ip, &tuple->dst.u3.ip); + break; + case NFPROTO_IPV6: + seq_printf(s, "src=%pI6 dst=%pI6 ", + tuple->src.u3.ip6, tuple->dst.u3.ip6); + break; + default: + break; + } + + switch (l4proto->l4proto) { + case IPPROTO_ICMP: + seq_printf(s, "type=%u code=%u id=%u ", + tuple->dst.u.icmp.type, + tuple->dst.u.icmp.code, + ntohs(tuple->src.u.icmp.id)); + break; + case IPPROTO_TCP: + seq_printf(s, "sport=%hu dport=%hu ", + ntohs(tuple->src.u.tcp.port), + ntohs(tuple->dst.u.tcp.port)); + break; + case IPPROTO_UDPLITE: /* fallthrough */ + case IPPROTO_UDP: + seq_printf(s, "sport=%hu dport=%hu ", + ntohs(tuple->src.u.udp.port), + ntohs(tuple->dst.u.udp.port)); + + break; + case IPPROTO_DCCP: + seq_printf(s, "sport=%hu dport=%hu ", + ntohs(tuple->src.u.dccp.port), + ntohs(tuple->dst.u.dccp.port)); + break; + case IPPROTO_SCTP: + seq_printf(s, "sport=%hu dport=%hu ", + ntohs(tuple->src.u.sctp.port), + ntohs(tuple->dst.u.sctp.port)); + break; + case IPPROTO_ICMPV6: + seq_printf(s, "type=%u code=%u id=%u ", + tuple->dst.u.icmp.type, + tuple->dst.u.icmp.code, + ntohs(tuple->src.u.icmp.id)); + break; + case IPPROTO_GRE: + seq_printf(s, "srckey=0x%x dstkey=0x%x ", + ntohs(tuple->src.u.gre.key), + ntohs(tuple->dst.u.gre.key)); + break; + default: + break; + } } EXPORT_SYMBOL_GPL(print_tuple); From ea48cc83cf612fddb4e8868369348b9f936cfedb Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sat, 12 Aug 2017 00:57:07 +0200 Subject: [PATCH 37/47] netfilter: conntrack: print_conntrack only needed if CONFIG_NF_CONNTRACK_PROCFS Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_l4proto.h | 7 ++++--- net/netfilter/nf_conntrack_proto_dccp.c | 6 ++++++ net/netfilter/nf_conntrack_proto_gre.c | 4 ++++ net/netfilter/nf_conntrack_proto_sctp.c | 6 ++++++ net/netfilter/nf_conntrack_proto_tcp.c | 6 ++++++ 5 files changed, 26 insertions(+), 3 deletions(-) diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h index 7e8da04a5eb6c..4976ef92dc786 100644 --- a/include/net/netfilter/nf_conntrack_l4proto.h +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -61,9 +61,6 @@ struct nf_conntrack_l4proto { /* called by gc worker if table is full */ bool (*can_early_drop)(const struct nf_conn *ct); - /* Print out the private part of the conntrack. */ - void (*print_conntrack)(struct seq_file *s, struct nf_conn *); - /* Return the array of timeouts for this protocol. */ unsigned int *(*get_timeouts)(struct net *net); @@ -96,6 +93,10 @@ struct nf_conntrack_l4proto { u16 nlattr_max; const struct nla_policy *nla_policy; } ctnl_timeout; +#endif +#ifdef CONFIG_NF_CONNTRACK_PROCFS + /* Print out the private part of the conntrack. */ + void (*print_conntrack)(struct seq_file *s, struct nf_conn *); #endif unsigned int *net_id; /* Init l4proto pernet data */ diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c index d2df49ac390a3..188347571fc78 100644 --- a/net/netfilter/nf_conntrack_proto_dccp.c +++ b/net/netfilter/nf_conntrack_proto_dccp.c @@ -623,10 +623,12 @@ static bool dccp_can_early_drop(const struct nf_conn *ct) return false; } +#ifdef CONFIG_NF_CONNTRACK_PROCFS static void dccp_print_conntrack(struct seq_file *s, struct nf_conn *ct) { seq_printf(s, "%s ", dccp_state_names[ct->proto.dccp.state]); } +#endif #if IS_ENABLED(CONFIG_NF_CT_NETLINK) static int dccp_to_nlattr(struct sk_buff *skb, struct nlattr *nla, @@ -879,7 +881,9 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp4 __read_mostly = { .get_timeouts = dccp_get_timeouts, .error = dccp_error, .can_early_drop = dccp_can_early_drop, +#ifdef CONFIG_NF_CONNTRACK_PROCFS .print_conntrack = dccp_print_conntrack, +#endif #if IS_ENABLED(CONFIG_NF_CT_NETLINK) .to_nlattr = dccp_to_nlattr, .nlattr_size = dccp_nlattr_size, @@ -913,7 +917,9 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp6 __read_mostly = { .get_timeouts = dccp_get_timeouts, .error = dccp_error, .can_early_drop = dccp_can_early_drop, +#ifdef CONFIG_NF_CONNTRACK_PROCFS .print_conntrack = dccp_print_conntrack, +#endif #if IS_ENABLED(CONFIG_NF_CT_NETLINK) .to_nlattr = dccp_to_nlattr, .nlattr_size = dccp_nlattr_size, diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c index cd28095dd7a42..c0e3a23ac23a4 100644 --- a/net/netfilter/nf_conntrack_proto_gre.c +++ b/net/netfilter/nf_conntrack_proto_gre.c @@ -224,6 +224,7 @@ static bool gre_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, return true; } +#ifdef CONFIG_NF_CONNTRACK_PROCFS /* print private data for conntrack */ static void gre_print_conntrack(struct seq_file *s, struct nf_conn *ct) { @@ -231,6 +232,7 @@ static void gre_print_conntrack(struct seq_file *s, struct nf_conn *ct) (ct->proto.gre.timeout / HZ), (ct->proto.gre.stream_timeout / HZ)); } +#endif static unsigned int *gre_get_timeouts(struct net *net) { @@ -357,7 +359,9 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_gre4 __read_mostly = { .l4proto = IPPROTO_GRE, .pkt_to_tuple = gre_pkt_to_tuple, .invert_tuple = gre_invert_tuple, +#ifdef CONFIG_NF_CONNTRACK_PROCFS .print_conntrack = gre_print_conntrack, +#endif .get_timeouts = gre_get_timeouts, .packet = gre_packet, .new = gre_new, diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index da83b401be17b..890b5c73368db 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -174,11 +174,13 @@ static bool sctp_invert_tuple(struct nf_conntrack_tuple *tuple, return true; } +#ifdef CONFIG_NF_CONNTRACK_PROCFS /* Print out the private part of the conntrack. */ static void sctp_print_conntrack(struct seq_file *s, struct nf_conn *ct) { seq_printf(s, "%s ", sctp_conntrack_names[ct->proto.sctp.state]); } +#endif #define for_each_sctp_chunk(skb, sch, _sch, offset, dataoff, count) \ for ((offset) = (dataoff) + sizeof(struct sctphdr), (count) = 0; \ @@ -784,7 +786,9 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4 __read_mostly = { .l4proto = IPPROTO_SCTP, .pkt_to_tuple = sctp_pkt_to_tuple, .invert_tuple = sctp_invert_tuple, +#ifdef CONFIG_NF_CONNTRACK_PROCFS .print_conntrack = sctp_print_conntrack, +#endif .packet = sctp_packet, .get_timeouts = sctp_get_timeouts, .new = sctp_new, @@ -819,7 +823,9 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6 __read_mostly = { .l4proto = IPPROTO_SCTP, .pkt_to_tuple = sctp_pkt_to_tuple, .invert_tuple = sctp_invert_tuple, +#ifdef CONFIG_NF_CONNTRACK_PROCFS .print_conntrack = sctp_print_conntrack, +#endif .packet = sctp_packet, .get_timeouts = sctp_get_timeouts, .new = sctp_new, diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index c868b36b8945f..33c52d9ab2f52 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -301,11 +301,13 @@ static bool tcp_invert_tuple(struct nf_conntrack_tuple *tuple, return true; } +#ifdef CONFIG_NF_CONNTRACK_PROCFS /* Print out the private part of the conntrack. */ static void tcp_print_conntrack(struct seq_file *s, struct nf_conn *ct) { seq_printf(s, "%s ", tcp_conntrack_names[ct->proto.tcp.state]); } +#endif static unsigned int get_conntrack_index(const struct tcphdr *tcph) { @@ -1549,7 +1551,9 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 __read_mostly = .l4proto = IPPROTO_TCP, .pkt_to_tuple = tcp_pkt_to_tuple, .invert_tuple = tcp_invert_tuple, +#ifdef CONFIG_NF_CONNTRACK_PROCFS .print_conntrack = tcp_print_conntrack, +#endif .packet = tcp_packet, .get_timeouts = tcp_get_timeouts, .new = tcp_new, @@ -1585,7 +1589,9 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6 __read_mostly = .l4proto = IPPROTO_TCP, .pkt_to_tuple = tcp_pkt_to_tuple, .invert_tuple = tcp_invert_tuple, +#ifdef CONFIG_NF_CONNTRACK_PROCFS .print_conntrack = tcp_print_conntrack, +#endif .packet = tcp_packet, .get_timeouts = tcp_get_timeouts, .new = tcp_new, From b3480fe059ac9121b5714205b4ddae14b59ef4be Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sat, 12 Aug 2017 00:57:08 +0200 Subject: [PATCH 38/47] netfilter: conntrack: make protocol tracker pointers const Doesn't change generated code, but will make it easier to eventually make the actual trackers themselvers const. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_l3proto.h | 6 +++--- include/net/netfilter/nf_conntrack_l4proto.h | 4 ++-- include/net/netfilter/nf_conntrack_timeout.h | 2 +- net/netfilter/nf_conntrack_core.c | 12 +++++------ net/netfilter/nf_conntrack_netlink.c | 22 ++++++++++---------- net/netfilter/nf_conntrack_proto.c | 20 +++++++++--------- net/netfilter/nfnetlink_cttimeout.c | 14 ++++++------- net/netfilter/xt_CT.c | 2 +- net/openvswitch/conntrack.c | 4 ++-- 9 files changed, 43 insertions(+), 43 deletions(-) diff --git a/include/net/netfilter/nf_conntrack_l3proto.h b/include/net/netfilter/nf_conntrack_l3proto.h index dabb53b0913c3..6269deecbee77 100644 --- a/include/net/netfilter/nf_conntrack_l3proto.h +++ b/include/net/netfilter/nf_conntrack_l3proto.h @@ -64,10 +64,10 @@ struct nf_conntrack_l3proto { extern struct nf_conntrack_l3proto __rcu *nf_ct_l3protos[NFPROTO_NUMPROTO]; /* Protocol global registration. */ -int nf_ct_l3proto_register(struct nf_conntrack_l3proto *proto); -void nf_ct_l3proto_unregister(struct nf_conntrack_l3proto *proto); +int nf_ct_l3proto_register(const struct nf_conntrack_l3proto *proto); +void nf_ct_l3proto_unregister(const struct nf_conntrack_l3proto *proto); -struct nf_conntrack_l3proto *nf_ct_l3proto_find_get(u_int16_t l3proto); +const struct nf_conntrack_l3proto *nf_ct_l3proto_find_get(u_int16_t l3proto); /* Existing built-in protocols */ extern struct nf_conntrack_l3proto nf_conntrack_l3proto_generic; diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h index 4976ef92dc786..d4933d56809d9 100644 --- a/include/net/netfilter/nf_conntrack_l4proto.h +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -114,10 +114,10 @@ extern struct nf_conntrack_l4proto nf_conntrack_l4proto_generic; #define MAX_NF_CT_PROTO 256 -struct nf_conntrack_l4proto *__nf_ct_l4proto_find(u_int16_t l3proto, +const struct nf_conntrack_l4proto *__nf_ct_l4proto_find(u_int16_t l3proto, u_int8_t l4proto); -struct nf_conntrack_l4proto *nf_ct_l4proto_find_get(u_int16_t l3proto, +const struct nf_conntrack_l4proto *nf_ct_l4proto_find_get(u_int16_t l3proto, u_int8_t l4proto); void nf_ct_l4proto_put(const struct nf_conntrack_l4proto *p); diff --git a/include/net/netfilter/nf_conntrack_timeout.h b/include/net/netfilter/nf_conntrack_timeout.h index b222957062b56..483d104fa2541 100644 --- a/include/net/netfilter/nf_conntrack_timeout.h +++ b/include/net/netfilter/nf_conntrack_timeout.h @@ -16,7 +16,7 @@ struct ctnl_timeout { refcount_t refcnt; char name[CTNL_TIMEOUT_NAME_MAX]; __u16 l3num; - struct nf_conntrack_l4proto *l4proto; + const struct nf_conntrack_l4proto *l4proto; char data[0]; }; diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index f2f00eaf217da..c23df7c9cd598 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -250,8 +250,8 @@ bool nf_ct_get_tuplepr(const struct sk_buff *skb, unsigned int nhoff, u_int16_t l3num, struct net *net, struct nf_conntrack_tuple *tuple) { - struct nf_conntrack_l3proto *l3proto; - struct nf_conntrack_l4proto *l4proto; + const struct nf_conntrack_l3proto *l3proto; + const struct nf_conntrack_l4proto *l4proto; unsigned int protoff; u_int8_t protonum; int ret; @@ -400,7 +400,7 @@ static void destroy_conntrack(struct nf_conntrack *nfct) { struct nf_conn *ct = (struct nf_conn *)nfct; - struct nf_conntrack_l4proto *l4proto; + const struct nf_conntrack_l4proto *l4proto; pr_debug("destroy_conntrack(%p)\n", ct); NF_CT_ASSERT(atomic_read(&nfct->use) == 0); @@ -694,7 +694,7 @@ static int nf_ct_resolve_clash(struct net *net, struct sk_buff *skb, { /* This is the conntrack entry already in hashes that won race. */ struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h); - struct nf_conntrack_l4proto *l4proto; + const struct nf_conntrack_l4proto *l4proto; l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct)); if (l4proto->allow_clash && @@ -1344,10 +1344,10 @@ unsigned int nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum, struct sk_buff *skb) { + const struct nf_conntrack_l3proto *l3proto; + const struct nf_conntrack_l4proto *l4proto; struct nf_conn *ct, *tmpl; enum ip_conntrack_info ctinfo; - struct nf_conntrack_l3proto *l3proto; - struct nf_conntrack_l4proto *l4proto; unsigned int *timeouts; unsigned int dataoff; u_int8_t protonum; diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index b59a453a0fd82..de4053d84364b 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -109,9 +109,9 @@ static int ctnetlink_dump_tuples_ip(struct sk_buff *skb, static int ctnetlink_dump_tuples(struct sk_buff *skb, const struct nf_conntrack_tuple *tuple) { + const struct nf_conntrack_l3proto *l3proto; + const struct nf_conntrack_l4proto *l4proto; int ret; - struct nf_conntrack_l3proto *l3proto; - struct nf_conntrack_l4proto *l4proto; rcu_read_lock(); l3proto = __nf_ct_l3proto_find(tuple->src.l3num); @@ -163,7 +163,7 @@ static int ctnetlink_dump_timeout(struct sk_buff *skb, const struct nf_conn *ct) static int ctnetlink_dump_protoinfo(struct sk_buff *skb, struct nf_conn *ct) { - struct nf_conntrack_l4proto *l4proto; + const struct nf_conntrack_l4proto *l4proto; struct nlattr *nest_proto; int ret; @@ -535,9 +535,9 @@ ctnetlink_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, static inline size_t ctnetlink_proto_size(const struct nf_conn *ct) { - struct nf_conntrack_l3proto *l3proto; - struct nf_conntrack_l4proto *l4proto; - size_t len = 0; + const struct nf_conntrack_l3proto *l3proto; + const struct nf_conntrack_l4proto *l4proto; + size_t len; l3proto = __nf_ct_l3proto_find(nf_ct_l3num(ct)); len = l3proto->nla_size; @@ -936,8 +936,8 @@ static const struct nla_policy proto_nla_policy[CTA_PROTO_MAX+1] = { static int ctnetlink_parse_tuple_proto(struct nlattr *attr, struct nf_conntrack_tuple *tuple) { + const struct nf_conntrack_l4proto *l4proto; struct nlattr *tb[CTA_PROTO_MAX+1]; - struct nf_conntrack_l4proto *l4proto; int ret = 0; ret = nla_parse_nested(tb, CTA_PROTO_MAX, attr, proto_nla_policy, @@ -1580,8 +1580,8 @@ static int ctnetlink_change_protoinfo(struct nf_conn *ct, const struct nlattr * const cda[]) { const struct nlattr *attr = cda[CTA_PROTOINFO]; + const struct nf_conntrack_l4proto *l4proto; struct nlattr *tb[CTA_PROTOINFO_MAX+1]; - struct nf_conntrack_l4proto *l4proto; int err = 0; err = nla_parse_nested(tb, CTA_PROTOINFO_MAX, attr, protoinfo_policy, @@ -2475,11 +2475,11 @@ static int ctnetlink_exp_dump_mask(struct sk_buff *skb, const struct nf_conntrack_tuple *tuple, const struct nf_conntrack_tuple_mask *mask) { - int ret; - struct nf_conntrack_l3proto *l3proto; - struct nf_conntrack_l4proto *l4proto; + const struct nf_conntrack_l3proto *l3proto; + const struct nf_conntrack_l4proto *l4proto; struct nf_conntrack_tuple m; struct nlattr *nest_parms; + int ret; memset(&m, 0xFF, sizeof(m)); memcpy(&m.src.u3, &mask->src.u3, sizeof(m.src.u3)); diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index 0ecab7163d625..b3e489c859ec4 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c @@ -65,7 +65,7 @@ nf_ct_unregister_sysctl(struct ctl_table_header **header, } #endif -struct nf_conntrack_l4proto * +const struct nf_conntrack_l4proto * __nf_ct_l4proto_find(u_int16_t l3proto, u_int8_t l4proto) { if (unlikely(l3proto >= NFPROTO_NUMPROTO || nf_ct_protos[l3proto] == NULL)) @@ -77,7 +77,7 @@ EXPORT_SYMBOL_GPL(__nf_ct_l4proto_find); /* this is guaranteed to always return a valid protocol helper, since * it falls back to generic_protocol */ -struct nf_conntrack_l3proto * +const struct nf_conntrack_l3proto * nf_ct_l3proto_find_get(u_int16_t l3proto) { struct nf_conntrack_l3proto *p; @@ -95,8 +95,8 @@ EXPORT_SYMBOL_GPL(nf_ct_l3proto_find_get); int nf_ct_l3proto_try_module_get(unsigned short l3proto) { + const struct nf_conntrack_l3proto *p; int ret; - struct nf_conntrack_l3proto *p; retry: p = nf_ct_l3proto_find_get(l3proto); if (p == &nf_conntrack_l3proto_generic) { @@ -173,10 +173,10 @@ void nf_ct_netns_put(struct net *net, u8 nfproto) } EXPORT_SYMBOL_GPL(nf_ct_netns_put); -struct nf_conntrack_l4proto * +const struct nf_conntrack_l4proto * nf_ct_l4proto_find_get(u_int16_t l3num, u_int8_t l4num) { - struct nf_conntrack_l4proto *p; + const struct nf_conntrack_l4proto *p; rcu_read_lock(); p = __nf_ct_l4proto_find(l3num, l4num); @@ -196,18 +196,18 @@ EXPORT_SYMBOL_GPL(nf_ct_l4proto_put); static int kill_l3proto(struct nf_conn *i, void *data) { - return nf_ct_l3num(i) == ((struct nf_conntrack_l3proto *)data)->l3proto; + return nf_ct_l3num(i) == ((const struct nf_conntrack_l3proto *)data)->l3proto; } static int kill_l4proto(struct nf_conn *i, void *data) { - struct nf_conntrack_l4proto *l4proto; + const struct nf_conntrack_l4proto *l4proto; l4proto = data; return nf_ct_protonum(i) == l4proto->l4proto && nf_ct_l3num(i) == l4proto->l3proto; } -int nf_ct_l3proto_register(struct nf_conntrack_l3proto *proto) +int nf_ct_l3proto_register(const struct nf_conntrack_l3proto *proto) { int ret = 0; struct nf_conntrack_l3proto *old; @@ -235,7 +235,7 @@ int nf_ct_l3proto_register(struct nf_conntrack_l3proto *proto) } EXPORT_SYMBOL_GPL(nf_ct_l3proto_register); -void nf_ct_l3proto_unregister(struct nf_conntrack_l3proto *proto) +void nf_ct_l3proto_unregister(const struct nf_conntrack_l3proto *proto) { BUG_ON(proto->l3proto >= NFPROTO_NUMPROTO); @@ -249,7 +249,7 @@ void nf_ct_l3proto_unregister(struct nf_conntrack_l3proto *proto) synchronize_rcu(); /* Remove all contrack entries for this protocol */ - nf_ct_iterate_destroy(kill_l3proto, proto); + nf_ct_iterate_destroy(kill_l3proto, (void*)proto); } EXPORT_SYMBOL_GPL(nf_ct_l3proto_unregister); diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c index fcabccc99f0df..32b1c0b44e791 100644 --- a/net/netfilter/nfnetlink_cttimeout.c +++ b/net/netfilter/nfnetlink_cttimeout.c @@ -75,7 +75,7 @@ static int cttimeout_new_timeout(struct net *net, struct sock *ctnl, { __u16 l3num; __u8 l4num; - struct nf_conntrack_l4proto *l4proto; + const struct nf_conntrack_l4proto *l4proto; struct ctnl_timeout *timeout, *matching = NULL; char *name; int ret; @@ -159,7 +159,7 @@ ctnl_timeout_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, struct nlmsghdr *nlh; struct nfgenmsg *nfmsg; unsigned int flags = portid ? NLM_F_MULTI : 0; - struct nf_conntrack_l4proto *l4proto = timeout->l4proto; + const struct nf_conntrack_l4proto *l4proto = timeout->l4proto; event = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK_TIMEOUT, event); nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags); @@ -364,10 +364,10 @@ static int cttimeout_default_set(struct net *net, struct sock *ctnl, const struct nlattr * const cda[], struct netlink_ext_ack *extack) { + const struct nf_conntrack_l4proto *l4proto; + unsigned int *timeouts; __u16 l3num; __u8 l4num; - struct nf_conntrack_l4proto *l4proto; - unsigned int *timeouts; int ret; if (!cda[CTA_TIMEOUT_L3PROTO] || @@ -454,11 +454,11 @@ static int cttimeout_default_get(struct net *net, struct sock *ctnl, const struct nlattr * const cda[], struct netlink_ext_ack *extack) { - __u16 l3num; - __u8 l4num; - struct nf_conntrack_l4proto *l4proto; + const struct nf_conntrack_l4proto *l4proto; struct sk_buff *skb2; int ret, err; + __u16 l3num; + __u8 l4num; if (!cda[CTA_TIMEOUT_L3PROTO] || !cda[CTA_TIMEOUT_L4PROTO]) return -EINVAL; diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c index 623ef37de886f..5a152e2acfd58 100644 --- a/net/netfilter/xt_CT.c +++ b/net/netfilter/xt_CT.c @@ -121,9 +121,9 @@ xt_ct_set_timeout(struct nf_conn *ct, const struct xt_tgchk_param *par, { #ifdef CONFIG_NF_CONNTRACK_TIMEOUT typeof(nf_ct_timeout_find_get_hook) timeout_find_get; + const struct nf_conntrack_l4proto *l4proto; struct ctnl_timeout *timeout; struct nf_conn_timeout *timeout_ext; - struct nf_conntrack_l4proto *l4proto; int ret = 0; u8 proto; diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index e3c4c6c3fef7f..2833635346473 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -579,8 +579,8 @@ static struct nf_conn * ovs_ct_find_existing(struct net *net, const struct nf_conntrack_zone *zone, u8 l3num, struct sk_buff *skb, bool natted) { - struct nf_conntrack_l3proto *l3proto; - struct nf_conntrack_l4proto *l4proto; + const struct nf_conntrack_l3proto *l3proto; + const struct nf_conntrack_l4proto *l4proto; struct nf_conntrack_tuple tuple; struct nf_conntrack_tuple_hash *h; struct nf_conn *ct; From c816c2558ed45e2db522ce8400fae82be6662415 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 15 Aug 2017 10:50:34 +0100 Subject: [PATCH 39/47] netfilter: ebtables: fix indent on if statements The returns on some if statements are not indented correctly, add in the missing tab. Signed-off-by: Colin Ian King Signed-off-by: Pablo Neira Ayuso --- net/bridge/netfilter/ebt_ip.c | 4 ++-- net/bridge/netfilter/ebt_ip6.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/net/bridge/netfilter/ebt_ip.c b/net/bridge/netfilter/ebt_ip.c index d06968bdf5ec7..2b46c50abce03 100644 --- a/net/bridge/netfilter/ebt_ip.c +++ b/net/bridge/netfilter/ebt_ip.c @@ -64,14 +64,14 @@ ebt_ip_mt(const struct sk_buff *skb, struct xt_action_param *par) if (NF_INVF(info, EBT_IP_DPORT, dst < info->dport[0] || dst > info->dport[1])) - return false; + return false; } if (info->bitmask & EBT_IP_SPORT) { u32 src = ntohs(pptr->src); if (NF_INVF(info, EBT_IP_SPORT, src < info->sport[0] || src > info->sport[1])) - return false; + return false; } } return true; diff --git a/net/bridge/netfilter/ebt_ip6.c b/net/bridge/netfilter/ebt_ip6.c index 4617491be41e7..2a5a52a53ec43 100644 --- a/net/bridge/netfilter/ebt_ip6.c +++ b/net/bridge/netfilter/ebt_ip6.c @@ -89,7 +89,7 @@ ebt_ip6_mt(const struct sk_buff *skb, struct xt_action_param *par) if (NF_INVF(info, EBT_IP6_SPORT, src < info->sport[0] || src > info->sport[1])) - return false; + return false; } if ((info->bitmask & EBT_IP6_ICMP6) && NF_INVF(info, EBT_IP6_ICMP6, From 5fd02ebe6537a7567f060ed36fb0dda64c5b63d8 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 21 Aug 2017 16:19:26 +0200 Subject: [PATCH 40/47] netfilter: fix a few (harmless) sparse warnings net/netfilter/nft_payload.c:187:18: warning: incorrect type in return expression (expected bool got restricted __sum16 [usertype] check) net/netfilter/nft_exthdr.c:222:14: warning: cast to restricted __be32 net/netfilter/nft_rt.c:49:23: warning: incorrect type in assignment (different base types expected unsigned int got restricted __be32) net/netfilter/nft_rt.c:70:25: warning: symbol 'nft_rt_policy' was not declared. Should it be static? Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_exthdr.c | 2 +- net/netfilter/nft_payload.c | 2 +- net/netfilter/nft_rt.c | 6 +++--- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c index f5a0bf5e3bdd2..a0a93d987a3bd 100644 --- a/net/netfilter/nft_exthdr.c +++ b/net/netfilter/nft_exthdr.c @@ -388,7 +388,7 @@ nft_exthdr_select_ops(const struct nft_ctx *ctx, if (tb[NFTA_EXTHDR_SREG] && tb[NFTA_EXTHDR_DREG]) return ERR_PTR(-EOPNOTSUPP); - op = ntohl(nla_get_u32(tb[NFTA_EXTHDR_OP])); + op = ntohl(nla_get_be32(tb[NFTA_EXTHDR_OP])); switch (op) { case NFT_EXTHDR_OP_TCPOPT: if (tb[NFTA_EXTHDR_SREG]) diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c index 7d699bbd45b0e..e110b0ebbf58b 100644 --- a/net/netfilter/nft_payload.c +++ b/net/netfilter/nft_payload.c @@ -184,7 +184,7 @@ static bool nft_payload_udp_checksum(struct sk_buff *skb, unsigned int thoff) if (!uh) return false; - return uh->check; + return (__force bool)uh->check; } static int nft_payload_l4csum_offset(const struct nft_pktinfo *pkt, diff --git a/net/netfilter/nft_rt.c b/net/netfilter/nft_rt.c index e142e65d31761..61fd3acaa3c9f 100644 --- a/net/netfilter/nft_rt.c +++ b/net/netfilter/nft_rt.c @@ -82,8 +82,8 @@ static void nft_rt_get_eval(const struct nft_expr *expr, if (nft_pf(pkt) != NFPROTO_IPV4) goto err; - *dest = rt_nexthop((const struct rtable *)dst, - ip_hdr(skb)->daddr); + *dest = (__force u32)rt_nexthop((const struct rtable *)dst, + ip_hdr(skb)->daddr); break; case NFT_RT_NEXTHOP6: if (nft_pf(pkt) != NFPROTO_IPV6) @@ -106,7 +106,7 @@ static void nft_rt_get_eval(const struct nft_expr *expr, regs->verdict.code = NFT_BREAK; } -const struct nla_policy nft_rt_policy[NFTA_RT_MAX + 1] = { +static const struct nla_policy nft_rt_policy[NFTA_RT_MAX + 1] = { [NFTA_RT_DREG] = { .type = NLA_U32 }, [NFTA_RT_KEY] = { .type = NLA_U32 }, }; From 960632ece6949be1ab6f7a911faa4fa6e8305f4a Mon Sep 17 00:00:00 2001 From: Aaron Conole Date: Thu, 24 Aug 2017 00:08:32 +0200 Subject: [PATCH 41/47] netfilter: convert hook list to an array This converts the storage and layout of netfilter hook entries from a linked list to an array. After this commit, hook entries will be stored adjacent in memory. The next pointer is no longer required. The ops pointers are stored at the end of the array as they are only used in the register/unregister path and in the legacy br_netfilter code. nf_unregister_net_hooks() is slower than needed as it just calls nf_unregister_net_hook in a loop (i.e. at least n synchronize_net() calls), this will be addressed in followup patch. Test setup: - ixgbe 10gbit - netperf UDP_STREAM, 64 byte packets - 5 hooks: (raw + mangle prerouting, mangle+filter input, inet filter): empty mangle and raw prerouting, mangle and filter input hooks: 353.9 this patch: 364.2 Signed-off-by: Aaron Conole Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netdevice.h | 2 +- include/linux/netfilter.h | 45 ++--- include/linux/netfilter_ingress.h | 4 +- include/net/netfilter/nf_queue.h | 2 +- include/net/netns/netfilter.h | 2 +- net/bridge/br_netfilter_hooks.c | 19 +- net/netfilter/core.c | 297 ++++++++++++++++++++++-------- net/netfilter/nf_internals.h | 3 +- net/netfilter/nf_queue.c | 67 ++++--- 9 files changed, 307 insertions(+), 134 deletions(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 614642eb7eb7a..ca0a301273007 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1811,7 +1811,7 @@ struct net_device { #endif struct netdev_queue __rcu *ingress_queue; #ifdef CONFIG_NETFILTER_INGRESS - struct nf_hook_entry __rcu *nf_hooks_ingress; + struct nf_hook_entries __rcu *nf_hooks_ingress; #endif unsigned char broadcast[MAX_ADDR_LEN]; diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 22f081065d496..f84bca1703cdc 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -72,25 +72,32 @@ struct nf_hook_ops { }; struct nf_hook_entry { - struct nf_hook_entry __rcu *next; nf_hookfn *hook; void *priv; - const struct nf_hook_ops *orig_ops; }; -static inline void -nf_hook_entry_init(struct nf_hook_entry *entry, const struct nf_hook_ops *ops) -{ - entry->next = NULL; - entry->hook = ops->hook; - entry->priv = ops->priv; - entry->orig_ops = ops; -} +struct nf_hook_entries { + u16 num_hook_entries; + /* padding */ + struct nf_hook_entry hooks[]; + + /* trailer: pointers to original orig_ops of each hook. + * + * This is not part of struct nf_hook_entry since its only + * needed in slow path (hook register/unregister). + * + * const struct nf_hook_ops *orig_ops[] + */ +}; -static inline int -nf_hook_entry_priority(const struct nf_hook_entry *entry) +static inline struct nf_hook_ops **nf_hook_entries_get_hook_ops(const struct nf_hook_entries *e) { - return entry->orig_ops->priority; + unsigned int n = e->num_hook_entries; + const void *hook_end; + + hook_end = &e->hooks[n]; /* this is *past* ->hooks[]! */ + + return (struct nf_hook_ops **)hook_end; } static inline int @@ -100,12 +107,6 @@ nf_hook_entry_hookfn(const struct nf_hook_entry *entry, struct sk_buff *skb, return entry->hook(entry->priv, skb, state); } -static inline const struct nf_hook_ops * -nf_hook_entry_ops(const struct nf_hook_entry *entry) -{ - return entry->orig_ops; -} - static inline void nf_hook_state_init(struct nf_hook_state *p, unsigned int hook, u_int8_t pf, @@ -168,7 +169,7 @@ extern struct static_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS]; #endif int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state, - struct nf_hook_entry *entry); + const struct nf_hook_entries *e, unsigned int i); /** * nf_hook - call a netfilter hook @@ -182,7 +183,7 @@ static inline int nf_hook(u_int8_t pf, unsigned int hook, struct net *net, struct net_device *indev, struct net_device *outdev, int (*okfn)(struct net *, struct sock *, struct sk_buff *)) { - struct nf_hook_entry *hook_head; + struct nf_hook_entries *hook_head; int ret = 1; #ifdef HAVE_JUMP_LABEL @@ -200,7 +201,7 @@ static inline int nf_hook(u_int8_t pf, unsigned int hook, struct net *net, nf_hook_state_init(&state, hook, pf, indev, outdev, sk, net, okfn); - ret = nf_hook_slow(skb, &state, hook_head); + ret = nf_hook_slow(skb, &state, hook_head, 0); } rcu_read_unlock(); diff --git a/include/linux/netfilter_ingress.h b/include/linux/netfilter_ingress.h index 59476061de868..8d5dae1e2ff85 100644 --- a/include/linux/netfilter_ingress.h +++ b/include/linux/netfilter_ingress.h @@ -17,7 +17,7 @@ static inline bool nf_hook_ingress_active(const struct sk_buff *skb) /* caller must hold rcu_read_lock */ static inline int nf_hook_ingress(struct sk_buff *skb) { - struct nf_hook_entry *e = rcu_dereference(skb->dev->nf_hooks_ingress); + struct nf_hook_entries *e = rcu_dereference(skb->dev->nf_hooks_ingress); struct nf_hook_state state; int ret; @@ -30,7 +30,7 @@ static inline int nf_hook_ingress(struct sk_buff *skb) nf_hook_state_init(&state, NF_NETDEV_INGRESS, NFPROTO_NETDEV, skb->dev, NULL, NULL, dev_net(skb->dev), NULL); - ret = nf_hook_slow(skb, &state, e); + ret = nf_hook_slow(skb, &state, e, 0); if (ret == 0) return -1; diff --git a/include/net/netfilter/nf_queue.h b/include/net/netfilter/nf_queue.h index 4454719ff849f..39468720fc192 100644 --- a/include/net/netfilter/nf_queue.h +++ b/include/net/netfilter/nf_queue.h @@ -10,9 +10,9 @@ struct nf_queue_entry { struct list_head list; struct sk_buff *skb; unsigned int id; + unsigned int hook_index; /* index in hook_entries->hook[] */ struct nf_hook_state state; - struct nf_hook_entry *hook; u16 size; /* sizeof(entry) + saved route keys */ /* extra space to store route keys */ diff --git a/include/net/netns/netfilter.h b/include/net/netns/netfilter.h index cea396b53a60d..72d66c8763d03 100644 --- a/include/net/netns/netfilter.h +++ b/include/net/netns/netfilter.h @@ -16,7 +16,7 @@ struct netns_nf { #ifdef CONFIG_SYSCTL struct ctl_table_header *nf_log_dir_header; #endif - struct nf_hook_entry __rcu *hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS]; + struct nf_hook_entries __rcu *hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS]; #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) bool defrag_ipv4; #endif diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index 626f4b2cef164..c2eea1b8737a1 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -985,22 +985,25 @@ int br_nf_hook_thresh(unsigned int hook, struct net *net, int (*okfn)(struct net *, struct sock *, struct sk_buff *)) { - struct nf_hook_entry *elem; + const struct nf_hook_entries *e; struct nf_hook_state state; + struct nf_hook_ops **ops; + unsigned int i; int ret; - for (elem = rcu_dereference(net->nf.hooks[NFPROTO_BRIDGE][hook]); - elem && nf_hook_entry_priority(elem) <= NF_BR_PRI_BRNF; - elem = rcu_dereference(elem->next)) - ; - - if (!elem) + e = rcu_dereference(net->nf.hooks[NFPROTO_BRIDGE][hook]); + if (!e) return okfn(net, sk, skb); + ops = nf_hook_entries_get_hook_ops(e); + for (i = 0; i < e->num_hook_entries && + ops[i]->priority <= NF_BR_PRI_BRNF; i++) + ; + nf_hook_state_init(&state, hook, NFPROTO_BRIDGE, indev, outdev, sk, net, okfn); - ret = nf_hook_slow(skb, &state, elem); + ret = nf_hook_slow(skb, &state, e, i); if (ret == 1) ret = okfn(net, sk, skb); diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 974cf2a3795aa..1a9e23c9ab98f 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -21,7 +21,7 @@ #include #include #include -#include +#include #include #include #include @@ -62,10 +62,160 @@ EXPORT_SYMBOL(nf_hooks_needed); #endif static DEFINE_MUTEX(nf_hook_mutex); + +/* max hooks per family/hooknum */ +#define MAX_HOOK_COUNT 1024 + #define nf_entry_dereference(e) \ rcu_dereference_protected(e, lockdep_is_held(&nf_hook_mutex)) -static struct nf_hook_entry __rcu **nf_hook_entry_head(struct net *net, const struct nf_hook_ops *reg) +static struct nf_hook_entries *allocate_hook_entries_size(u16 num) +{ + struct nf_hook_entries *e; + size_t alloc = sizeof(*e) + + sizeof(struct nf_hook_entry) * num + + sizeof(struct nf_hook_ops *) * num; + + if (num == 0) + return NULL; + + e = kvzalloc(alloc, GFP_KERNEL); + if (e) + e->num_hook_entries = num; + return e; +} + +static unsigned int accept_all(void *priv, + struct sk_buff *skb, + const struct nf_hook_state *state) +{ + return NF_ACCEPT; /* ACCEPT makes nf_hook_slow call next hook */ +} + +static const struct nf_hook_ops dummy_ops = { + .hook = accept_all, + .priority = INT_MIN, +}; + +static struct nf_hook_entries * +nf_hook_entries_grow(const struct nf_hook_entries *old, + const struct nf_hook_ops *reg) +{ + unsigned int i, alloc_entries, nhooks, old_entries; + struct nf_hook_ops **orig_ops = NULL; + struct nf_hook_ops **new_ops; + struct nf_hook_entries *new; + bool inserted = false; + + alloc_entries = 1; + old_entries = old ? old->num_hook_entries : 0; + + if (old) { + orig_ops = nf_hook_entries_get_hook_ops(old); + + for (i = 0; i < old_entries; i++) { + if (orig_ops[i] != &dummy_ops) + alloc_entries++; + } + } + + if (alloc_entries > MAX_HOOK_COUNT) + return ERR_PTR(-E2BIG); + + new = allocate_hook_entries_size(alloc_entries); + if (!new) + return ERR_PTR(-ENOMEM); + + new_ops = nf_hook_entries_get_hook_ops(new); + + i = 0; + nhooks = 0; + while (i < old_entries) { + if (orig_ops[i] == &dummy_ops) { + ++i; + continue; + } + if (inserted || reg->priority > orig_ops[i]->priority) { + new_ops[nhooks] = (void *)orig_ops[i]; + new->hooks[nhooks] = old->hooks[i]; + i++; + } else { + new_ops[nhooks] = (void *)reg; + new->hooks[nhooks].hook = reg->hook; + new->hooks[nhooks].priv = reg->priv; + inserted = true; + } + nhooks++; + } + + if (!inserted) { + new_ops[nhooks] = (void *)reg; + new->hooks[nhooks].hook = reg->hook; + new->hooks[nhooks].priv = reg->priv; + } + + return new; +} + +/* + * __nf_hook_entries_try_shrink - try to shrink hook array + * + * @pp -- location of hook blob + * + * Hook unregistration must always succeed, so to-be-removed hooks + * are replaced by a dummy one that will just move to next hook. + * + * This counts the current dummy hooks, attempts to allocate new blob, + * copies the live hooks, then replaces and discards old one. + * + * return values: + * + * Returns address to free, or NULL. + */ +static void *__nf_hook_entries_try_shrink(struct nf_hook_entries __rcu **pp) +{ + struct nf_hook_entries *old, *new = NULL; + unsigned int i, j, skip = 0, hook_entries; + struct nf_hook_ops **orig_ops; + struct nf_hook_ops **new_ops; + + old = nf_entry_dereference(*pp); + if (WARN_ON_ONCE(!old)) + return NULL; + + orig_ops = nf_hook_entries_get_hook_ops(old); + for (i = 0; i < old->num_hook_entries; i++) { + if (orig_ops[i] == &dummy_ops) + skip++; + } + + /* if skip == hook_entries all hooks have been removed */ + hook_entries = old->num_hook_entries; + if (skip == hook_entries) + goto out_assign; + + if (WARN_ON(skip == 0)) + return NULL; + + hook_entries -= skip; + new = allocate_hook_entries_size(hook_entries); + if (!new) + return NULL; + + new_ops = nf_hook_entries_get_hook_ops(new); + for (i = 0, j = 0; i < old->num_hook_entries; i++) { + if (orig_ops[i] == &dummy_ops) + continue; + new->hooks[j] = old->hooks[i]; + new_ops[j] = (void *)orig_ops[i]; + j++; + } +out_assign: + rcu_assign_pointer(*pp, new); + return old; +} + +static struct nf_hook_entries __rcu **nf_hook_entry_head(struct net *net, const struct nf_hook_ops *reg) { if (reg->pf != NFPROTO_NETDEV) return net->nf.hooks[reg->pf]+reg->hooknum; @@ -76,13 +226,14 @@ static struct nf_hook_entry __rcu **nf_hook_entry_head(struct net *net, const st return ®->dev->nf_hooks_ingress; } #endif + WARN_ON_ONCE(1); return NULL; } int nf_register_net_hook(struct net *net, const struct nf_hook_ops *reg) { - struct nf_hook_entry __rcu **pp; - struct nf_hook_entry *entry, *p; + struct nf_hook_entries *p, *new_hooks; + struct nf_hook_entries __rcu **pp; if (reg->pf == NFPROTO_NETDEV) { #ifndef CONFIG_NETFILTER_INGRESS @@ -98,23 +249,18 @@ int nf_register_net_hook(struct net *net, const struct nf_hook_ops *reg) if (!pp) return -EINVAL; - entry = kmalloc(sizeof(*entry), GFP_KERNEL); - if (!entry) - return -ENOMEM; - - nf_hook_entry_init(entry, reg); - mutex_lock(&nf_hook_mutex); - /* Find the spot in the list */ - for (; (p = nf_entry_dereference(*pp)) != NULL; pp = &p->next) { - if (reg->priority < nf_hook_entry_priority(p)) - break; - } - rcu_assign_pointer(entry->next, p); - rcu_assign_pointer(*pp, entry); + p = nf_entry_dereference(*pp); + new_hooks = nf_hook_entries_grow(p, reg); + + if (!IS_ERR(new_hooks)) + rcu_assign_pointer(*pp, new_hooks); mutex_unlock(&nf_hook_mutex); + if (IS_ERR(new_hooks)) + return PTR_ERR(new_hooks); + #ifdef CONFIG_NETFILTER_INGRESS if (reg->pf == NFPROTO_NETDEV && reg->hooknum == NF_NETDEV_INGRESS) net_inc_ingress_queue(); @@ -122,48 +268,74 @@ int nf_register_net_hook(struct net *net, const struct nf_hook_ops *reg) #ifdef HAVE_JUMP_LABEL static_key_slow_inc(&nf_hooks_needed[reg->pf][reg->hooknum]); #endif + synchronize_net(); + BUG_ON(p == new_hooks); + kvfree(p); return 0; } EXPORT_SYMBOL(nf_register_net_hook); -static struct nf_hook_entry * -__nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *reg) +/* + * __nf_unregister_net_hook - remove a hook from blob + * + * @oldp: current address of hook blob + * @unreg: hook to unregister + * + * This cannot fail, hook unregistration must always succeed. + * Therefore replace the to-be-removed hook with a dummy hook. + */ +static void __nf_unregister_net_hook(struct nf_hook_entries *old, + const struct nf_hook_ops *unreg) { - struct nf_hook_entry __rcu **pp; - struct nf_hook_entry *p; - - pp = nf_hook_entry_head(net, reg); - if (WARN_ON_ONCE(!pp)) - return NULL; + struct nf_hook_ops **orig_ops; + bool found = false; + unsigned int i; - mutex_lock(&nf_hook_mutex); - for (; (p = nf_entry_dereference(*pp)) != NULL; pp = &p->next) { - if (nf_hook_entry_ops(p) == reg) { - rcu_assign_pointer(*pp, p->next); - break; - } - } - mutex_unlock(&nf_hook_mutex); - if (!p) { - WARN(1, "nf_unregister_net_hook: hook not found!\n"); - return NULL; + orig_ops = nf_hook_entries_get_hook_ops(old); + for (i = 0; i < old->num_hook_entries; i++) { + if (orig_ops[i] != unreg) + continue; + WRITE_ONCE(old->hooks[i].hook, accept_all); + WRITE_ONCE(orig_ops[i], &dummy_ops); + found = true; + break; } + + if (found) { #ifdef CONFIG_NETFILTER_INGRESS - if (reg->pf == NFPROTO_NETDEV && reg->hooknum == NF_NETDEV_INGRESS) - net_dec_ingress_queue(); + if (unreg->pf == NFPROTO_NETDEV && unreg->hooknum == NF_NETDEV_INGRESS) + net_dec_ingress_queue(); #endif #ifdef HAVE_JUMP_LABEL - static_key_slow_dec(&nf_hooks_needed[reg->pf][reg->hooknum]); + static_key_slow_dec(&nf_hooks_needed[unreg->pf][unreg->hooknum]); #endif - - return p; + } else { + WARN_ONCE(1, "hook not found, pf %d num %d", unreg->pf, unreg->hooknum); + } } void nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *reg) { - struct nf_hook_entry *p = __nf_unregister_net_hook(net, reg); + struct nf_hook_entries __rcu **pp; + struct nf_hook_entries *p; unsigned int nfq; + pp = nf_hook_entry_head(net, reg); + if (!pp) + return; + + mutex_lock(&nf_hook_mutex); + + p = nf_entry_dereference(*pp); + if (WARN_ON_ONCE(!p)) { + mutex_unlock(&nf_hook_mutex); + return; + } + + __nf_unregister_net_hook(p, reg); + + p = __nf_hook_entries_try_shrink(pp); + mutex_unlock(&nf_hook_mutex); if (!p) return; @@ -173,7 +345,7 @@ void nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *reg) nfq = nf_queue_nf_hook_drop(net); if (nfq) synchronize_net(); - kfree(p); + kvfree(p); } EXPORT_SYMBOL(nf_unregister_net_hook); @@ -200,46 +372,25 @@ EXPORT_SYMBOL(nf_register_net_hooks); void nf_unregister_net_hooks(struct net *net, const struct nf_hook_ops *reg, unsigned int hookcount) { - struct nf_hook_entry *to_free[16]; - unsigned int i, n, nfq; - - do { - n = min_t(unsigned int, hookcount, ARRAY_SIZE(to_free)); - - for (i = 0; i < n; i++) - to_free[i] = __nf_unregister_net_hook(net, ®[i]); - - synchronize_net(); - - /* need 2nd synchronize_net() if nfqueue is used, skb - * can get reinjected right before nf_queue_hook_drop() - */ - nfq = nf_queue_nf_hook_drop(net); - if (nfq) - synchronize_net(); - - for (i = 0; i < n; i++) - kfree(to_free[i]); + unsigned int i; - reg += n; - hookcount -= n; - } while (hookcount > 0); + for (i = 0; i < hookcount; i++) + nf_unregister_net_hook(net, ®[i]); } EXPORT_SYMBOL(nf_unregister_net_hooks); /* Returns 1 if okfn() needs to be executed by the caller, * -EPERM for NF_DROP, 0 otherwise. Caller must hold rcu_read_lock. */ int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state, - struct nf_hook_entry *entry) + const struct nf_hook_entries *e, unsigned int s) { unsigned int verdict; int ret; - do { - verdict = nf_hook_entry_hookfn(entry, skb, state); + for (; s < e->num_hook_entries; s++) { + verdict = nf_hook_entry_hookfn(&e->hooks[s], skb, state); switch (verdict & NF_VERDICT_MASK) { case NF_ACCEPT: - entry = rcu_dereference(entry->next); break; case NF_DROP: kfree_skb(skb); @@ -248,8 +399,8 @@ int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state, ret = -EPERM; return ret; case NF_QUEUE: - ret = nf_queue(skb, state, &entry, verdict); - if (ret == 1 && entry) + ret = nf_queue(skb, state, e, s, verdict); + if (ret == 1) continue; return ret; default: @@ -258,7 +409,7 @@ int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state, */ return 0; } - } while (entry); + } return 1; } diff --git a/net/netfilter/nf_internals.h b/net/netfilter/nf_internals.h index 19f00a47a7108..bacd6363946e4 100644 --- a/net/netfilter/nf_internals.h +++ b/net/netfilter/nf_internals.h @@ -13,7 +13,8 @@ /* nf_queue.c */ int nf_queue(struct sk_buff *skb, struct nf_hook_state *state, - struct nf_hook_entry **entryp, unsigned int verdict); + const struct nf_hook_entries *entries, unsigned int index, + unsigned int verdict); unsigned int nf_queue_nf_hook_drop(struct net *net); /* nf_log.c */ diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index 4f4d80a58fb5f..f7e21953b1deb 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c @@ -112,7 +112,8 @@ unsigned int nf_queue_nf_hook_drop(struct net *net) EXPORT_SYMBOL_GPL(nf_queue_nf_hook_drop); static int __nf_queue(struct sk_buff *skb, const struct nf_hook_state *state, - struct nf_hook_entry *hook_entry, unsigned int queuenum) + const struct nf_hook_entries *entries, + unsigned int index, unsigned int queuenum) { int status = -ENOENT; struct nf_queue_entry *entry = NULL; @@ -140,7 +141,7 @@ static int __nf_queue(struct sk_buff *skb, const struct nf_hook_state *state, *entry = (struct nf_queue_entry) { .skb = skb, .state = *state, - .hook = hook_entry, + .hook_index = index, .size = sizeof(*entry) + afinfo->route_key_size, }; @@ -163,18 +164,16 @@ static int __nf_queue(struct sk_buff *skb, const struct nf_hook_state *state, /* Packets leaving via this function must come back through nf_reinject(). */ int nf_queue(struct sk_buff *skb, struct nf_hook_state *state, - struct nf_hook_entry **entryp, unsigned int verdict) + const struct nf_hook_entries *entries, unsigned int index, + unsigned int verdict) { - struct nf_hook_entry *entry = *entryp; int ret; - ret = __nf_queue(skb, state, entry, verdict >> NF_VERDICT_QBITS); + ret = __nf_queue(skb, state, entries, index, verdict >> NF_VERDICT_QBITS); if (ret < 0) { if (ret == -ESRCH && - (verdict & NF_VERDICT_FLAG_QUEUE_BYPASS)) { - *entryp = rcu_dereference(entry->next); + (verdict & NF_VERDICT_FLAG_QUEUE_BYPASS)) return 1; - } kfree_skb(skb); } @@ -183,33 +182,56 @@ int nf_queue(struct sk_buff *skb, struct nf_hook_state *state, static unsigned int nf_iterate(struct sk_buff *skb, struct nf_hook_state *state, - struct nf_hook_entry **entryp) + const struct nf_hook_entries *hooks, + unsigned int *index) { - unsigned int verdict; + const struct nf_hook_entry *hook; + unsigned int verdict, i = *index; - do { + while (i < hooks->num_hook_entries) { + hook = &hooks->hooks[i]; repeat: - verdict = nf_hook_entry_hookfn((*entryp), skb, state); + verdict = nf_hook_entry_hookfn(hook, skb, state); if (verdict != NF_ACCEPT) { if (verdict != NF_REPEAT) return verdict; goto repeat; } - *entryp = rcu_dereference((*entryp)->next); - } while (*entryp); + i++; + } + *index = i; return NF_ACCEPT; } +/* Caller must hold rcu read-side lock */ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict) { - struct nf_hook_entry *hook_entry = entry->hook; + const struct nf_hook_entry *hook_entry; + const struct nf_hook_entries *hooks; struct sk_buff *skb = entry->skb; const struct nf_afinfo *afinfo; + const struct net *net; + unsigned int i; int err; + u8 pf; + + net = entry->state.net; + pf = entry->state.pf; + + hooks = rcu_dereference(net->nf.hooks[pf][entry->state.hook]); nf_queue_entry_release_refs(entry); + i = entry->hook_index; + if (WARN_ON_ONCE(i >= hooks->num_hook_entries)) { + kfree_skb(skb); + kfree(entry); + return; + } + + hook_entry = &hooks->hooks[i]; + /* Continue traversal iff userspace said ok... */ if (verdict == NF_REPEAT) verdict = nf_hook_entry_hookfn(hook_entry, skb, &entry->state); @@ -221,27 +243,22 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict) } if (verdict == NF_ACCEPT) { - hook_entry = rcu_dereference(hook_entry->next); - if (hook_entry) next_hook: - verdict = nf_iterate(skb, &entry->state, &hook_entry); + ++i; + verdict = nf_iterate(skb, &entry->state, hooks, &i); } switch (verdict & NF_VERDICT_MASK) { case NF_ACCEPT: case NF_STOP: -okfn: local_bh_disable(); entry->state.okfn(entry->state.net, entry->state.sk, skb); local_bh_enable(); break; case NF_QUEUE: - err = nf_queue(skb, &entry->state, &hook_entry, verdict); - if (err == 1) { - if (hook_entry) - goto next_hook; - goto okfn; - } + err = nf_queue(skb, &entry->state, hooks, i, verdict); + if (err == 1) + goto next_hook; break; case NF_STOLEN: break; From 2420b79f8c18a75ee2417cace381f4604b9b4365 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 23 Aug 2017 17:26:26 +0200 Subject: [PATCH 42/47] netfilter: debug: check for sorted array Make sure our grow/shrink routine places them in the correct order. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/core.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 1a9e23c9ab98f..164ad20d0bd25 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -157,6 +157,27 @@ nf_hook_entries_grow(const struct nf_hook_entries *old, return new; } +static void hooks_validate(const struct nf_hook_entries *hooks) +{ +#ifdef CONFIG_DEBUG_KERNEL + struct nf_hook_ops **orig_ops; + int prio = INT_MIN; + size_t i = 0; + + orig_ops = nf_hook_entries_get_hook_ops(hooks); + + for (i = 0; i < hooks->num_hook_entries; i++) { + if (orig_ops[i] == &dummy_ops) + continue; + + WARN_ON(orig_ops[i]->priority < prio); + + if (orig_ops[i]->priority > prio) + prio = orig_ops[i]->priority; + } +#endif +} + /* * __nf_hook_entries_try_shrink - try to shrink hook array * @@ -210,6 +231,7 @@ static void *__nf_hook_entries_try_shrink(struct nf_hook_entries __rcu **pp) new_ops[j] = (void *)orig_ops[i]; j++; } + hooks_validate(new); out_assign: rcu_assign_pointer(*pp, new); return old; @@ -261,6 +283,7 @@ int nf_register_net_hook(struct net *net, const struct nf_hook_ops *reg) if (IS_ERR(new_hooks)) return PTR_ERR(new_hooks); + hooks_validate(new_hooks); #ifdef CONFIG_NETFILTER_INGRESS if (reg->pf == NFPROTO_NETDEV && reg->hooknum == NF_NETDEV_INGRESS) net_inc_ingress_queue(); From d3ad2c17b4047747bcec074814c08b00795afc85 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 23 Aug 2017 17:26:27 +0200 Subject: [PATCH 43/47] netfilter: core: batch nf_unregister_net_hooks synchronize_net calls re-add batching in nf_unregister_net_hooks(). Similar as before, just store an array with to-be-free'd rule arrays on stack, then call synchronize_net once per batch. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/core.c | 59 +++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 56 insertions(+), 3 deletions(-) diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 164ad20d0bd25..04fe25abc5f66 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -395,10 +395,63 @@ EXPORT_SYMBOL(nf_register_net_hooks); void nf_unregister_net_hooks(struct net *net, const struct nf_hook_ops *reg, unsigned int hookcount) { - unsigned int i; + struct nf_hook_entries *to_free[16], *p; + struct nf_hook_entries __rcu **pp; + unsigned int i, j, n; + + mutex_lock(&nf_hook_mutex); + for (i = 0; i < hookcount; i++) { + pp = nf_hook_entry_head(net, ®[i]); + if (!pp) + continue; + + p = nf_entry_dereference(*pp); + if (WARN_ON_ONCE(!p)) + continue; + __nf_unregister_net_hook(p, ®[i]); + } + mutex_unlock(&nf_hook_mutex); + + do { + n = min_t(unsigned int, hookcount, ARRAY_SIZE(to_free)); + + mutex_lock(&nf_hook_mutex); + + for (i = 0, j = 0; i < hookcount && j < n; i++) { + pp = nf_hook_entry_head(net, ®[i]); + if (!pp) + continue; + + p = nf_entry_dereference(*pp); + if (!p) + continue; + + to_free[j] = __nf_hook_entries_try_shrink(pp); + if (to_free[j]) + ++j; + } + + mutex_unlock(&nf_hook_mutex); + + if (j) { + unsigned int nfq; + + synchronize_net(); + + /* need 2nd synchronize_net() if nfqueue is used, skb + * can get reinjected right before nf_queue_hook_drop() + */ + nfq = nf_queue_nf_hook_drop(net); + if (nfq) + synchronize_net(); + + for (i = 0; i < j; i++) + kvfree(to_free[i]); + } - for (i = 0; i < hookcount; i++) - nf_unregister_net_hook(net, ®[i]); + reg += n; + hookcount -= n; + } while (hookcount > 0); } EXPORT_SYMBOL(nf_unregister_net_hooks); From e2f387d2df0ece6d4418bb09bef7802cfaf7142d Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 25 Aug 2017 02:59:41 +0200 Subject: [PATCH 44/47] netfilter: conntrack: don't log "invalid" icmpv6 connections When enabling logging for invalid connections we currently also log most icmpv6 types, which we don't track intentionally (e.g. neigh discovery). "invalid" should really mean "invalid", i.e. short header or bad checksum. We don't do any logging for icmp(v4) either, its just useless noise. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c index 808f63e2e1ffe..43544b975eaee 100644 --- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c @@ -121,11 +121,6 @@ static bool icmpv6_new(struct nf_conn *ct, const struct sk_buff *skb, pr_debug("icmpv6: can't create new conn with type %u\n", type + 128); nf_ct_dump_tuple_ipv6(&ct->tuplehash[0].tuple); - if (LOG_INVALID(nf_ct_net(ct), IPPROTO_ICMPV6)) - nf_log_packet(nf_ct_net(ct), PF_INET6, 0, skb, NULL, - NULL, NULL, - "nf_ct_icmpv6: invalid new with type %d ", - type + 128); return false; } return true; From b38cf90133e5b86c26633c1041c2cdc7360cb346 Mon Sep 17 00:00:00 2001 From: Varsha Rao Date: Mon, 28 Aug 2017 18:05:26 +0200 Subject: [PATCH 45/47] netfilter: Remove NFDEBUG() Remove NFDEBUG and use pr_debug() instead of it. Signed-off-by: Varsha Rao Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_internals.h | 6 ------ net/netfilter/nf_sockopt.c | 2 +- 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/net/netfilter/nf_internals.h b/net/netfilter/nf_internals.h index bacd6363946e4..49f87ec093a39 100644 --- a/net/netfilter/nf_internals.h +++ b/net/netfilter/nf_internals.h @@ -5,12 +5,6 @@ #include #include -#ifdef CONFIG_NETFILTER_DEBUG -#define NFDEBUG(format, args...) printk(KERN_DEBUG format , ## args) -#else -#define NFDEBUG(format, args...) -#endif - /* nf_queue.c */ int nf_queue(struct sk_buff *skb, struct nf_hook_state *state, const struct nf_hook_entries *entries, unsigned int index, diff --git a/net/netfilter/nf_sockopt.c b/net/netfilter/nf_sockopt.c index c68c1e58b3628..d2a9e6b5d01f7 100644 --- a/net/netfilter/nf_sockopt.c +++ b/net/netfilter/nf_sockopt.c @@ -33,7 +33,7 @@ int nf_register_sockopt(struct nf_sockopt_ops *reg) reg->set_optmin, reg->set_optmax) || overlap(ops->get_optmin, ops->get_optmax, reg->get_optmin, reg->get_optmax))) { - NFDEBUG("nf_sock overlap: %u-%u/%u-%u v %u-%u/%u-%u\n", + pr_debug("nf_sock overlap: %u-%u/%u-%u v %u-%u/%u-%u\n", ops->set_optmin, ops->set_optmax, ops->get_optmin, ops->get_optmax, reg->set_optmin, reg->set_optmax, From d11dd6cdc3883f3c74f841f4d40dfe57c0b9756c Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Mon, 28 Aug 2017 11:52:07 +0200 Subject: [PATCH 46/47] netfilter: conntrack: remove unused code in nf_conntrack_proto_generic.c L4 protocol helpers for DCCP, SCTP and UDPlite can't be built as kernel modules anymore, so we can remove code enclosed in #ifdef CONFIG_NF_CT_PROTO_{DCCP,SCTP,UDPLITE}_MODULE Signed-off-by: Davide Caratti Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_proto_generic.c | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/net/netfilter/nf_conntrack_proto_generic.c b/net/netfilter/nf_conntrack_proto_generic.c index 2bc3d0c1a5bf4..2993995b690db 100644 --- a/net/netfilter/nf_conntrack_proto_generic.c +++ b/net/netfilter/nf_conntrack_proto_generic.c @@ -17,21 +17,9 @@ static unsigned int nf_ct_generic_timeout __read_mostly = 600*HZ; static bool nf_generic_should_process(u8 proto) { switch (proto) { -#ifdef CONFIG_NF_CT_PROTO_SCTP_MODULE - case IPPROTO_SCTP: - return false; -#endif -#ifdef CONFIG_NF_CT_PROTO_DCCP_MODULE - case IPPROTO_DCCP: - return false; -#endif #ifdef CONFIG_NF_CT_PROTO_GRE_MODULE case IPPROTO_GRE: return false; -#endif -#ifdef CONFIG_NF_CT_PROTO_UDPLITE_MODULE - case IPPROTO_UDPLITE: - return false; #endif default: return true; From 1aff64715edb8565e99337b842d814d636641b50 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 28 Aug 2017 17:00:12 +0200 Subject: [PATCH 47/47] netfilter: rt: account for tcp header size too This needs to accout for the ipv4/ipv6 header size and the tcp header without options. Fixes: 6b5dc98e8fac0 ("netfilter: rt: add support to fetch path mss") Reported-by: Matteo Croce Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_rt.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nft_rt.c b/net/netfilter/nft_rt.c index 61fd3acaa3c9f..a6b7d05aeacf4 100644 --- a/net/netfilter/nft_rt.c +++ b/net/netfilter/nft_rt.c @@ -35,10 +35,11 @@ static u16 get_tcpmss(const struct nft_pktinfo *pkt, const struct dst_entry *skb switch (nft_pf(pkt)) { case NFPROTO_IPV4: fl.u.ip4.daddr = ip_hdr(skb)->saddr; - minlen = sizeof(struct iphdr); + minlen = sizeof(struct iphdr) + sizeof(struct tcphdr); break; case NFPROTO_IPV6: fl.u.ip6.daddr = ipv6_hdr(skb)->saddr; + minlen = sizeof(struct ipv6hdr) + sizeof(struct tcphdr); break; }