From a53b42c11815d2357e31a9403ae3950517525894 Mon Sep 17 00:00:00 2001 From: Tan Hu Date: Wed, 25 Jul 2018 15:23:07 +0800 Subject: [PATCH 01/15] ipvs: fix race between ip_vs_conn_new() and ip_vs_del_dest() We came across infinite loop in ipvs when using ipvs in docker env. When ipvs receives new packets and cannot find an ipvs connection, it will create a new connection, then if the dest is unavailable (i.e. IP_VS_DEST_F_AVAILABLE), the packet will be dropped sliently. But if the dropped packet is the first packet of this connection, the connection control timer never has a chance to start and the ipvs connection cannot be released. This will lead to memory leak, or infinite loop in cleanup_net() when net namespace is released like this: ip_vs_conn_net_cleanup at ffffffffa0a9f31a [ip_vs] __ip_vs_cleanup at ffffffffa0a9f60a [ip_vs] ops_exit_list at ffffffff81567a49 cleanup_net at ffffffff81568b40 process_one_work at ffffffff810a851b worker_thread at ffffffff810a9356 kthread at ffffffff810b0b6f ret_from_fork at ffffffff81697a18 race condition: CPU1 CPU2 ip_vs_in() ip_vs_conn_new() ip_vs_del_dest() __ip_vs_unlink_dest() ~IP_VS_DEST_F_AVAILABLE cp->dest && !IP_VS_DEST_F_AVAILABLE __ip_vs_conn_put ... cleanup_net ---> infinite looping Fix this by checking whether the timer already started. Signed-off-by: Tan Hu Reviewed-by: Jiang Biao Acked-by: Julian Anastasov Acked-by: Simon Horman Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipvs/ip_vs_core.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 0679dd101e72a..7ca926a03b81c 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -1972,13 +1972,20 @@ ip_vs_in(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, int if (cp->dest && !(cp->dest->flags & IP_VS_DEST_F_AVAILABLE)) { /* the destination server is not available */ - if (sysctl_expire_nodest_conn(ipvs)) { + __u32 flags = cp->flags; + + /* when timer already started, silently drop the packet.*/ + if (timer_pending(&cp->timer)) + __ip_vs_conn_put(cp); + else + ip_vs_conn_put(cp); + + if (sysctl_expire_nodest_conn(ipvs) && + !(flags & IP_VS_CONN_F_ONE_PACKET)) { /* try to expire the connection immediately */ ip_vs_conn_expire_now(cp); } - /* don't restart its timer, and silently - drop the packet. */ - __ip_vs_conn_put(cp); + return NF_DROP; } From 14d32b2525dd3a21c29b0d9edf1748f5dabc1bbc Mon Sep 17 00:00:00 2001 From: Matteo Croce Date: Tue, 31 Jul 2018 18:03:32 +0200 Subject: [PATCH 02/15] jiffies: add utility function to calculate delta in ms add jiffies_delta_to_msecs() helper func to calculate the delta between two times and eventually 0 if negative. Suggested-by: Eric Dumazet Signed-off-by: Matteo Croce Reviewed-by: Eric Dumazet Acked-by: Simon Horman Signed-off-by: Pablo Neira Ayuso --- include/linux/jiffies.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h index a27cf66523279..fa928242567db 100644 --- a/include/linux/jiffies.h +++ b/include/linux/jiffies.h @@ -447,6 +447,11 @@ static inline clock_t jiffies_delta_to_clock_t(long delta) return jiffies_to_clock_t(max(0L, delta)); } +static inline unsigned int jiffies_delta_to_msecs(long delta) +{ + return jiffies_to_msecs(max(0L, delta)); +} + extern unsigned long clock_t_to_jiffies(unsigned long x); extern u64 jiffies_64_to_clock_t(u64 x); extern u64 nsec_to_clock_t(u64 x); From b71ed54dc2a1dbb4328f7d7c98d712747aee92ba Mon Sep 17 00:00:00 2001 From: Matteo Croce Date: Tue, 31 Jul 2018 18:03:33 +0200 Subject: [PATCH 03/15] ipvs: don't show negative times in ip_vs_conn Since commit 500462a9de65 ("timers: Switch to a non-cascading wheel"), timers duration can last even 12.5% more than the scheduled interval. IPVS has two handlers, /proc/net/ip_vs_conn and /proc/net/ip_vs_conn_sync, which shows the remaining time before that a connection expires. The default expire time for a connection is 60 seconds, and the expiration timer can fire even 4 seconds later than the scheduled time. The expiration time is calculated subtracting jiffies to the scheduled expiration time, and it's shown as a huge number when the timer fires late, since both values are unsigned. This can confuse script and tools which relies on it, like ipvsadm: root@mcroce-redhat:~# while ipvsadm -lc |grep SYN_RECV; do sleep 1 ; done TCP 00:05 SYN_RECV [fc00:1::1]:55732 [fc00:1::2]:8000 [fc00:2000::1]:8000 TCP 00:04 SYN_RECV [fc00:1::1]:55732 [fc00:1::2]:8000 [fc00:2000::1]:8000 TCP 00:03 SYN_RECV [fc00:1::1]:55732 [fc00:1::2]:8000 [fc00:2000::1]:8000 TCP 00:02 SYN_RECV [fc00:1::1]:55732 [fc00:1::2]:8000 [fc00:2000::1]:8000 TCP 00:01 SYN_RECV [fc00:1::1]:55732 [fc00:1::2]:8000 [fc00:2000::1]:8000 TCP 00:00 SYN_RECV [fc00:1::1]:55732 [fc00:1::2]:8000 [fc00:2000::1]:8000 TCP 68719476:44 SYN_RECV [fc00:1::1]:55732 [fc00:1::2]:8000 [fc00:2000::1]:8000 TCP 68719476:43 SYN_RECV [fc00:1::1]:55732 [fc00:1::2]:8000 [fc00:2000::1]:8000 TCP 68719476:42 SYN_RECV [fc00:1::1]:55732 [fc00:1::2]:8000 [fc00:2000::1]:8000 TCP 68719476:41 SYN_RECV [fc00:1::1]:55732 [fc00:1::2]:8000 [fc00:2000::1]:8000 TCP 68719476:40 SYN_RECV [fc00:1::1]:55732 [fc00:1::2]:8000 [fc00:2000::1]:8000 TCP 68719476:39 SYN_RECV [fc00:1::1]:55732 [fc00:1::2]:8000 [fc00:2000::1]:8000 Signed-off-by: Matteo Croce Acked-by: Simon Horman Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipvs/ip_vs_conn.c | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index 0edc62910ebfc..5b2b17867cb1b 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -1117,24 +1117,28 @@ static int ip_vs_conn_seq_show(struct seq_file *seq, void *v) #ifdef CONFIG_IP_VS_IPV6 if (cp->af == AF_INET6) seq_printf(seq, "%-3s %pI6 %04X %pI6 %04X " - "%s %04X %-11s %7lu%s\n", + "%s %04X %-11s %7u%s\n", ip_vs_proto_name(cp->protocol), &cp->caddr.in6, ntohs(cp->cport), &cp->vaddr.in6, ntohs(cp->vport), dbuf, ntohs(cp->dport), ip_vs_state_name(cp), - (cp->timer.expires-jiffies)/HZ, pe_data); + jiffies_delta_to_msecs(cp->timer.expires - + jiffies) / 1000, + pe_data); else #endif seq_printf(seq, "%-3s %08X %04X %08X %04X" - " %s %04X %-11s %7lu%s\n", + " %s %04X %-11s %7u%s\n", ip_vs_proto_name(cp->protocol), ntohl(cp->caddr.ip), ntohs(cp->cport), ntohl(cp->vaddr.ip), ntohs(cp->vport), dbuf, ntohs(cp->dport), ip_vs_state_name(cp), - (cp->timer.expires-jiffies)/HZ, pe_data); + jiffies_delta_to_msecs(cp->timer.expires - + jiffies) / 1000, + pe_data); } return 0; } @@ -1179,26 +1183,28 @@ static int ip_vs_conn_sync_seq_show(struct seq_file *seq, void *v) #ifdef CONFIG_IP_VS_IPV6 if (cp->af == AF_INET6) seq_printf(seq, "%-3s %pI6 %04X %pI6 %04X " - "%s %04X %-11s %-6s %7lu\n", + "%s %04X %-11s %-6s %7u\n", ip_vs_proto_name(cp->protocol), &cp->caddr.in6, ntohs(cp->cport), &cp->vaddr.in6, ntohs(cp->vport), dbuf, ntohs(cp->dport), ip_vs_state_name(cp), ip_vs_origin_name(cp->flags), - (cp->timer.expires-jiffies)/HZ); + jiffies_delta_to_msecs(cp->timer.expires - + jiffies) / 1000); else #endif seq_printf(seq, "%-3s %08X %04X %08X %04X " - "%s %04X %-11s %-6s %7lu\n", + "%s %04X %-11s %-6s %7u\n", ip_vs_proto_name(cp->protocol), ntohl(cp->caddr.ip), ntohs(cp->cport), ntohl(cp->vaddr.ip), ntohs(cp->vport), dbuf, ntohs(cp->dport), ip_vs_state_name(cp), ip_vs_origin_name(cp->flags), - (cp->timer.expires-jiffies)/HZ); + jiffies_delta_to_msecs(cp->timer.expires - + jiffies) / 1000); } return 0; } From da786717e0894886301ed2536843c13f9e8fd53e Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 25 Jul 2018 21:38:43 +0200 Subject: [PATCH 04/15] netfilter: ip6t_rpfilter: set F_IFACE for linklocal addresses Roman reports that DHCPv6 client no longer sees replies from server due to ip6tables -t raw -A PREROUTING -m rpfilter --invert -j DROP rule. We need to set the F_IFACE flag for linklocal addresses, they are scoped per-device. Fixes: 47b7e7f82802 ("netfilter: don't set F_IFACE on ipv6 fib lookups") Reported-by: Roman Mamedov Tested-by: Roman Mamedov Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/ipv6/netfilter/ip6t_rpfilter.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/net/ipv6/netfilter/ip6t_rpfilter.c b/net/ipv6/netfilter/ip6t_rpfilter.c index 0fe61ede77c67..c3c6b09acdc4f 100644 --- a/net/ipv6/netfilter/ip6t_rpfilter.c +++ b/net/ipv6/netfilter/ip6t_rpfilter.c @@ -26,6 +26,12 @@ static bool rpfilter_addr_unicast(const struct in6_addr *addr) return addr_type & IPV6_ADDR_UNICAST; } +static bool rpfilter_addr_linklocal(const struct in6_addr *addr) +{ + int addr_type = ipv6_addr_type(addr); + return addr_type & IPV6_ADDR_LINKLOCAL; +} + static bool rpfilter_lookup_reverse6(struct net *net, const struct sk_buff *skb, const struct net_device *dev, u8 flags) { @@ -48,7 +54,11 @@ static bool rpfilter_lookup_reverse6(struct net *net, const struct sk_buff *skb, } fl6.flowi6_mark = flags & XT_RPFILTER_VALID_MARK ? skb->mark : 0; - if ((flags & XT_RPFILTER_LOOSE) == 0) + + if (rpfilter_addr_linklocal(&iph->saddr)) { + lookup_flags |= RT6_LOOKUP_F_IFACE; + fl6.flowi6_oif = dev->ifindex; + } else if ((flags & XT_RPFILTER_LOOSE) == 0) fl6.flowi6_oif = dev->ifindex; rt = (void *)ip6_route_lookup(net, &fl6, skb, lookup_flags); From 4ef360dd6a65f6ef337645e1b65e744034754b19 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Thu, 26 Jul 2018 00:39:51 +0900 Subject: [PATCH 05/15] netfilter: nft_set: fix allocation size overflow in privsize callback. In order to determine allocation size of set, ->privsize is invoked. At this point, both desc->size and size of each data structure of set are used. desc->size means number of element that is given by user. desc->size is u32 type. so that upperlimit of set element is 4294967295. but return type of ->privsize is also u32. hence overflow can occurred. test commands: %nft add table ip filter %nft add set ip filter hash1 { type ipv4_addr \; size 4294967295 \; } %nft list ruleset splat looks like: [ 1239.202910] kasan: CONFIG_KASAN_INLINE enabled [ 1239.208788] kasan: GPF could be caused by NULL-ptr deref or user memory access [ 1239.217625] general protection fault: 0000 [#1] SMP DEBUG_PAGEALLOC KASAN PTI [ 1239.219329] CPU: 0 PID: 1603 Comm: nft Not tainted 4.18.0-rc5+ #7 [ 1239.229091] RIP: 0010:nft_hash_walk+0x1d2/0x310 [nf_tables_set] [ 1239.229091] Code: 84 d2 7f 10 4c 89 e7 89 44 24 38 e8 d8 5a 17 e0 8b 44 24 38 48 8d 7b 10 41 0f b6 0c 24 48 89 fa 48 89 fe 48 c1 ea 03 83 e6 07 <42> 0f b6 14 3a 40 38 f2 7f 1a 84 d2 74 16 [ 1239.229091] RSP: 0018:ffff8801118cf358 EFLAGS: 00010246 [ 1239.229091] RAX: 0000000000000000 RBX: 0000000000020400 RCX: 0000000000000001 [ 1239.229091] RDX: 0000000000004082 RSI: 0000000000000000 RDI: 0000000000020410 [ 1239.229091] RBP: ffff880114d5a988 R08: 0000000000007e94 R09: ffff880114dd8030 [ 1239.229091] R10: ffff880114d5a988 R11: ffffed00229bb006 R12: ffff8801118cf4d0 [ 1239.229091] R13: ffff8801118cf4d8 R14: 0000000000000000 R15: dffffc0000000000 [ 1239.229091] FS: 00007f5a8fe0b700(0000) GS:ffff88011b600000(0000) knlGS:0000000000000000 [ 1239.229091] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 1239.229091] CR2: 00007f5a8ecc27b0 CR3: 000000010608e000 CR4: 00000000001006f0 [ 1239.229091] Call Trace: [ 1239.229091] ? nft_hash_remove+0xf0/0xf0 [nf_tables_set] [ 1239.229091] ? memset+0x1f/0x40 [ 1239.229091] ? __nla_reserve+0x9f/0xb0 [ 1239.229091] ? memcpy+0x34/0x50 [ 1239.229091] nf_tables_dump_set+0x9a1/0xda0 [nf_tables] [ 1239.229091] ? __kmalloc_reserve.isra.29+0x2e/0xa0 [ 1239.229091] ? nft_chain_hash_obj+0x630/0x630 [nf_tables] [ 1239.229091] ? nf_tables_commit+0x2c60/0x2c60 [nf_tables] [ 1239.229091] netlink_dump+0x470/0xa20 [ 1239.229091] __netlink_dump_start+0x5ae/0x690 [ 1239.229091] nft_netlink_dump_start_rcu+0xd1/0x160 [nf_tables] [ 1239.229091] nf_tables_getsetelem+0x2e5/0x4b0 [nf_tables] [ 1239.229091] ? nft_get_set_elem+0x440/0x440 [nf_tables] [ 1239.229091] ? nft_chain_hash_obj+0x630/0x630 [nf_tables] [ 1239.229091] ? nf_tables_dump_obj_done+0x70/0x70 [nf_tables] [ 1239.229091] ? nla_parse+0xab/0x230 [ 1239.229091] ? nft_get_set_elem+0x440/0x440 [nf_tables] [ 1239.229091] nfnetlink_rcv_msg+0x7f0/0xab0 [nfnetlink] [ 1239.229091] ? nfnetlink_bind+0x1d0/0x1d0 [nfnetlink] [ 1239.229091] ? debug_show_all_locks+0x290/0x290 [ 1239.229091] ? sched_clock_cpu+0x132/0x170 [ 1239.229091] ? find_held_lock+0x39/0x1b0 [ 1239.229091] ? sched_clock_local+0x10d/0x130 [ 1239.229091] netlink_rcv_skb+0x211/0x320 [ 1239.229091] ? nfnetlink_bind+0x1d0/0x1d0 [nfnetlink] [ 1239.229091] ? netlink_ack+0x7b0/0x7b0 [ 1239.229091] ? ns_capable_common+0x6e/0x110 [ 1239.229091] nfnetlink_rcv+0x2d1/0x310 [nfnetlink] [ 1239.229091] ? nfnetlink_rcv_batch+0x10f0/0x10f0 [nfnetlink] [ 1239.229091] ? netlink_deliver_tap+0x829/0x930 [ 1239.229091] ? lock_acquire+0x265/0x2e0 [ 1239.229091] netlink_unicast+0x406/0x520 [ 1239.509725] ? netlink_attachskb+0x5b0/0x5b0 [ 1239.509725] ? find_held_lock+0x39/0x1b0 [ 1239.509725] netlink_sendmsg+0x987/0xa20 [ 1239.509725] ? netlink_unicast+0x520/0x520 [ 1239.509725] ? _copy_from_user+0xa9/0xc0 [ 1239.509725] __sys_sendto+0x21a/0x2c0 [ 1239.509725] ? __ia32_sys_getpeername+0xa0/0xa0 [ 1239.509725] ? retint_kernel+0x10/0x10 [ 1239.509725] ? sched_clock_cpu+0x132/0x170 [ 1239.509725] ? find_held_lock+0x39/0x1b0 [ 1239.509725] ? lock_downgrade+0x540/0x540 [ 1239.509725] ? up_read+0x1c/0x100 [ 1239.509725] ? __do_page_fault+0x763/0x970 [ 1239.509725] ? retint_user+0x18/0x18 [ 1239.509725] __x64_sys_sendto+0x177/0x180 [ 1239.509725] do_syscall_64+0xaa/0x360 [ 1239.509725] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 1239.509725] RIP: 0033:0x7f5a8f468e03 [ 1239.509725] Code: 00 f7 d8 64 89 02 48 c7 c0 ff ff ff ff eb d0 0f 1f 84 00 00 00 00 00 83 3d 49 c9 2b 00 00 75 13 49 89 ca b8 2c 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 34 c3 48 83 ec 08 e8 [ 1239.509725] RSP: 002b:00007ffd78d0b778 EFLAGS: 00000246 ORIG_RAX: 000000000000002c [ 1239.509725] RAX: ffffffffffffffda RBX: 00007ffd78d0c890 RCX: 00007f5a8f468e03 [ 1239.509725] RDX: 0000000000000034 RSI: 00007ffd78d0b7e0 RDI: 0000000000000003 [ 1239.509725] RBP: 00007ffd78d0b7d0 R08: 00007f5a8f15c160 R09: 000000000000000c [ 1239.509725] R10: 0000000000000000 R11: 0000000000000246 R12: 00007ffd78d0b7e0 [ 1239.509725] R13: 0000000000000034 R14: 00007f5a8f9aff60 R15: 00005648040094b0 [ 1239.509725] Modules linked in: nf_tables_set nf_tables nfnetlink ip_tables x_tables [ 1239.670713] ---[ end trace 39375adcda140f11 ]--- [ 1239.676016] RIP: 0010:nft_hash_walk+0x1d2/0x310 [nf_tables_set] [ 1239.682834] Code: 84 d2 7f 10 4c 89 e7 89 44 24 38 e8 d8 5a 17 e0 8b 44 24 38 48 8d 7b 10 41 0f b6 0c 24 48 89 fa 48 89 fe 48 c1 ea 03 83 e6 07 <42> 0f b6 14 3a 40 38 f2 7f 1a 84 d2 74 16 [ 1239.705108] RSP: 0018:ffff8801118cf358 EFLAGS: 00010246 [ 1239.711115] RAX: 0000000000000000 RBX: 0000000000020400 RCX: 0000000000000001 [ 1239.719269] RDX: 0000000000004082 RSI: 0000000000000000 RDI: 0000000000020410 [ 1239.727401] RBP: ffff880114d5a988 R08: 0000000000007e94 R09: ffff880114dd8030 [ 1239.735530] R10: ffff880114d5a988 R11: ffffed00229bb006 R12: ffff8801118cf4d0 [ 1239.743658] R13: ffff8801118cf4d8 R14: 0000000000000000 R15: dffffc0000000000 [ 1239.751785] FS: 00007f5a8fe0b700(0000) GS:ffff88011b600000(0000) knlGS:0000000000000000 [ 1239.760993] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 1239.767560] CR2: 00007f5a8ecc27b0 CR3: 000000010608e000 CR4: 00000000001006f0 [ 1239.775679] Kernel panic - not syncing: Fatal exception [ 1239.776630] Kernel Offset: 0x1f000000 from 0xffffffff81000000 (relocation range: 0xffffffff80000000-0xffffffffbfffffff) [ 1239.776630] Rebooting in 5 seconds.. Fixes: 20a69341f2d0 ("netfilter: nf_tables: add netlink set API") Signed-off-by: Taehee Yoo Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 4 ++-- net/netfilter/nf_tables_api.c | 2 +- net/netfilter/nft_set_bitmap.c | 6 +++--- net/netfilter/nft_set_hash.c | 8 ++++---- net/netfilter/nft_set_rbtree.c | 4 ++-- 5 files changed, 12 insertions(+), 12 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index dc417ef0a0c50..552bfbef1bf1b 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -274,7 +274,7 @@ enum nft_set_class { * @space: memory class */ struct nft_set_estimate { - unsigned int size; + u64 size; enum nft_set_class lookup; enum nft_set_class space; }; @@ -336,7 +336,7 @@ struct nft_set_ops { const struct nft_set_elem *elem, unsigned int flags); - unsigned int (*privsize)(const struct nlattr * const nla[], + u64 (*privsize)(const struct nlattr * const nla[], const struct nft_set_desc *desc); bool (*estimate)(const struct nft_set_desc *desc, u32 features, diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 67cdd5c4f4f5d..3008f93469c49 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -3354,7 +3354,7 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk, struct nft_set *set; struct nft_ctx ctx; char *name; - unsigned int size; + u64 size; u64 timeout; u32 ktype, dtype, flags, policy, gc_int, objtype; struct nft_set_desc desc; diff --git a/net/netfilter/nft_set_bitmap.c b/net/netfilter/nft_set_bitmap.c index 128bc16f52dd4..f866bd41e5d2b 100644 --- a/net/netfilter/nft_set_bitmap.c +++ b/net/netfilter/nft_set_bitmap.c @@ -248,13 +248,13 @@ static inline u32 nft_bitmap_size(u32 klen) return ((2 << ((klen * BITS_PER_BYTE) - 1)) / BITS_PER_BYTE) << 1; } -static inline u32 nft_bitmap_total_size(u32 klen) +static inline u64 nft_bitmap_total_size(u32 klen) { return sizeof(struct nft_bitmap) + nft_bitmap_size(klen); } -static unsigned int nft_bitmap_privsize(const struct nlattr * const nla[], - const struct nft_set_desc *desc) +static u64 nft_bitmap_privsize(const struct nlattr * const nla[], + const struct nft_set_desc *desc) { u32 klen = ntohl(nla_get_be32(nla[NFTA_SET_KEY_LEN])); diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c index 90c3e7e6cacba..015124e649cbd 100644 --- a/net/netfilter/nft_set_hash.c +++ b/net/netfilter/nft_set_hash.c @@ -341,8 +341,8 @@ static void nft_rhash_gc(struct work_struct *work) nft_set_gc_interval(set)); } -static unsigned int nft_rhash_privsize(const struct nlattr * const nla[], - const struct nft_set_desc *desc) +static u64 nft_rhash_privsize(const struct nlattr * const nla[], + const struct nft_set_desc *desc) { return sizeof(struct nft_rhash); } @@ -585,8 +585,8 @@ static void nft_hash_walk(const struct nft_ctx *ctx, struct nft_set *set, } } -static unsigned int nft_hash_privsize(const struct nlattr * const nla[], - const struct nft_set_desc *desc) +static u64 nft_hash_privsize(const struct nlattr * const nla[], + const struct nft_set_desc *desc) { return sizeof(struct nft_hash) + nft_hash_buckets(desc->size) * sizeof(struct hlist_head); diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c index 9873d734b4948..55e2d9215c0d4 100644 --- a/net/netfilter/nft_set_rbtree.c +++ b/net/netfilter/nft_set_rbtree.c @@ -411,8 +411,8 @@ static void nft_rbtree_gc(struct work_struct *work) nft_set_gc_interval(set)); } -static unsigned int nft_rbtree_privsize(const struct nlattr * const nla[], - const struct nft_set_desc *desc) +static u64 nft_rbtree_privsize(const struct nlattr * const nla[], + const struct nft_set_desc *desc) { return sizeof(struct nft_rbtree); } From 3e673b23b541b8e7f773b2d378d6eb99831741cd Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 31 Jul 2018 13:41:23 +0200 Subject: [PATCH 06/15] netfilter: fix memory leaks on netlink_dump_start error Shaochun Chen points out we leak dumper filter state allocations stored in dump_control->data in case there is an error before netlink sets cb_running (after which ->done will be called at some point). In order to fix this, add .start functions and move allocations there. Same pattern as used in commit 90fd131afc565159c9e0ea742f082b337e10f8c6 ("netfilter: nf_tables: move dumper state allocation into ->start"). Reported-by: shaochun chen Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_netlink.c | 26 ++++++++++++++++--------- net/netfilter/nfnetlink_acct.c | 29 +++++++++++++--------------- 2 files changed, 30 insertions(+), 25 deletions(-) diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index f981bfa8db72e..036207ecaf166 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -846,6 +846,21 @@ ctnetlink_alloc_filter(const struct nlattr * const cda[]) #endif } +static int ctnetlink_start(struct netlink_callback *cb) +{ + const struct nlattr * const *cda = cb->data; + struct ctnetlink_filter *filter = NULL; + + if (cda[CTA_MARK] && cda[CTA_MARK_MASK]) { + filter = ctnetlink_alloc_filter(cda); + if (IS_ERR(filter)) + return PTR_ERR(filter); + } + + cb->data = filter; + return 0; +} + static int ctnetlink_filter_match(struct nf_conn *ct, void *data) { struct ctnetlink_filter *filter = data; @@ -1290,19 +1305,12 @@ static int ctnetlink_get_conntrack(struct net *net, struct sock *ctnl, if (nlh->nlmsg_flags & NLM_F_DUMP) { struct netlink_dump_control c = { + .start = ctnetlink_start, .dump = ctnetlink_dump_table, .done = ctnetlink_done, + .data = (void *)cda, }; - if (cda[CTA_MARK] && cda[CTA_MARK_MASK]) { - struct ctnetlink_filter *filter; - - filter = ctnetlink_alloc_filter(cda); - if (IS_ERR(filter)) - return PTR_ERR(filter); - - c.data = filter; - } return netlink_dump_start(ctnl, skb, nlh, &c); } diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c index a0e5adf0b3b6d..8fa8bf7c48e64 100644 --- a/net/netfilter/nfnetlink_acct.c +++ b/net/netfilter/nfnetlink_acct.c @@ -238,29 +238,33 @@ static const struct nla_policy filter_policy[NFACCT_FILTER_MAX + 1] = { [NFACCT_FILTER_VALUE] = { .type = NLA_U32 }, }; -static struct nfacct_filter * -nfacct_filter_alloc(const struct nlattr * const attr) +static int nfnl_acct_start(struct netlink_callback *cb) { - struct nfacct_filter *filter; + const struct nlattr *const attr = cb->data; struct nlattr *tb[NFACCT_FILTER_MAX + 1]; + struct nfacct_filter *filter; int err; + if (!attr) + return 0; + err = nla_parse_nested(tb, NFACCT_FILTER_MAX, attr, filter_policy, NULL); if (err < 0) - return ERR_PTR(err); + return err; if (!tb[NFACCT_FILTER_MASK] || !tb[NFACCT_FILTER_VALUE]) - return ERR_PTR(-EINVAL); + return -EINVAL; filter = kzalloc(sizeof(struct nfacct_filter), GFP_KERNEL); if (!filter) - return ERR_PTR(-ENOMEM); + return -ENOMEM; filter->mask = ntohl(nla_get_be32(tb[NFACCT_FILTER_MASK])); filter->value = ntohl(nla_get_be32(tb[NFACCT_FILTER_VALUE])); + cb->data = filter; - return filter; + return 0; } static int nfnl_acct_get(struct net *net, struct sock *nfnl, @@ -275,18 +279,11 @@ static int nfnl_acct_get(struct net *net, struct sock *nfnl, if (nlh->nlmsg_flags & NLM_F_DUMP) { struct netlink_dump_control c = { .dump = nfnl_acct_dump, + .start = nfnl_acct_start, .done = nfnl_acct_done, + .data = (void *)tb[NFACCT_FILTER], }; - if (tb[NFACCT_FILTER]) { - struct nfacct_filter *filter; - - filter = nfacct_filter_alloc(tb[NFACCT_FILTER]); - if (IS_ERR(filter)) - return PTR_ERR(filter); - - c.data = filter; - } return netlink_dump_start(nfnl, skb, nlh, &c); } From d209df3e7f7002d9099fdb0f6df0f972b4386a63 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 2 Aug 2018 21:44:40 +0200 Subject: [PATCH 07/15] netfilter: nf_tables: fix register ordering We must register nfnetlink ops last, as that exposes nf_tables to userspace. Without this, we could theoretically get nfnetlink request before net->nft state has been initialized. Fixes: 99633ab29b213 ("netfilter: nf_tables: complete net namespace support") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 2 +- net/netfilter/nf_tables_api.c | 29 ++++++++++++++++++++++------- net/netfilter/nft_chain_filter.c | 2 +- 3 files changed, 24 insertions(+), 9 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 552bfbef1bf1b..0f39ac487012c 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -1374,6 +1374,6 @@ struct nft_trans_flowtable { (((struct nft_trans_flowtable *)trans->data)->flowtable) int __init nft_chain_filter_init(void); -void __exit nft_chain_filter_fini(void); +void nft_chain_filter_fini(void); #endif /* _NET_NF_TABLES_H */ diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 3008f93469c49..80636cc596868 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -7273,21 +7273,36 @@ static int __init nf_tables_module_init(void) { int err; - nft_chain_filter_init(); + err = register_pernet_subsys(&nf_tables_net_ops); + if (err < 0) + return err; + + err = nft_chain_filter_init(); + if (err < 0) + goto err1; err = nf_tables_core_module_init(); if (err < 0) - return err; + goto err2; - err = nfnetlink_subsys_register(&nf_tables_subsys); + err = register_netdevice_notifier(&nf_tables_flowtable_notifier); if (err < 0) - goto err; + goto err3; - register_netdevice_notifier(&nf_tables_flowtable_notifier); + /* must be last */ + err = nfnetlink_subsys_register(&nf_tables_subsys); + if (err < 0) + goto err4; - return register_pernet_subsys(&nf_tables_net_ops); -err: + return err; +err4: + unregister_netdevice_notifier(&nf_tables_flowtable_notifier); +err3: nf_tables_core_module_exit(); +err2: + nft_chain_filter_fini(); +err1: + unregister_pernet_subsys(&nf_tables_net_ops); return err; } diff --git a/net/netfilter/nft_chain_filter.c b/net/netfilter/nft_chain_filter.c index ea5b7c4944f69..9d07b277b9eec 100644 --- a/net/netfilter/nft_chain_filter.c +++ b/net/netfilter/nft_chain_filter.c @@ -392,7 +392,7 @@ int __init nft_chain_filter_init(void) return 0; } -void __exit nft_chain_filter_fini(void) +void nft_chain_filter_fini(void) { nft_chain_filter_bridge_fini(); nft_chain_filter_inet_fini(); From 6a48de0144767f2c6880540c0a4ac6741e3c440b Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 2 Aug 2018 21:44:41 +0200 Subject: [PATCH 08/15] netfilter: nf_tables: don't prevent event handler from device cleanup on netns exit When a netnsamespace exits, the nf_tables pernet_ops will remove all rules. However, there is one caveat: Base chains that register ingress hooks will cause use-after-free: device is already gone at that point. The device event handlers prevent this from happening: netns exit synthesizes unregister events for all devices. However, an improper fix for a race condition made the notifiers a no-op in case they get called from netns exit path, so revert that part. This is safe now as the previous patch fixed nf_tables pernet ops and device notifier initialisation ordering. Fixes: 0a2cf5ee432c2 ("netfilter: nf_tables: close race between netns exit and rmmod") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 7 ++----- net/netfilter/nft_chain_filter.c | 12 +++++++----- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 80636cc596868..1dca5683f59f1 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -5925,10 +5925,7 @@ static int nf_tables_flowtable_event(struct notifier_block *this, if (event != NETDEV_UNREGISTER) return 0; - net = maybe_get_net(dev_net(dev)); - if (!net) - return 0; - + net = dev_net(dev); mutex_lock(&net->nft.commit_mutex); list_for_each_entry(table, &net->nft.tables, list) { list_for_each_entry(flowtable, &table->flowtables, list) { @@ -5936,7 +5933,7 @@ static int nf_tables_flowtable_event(struct notifier_block *this, } } mutex_unlock(&net->nft.commit_mutex); - put_net(net); + return NOTIFY_DONE; } diff --git a/net/netfilter/nft_chain_filter.c b/net/netfilter/nft_chain_filter.c index 9d07b277b9eec..3fd540b2c6baf 100644 --- a/net/netfilter/nft_chain_filter.c +++ b/net/netfilter/nft_chain_filter.c @@ -293,6 +293,13 @@ static void nft_netdev_event(unsigned long event, struct net_device *dev, if (strcmp(basechain->dev_name, dev->name) != 0) return; + /* UNREGISTER events are also happpening on netns exit. + * + * Altough nf_tables core releases all tables/chains, only + * this event handler provides guarantee that + * basechain.ops->dev is still accessible, so we cannot + * skip exiting net namespaces. + */ __nft_release_basechain(ctx); break; case NETDEV_CHANGENAME: @@ -318,10 +325,6 @@ static int nf_tables_netdev_event(struct notifier_block *this, event != NETDEV_CHANGENAME) return NOTIFY_DONE; - ctx.net = maybe_get_net(ctx.net); - if (!ctx.net) - return NOTIFY_DONE; - mutex_lock(&ctx.net->nft.commit_mutex); list_for_each_entry(table, &ctx.net->nft.tables, list) { if (table->family != NFPROTO_NETDEV) @@ -338,7 +341,6 @@ static int nf_tables_netdev_event(struct notifier_block *this, } } mutex_unlock(&ctx.net->nft.commit_mutex); - put_net(ctx.net); return NOTIFY_DONE; } From 1c117d3b721a2678020d522ff1bf33984d4f0a5a Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 2 Aug 2018 12:30:09 +0200 Subject: [PATCH 09/15] netfilter: conntrack: fix removal of conntrack entries when l4tracker is removed nf_ct_l4proto_unregister_one() leaves conntracks added by to-be-removed tracker behind, nf_ct_l4proto_unregister has to iterate for each protocol to be removed. v2: call nf_ct_iterate_destroy without holding nf_ct_proto_mutex. Fixes: 2c41f33c1b703 ("netfilter: move table iteration out of netns exit paths") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_proto.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index 30070732ee50a..9f14b0df6960d 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c @@ -312,7 +312,9 @@ void nf_ct_l4proto_unregister_one(const struct nf_conntrack_l4proto *l4proto) __nf_ct_l4proto_unregister_one(l4proto); mutex_unlock(&nf_ct_proto_mutex); - synchronize_rcu(); + synchronize_net(); + /* Remove all contrack entries for this protocol */ + nf_ct_iterate_destroy(kill_l4proto, (void *)l4proto); } EXPORT_SYMBOL_GPL(nf_ct_l4proto_unregister_one); @@ -333,14 +335,17 @@ static void nf_ct_l4proto_unregister(const struct nf_conntrack_l4proto * const l4proto[], unsigned int num_proto) { + int i; + mutex_lock(&nf_ct_proto_mutex); - while (num_proto-- != 0) - __nf_ct_l4proto_unregister_one(l4proto[num_proto]); + for (i = 0; i < num_proto; i++) + __nf_ct_l4proto_unregister_one(l4proto[i]); mutex_unlock(&nf_ct_proto_mutex); synchronize_net(); - /* Remove all contrack entries for this protocol */ - nf_ct_iterate_destroy(kill_l4proto, (void *)l4proto); + + for (i = 0; i < num_proto; i++) + nf_ct_iterate_destroy(kill_l4proto, (void *)l4proto[i]); } static int From a148ce15375fc664ad64762c751c0c2aecb2cafe Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Tue, 7 Aug 2018 21:54:00 +0200 Subject: [PATCH 10/15] netfilter: x_tables: do not fail xt_alloc_table_info too easilly eacd86ca3b03 ("net/netfilter/x_tables.c: use kvmalloc() in xt_alloc_table_info()") has unintentionally fortified xt_alloc_table_info allocation when __GFP_RETRY has been dropped from the vmalloc fallback. Later on there was a syzbot report that this can lead to OOM killer invocations when tables are too large and 0537250fdc6c ("netfilter: x_tables: make allocation less aggressive") has been merged to restore the original behavior. Georgi Nikolov however noticed that he is not able to install his iptables anymore so this can be seen as a regression. The primary argument for 0537250fdc6c was that this allocation path shouldn't really trigger the OOM killer and kill innocent tasks. On the other hand the interface requires root and as such should allow what the admin asks for. Root inside a namespaces makes this more complicated because those might be not trusted in general. If they are not then such namespaces should be restricted anyway. Therefore drop the __GFP_NORETRY and replace it by __GFP_ACCOUNT to enfore memcg constrains on it. Fixes: 0537250fdc6c ("netfilter: x_tables: make allocation less aggressive") Reported-by: Georgi Nikolov Suggested-by: Vlastimil Babka Acked-by: Florian Westphal Signed-off-by: Michal Hocko Acked-by: Vlastimil Babka Signed-off-by: Pablo Neira Ayuso --- net/netfilter/x_tables.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index d0d8397c95889..aecadd471e1d0 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -1178,12 +1178,7 @@ struct xt_table_info *xt_alloc_table_info(unsigned int size) if (sz < sizeof(*info) || sz >= XT_MAX_TABLE_SIZE) return NULL; - /* __GFP_NORETRY is not fully supported by kvmalloc but it should - * work reasonably well if sz is too large and bail out rather - * than shoot all processes down before realizing there is nothing - * more to reclaim. - */ - info = kvmalloc(sz, GFP_KERNEL | __GFP_NORETRY); + info = kvmalloc(sz, GFP_KERNEL_ACCOUNT); if (!info) return NULL; From 1bfc2bc7ad8ac045045e99e33aa1ba727d2d291d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C3=A1t=C3=A9=20Eckl?= Date: Fri, 10 Aug 2018 12:58:39 +0200 Subject: [PATCH 11/15] netfilter: doc: Add nf_tables part in tproxy.txt MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Recently, transparent proxy support has been added to nf_tables so that this document should be updated with the new information. - Nft commands are added as alternatives to iptables ones. - The link for a patched iptables is removed as it is already part of the mainline iptables implementation (and the link is dead). - tcprdr is added as an example implementation of a transparent proxy Cc: "David S. Miller" Cc: Jonathan Corbet Cc: Florian Westphal Cc: KOVACS Krisztian Cc: Pablo Neira Ayuso Cc: linux-doc@vger.kernel.org Signed-off-by: Máté Eckl Signed-off-by: Pablo Neira Ayuso --- Documentation/networking/tproxy.txt | 34 +++++++++++++++++++++++------ 1 file changed, 27 insertions(+), 7 deletions(-) diff --git a/Documentation/networking/tproxy.txt b/Documentation/networking/tproxy.txt index ec11429e1d42a..b9a188823d9fc 100644 --- a/Documentation/networking/tproxy.txt +++ b/Documentation/networking/tproxy.txt @@ -5,19 +5,28 @@ This feature adds Linux 2.2-like transparent proxy support to current kernels. To use it, enable the socket match and the TPROXY target in your kernel config. You will need policy routing too, so be sure to enable that as well. +From Linux 4.18 transparent proxy support is also available in nf_tables. 1. Making non-local sockets work ================================ The idea is that you identify packets with destination address matching a local -socket on your box, set the packet mark to a certain value, and then match on that -value using policy routing to have those packets delivered locally: +socket on your box, set the packet mark to a certain value: # iptables -t mangle -N DIVERT # iptables -t mangle -A PREROUTING -p tcp -m socket -j DIVERT # iptables -t mangle -A DIVERT -j MARK --set-mark 1 # iptables -t mangle -A DIVERT -j ACCEPT +Alternatively you can do this in nft with the following commands: + +# nft add table filter +# nft add chain filter divert "{ type filter hook prerouting priority -150; }" +# nft add rule filter divert meta l4proto tcp socket transparent 1 meta mark set 1 accept + +And then match on that value using policy routing to have those packets +delivered locally: + # ip rule add fwmark 1 lookup 100 # ip route add local 0.0.0.0/0 dev lo table 100 @@ -57,17 +66,28 @@ add rules like this to the iptables ruleset above: # iptables -t mangle -A PREROUTING -p tcp --dport 80 -j TPROXY \ --tproxy-mark 0x1/0x1 --on-port 50080 +Or the following rule to nft: + +# nft add rule filter divert tcp dport 80 tproxy to :50080 meta mark set 1 accept + Note that for this to work you'll have to modify the proxy to enable (SOL_IP, IP_TRANSPARENT) for the listening socket. +As an example implementation, tcprdr is available here: +https://git.breakpoint.cc/cgit/fw/tcprdr.git/ +This tool is written by Florian Westphal and it was used for testing during the +nf_tables implementation. -3. Iptables extensions -====================== +3. Iptables and nf_tables extensions +==================================== -To use tproxy you'll need to have the 'socket' and 'TPROXY' modules -compiled for iptables. A patched version of iptables is available -here: http://git.balabit.hu/?p=bazsi/iptables-tproxy.git +To use tproxy you'll need to have the following modules compiled for iptables: + - NETFILTER_XT_MATCH_SOCKET + - NETFILTER_XT_TARGET_TPROXY +Or the floowing modules for nf_tables: + - NFT_SOCKET + - NFT_TPROXY 4. Application support ====================== From 3206c516ce4e4b56d7b99341814c261ec190f6df Mon Sep 17 00:00:00 2001 From: Harsha Sharma Date: Fri, 10 Aug 2018 22:52:37 +0530 Subject: [PATCH 12/15] netfilter: nft_ct: make l3 protocol field optional for timeout object If l3 protocol value is not specified for ct timeout object then use the value from nft_ctx protocol family. Signed-off-by: Harsha Sharma Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_ct.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index 4855d4ce1c8f8..26a8baebd0722 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -832,12 +832,13 @@ static int nft_ct_timeout_obj_init(const struct nft_ctx *ctx, __u8 l4num; int ret; - if (!tb[NFTA_CT_TIMEOUT_L3PROTO] || - !tb[NFTA_CT_TIMEOUT_L4PROTO] || + if (!tb[NFTA_CT_TIMEOUT_L4PROTO] || !tb[NFTA_CT_TIMEOUT_DATA]) return -EINVAL; - l3num = ntohs(nla_get_be16(tb[NFTA_CT_TIMEOUT_L3PROTO])); + if (tb[NFTA_CT_TIMEOUT_L3PROTO]) + l3num = ntohs(nla_get_be16(tb[NFTA_CT_TIMEOUT_L3PROTO])); + l4num = nla_get_u8(tb[NFTA_CT_TIMEOUT_L4PROTO]); priv->l4proto = l4num; From cdb2f401246ef70b37eeedd9c6f9c28d611d8255 Mon Sep 17 00:00:00 2001 From: "Dmitry V. Levin" Date: Mon, 13 Aug 2018 18:43:36 +0300 Subject: [PATCH 13/15] netfilter: uapi: fix linux/netfilter/nf_osf.h userspace compilation errors Move inclusion of and from linux/netfilter/xt_osf.h to linux/netfilter/nf_osf.h to fix the following linux/netfilter/nf_osf.h userspace compilation errors: /usr/include/linux/netfilter/nf_osf.h:59:24: error: 'MAX_IPOPTLEN' undeclared here (not in a function) struct nf_osf_opt opt[MAX_IPOPTLEN]; /usr/include/linux/netfilter/nf_osf.h:64:17: error: field 'ip' has incomplete type struct iphdr ip; /usr/include/linux/netfilter/nf_osf.h:65:18: error: field 'tcp' has incomplete type struct tcphdr tcp; Fixes: bfb15f2a95cb ("netfilter: extract Passive OS fingerprint infrastructure from xt_osf") Signed-off-by: Dmitry V. Levin Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nfnetlink_osf.h | 2 ++ include/uapi/linux/netfilter/xt_osf.h | 2 -- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/uapi/linux/netfilter/nfnetlink_osf.h b/include/uapi/linux/netfilter/nfnetlink_osf.h index 76a3527df5dda..272bc3195f2dd 100644 --- a/include/uapi/linux/netfilter/nfnetlink_osf.h +++ b/include/uapi/linux/netfilter/nfnetlink_osf.h @@ -2,6 +2,8 @@ #define _NF_OSF_H #include +#include +#include #define MAXGENRELEN 32 diff --git a/include/uapi/linux/netfilter/xt_osf.h b/include/uapi/linux/netfilter/xt_osf.h index 24102b5286ecb..6e466236ca4b2 100644 --- a/include/uapi/linux/netfilter/xt_osf.h +++ b/include/uapi/linux/netfilter/xt_osf.h @@ -21,8 +21,6 @@ #define _XT_OSF_H #include -#include -#include #include #define XT_OSF_GENRE NF_OSF_GENRE From 90d827f06bebbd9aded00c152e6c9eb2db4db1a3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C3=A1t=C3=A9=20Eckl?= Date: Tue, 14 Aug 2018 22:09:10 +0200 Subject: [PATCH 14/15] netfilter: nft_tproxy: Fix missing-braces warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch fixes a warning reported by the kbuild test robot (from linux-next tree): net/netfilter/nft_tproxy.c: In function 'nft_tproxy_eval_v6': >> net/netfilter/nft_tproxy.c:85:9: warning: missing braces around initializer [-Wmissing-braces] struct in6_addr taddr = {0}; ^ net/netfilter/nft_tproxy.c:85:9: warning: (near initialization for 'taddr.in6_u') [-Wmissing-braces] This warning is actually caused by a gcc bug already resolved in newer versions (kbuild used 4.9) so this kind of initialization is omitted and memset is used instead. Fixes: 4ed8eb6570a4 ("netfilter: nf_tables: Add native tproxy support") Signed-off-by: Máté Eckl Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_tproxy.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nft_tproxy.c b/net/netfilter/nft_tproxy.c index eff99dffc842d..f92a82c738808 100644 --- a/net/netfilter/nft_tproxy.c +++ b/net/netfilter/nft_tproxy.c @@ -82,13 +82,15 @@ static void nft_tproxy_eval_v6(const struct nft_expr *expr, const struct nft_tproxy *priv = nft_expr_priv(expr); struct sk_buff *skb = pkt->skb; const struct ipv6hdr *iph = ipv6_hdr(skb); - struct in6_addr taddr = {0}; + struct in6_addr taddr; int thoff = pkt->xt.thoff; struct udphdr _hdr, *hp; __be16 tport = 0; struct sock *sk; int l4proto; + memset(&taddr, 0, sizeof(taddr)); + if (!pkt->tprot_set) { regs->verdict.code = NFT_BREAK; return; From feb9f55c33e5114127238a2c87c069b4f30d1f23 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 15 Aug 2018 15:37:23 +0200 Subject: [PATCH 15/15] netfilter: nft_dynset: allow dynamic updates of non-anonymous set This check is superfluous since it breaks valid configurations, remove it. Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_dynset.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c index 81184c244d1a9..6e91a37d57f27 100644 --- a/net/netfilter/nft_dynset.c +++ b/net/netfilter/nft_dynset.c @@ -187,8 +187,6 @@ static int nft_dynset_init(const struct nft_ctx *ctx, if (tb[NFTA_DYNSET_EXPR] != NULL) { if (!(set->flags & NFT_SET_EVAL)) return -EINVAL; - if (!nft_set_is_anonymous(set)) - return -EOPNOTSUPP; priv->expr = nft_expr_init(ctx, tb[NFTA_DYNSET_EXPR]); if (IS_ERR(priv->expr))