Skip to content

Commit

Permalink
Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf
Browse files Browse the repository at this point in the history
Pablo Neira Ayuso says:

====================
Netfilter/IPVS fixes for net

The following patchset contains Netfilter/IPVS fixes for your net tree:

1) Infinite loop in IPVS when net namespace is released, from
   Tan Hu.

2) Do not show negative timeouts in ip_vs_conn by using the new
   jiffies_delta_to_msecs(), patches from Matteo Croce.

3) Set F_IFACE flag for linklocal addresses in ip6t_rpfilter,
   from Florian Westphal.

4) Fix overflow in set size allocation, from Taehee Yoo.

5) Use netlink_dump_start() from ctnetlink to fix memleak from
   the error path, again from Florian.

6) Register nfnetlink_subsys in last place, otherwise netns
   init path may lose race and see net->nft uninitialized data.
   This also reverts previous attempt to fix this by increase
   netns refcount, patches from Florian.

7) Remove conntrack entries on layer 4 protocol tracker module
   removal, from Florian.

8) Use GFP_KERNEL_ACCOUNT for xtables blob allocation, from
   Michal Hocko.

9) Get tproxy documentation in sync with existing codebase,
   from Mate Eckl.

10) Honor preset layer 3 protocol via ctx->family in the new nft_ct
    timeout infrastructure, from Harsha Sharma.

11) Let uapi nfnetlink_osf.h compile standalone with no errors,
    from Dmitry V. Levin.

12) Missing braces compilation warning in nft_tproxy, patch from
    Mate Eclk.

13) Disregard bogus check to bail out on non-anonymous sets from
    the dynamic set update extension.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
David S. Miller committed Aug 18, 2018
2 parents bfdd19a + feb9f55 commit 3fe49d6
Show file tree
Hide file tree
Showing 20 changed files with 163 additions and 95 deletions.
34 changes: 27 additions & 7 deletions Documentation/networking/tproxy.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,19 +5,28 @@ This feature adds Linux 2.2-like transparent proxy support to current kernels.
To use it, enable the socket match and the TPROXY target in your kernel config.
You will need policy routing too, so be sure to enable that as well.

From Linux 4.18 transparent proxy support is also available in nf_tables.

1. Making non-local sockets work
================================

The idea is that you identify packets with destination address matching a local
socket on your box, set the packet mark to a certain value, and then match on that
value using policy routing to have those packets delivered locally:
socket on your box, set the packet mark to a certain value:

# iptables -t mangle -N DIVERT
# iptables -t mangle -A PREROUTING -p tcp -m socket -j DIVERT
# iptables -t mangle -A DIVERT -j MARK --set-mark 1
# iptables -t mangle -A DIVERT -j ACCEPT

Alternatively you can do this in nft with the following commands:

# nft add table filter
# nft add chain filter divert "{ type filter hook prerouting priority -150; }"
# nft add rule filter divert meta l4proto tcp socket transparent 1 meta mark set 1 accept

And then match on that value using policy routing to have those packets
delivered locally:

# ip rule add fwmark 1 lookup 100
# ip route add local 0.0.0.0/0 dev lo table 100

Expand Down Expand Up @@ -57,17 +66,28 @@ add rules like this to the iptables ruleset above:
# iptables -t mangle -A PREROUTING -p tcp --dport 80 -j TPROXY \
--tproxy-mark 0x1/0x1 --on-port 50080

Or the following rule to nft:

# nft add rule filter divert tcp dport 80 tproxy to :50080 meta mark set 1 accept

Note that for this to work you'll have to modify the proxy to enable (SOL_IP,
IP_TRANSPARENT) for the listening socket.

As an example implementation, tcprdr is available here:
https://git.breakpoint.cc/cgit/fw/tcprdr.git/
This tool is written by Florian Westphal and it was used for testing during the
nf_tables implementation.

3. Iptables extensions
======================
3. Iptables and nf_tables extensions
====================================

To use tproxy you'll need to have the 'socket' and 'TPROXY' modules
compiled for iptables. A patched version of iptables is available
here: http://git.balabit.hu/?p=bazsi/iptables-tproxy.git
To use tproxy you'll need to have the following modules compiled for iptables:
- NETFILTER_XT_MATCH_SOCKET
- NETFILTER_XT_TARGET_TPROXY

Or the floowing modules for nf_tables:
- NFT_SOCKET
- NFT_TPROXY

4. Application support
======================
Expand Down
5 changes: 5 additions & 0 deletions include/linux/jiffies.h
Original file line number Diff line number Diff line change
Expand Up @@ -447,6 +447,11 @@ static inline clock_t jiffies_delta_to_clock_t(long delta)
return jiffies_to_clock_t(max(0L, delta));
}

static inline unsigned int jiffies_delta_to_msecs(long delta)
{
return jiffies_to_msecs(max(0L, delta));
}

extern unsigned long clock_t_to_jiffies(unsigned long x);
extern u64 jiffies_64_to_clock_t(u64 x);
extern u64 nsec_to_clock_t(u64 x);
Expand Down
6 changes: 3 additions & 3 deletions include/net/netfilter/nf_tables.h
Original file line number Diff line number Diff line change
Expand Up @@ -274,7 +274,7 @@ enum nft_set_class {
* @space: memory class
*/
struct nft_set_estimate {
unsigned int size;
u64 size;
enum nft_set_class lookup;
enum nft_set_class space;
};
Expand Down Expand Up @@ -336,7 +336,7 @@ struct nft_set_ops {
const struct nft_set_elem *elem,
unsigned int flags);

unsigned int (*privsize)(const struct nlattr * const nla[],
u64 (*privsize)(const struct nlattr * const nla[],
const struct nft_set_desc *desc);
bool (*estimate)(const struct nft_set_desc *desc,
u32 features,
Expand Down Expand Up @@ -1374,6 +1374,6 @@ struct nft_trans_flowtable {
(((struct nft_trans_flowtable *)trans->data)->flowtable)

int __init nft_chain_filter_init(void);
void __exit nft_chain_filter_fini(void);
void nft_chain_filter_fini(void);

#endif /* _NET_NF_TABLES_H */
2 changes: 2 additions & 0 deletions include/uapi/linux/netfilter/nfnetlink_osf.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
#define _NF_OSF_H

#include <linux/types.h>
#include <linux/ip.h>
#include <linux/tcp.h>

#define MAXGENRELEN 32

Expand Down
2 changes: 0 additions & 2 deletions include/uapi/linux/netfilter/xt_osf.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,6 @@
#define _XT_OSF_H

#include <linux/types.h>
#include <linux/ip.h>
#include <linux/tcp.h>
#include <linux/netfilter/nfnetlink_osf.h>

#define XT_OSF_GENRE NF_OSF_GENRE
Expand Down
12 changes: 11 additions & 1 deletion net/ipv6/netfilter/ip6t_rpfilter.c
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,12 @@ static bool rpfilter_addr_unicast(const struct in6_addr *addr)
return addr_type & IPV6_ADDR_UNICAST;
}

static bool rpfilter_addr_linklocal(const struct in6_addr *addr)
{
int addr_type = ipv6_addr_type(addr);
return addr_type & IPV6_ADDR_LINKLOCAL;
}

static bool rpfilter_lookup_reverse6(struct net *net, const struct sk_buff *skb,
const struct net_device *dev, u8 flags)
{
Expand All @@ -48,7 +54,11 @@ static bool rpfilter_lookup_reverse6(struct net *net, const struct sk_buff *skb,
}

fl6.flowi6_mark = flags & XT_RPFILTER_VALID_MARK ? skb->mark : 0;
if ((flags & XT_RPFILTER_LOOSE) == 0)

if (rpfilter_addr_linklocal(&iph->saddr)) {
lookup_flags |= RT6_LOOKUP_F_IFACE;
fl6.flowi6_oif = dev->ifindex;
} else if ((flags & XT_RPFILTER_LOOSE) == 0)
fl6.flowi6_oif = dev->ifindex;

rt = (void *)ip6_route_lookup(net, &fl6, skb, lookup_flags);
Expand Down
22 changes: 14 additions & 8 deletions net/netfilter/ipvs/ip_vs_conn.c
Original file line number Diff line number Diff line change
Expand Up @@ -1117,24 +1117,28 @@ static int ip_vs_conn_seq_show(struct seq_file *seq, void *v)
#ifdef CONFIG_IP_VS_IPV6
if (cp->af == AF_INET6)
seq_printf(seq, "%-3s %pI6 %04X %pI6 %04X "
"%s %04X %-11s %7lu%s\n",
"%s %04X %-11s %7u%s\n",
ip_vs_proto_name(cp->protocol),
&cp->caddr.in6, ntohs(cp->cport),
&cp->vaddr.in6, ntohs(cp->vport),
dbuf, ntohs(cp->dport),
ip_vs_state_name(cp),
(cp->timer.expires-jiffies)/HZ, pe_data);
jiffies_delta_to_msecs(cp->timer.expires -
jiffies) / 1000,
pe_data);
else
#endif
seq_printf(seq,
"%-3s %08X %04X %08X %04X"
" %s %04X %-11s %7lu%s\n",
" %s %04X %-11s %7u%s\n",
ip_vs_proto_name(cp->protocol),
ntohl(cp->caddr.ip), ntohs(cp->cport),
ntohl(cp->vaddr.ip), ntohs(cp->vport),
dbuf, ntohs(cp->dport),
ip_vs_state_name(cp),
(cp->timer.expires-jiffies)/HZ, pe_data);
jiffies_delta_to_msecs(cp->timer.expires -
jiffies) / 1000,
pe_data);
}
return 0;
}
Expand Down Expand Up @@ -1179,26 +1183,28 @@ static int ip_vs_conn_sync_seq_show(struct seq_file *seq, void *v)
#ifdef CONFIG_IP_VS_IPV6
if (cp->af == AF_INET6)
seq_printf(seq, "%-3s %pI6 %04X %pI6 %04X "
"%s %04X %-11s %-6s %7lu\n",
"%s %04X %-11s %-6s %7u\n",
ip_vs_proto_name(cp->protocol),
&cp->caddr.in6, ntohs(cp->cport),
&cp->vaddr.in6, ntohs(cp->vport),
dbuf, ntohs(cp->dport),
ip_vs_state_name(cp),
ip_vs_origin_name(cp->flags),
(cp->timer.expires-jiffies)/HZ);
jiffies_delta_to_msecs(cp->timer.expires -
jiffies) / 1000);
else
#endif
seq_printf(seq,
"%-3s %08X %04X %08X %04X "
"%s %04X %-11s %-6s %7lu\n",
"%s %04X %-11s %-6s %7u\n",
ip_vs_proto_name(cp->protocol),
ntohl(cp->caddr.ip), ntohs(cp->cport),
ntohl(cp->vaddr.ip), ntohs(cp->vport),
dbuf, ntohs(cp->dport),
ip_vs_state_name(cp),
ip_vs_origin_name(cp->flags),
(cp->timer.expires-jiffies)/HZ);
jiffies_delta_to_msecs(cp->timer.expires -
jiffies) / 1000);
}
return 0;
}
Expand Down
15 changes: 11 additions & 4 deletions net/netfilter/ipvs/ip_vs_core.c
Original file line number Diff line number Diff line change
Expand Up @@ -1972,13 +1972,20 @@ ip_vs_in(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, int
if (cp->dest && !(cp->dest->flags & IP_VS_DEST_F_AVAILABLE)) {
/* the destination server is not available */

if (sysctl_expire_nodest_conn(ipvs)) {
__u32 flags = cp->flags;

/* when timer already started, silently drop the packet.*/
if (timer_pending(&cp->timer))
__ip_vs_conn_put(cp);
else
ip_vs_conn_put(cp);

if (sysctl_expire_nodest_conn(ipvs) &&
!(flags & IP_VS_CONN_F_ONE_PACKET)) {
/* try to expire the connection immediately */
ip_vs_conn_expire_now(cp);
}
/* don't restart its timer, and silently
drop the packet. */
__ip_vs_conn_put(cp);

return NF_DROP;
}

Expand Down
26 changes: 17 additions & 9 deletions net/netfilter/nf_conntrack_netlink.c
Original file line number Diff line number Diff line change
Expand Up @@ -846,6 +846,21 @@ ctnetlink_alloc_filter(const struct nlattr * const cda[])
#endif
}

static int ctnetlink_start(struct netlink_callback *cb)
{
const struct nlattr * const *cda = cb->data;
struct ctnetlink_filter *filter = NULL;

if (cda[CTA_MARK] && cda[CTA_MARK_MASK]) {
filter = ctnetlink_alloc_filter(cda);
if (IS_ERR(filter))
return PTR_ERR(filter);
}

cb->data = filter;
return 0;
}

static int ctnetlink_filter_match(struct nf_conn *ct, void *data)
{
struct ctnetlink_filter *filter = data;
Expand Down Expand Up @@ -1290,19 +1305,12 @@ static int ctnetlink_get_conntrack(struct net *net, struct sock *ctnl,

if (nlh->nlmsg_flags & NLM_F_DUMP) {
struct netlink_dump_control c = {
.start = ctnetlink_start,
.dump = ctnetlink_dump_table,
.done = ctnetlink_done,
.data = (void *)cda,
};

if (cda[CTA_MARK] && cda[CTA_MARK_MASK]) {
struct ctnetlink_filter *filter;

filter = ctnetlink_alloc_filter(cda);
if (IS_ERR(filter))
return PTR_ERR(filter);

c.data = filter;
}
return netlink_dump_start(ctnl, skb, nlh, &c);
}

Expand Down
15 changes: 10 additions & 5 deletions net/netfilter/nf_conntrack_proto.c
Original file line number Diff line number Diff line change
Expand Up @@ -312,7 +312,9 @@ void nf_ct_l4proto_unregister_one(const struct nf_conntrack_l4proto *l4proto)
__nf_ct_l4proto_unregister_one(l4proto);
mutex_unlock(&nf_ct_proto_mutex);

synchronize_rcu();
synchronize_net();
/* Remove all contrack entries for this protocol */
nf_ct_iterate_destroy(kill_l4proto, (void *)l4proto);
}
EXPORT_SYMBOL_GPL(nf_ct_l4proto_unregister_one);

Expand All @@ -333,14 +335,17 @@ static void
nf_ct_l4proto_unregister(const struct nf_conntrack_l4proto * const l4proto[],
unsigned int num_proto)
{
int i;

mutex_lock(&nf_ct_proto_mutex);
while (num_proto-- != 0)
__nf_ct_l4proto_unregister_one(l4proto[num_proto]);
for (i = 0; i < num_proto; i++)
__nf_ct_l4proto_unregister_one(l4proto[i]);
mutex_unlock(&nf_ct_proto_mutex);

synchronize_net();
/* Remove all contrack entries for this protocol */
nf_ct_iterate_destroy(kill_l4proto, (void *)l4proto);

for (i = 0; i < num_proto; i++)
nf_ct_iterate_destroy(kill_l4proto, (void *)l4proto[i]);
}

static int
Expand Down
38 changes: 25 additions & 13 deletions net/netfilter/nf_tables_api.c
Original file line number Diff line number Diff line change
Expand Up @@ -3354,7 +3354,7 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk,
struct nft_set *set;
struct nft_ctx ctx;
char *name;
unsigned int size;
u64 size;
u64 timeout;
u32 ktype, dtype, flags, policy, gc_int, objtype;
struct nft_set_desc desc;
Expand Down Expand Up @@ -5925,18 +5925,15 @@ static int nf_tables_flowtable_event(struct notifier_block *this,
if (event != NETDEV_UNREGISTER)
return 0;

net = maybe_get_net(dev_net(dev));
if (!net)
return 0;

net = dev_net(dev);
mutex_lock(&net->nft.commit_mutex);
list_for_each_entry(table, &net->nft.tables, list) {
list_for_each_entry(flowtable, &table->flowtables, list) {
nft_flowtable_event(event, dev, flowtable);
}
}
mutex_unlock(&net->nft.commit_mutex);
put_net(net);

return NOTIFY_DONE;
}

Expand Down Expand Up @@ -7273,21 +7270,36 @@ static int __init nf_tables_module_init(void)
{
int err;

nft_chain_filter_init();
err = register_pernet_subsys(&nf_tables_net_ops);
if (err < 0)
return err;

err = nft_chain_filter_init();
if (err < 0)
goto err1;

err = nf_tables_core_module_init();
if (err < 0)
return err;
goto err2;

err = nfnetlink_subsys_register(&nf_tables_subsys);
err = register_netdevice_notifier(&nf_tables_flowtable_notifier);
if (err < 0)
goto err;
goto err3;

register_netdevice_notifier(&nf_tables_flowtable_notifier);
/* must be last */
err = nfnetlink_subsys_register(&nf_tables_subsys);
if (err < 0)
goto err4;

return register_pernet_subsys(&nf_tables_net_ops);
err:
return err;
err4:
unregister_netdevice_notifier(&nf_tables_flowtable_notifier);
err3:
nf_tables_core_module_exit();
err2:
nft_chain_filter_fini();
err1:
unregister_pernet_subsys(&nf_tables_net_ops);
return err;
}

Expand Down
Loading

0 comments on commit 3fe49d6

Please sign in to comment.