Skip to content

Commit

Permalink
netfilter: xtables: don't hook tables by default
Browse files Browse the repository at this point in the history
delay hook registration until the table is being requested inside a
namespace.

Historically, a particular table (iptables mangle, ip6tables filter, etc)
was registered on module load.

When netns support was added to iptables only the ip/ip6tables ruleset was
made namespace aware, not the actual hook points.

This means f.e. that when ipt_filter table/module is loaded on a system,
then each namespace on that system has an (empty) iptables filter ruleset.

In other words, if a namespace sends a packet, such skb is 'caught' by
netfilter machinery and fed to hooking points for that table (i.e. INPUT,
FORWARD, etc).

Thanks to Eric Biederman, hooks are no longer global, but per namespace.

This means that we can avoid allocation of empty ruleset in a namespace and
defer hook registration until we need the functionality.

We register a tables hook entry points ONLY in the initial namespace.
When an iptables get/setockopt is issued inside a given namespace, we check
if the table is found in the per-namespace list.

If not, we attempt to find it in the initial namespace, and, if found,
create an empty default table in the requesting namespace and register the
needed hooks.

Hook points are destroyed only once namespace is deleted, there is no
'usage count' (it makes no sense since there is no 'remove table' operation
in xtables api).

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
  • Loading branch information
Florian Westphal authored and Pablo Neira Ayuso committed Mar 2, 2016
1 parent a67dd26 commit b9e69e1
Show file tree
Hide file tree
Showing 16 changed files with 361 additions and 208 deletions.
6 changes: 4 additions & 2 deletions include/linux/netfilter/x_tables.h
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,9 @@ struct xt_table {
u_int8_t af; /* address/protocol family */
int priority; /* hook order */

/* called when table is needed in the given netns */
int (*table_init)(struct net *net);

/* A unique name... */
const char name[XT_TABLE_MAXNAMELEN];
};
Expand Down Expand Up @@ -408,8 +411,7 @@ xt_get_per_cpu_counter(struct xt_counters *cnt, unsigned int cpu)
return cnt;
}

struct nf_hook_ops *xt_hook_link(const struct xt_table *, nf_hookfn *);
void xt_hook_unlink(const struct xt_table *, struct nf_hook_ops *);
struct nf_hook_ops *xt_hook_ops_alloc(const struct xt_table *, nf_hookfn *);

#ifdef CONFIG_COMPAT
#include <net/compat.h>
Expand Down
41 changes: 27 additions & 14 deletions net/ipv4/netfilter/arp_tables.c
Original file line number Diff line number Diff line change
Expand Up @@ -1780,6 +1780,24 @@ static int do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len
return ret;
}

static void __arpt_unregister_table(struct xt_table *table)
{
struct xt_table_info *private;
void *loc_cpu_entry;
struct module *table_owner = table->me;
struct arpt_entry *iter;

private = xt_unregister_table(table);

/* Decrease module usage counts and free resources */
loc_cpu_entry = private->entries;
xt_entry_foreach(iter, loc_cpu_entry, private->size)
cleanup_entry(iter);
if (private->number > private->initial_entries)
module_put(table_owner);
xt_free_table_info(private);
}

int arpt_register_table(struct net *net,
const struct xt_table *table,
const struct arpt_replace *repl,
Expand Down Expand Up @@ -1810,8 +1828,15 @@ int arpt_register_table(struct net *net,
goto out_free;
}

/* set res now, will see skbs right after nf_register_net_hooks */
WRITE_ONCE(*res, new_table);

ret = nf_register_net_hooks(net, ops, hweight32(table->valid_hooks));
if (ret != 0) {
__arpt_unregister_table(new_table);
*res = NULL;
}

return ret;

out_free:
Expand All @@ -1822,20 +1847,8 @@ int arpt_register_table(struct net *net,
void arpt_unregister_table(struct net *net, struct xt_table *table,
const struct nf_hook_ops *ops)
{
struct xt_table_info *private;
void *loc_cpu_entry;
struct module *table_owner = table->me;
struct arpt_entry *iter;

private = xt_unregister_table(table);

/* Decrease module usage counts and free resources */
loc_cpu_entry = private->entries;
xt_entry_foreach(iter, loc_cpu_entry, private->size)
cleanup_entry(iter);
if (private->number > private->initial_entries)
module_put(table_owner);
xt_free_table_info(private);
nf_unregister_net_hooks(net, ops, hweight32(table->valid_hooks));
__arpt_unregister_table(table);
}

/* The built-in targets: standard (NULL) and error. */
Expand Down
29 changes: 17 additions & 12 deletions net/ipv4/netfilter/arptable_filter.c
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,15 @@ MODULE_DESCRIPTION("arptables filter table");
#define FILTER_VALID_HOOKS ((1 << NF_ARP_IN) | (1 << NF_ARP_OUT) | \
(1 << NF_ARP_FORWARD))

static int __net_init arptable_filter_table_init(struct net *net);

static const struct xt_table packet_filter = {
.name = "filter",
.valid_hooks = FILTER_VALID_HOOKS,
.me = THIS_MODULE,
.af = NFPROTO_ARP,
.priority = NF_IP_PRI_FILTER,
.table_init = arptable_filter_table_init,
};

/* The work comes in here from netfilter.c */
Expand All @@ -35,11 +38,14 @@ arptable_filter_hook(void *priv, struct sk_buff *skb,

static struct nf_hook_ops *arpfilter_ops __read_mostly;

static int __net_init arptable_filter_net_init(struct net *net)
static int __net_init arptable_filter_table_init(struct net *net)
{
struct arpt_replace *repl;
int err;

if (net->ipv4.arptable_filter)
return 0;

repl = arpt_alloc_initial_table(&packet_filter);
if (repl == NULL)
return -ENOMEM;
Expand All @@ -51,38 +57,37 @@ static int __net_init arptable_filter_net_init(struct net *net)

static void __net_exit arptable_filter_net_exit(struct net *net)
{
if (!net->ipv4.arptable_filter)
return;
arpt_unregister_table(net, net->ipv4.arptable_filter, arpfilter_ops);
net->ipv4.arptable_filter = NULL;
}

static struct pernet_operations arptable_filter_net_ops = {
.init = arptable_filter_net_init,
.exit = arptable_filter_net_exit,
};

static int __init arptable_filter_init(void)
{
int ret;

arpfilter_ops = xt_hook_ops_alloc(&packet_filter, arptable_filter_hook);
if (IS_ERR(arpfilter_ops))
return PTR_ERR(arpfilter_ops);

ret = register_pernet_subsys(&arptable_filter_net_ops);
if (ret < 0)
if (ret < 0) {
kfree(arpfilter_ops);
return ret;

arpfilter_ops = xt_hook_link(&packet_filter, arptable_filter_hook);
if (IS_ERR(arpfilter_ops)) {
ret = PTR_ERR(arpfilter_ops);
goto cleanup_table;
}
return ret;

cleanup_table:
unregister_pernet_subsys(&arptable_filter_net_ops);
return ret;
}

static void __exit arptable_filter_fini(void)
{
xt_hook_unlink(&packet_filter, arpfilter_ops);
unregister_pernet_subsys(&arptable_filter_net_ops);
kfree(arpfilter_ops);
}

module_init(arptable_filter_init);
Expand Down
42 changes: 28 additions & 14 deletions net/ipv4/netfilter/ip_tables.c
Original file line number Diff line number Diff line change
Expand Up @@ -2062,6 +2062,24 @@ do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
return ret;
}

static void __ipt_unregister_table(struct net *net, struct xt_table *table)
{
struct xt_table_info *private;
void *loc_cpu_entry;
struct module *table_owner = table->me;
struct ipt_entry *iter;

private = xt_unregister_table(table);

/* Decrease module usage counts and free resources */
loc_cpu_entry = private->entries;
xt_entry_foreach(iter, loc_cpu_entry, private->size)
cleanup_entry(iter, net);
if (private->number > private->initial_entries)
module_put(table_owner);
xt_free_table_info(private);
}

int ipt_register_table(struct net *net, const struct xt_table *table,
const struct ipt_replace *repl,
const struct nf_hook_ops *ops, struct xt_table **res)
Expand Down Expand Up @@ -2089,7 +2107,15 @@ int ipt_register_table(struct net *net, const struct xt_table *table,
goto out_free;
}

/* set res now, will see skbs right after nf_register_net_hooks */
WRITE_ONCE(*res, new_table);

ret = nf_register_net_hooks(net, ops, hweight32(table->valid_hooks));
if (ret != 0) {
__ipt_unregister_table(net, new_table);
*res = NULL;
}

return ret;

out_free:
Expand All @@ -2100,20 +2126,8 @@ int ipt_register_table(struct net *net, const struct xt_table *table,
void ipt_unregister_table(struct net *net, struct xt_table *table,
const struct nf_hook_ops *ops)
{
struct xt_table_info *private;
void *loc_cpu_entry;
struct module *table_owner = table->me;
struct ipt_entry *iter;

private = xt_unregister_table(table);

/* Decrease module usage counts and free resources */
loc_cpu_entry = private->entries;
xt_entry_foreach(iter, loc_cpu_entry, private->size)
cleanup_entry(iter, net);
if (private->number > private->initial_entries)
module_put(table_owner);
xt_free_table_info(private);
nf_unregister_net_hooks(net, ops, hweight32(table->valid_hooks));
__ipt_unregister_table(net, table);
}

/* Returns 1 if the type and code is matched by the range, 0 otherwise */
Expand Down
35 changes: 24 additions & 11 deletions net/ipv4/netfilter/iptable_filter.c
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,15 @@ MODULE_DESCRIPTION("iptables filter table");
#define FILTER_VALID_HOOKS ((1 << NF_INET_LOCAL_IN) | \
(1 << NF_INET_FORWARD) | \
(1 << NF_INET_LOCAL_OUT))
static int __net_init iptable_filter_table_init(struct net *net);

static const struct xt_table packet_filter = {
.name = "filter",
.valid_hooks = FILTER_VALID_HOOKS,
.me = THIS_MODULE,
.af = NFPROTO_IPV4,
.priority = NF_IP_PRI_FILTER,
.table_init = iptable_filter_table_init,
};

static unsigned int
Expand All @@ -48,14 +50,17 @@ iptable_filter_hook(void *priv, struct sk_buff *skb,
static struct nf_hook_ops *filter_ops __read_mostly;

/* Default to forward because I got too much mail already. */
static bool forward = true;
static bool forward __read_mostly = true;
module_param(forward, bool, 0000);

static int __net_init iptable_filter_net_init(struct net *net)
static int __net_init iptable_filter_table_init(struct net *net)
{
struct ipt_replace *repl;
int err;

if (net->ipv4.iptable_filter)
return 0;

repl = ipt_alloc_initial_table(&packet_filter);
if (repl == NULL)
return -ENOMEM;
Expand All @@ -69,9 +74,20 @@ static int __net_init iptable_filter_net_init(struct net *net)
return err;
}

static int __net_init iptable_filter_net_init(struct net *net)
{
if (net == &init_net || !forward)
return iptable_filter_table_init(net);

return 0;
}

static void __net_exit iptable_filter_net_exit(struct net *net)
{
if (!net->ipv4.iptable_filter)
return;
ipt_unregister_table(net, net->ipv4.iptable_filter, filter_ops);
net->ipv4.iptable_filter = NULL;
}

static struct pernet_operations iptable_filter_net_ops = {
Expand All @@ -83,24 +99,21 @@ static int __init iptable_filter_init(void)
{
int ret;

filter_ops = xt_hook_ops_alloc(&packet_filter, iptable_filter_hook);
if (IS_ERR(filter_ops))
return PTR_ERR(filter_ops);

ret = register_pernet_subsys(&iptable_filter_net_ops);
if (ret < 0)
return ret;

/* Register hooks */
filter_ops = xt_hook_link(&packet_filter, iptable_filter_hook);
if (IS_ERR(filter_ops)) {
ret = PTR_ERR(filter_ops);
unregister_pernet_subsys(&iptable_filter_net_ops);
}
kfree(filter_ops);

return ret;
}

static void __exit iptable_filter_fini(void)
{
xt_hook_unlink(&packet_filter, filter_ops);
unregister_pernet_subsys(&iptable_filter_net_ops);
kfree(filter_ops);
}

module_init(iptable_filter_init);
Expand Down
Loading

0 comments on commit b9e69e1

Please sign in to comment.