Skip to content

Commit

Permalink
netfilter: nf_conntrack: add support for "conntrack zones"
Browse files Browse the repository at this point in the history
Normally, each connection needs a unique identity. Conntrack zones allow
to specify a numerical zone using the CT target, connections in different
zones can use the same identity.

Example:

iptables -t raw -A PREROUTING -i veth0 -j CT --zone 1
iptables -t raw -A OUTPUT -o veth1 -j CT --zone 1

Signed-off-by: Patrick McHardy <kaber@trash.net>
  • Loading branch information
Patrick McHardy committed Feb 15, 2010
1 parent 8fea97e commit 5d0aa2c
Show file tree
Hide file tree
Showing 25 changed files with 236 additions and 85 deletions.
2 changes: 1 addition & 1 deletion include/linux/netfilter/xt_CT.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

struct xt_ct_target_info {
u_int16_t flags;
u_int16_t __unused;
u_int16_t zone;
u_int32_t ct_events;
u_int32_t exp_events;
char helper[16];
Expand Down
3 changes: 3 additions & 0 deletions include/net/ip.h
Original file line number Diff line number Diff line change
Expand Up @@ -352,8 +352,11 @@ enum ip_defrag_users {
IP_DEFRAG_LOCAL_DELIVER,
IP_DEFRAG_CALL_RA_CHAIN,
IP_DEFRAG_CONNTRACK_IN,
__IP_DEFRAG_CONNTRACK_IN_END = IP_DEFRAG_CONNTRACK_IN + USHORT_MAX,
IP_DEFRAG_CONNTRACK_OUT,
__IP_DEFRAG_CONNTRACK_OUT_END = IP_DEFRAG_CONNTRACK_OUT + USHORT_MAX,
IP_DEFRAG_CONNTRACK_BRIDGE_IN,
__IP_DEFRAG_CONNTRACK_BRIDGE_IN = IP_DEFRAG_CONNTRACK_BRIDGE_IN + USHORT_MAX,
IP_DEFRAG_VS_IN,
IP_DEFRAG_VS_OUT,
IP_DEFRAG_VS_FWD
Expand Down
3 changes: 3 additions & 0 deletions include/net/ipv6.h
Original file line number Diff line number Diff line change
Expand Up @@ -355,8 +355,11 @@ struct inet_frag_queue;
enum ip6_defrag_users {
IP6_DEFRAG_LOCAL_DELIVER,
IP6_DEFRAG_CONNTRACK_IN,
__IP6_DEFRAG_CONNTRACK_IN = IP6_DEFRAG_CONNTRACK_IN + USHORT_MAX,
IP6_DEFRAG_CONNTRACK_OUT,
__IP6_DEFRAG_CONNTRACK_OUT = IP6_DEFRAG_CONNTRACK_OUT + USHORT_MAX,
IP6_DEFRAG_CONNTRACK_BRIDGE_IN,
__IP6_DEFRAG_CONNTRACK_BRIDGE_IN = IP6_DEFRAG_CONNTRACK_BRIDGE_IN + USHORT_MAX,
};

struct ip6_create_arg {
Expand Down
5 changes: 3 additions & 2 deletions include/net/netfilter/nf_conntrack.h
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,8 @@ extern void *nf_ct_alloc_hashtable(unsigned int *sizep, int *vmalloced, int null
extern void nf_ct_free_hashtable(void *hash, int vmalloced, unsigned int size);

extern struct nf_conntrack_tuple_hash *
__nf_conntrack_find(struct net *net, const struct nf_conntrack_tuple *tuple);
__nf_conntrack_find(struct net *net, u16 zone,
const struct nf_conntrack_tuple *tuple);

extern void nf_conntrack_hash_insert(struct nf_conn *ct);
extern void nf_ct_delete_from_lists(struct nf_conn *ct);
Expand Down Expand Up @@ -267,7 +268,7 @@ extern void
nf_ct_iterate_cleanup(struct net *net, int (*iter)(struct nf_conn *i, void *data), void *data);
extern void nf_conntrack_free(struct nf_conn *ct);
extern struct nf_conn *
nf_conntrack_alloc(struct net *net,
nf_conntrack_alloc(struct net *net, u16 zone,
const struct nf_conntrack_tuple *orig,
const struct nf_conntrack_tuple *repl,
gfp_t gfp);
Expand Down
3 changes: 2 additions & 1 deletion include/net/netfilter/nf_conntrack_core.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,8 @@ nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse,

/* Find a connection corresponding to a tuple. */
extern struct nf_conntrack_tuple_hash *
nf_conntrack_find_get(struct net *net, const struct nf_conntrack_tuple *tuple);
nf_conntrack_find_get(struct net *net, u16 zone,
const struct nf_conntrack_tuple *tuple);

extern int __nf_conntrack_confirm(struct sk_buff *skb);

Expand Down
9 changes: 6 additions & 3 deletions include/net/netfilter/nf_conntrack_expect.h
Original file line number Diff line number Diff line change
Expand Up @@ -74,13 +74,16 @@ int nf_conntrack_expect_init(struct net *net);
void nf_conntrack_expect_fini(struct net *net);

struct nf_conntrack_expect *
__nf_ct_expect_find(struct net *net, const struct nf_conntrack_tuple *tuple);
__nf_ct_expect_find(struct net *net, u16 zone,
const struct nf_conntrack_tuple *tuple);

struct nf_conntrack_expect *
nf_ct_expect_find_get(struct net *net, const struct nf_conntrack_tuple *tuple);
nf_ct_expect_find_get(struct net *net, u16 zone,
const struct nf_conntrack_tuple *tuple);

struct nf_conntrack_expect *
nf_ct_find_expectation(struct net *net, const struct nf_conntrack_tuple *tuple);
nf_ct_find_expectation(struct net *net, u16 zone,
const struct nf_conntrack_tuple *tuple);

void nf_ct_unlink_expect(struct nf_conntrack_expect *exp);
void nf_ct_remove_expectations(struct nf_conn *ct);
Expand Down
2 changes: 2 additions & 0 deletions include/net/netfilter/nf_conntrack_extend.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,15 @@ enum nf_ct_ext_id {
NF_CT_EXT_NAT,
NF_CT_EXT_ACCT,
NF_CT_EXT_ECACHE,
NF_CT_EXT_ZONE,
NF_CT_EXT_NUM,
};

#define NF_CT_EXT_HELPER_TYPE struct nf_conn_help
#define NF_CT_EXT_NAT_TYPE struct nf_conn_nat
#define NF_CT_EXT_ACCT_TYPE struct nf_conn_counter
#define NF_CT_EXT_ECACHE_TYPE struct nf_conntrack_ecache
#define NF_CT_EXT_ZONE_TYPE struct nf_conntrack_zone

/* Extensions: optional stuff which isn't permanently in struct. */
struct nf_ct_ext {
Expand Down
23 changes: 23 additions & 0 deletions include/net/netfilter/nf_conntrack_zones.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
#ifndef _NF_CONNTRACK_ZONES_H
#define _NF_CONNTRACK_ZONES_H

#include <net/netfilter/nf_conntrack_extend.h>

#define NF_CT_DEFAULT_ZONE 0

struct nf_conntrack_zone {
u16 id;
};

static inline u16 nf_ct_zone(const struct nf_conn *ct)
{
#ifdef CONFIG_NF_CONNTRACK_ZONES
struct nf_conntrack_zone *nf_ct_zone;
nf_ct_zone = nf_ct_ext_find(ct, NF_CT_EXT_ZONE);
if (nf_ct_zone)
return nf_ct_zone->id;
#endif
return NF_CT_DEFAULT_ZONE;
}

#endif /* _NF_CONNTRACK_ZONES_H */
3 changes: 2 additions & 1 deletion net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include <net/netfilter/nf_conntrack_helper.h>
#include <net/netfilter/nf_conntrack_l4proto.h>
#include <net/netfilter/nf_conntrack_l3proto.h>
#include <net/netfilter/nf_conntrack_zones.h>
#include <net/netfilter/nf_conntrack_core.h>
#include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
#include <net/netfilter/nf_nat_helper.h>
Expand Down Expand Up @@ -266,7 +267,7 @@ getorigdst(struct sock *sk, int optval, void __user *user, int *len)
return -EINVAL;
}

h = nf_conntrack_find_get(sock_net(sk), &tuple);
h = nf_conntrack_find_get(sock_net(sk), NF_CT_DEFAULT_ZONE, &tuple);
if (h) {
struct sockaddr_in sin;
struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
Expand Down
8 changes: 5 additions & 3 deletions net/ipv4/netfilter/nf_conntrack_proto_icmp.c
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include <net/netfilter/nf_conntrack_tuple.h>
#include <net/netfilter/nf_conntrack_l4proto.h>
#include <net/netfilter/nf_conntrack_core.h>
#include <net/netfilter/nf_conntrack_zones.h>
#include <net/netfilter/nf_log.h>

static unsigned int nf_ct_icmp_timeout __read_mostly = 30*HZ;
Expand Down Expand Up @@ -114,13 +115,14 @@ static bool icmp_new(struct nf_conn *ct, const struct sk_buff *skb,

/* Returns conntrack if it dealt with ICMP, and filled in skb fields */
static int
icmp_error_message(struct net *net, struct sk_buff *skb,
icmp_error_message(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb,
enum ip_conntrack_info *ctinfo,
unsigned int hooknum)
{
struct nf_conntrack_tuple innertuple, origtuple;
const struct nf_conntrack_l4proto *innerproto;
const struct nf_conntrack_tuple_hash *h;
u16 zone = tmpl ? nf_ct_zone(tmpl) : NF_CT_DEFAULT_ZONE;

NF_CT_ASSERT(skb->nfct == NULL);

Expand All @@ -146,7 +148,7 @@ icmp_error_message(struct net *net, struct sk_buff *skb,

*ctinfo = IP_CT_RELATED;

h = nf_conntrack_find_get(net, &innertuple);
h = nf_conntrack_find_get(net, zone, &innertuple);
if (!h) {
pr_debug("icmp_error_message: no match\n");
return -NF_ACCEPT;
Expand Down Expand Up @@ -209,7 +211,7 @@ icmp_error(struct net *net, struct nf_conn *tmpl,
icmph->type != ICMP_REDIRECT)
return NF_ACCEPT;

return icmp_error_message(net, skb, ctinfo, hooknum);
return icmp_error_message(net, tmpl, skb, ctinfo, hooknum);
}

#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
Expand Down
12 changes: 9 additions & 3 deletions net/ipv4/netfilter/nf_defrag_ipv4.c
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

#include <linux/netfilter_bridge.h>
#include <linux/netfilter_ipv4.h>
#include <net/netfilter/nf_conntrack_zones.h>
#include <net/netfilter/ipv4/nf_defrag_ipv4.h>
#include <net/netfilter/nf_conntrack.h>

Expand All @@ -39,15 +40,20 @@ static int nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user)
static enum ip_defrag_users nf_ct_defrag_user(unsigned int hooknum,
struct sk_buff *skb)
{
u16 zone = NF_CT_DEFAULT_ZONE;

if (skb->nfct)
zone = nf_ct_zone((struct nf_conn *)skb->nfct);

#ifdef CONFIG_BRIDGE_NETFILTER
if (skb->nf_bridge &&
skb->nf_bridge->mask & BRNF_NF_BRIDGE_PREROUTING)
return IP_DEFRAG_CONNTRACK_BRIDGE_IN;
return IP_DEFRAG_CONNTRACK_BRIDGE_IN + zone;
#endif
if (hooknum == NF_INET_PRE_ROUTING)
return IP_DEFRAG_CONNTRACK_IN;
return IP_DEFRAG_CONNTRACK_IN + zone;
else
return IP_DEFRAG_CONNTRACK_OUT;
return IP_DEFRAG_CONNTRACK_OUT + zone;
}

static unsigned int ipv4_conntrack_defrag(unsigned int hooknum,
Expand Down
24 changes: 14 additions & 10 deletions net/ipv4/netfilter/nf_nat_core.c
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
#include <net/netfilter/nf_conntrack_helper.h>
#include <net/netfilter/nf_conntrack_l3proto.h>
#include <net/netfilter/nf_conntrack_l4proto.h>
#include <net/netfilter/nf_conntrack_zones.h>

static DEFINE_SPINLOCK(nf_nat_lock);

Expand Down Expand Up @@ -69,13 +70,14 @@ EXPORT_SYMBOL_GPL(nf_nat_proto_put);

/* We keep an extra hash for each conntrack, for fast searching. */
static inline unsigned int
hash_by_src(const struct net *net, const struct nf_conntrack_tuple *tuple)
hash_by_src(const struct net *net, u16 zone,
const struct nf_conntrack_tuple *tuple)
{
unsigned int hash;

/* Original src, to ensure we map it consistently if poss. */
hash = jhash_3words((__force u32)tuple->src.u3.ip,
(__force u32)tuple->src.u.all,
(__force u32)tuple->src.u.all ^ zone,
tuple->dst.protonum, 0);
return ((u64)hash * net->ipv4.nat_htable_size) >> 32;
}
Expand Down Expand Up @@ -139,20 +141,20 @@ same_src(const struct nf_conn *ct,

/* Only called for SRC manip */
static int
find_appropriate_src(struct net *net,
find_appropriate_src(struct net *net, u16 zone,
const struct nf_conntrack_tuple *tuple,
struct nf_conntrack_tuple *result,
const struct nf_nat_range *range)
{
unsigned int h = hash_by_src(net, tuple);
unsigned int h = hash_by_src(net, zone, tuple);
const struct nf_conn_nat *nat;
const struct nf_conn *ct;
const struct hlist_node *n;

rcu_read_lock();
hlist_for_each_entry_rcu(nat, n, &net->ipv4.nat_bysource[h], bysource) {
ct = nat->ct;
if (same_src(ct, tuple)) {
if (same_src(ct, tuple) && nf_ct_zone(ct) == zone) {
/* Copy source part from reply tuple. */
nf_ct_invert_tuplepr(result,
&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
Expand All @@ -175,7 +177,7 @@ find_appropriate_src(struct net *net,
the ip with the lowest src-ip/dst-ip/proto usage.
*/
static void
find_best_ips_proto(struct nf_conntrack_tuple *tuple,
find_best_ips_proto(u16 zone, struct nf_conntrack_tuple *tuple,
const struct nf_nat_range *range,
const struct nf_conn *ct,
enum nf_nat_manip_type maniptype)
Expand Down Expand Up @@ -209,7 +211,7 @@ find_best_ips_proto(struct nf_conntrack_tuple *tuple,
maxip = ntohl(range->max_ip);
j = jhash_2words((__force u32)tuple->src.u3.ip,
range->flags & IP_NAT_RANGE_PERSISTENT ?
0 : (__force u32)tuple->dst.u3.ip, 0);
0 : (__force u32)tuple->dst.u3.ip ^ zone, 0);
j = ((u64)j * (maxip - minip + 1)) >> 32;
*var_ipp = htonl(minip + j);
}
Expand All @@ -229,6 +231,7 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple,
{
struct net *net = nf_ct_net(ct);
const struct nf_nat_protocol *proto;
u16 zone = nf_ct_zone(ct);

/* 1) If this srcip/proto/src-proto-part is currently mapped,
and that same mapping gives a unique tuple within the given
Expand All @@ -239,7 +242,7 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple,
manips not an issue. */
if (maniptype == IP_NAT_MANIP_SRC &&
!(range->flags & IP_NAT_RANGE_PROTO_RANDOM)) {
if (find_appropriate_src(net, orig_tuple, tuple, range)) {
if (find_appropriate_src(net, zone, orig_tuple, tuple, range)) {
pr_debug("get_unique_tuple: Found current src map\n");
if (!nf_nat_used_tuple(tuple, ct))
return;
Expand All @@ -249,7 +252,7 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple,
/* 2) Select the least-used IP/proto combination in the given
range. */
*tuple = *orig_tuple;
find_best_ips_proto(tuple, range, ct, maniptype);
find_best_ips_proto(zone, tuple, range, ct, maniptype);

/* 3) The per-protocol part of the manip is made to map into
the range to make a unique tuple. */
Expand Down Expand Up @@ -327,7 +330,8 @@ nf_nat_setup_info(struct nf_conn *ct,
if (have_to_hash) {
unsigned int srchash;

srchash = hash_by_src(net, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
srchash = hash_by_src(net, nf_ct_zone(ct),
&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
spin_lock_bh(&nf_nat_lock);
/* nf_conntrack_alter_reply might re-allocate exntension aera */
nat = nfct_nat(ct);
Expand Down
3 changes: 2 additions & 1 deletion net/ipv4/netfilter/nf_nat_pptp.c
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#include <net/netfilter/nf_nat_rule.h>
#include <net/netfilter/nf_conntrack_helper.h>
#include <net/netfilter/nf_conntrack_expect.h>
#include <net/netfilter/nf_conntrack_zones.h>
#include <linux/netfilter/nf_conntrack_proto_gre.h>
#include <linux/netfilter/nf_conntrack_pptp.h>

Expand Down Expand Up @@ -74,7 +75,7 @@ static void pptp_nat_expected(struct nf_conn *ct,

pr_debug("trying to unexpect other dir: ");
nf_ct_dump_tuple_ip(&t);
other_exp = nf_ct_expect_find_get(net, &t);
other_exp = nf_ct_expect_find_get(net, nf_ct_zone(ct), &t);
if (other_exp) {
nf_ct_unexpect_related(other_exp);
nf_ct_expect_put(other_exp);
Expand Down
12 changes: 9 additions & 3 deletions net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
#include <net/netfilter/nf_conntrack_l4proto.h>
#include <net/netfilter/nf_conntrack_l3proto.h>
#include <net/netfilter/nf_conntrack_core.h>
#include <net/netfilter/nf_conntrack_zones.h>
#include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
#include <net/netfilter/nf_log.h>

Expand Down Expand Up @@ -191,15 +192,20 @@ static unsigned int ipv6_confirm(unsigned int hooknum,
static enum ip6_defrag_users nf_ct6_defrag_user(unsigned int hooknum,
struct sk_buff *skb)
{
u16 zone = NF_CT_DEFAULT_ZONE;

if (skb->nfct)
zone = nf_ct_zone((struct nf_conn *)skb->nfct);

#ifdef CONFIG_BRIDGE_NETFILTER
if (skb->nf_bridge &&
skb->nf_bridge->mask & BRNF_NF_BRIDGE_PREROUTING)
return IP6_DEFRAG_CONNTRACK_BRIDGE_IN;
return IP6_DEFRAG_CONNTRACK_BRIDGE_IN + zone;
#endif
if (hooknum == NF_INET_PRE_ROUTING)
return IP6_DEFRAG_CONNTRACK_IN;
return IP6_DEFRAG_CONNTRACK_IN + zone;
else
return IP6_DEFRAG_CONNTRACK_OUT;
return IP6_DEFRAG_CONNTRACK_OUT + zone;

}

Expand Down
Loading

0 comments on commit 5d0aa2c

Please sign in to comment.