Skip to content

Commit

Permalink
netfilter: flow table support for IPv6
Browse files Browse the repository at this point in the history
This patch adds the IPv6 flow table type, that implements the datapath
flow table to forward IPv6 traffic.

This patch exports ip6_dst_mtu_forward() that is required to check for
mtu to pass up packets that need PMTUD handling to the classic
forwarding path.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
  • Loading branch information
Pablo Neira Ayuso committed Jan 8, 2018
1 parent 97add9f commit 0995210
Show file tree
Hide file tree
Showing 5 changed files with 292 additions and 1 deletion.
2 changes: 2 additions & 0 deletions include/net/ipv6.h
Original file line number Diff line number Diff line change
Expand Up @@ -969,6 +969,8 @@ static inline struct sk_buff *ip6_finish_skb(struct sock *sk)
&inet6_sk(sk)->cork);
}

unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst);

int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst,
struct flowi6 *fl6);
struct dst_entry *ip6_dst_lookup_flow(const struct sock *sk, struct flowi6 *fl6,
Expand Down
3 changes: 2 additions & 1 deletion net/ipv6/ip6_output.c
Original file line number Diff line number Diff line change
Expand Up @@ -378,7 +378,7 @@ static inline int ip6_forward_finish(struct net *net, struct sock *sk,
return dst_output(net, sk, skb);
}

static unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
{
unsigned int mtu;
struct inet6_dev *idev;
Expand All @@ -398,6 +398,7 @@ static unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)

return mtu;
}
EXPORT_SYMBOL_GPL(ip6_dst_mtu_forward);

static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
{
Expand Down
8 changes: 8 additions & 0 deletions net/ipv6/netfilter/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,14 @@ config NFT_FIB_IPV6
endif # NF_TABLES_IPV6
endif # NF_TABLES

config NF_FLOW_TABLE_IPV6
select NF_FLOW_TABLE
tristate "Netfilter flow table IPv6 module"
help
This option adds the flow table IPv6 support.

To compile it as a module, choose M here.

config NF_DUP_IPV6
tristate "Netfilter IPv6 packet duplication to alternate destination"
depends on !NF_CONNTRACK || NF_CONNTRACK
Expand Down
3 changes: 3 additions & 0 deletions net/ipv6/netfilter/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,9 @@ obj-$(CONFIG_NFT_REDIR_IPV6) += nft_redir_ipv6.o
obj-$(CONFIG_NFT_DUP_IPV6) += nft_dup_ipv6.o
obj-$(CONFIG_NFT_FIB_IPV6) += nft_fib_ipv6.o

# flow table support
obj-$(CONFIG_NF_FLOW_TABLE_IPV6) += nf_flow_table_ipv6.o

# matches
obj-$(CONFIG_IP6_NF_MATCH_AH) += ip6t_ah.o
obj-$(CONFIG_IP6_NF_MATCH_EUI64) += ip6t_eui64.o
Expand Down
277 changes: 277 additions & 0 deletions net/ipv6/netfilter/nf_flow_table_ipv6.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,277 @@
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/netfilter.h>
#include <linux/rhashtable.h>
#include <linux/ipv6.h>
#include <linux/netdevice.h>
#include <linux/ipv6.h>
#include <net/ipv6.h>
#include <net/ip6_route.h>
#include <net/neighbour.h>
#include <net/netfilter/nf_flow_table.h>
#include <net/netfilter/nf_tables.h>
/* For layer 4 checksum field offset. */
#include <linux/tcp.h>
#include <linux/udp.h>

static int nf_flow_nat_ipv6_tcp(struct sk_buff *skb, unsigned int thoff,
struct in6_addr *addr,
struct in6_addr *new_addr)
{
struct tcphdr *tcph;

if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) ||
skb_try_make_writable(skb, thoff + sizeof(*tcph)))
return -1;

tcph = (void *)(skb_network_header(skb) + thoff);
inet_proto_csum_replace16(&tcph->check, skb, addr->s6_addr32,
new_addr->s6_addr32, true);

return 0;
}

static int nf_flow_nat_ipv6_udp(struct sk_buff *skb, unsigned int thoff,
struct in6_addr *addr,
struct in6_addr *new_addr)
{
struct udphdr *udph;

if (!pskb_may_pull(skb, thoff + sizeof(*udph)) ||
skb_try_make_writable(skb, thoff + sizeof(*udph)))
return -1;

udph = (void *)(skb_network_header(skb) + thoff);
if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
inet_proto_csum_replace16(&udph->check, skb, addr->s6_addr32,
new_addr->s6_addr32, true);
if (!udph->check)
udph->check = CSUM_MANGLED_0;
}

return 0;
}

static int nf_flow_nat_ipv6_l4proto(struct sk_buff *skb, struct ipv6hdr *ip6h,
unsigned int thoff, struct in6_addr *addr,
struct in6_addr *new_addr)
{
switch (ip6h->nexthdr) {
case IPPROTO_TCP:
if (nf_flow_nat_ipv6_tcp(skb, thoff, addr, new_addr) < 0)
return NF_DROP;
break;
case IPPROTO_UDP:
if (nf_flow_nat_ipv6_udp(skb, thoff, addr, new_addr) < 0)
return NF_DROP;
break;
}

return 0;
}

static int nf_flow_snat_ipv6(const struct flow_offload *flow,
struct sk_buff *skb, struct ipv6hdr *ip6h,
unsigned int thoff,
enum flow_offload_tuple_dir dir)
{
struct in6_addr addr, new_addr;

switch (dir) {
case FLOW_OFFLOAD_DIR_ORIGINAL:
addr = ip6h->saddr;
new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v6;
ip6h->saddr = new_addr;
break;
case FLOW_OFFLOAD_DIR_REPLY:
addr = ip6h->daddr;
new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v6;
ip6h->daddr = new_addr;
break;
default:
return -1;
}

return nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
}

static int nf_flow_dnat_ipv6(const struct flow_offload *flow,
struct sk_buff *skb, struct ipv6hdr *ip6h,
unsigned int thoff,
enum flow_offload_tuple_dir dir)
{
struct in6_addr addr, new_addr;

switch (dir) {
case FLOW_OFFLOAD_DIR_ORIGINAL:
addr = ip6h->daddr;
new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v6;
ip6h->daddr = new_addr;
break;
case FLOW_OFFLOAD_DIR_REPLY:
addr = ip6h->saddr;
new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v6;
ip6h->saddr = new_addr;
break;
default:
return -1;
}

return nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
}

static int nf_flow_nat_ipv6(const struct flow_offload *flow,
struct sk_buff *skb,
enum flow_offload_tuple_dir dir)
{
struct ipv6hdr *ip6h = ipv6_hdr(skb);
unsigned int thoff = sizeof(*ip6h);

if (flow->flags & FLOW_OFFLOAD_SNAT &&
(nf_flow_snat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 ||
nf_flow_snat_ipv6(flow, skb, ip6h, thoff, dir) < 0))
return -1;
if (flow->flags & FLOW_OFFLOAD_DNAT &&
(nf_flow_dnat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 ||
nf_flow_dnat_ipv6(flow, skb, ip6h, thoff, dir) < 0))
return -1;

return 0;
}

static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev,
struct flow_offload_tuple *tuple)
{
struct flow_ports *ports;
struct ipv6hdr *ip6h;
unsigned int thoff;

if (!pskb_may_pull(skb, sizeof(*ip6h)))
return -1;

ip6h = ipv6_hdr(skb);

if (ip6h->nexthdr != IPPROTO_TCP &&
ip6h->nexthdr != IPPROTO_UDP)
return -1;

thoff = sizeof(*ip6h);
if (!pskb_may_pull(skb, thoff + sizeof(*ports)))
return -1;

ports = (struct flow_ports *)(skb_network_header(skb) + thoff);

tuple->src_v6 = ip6h->saddr;
tuple->dst_v6 = ip6h->daddr;
tuple->src_port = ports->source;
tuple->dst_port = ports->dest;
tuple->l3proto = AF_INET6;
tuple->l4proto = ip6h->nexthdr;
tuple->iifidx = dev->ifindex;

return 0;
}

/* Based on ip_exceeds_mtu(). */
static bool __nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
{
if (skb->len <= mtu)
return false;

if (skb_is_gso(skb) && skb_gso_validate_mtu(skb, mtu))
return false;

return true;
}

static bool nf_flow_exceeds_mtu(struct sk_buff *skb, const struct rt6_info *rt)
{
u32 mtu;

mtu = ip6_dst_mtu_forward(&rt->dst);
if (__nf_flow_exceeds_mtu(skb, mtu))
return true;

return false;
}

static unsigned int
nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
{
struct flow_offload_tuple_rhash *tuplehash;
struct nf_flowtable *flow_table = priv;
struct flow_offload_tuple tuple = {};
enum flow_offload_tuple_dir dir;
struct flow_offload *flow;
struct net_device *outdev;
struct in6_addr *nexthop;
struct ipv6hdr *ip6h;
struct rt6_info *rt;

if (skb->protocol != htons(ETH_P_IPV6))
return NF_ACCEPT;

if (nf_flow_tuple_ipv6(skb, state->in, &tuple) < 0)
return NF_ACCEPT;

tuplehash = flow_offload_lookup(flow_table, &tuple);
if (tuplehash == NULL)
return NF_ACCEPT;

outdev = dev_get_by_index_rcu(state->net, tuplehash->tuple.oifidx);
if (!outdev)
return NF_ACCEPT;

dir = tuplehash->tuple.dir;
flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);

rt = (struct rt6_info *)flow->tuplehash[dir].tuple.dst_cache;
if (unlikely(nf_flow_exceeds_mtu(skb, rt)))
return NF_ACCEPT;

if (skb_try_make_writable(skb, sizeof(*ip6h)))
return NF_DROP;

if (flow->flags & (FLOW_OFFLOAD_SNAT | FLOW_OFFLOAD_DNAT) &&
nf_flow_nat_ipv6(flow, skb, dir) < 0)
return NF_DROP;

flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT;
ip6h = ipv6_hdr(skb);
ip6h->hop_limit--;

skb->dev = outdev;
nexthop = rt6_nexthop(rt, &flow->tuplehash[!dir].tuple.src_v6);
neigh_xmit(NEIGH_ND_TABLE, outdev, nexthop, skb);

return NF_STOLEN;
}

static struct nf_flowtable_type flowtable_ipv6 = {
.family = NFPROTO_IPV6,
.params = &nf_flow_offload_rhash_params,
.gc = nf_flow_offload_work_gc,
.hook = nf_flow_offload_ipv6_hook,
.owner = THIS_MODULE,
};

static int __init nf_flow_ipv6_module_init(void)
{
nft_register_flowtable_type(&flowtable_ipv6);

return 0;
}

static void __exit nf_flow_ipv6_module_exit(void)
{
nft_unregister_flowtable_type(&flowtable_ipv6);
}

module_init(nf_flow_ipv6_module_init);
module_exit(nf_flow_ipv6_module_exit);

MODULE_LICENSE("GPL");
MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
MODULE_ALIAS_NF_FLOWTABLE(AF_INET6);

0 comments on commit 0995210

Please sign in to comment.