Skip to content

Commit

Permalink
ipvs: Complete IPv6 fragment handling for IPVS
Browse files Browse the repository at this point in the history
IPVS now supports fragmented packets, with support from nf_conntrack_reasm.c

Based on patch from: Hans Schillstrom.

IPVS do like conntrack i.e. use the skb->nfct_reasm
(i.e. when all fragments is collected, nf_ct_frag6_output()
starts a "re-play" of all fragments into the interrupted
PREROUTING chain at prio -399 (NF_IP6_PRI_CONNTRACK_DEFRAG+1)
with nfct_reasm pointing to the assembled packet.)

Notice, module nf_defrag_ipv6 must be loaded for this to work.
Report unhandled fragments, and recommend user to load nf_defrag_ipv6.

To handle fw-mark for fragments.  Add a new IPVS hook into prerouting
chain at prio -99 (NF_IP6_PRI_NAT_DST+1) to catch fragments, and copy
fw-mark info from the first packet with an upper layer header.

IPv6 fragment handling should be the last thing on the IPVS IPv6
missing support list.

Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: Hans Schillstrom <hans@schillstrom.com>
Acked-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
  • Loading branch information
Jesper Dangaard Brouer authored and Simon Horman committed Sep 28, 2012
1 parent 63dca2c commit 2f74713
Show file tree
Hide file tree
Showing 5 changed files with 164 additions and 36 deletions.
39 changes: 38 additions & 1 deletion include/net/ip_vs.h
Original file line number Diff line number Diff line change
Expand Up @@ -109,13 +109,43 @@ extern int ip_vs_conn_tab_size;
struct ip_vs_iphdr {
__u32 len; /* IPv4 simply where L4 starts
IPv6 where L4 Transport Header starts */
__u32 thoff_reasm; /* Transport Header Offset in nfct_reasm skb */
__u16 fragoffs; /* IPv6 fragment offset, 0 if first frag (or not frag)*/
__s16 protocol;
__s32 flags;
union nf_inet_addr saddr;
union nf_inet_addr daddr;
};

/* Dependency to module: nf_defrag_ipv6 */
#if defined(CONFIG_NF_DEFRAG_IPV6) || defined(CONFIG_NF_DEFRAG_IPV6_MODULE)
static inline struct sk_buff *skb_nfct_reasm(const struct sk_buff *skb)
{
return skb->nfct_reasm;
}
static inline void *frag_safe_skb_hp(const struct sk_buff *skb, int offset,
int len, void *buffer,
const struct ip_vs_iphdr *ipvsh)
{
if (unlikely(ipvsh->fragoffs && skb_nfct_reasm(skb)))
return skb_header_pointer(skb_nfct_reasm(skb),
ipvsh->thoff_reasm, len, buffer);

return skb_header_pointer(skb, offset, len, buffer);
}
#else
static inline struct sk_buff *skb_nfct_reasm(const struct sk_buff *skb)
{
return NULL;
}
static inline void *frag_safe_skb_hp(const struct sk_buff *skb, int offset,
int len, void *buffer,
const struct ip_vs_iphdr *ipvsh)
{
return skb_header_pointer(skb, offset, len, buffer);
}
#endif

static inline void
ip_vs_fill_ip4hdr(const void *nh, struct ip_vs_iphdr *iphdr)
{
Expand All @@ -141,12 +171,19 @@ ip_vs_fill_iph_skb(int af, const struct sk_buff *skb, struct ip_vs_iphdr *iphdr)
(struct ipv6hdr *)skb_network_header(skb);
iphdr->saddr.in6 = iph->saddr;
iphdr->daddr.in6 = iph->daddr;
/* ipv6_find_hdr() updates len, flags */
/* ipv6_find_hdr() updates len, flags, thoff_reasm */
iphdr->thoff_reasm = 0;
iphdr->len = 0;
iphdr->flags = 0;
iphdr->protocol = ipv6_find_hdr(skb, &iphdr->len, -1,
&iphdr->fragoffs,
&iphdr->flags);
/* get proto from re-assembled packet and it's offset */
if (skb_nfct_reasm(skb))
iphdr->protocol = ipv6_find_hdr(skb_nfct_reasm(skb),
&iphdr->thoff_reasm,
-1, NULL, NULL);

} else
#endif
{
Expand Down
6 changes: 2 additions & 4 deletions net/netfilter/ipvs/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,9 @@ config IP_VS_IPV6
depends on IPV6 = y || IP_VS = IPV6
select IP6_NF_IPTABLES
---help---
Add IPv6 support to IPVS. This is incomplete and might be dangerous.
Add IPv6 support to IPVS.

See http://www.mindbasket.com/ipvs for more information.

Say N if unsure.
Say Y if unsure.

config IP_VS_DEBUG
bool "IP virtual server debugging"
Expand Down
2 changes: 1 addition & 1 deletion net/netfilter/ipvs/ip_vs_conn.c
Original file line number Diff line number Diff line change
Expand Up @@ -314,7 +314,7 @@ ip_vs_conn_fill_param_proto(int af, const struct sk_buff *skb,
__be16 _ports[2], *pptr;
struct net *net = skb_net(skb);

pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
pptr = frag_safe_skb_hp(skb, proto_off, sizeof(_ports), _ports, iph);
if (pptr == NULL)
return 1;

Expand Down
117 changes: 96 additions & 21 deletions net/netfilter/ipvs/ip_vs_core.c
Original file line number Diff line number Diff line change
Expand Up @@ -402,8 +402,12 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
unsigned int flags;

*ignored = 1;

/*
* IPv6 frags, only the first hit here.
*/
ip_vs_fill_iph_skb(svc->af, skb, &iph);
pptr = skb_header_pointer(skb, iph.len, sizeof(_ports), _ports);
pptr = frag_safe_skb_hp(skb, iph.len, sizeof(_ports), _ports, &iph);
if (pptr == NULL)
return NULL;

Expand Down Expand Up @@ -507,8 +511,7 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
#endif

ip_vs_fill_iph_skb(svc->af, skb, &iph);

pptr = skb_header_pointer(skb, iph.len, sizeof(_ports), _ports);
pptr = frag_safe_skb_hp(skb, iph.len, sizeof(_ports), _ports, &iph);
if (pptr == NULL) {
ip_vs_service_put(svc);
return NF_DROP;
Expand Down Expand Up @@ -654,14 +657,6 @@ static inline int ip_vs_gather_frags(struct sk_buff *skb, u_int32_t user)
return err;
}

#ifdef CONFIG_IP_VS_IPV6
static inline int ip_vs_gather_frags_v6(struct sk_buff *skb, u_int32_t user)
{
/* TODO IPv6: Find out what to do here for IPv6 */
return 0;
}
#endif

static int ip_vs_route_me_harder(int af, struct sk_buff *skb)
{
#ifdef CONFIG_IP_VS_IPV6
Expand Down Expand Up @@ -939,8 +934,7 @@ static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related,
ip_vs_fill_iph_skb(AF_INET6, skb, ipvsh);

*related = 1;

ic = skb_header_pointer(skb, ipvsh->len, sizeof(_icmph), &_icmph);
ic = frag_safe_skb_hp(skb, ipvsh->len, sizeof(_icmph), &_icmph, ipvsh);
if (ic == NULL)
return NF_DROP;

Expand All @@ -955,6 +949,11 @@ static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related,
*related = 0;
return NF_ACCEPT;
}
/* Fragment header that is before ICMP header tells us that:
* it's not an error message since they can't be fragmented.
*/
if (ipvsh->flags & IP6T_FH_F_FRAG)
return NF_DROP;

IP_VS_DBG(8, "Outgoing ICMPv6 (%d,%d) %pI6c->%pI6c\n",
ic->icmp6_type, ntohs(icmpv6_id(ic)),
Expand Down Expand Up @@ -1117,14 +1116,19 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
ip_vs_fill_iph_skb(af, skb, &iph);
#ifdef CONFIG_IP_VS_IPV6
if (af == AF_INET6) {
if (!iph.fragoffs && skb_nfct_reasm(skb)) {
struct sk_buff *reasm = skb_nfct_reasm(skb);
/* Save fw mark for coming frags */
reasm->ipvs_property = 1;
reasm->mark = skb->mark;
}
if (unlikely(iph.protocol == IPPROTO_ICMPV6)) {
int related;
int verdict = ip_vs_out_icmp_v6(skb, &related,
hooknum);

if (related)
return verdict;
ip_vs_fill_iph_skb(af, skb, &iph);
}
} else
#endif
Expand All @@ -1134,7 +1138,6 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)

if (related)
return verdict;
ip_vs_fill_ip4hdr(skb_network_header(skb), &iph);
}

pd = ip_vs_proto_data_get(net, iph.protocol);
Expand Down Expand Up @@ -1167,8 +1170,8 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
pp->protocol == IPPROTO_SCTP)) {
__be16 _ports[2], *pptr;

pptr = skb_header_pointer(skb, iph.len,
sizeof(_ports), _ports);
pptr = frag_safe_skb_hp(skb, iph.len,
sizeof(_ports), _ports, &iph);
if (pptr == NULL)
return NF_ACCEPT; /* Not for me */
if (ip_vs_lookup_real_service(net, af, iph.protocol,
Expand Down Expand Up @@ -1468,7 +1471,7 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum)

*related = 1;

ic = skb_header_pointer(skb, iph->len, sizeof(_icmph), &_icmph);
ic = frag_safe_skb_hp(skb, iph->len, sizeof(_icmph), &_icmph, iph);
if (ic == NULL)
return NF_DROP;

Expand All @@ -1483,6 +1486,11 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum)
*related = 0;
return NF_ACCEPT;
}
/* Fragment header that is before ICMP header tells us that:
* it's not an error message since they can't be fragmented.
*/
if (iph->flags & IP6T_FH_F_FRAG)
return NF_DROP;

IP_VS_DBG(8, "Incoming ICMPv6 (%d,%d) %pI6c->%pI6c\n",
ic->icmp6_type, ntohs(icmpv6_id(ic)),
Expand Down Expand Up @@ -1514,10 +1522,20 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum)
IP_VS_DBG_PKT(11, AF_INET6, pp, skb, offs_ciph,
"Checking incoming ICMPv6 for");

/* The embedded headers contain source and dest in reverse order */
cp = pp->conn_in_get(AF_INET6, skb, &ciph, ciph.len, 1);
/* The embedded headers contain source and dest in reverse order
* if not from localhost
*/
cp = pp->conn_in_get(AF_INET6, skb, &ciph, ciph.len,
(hooknum == NF_INET_LOCAL_OUT) ? 0 : 1);

if (!cp)
return NF_ACCEPT;
/* VS/TUN, VS/DR and LOCALNODE just let it go */
if ((hooknum == NF_INET_LOCAL_OUT) &&
(IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)) {
__ip_vs_conn_put(cp);
return NF_ACCEPT;
}

/* do the statistics and put it back */
ip_vs_in_stats(cp, skb);
Expand Down Expand Up @@ -1590,6 +1608,12 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)

#ifdef CONFIG_IP_VS_IPV6
if (af == AF_INET6) {
if (!iph.fragoffs && skb_nfct_reasm(skb)) {
struct sk_buff *reasm = skb_nfct_reasm(skb);
/* Save fw mark for coming frags. */
reasm->ipvs_property = 1;
reasm->mark = skb->mark;
}
if (unlikely(iph.protocol == IPPROTO_ICMPV6)) {
int related;
int verdict = ip_vs_in_icmp_v6(skb, &related, hooknum);
Expand All @@ -1614,13 +1638,16 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
pp = pd->pp;
/*
* Check if the packet belongs to an existing connection entry
* Only sched first IPv6 fragment.
*/
cp = pp->conn_in_get(af, skb, &iph, iph.len, 0);

if (unlikely(!cp) && !iph.fragoffs) {
/* No (second) fragments need to enter here, as nf_defrag_ipv6
* replayed fragment zero will already have created the cp
*/
int v;

/* Schedule and create new connection entry into &cp */
if (!pp->conn_schedule(af, skb, pd, &v, &cp))
return v;
}
Expand All @@ -1629,6 +1656,14 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
/* sorry, all this trouble for a no-hit :) */
IP_VS_DBG_PKT(12, af, pp, skb, 0,
"ip_vs_in: packet continues traversal as normal");
if (iph.fragoffs && !skb_nfct_reasm(skb)) {
/* Fragment that couldn't be mapped to a conn entry
* and don't have any pointer to a reasm skb
* is missing module nf_defrag_ipv6
*/
IP_VS_DBG_RL("Unhandled frag, load nf_defrag_ipv6\n");
IP_VS_DBG_PKT(7, af, pp, skb, 0, "unhandled fragment");
}
return NF_ACCEPT;
}

Expand Down Expand Up @@ -1712,6 +1747,38 @@ ip_vs_local_request4(unsigned int hooknum, struct sk_buff *skb,

#ifdef CONFIG_IP_VS_IPV6

/*
* AF_INET6 fragment handling
* Copy info from first fragment, to the rest of them.
*/
static unsigned int
ip_vs_preroute_frag6(unsigned int hooknum, struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
struct sk_buff *reasm = skb_nfct_reasm(skb);
struct net *net;

/* Skip if not a "replay" from nf_ct_frag6_output or first fragment.
* ipvs_property is set when checking first fragment
* in ip_vs_in() and ip_vs_out().
*/
if (reasm)
IP_VS_DBG(2, "Fragment recv prop:%d\n", reasm->ipvs_property);
if (!reasm || !reasm->ipvs_property)
return NF_ACCEPT;

net = skb_net(skb);
if (!net_ipvs(net)->enable)
return NF_ACCEPT;

/* Copy stored fw mark, saved in ip_vs_{in,out} */
skb->mark = reasm->mark;

return NF_ACCEPT;
}

/*
* AF_INET6 handler in NF_INET_LOCAL_IN chain
* Schedule and forward packets from remote clients
Expand Down Expand Up @@ -1851,6 +1918,14 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
.priority = 100,
},
#ifdef CONFIG_IP_VS_IPV6
/* After mangle & nat fetch 2:nd fragment and following */
{
.hook = ip_vs_preroute_frag6,
.owner = THIS_MODULE,
.pf = NFPROTO_IPV6,
.hooknum = NF_INET_PRE_ROUTING,
.priority = NF_IP6_PRI_NAT_DST + 1,
},
/* After packet filtering, change source only for VS/NAT */
{
.hook = ip_vs_reply6,
Expand Down
Loading

0 comments on commit 2f74713

Please sign in to comment.