Skip to content

Commit

Permalink
ipvs: netfilter connection tracking changes
Browse files Browse the repository at this point in the history
	Add more code to IPVS to work with Netfilter connection
tracking and fix some problems.

- Allow IPVS to be compiled without connection tracking as in
2.6.35 and before. This can avoid keeping conntracks for all
IPVS connections because this costs memory. ip_vs_ftp still
depends on connection tracking and NAT as implemented for 2.6.36.

- Add sysctl var "conntrack" to enable connection tracking for
all IPVS connections. For loaded IPVS directors it needs
tuning of nf_conntrack_max limit.

- Add IP_VS_CONN_F_NFCT connection flag to request the connection
to use connection tracking. This allows user space to provide this
flag, for example, in dest->conn_flags. This can be useful to
request connection tracking per real server instead of forcing it
for all connections with the "conntrack" sysctl. This flag is
set currently only by ip_vs_ftp and of course by "conntrack" sysctl.

- Add ip_vs_nfct.c file to hold all connection tracking code,
by this way main code should not depend of netfilter conntrack
support.

- Return back the ip_vs_post_routing handler as in 2.6.35 and use
skb->ipvs_property=1 to allow IPVS to work without connection
tracking

Connection tracking:

- most of the code is already in 2.6.36-rc

- alter conntrack reply tuple for LVS-NAT connections when first packet
from client is forwarded and conntrack state is NEW or RELATED.
Additionally, alter reply for RELATED connections from real server,
again for packet in original direction.

- add IP_VS_XMIT_TUNNEL to confirm conntrack (without altering
reply) for LVS-TUN early because we want to call nf_reset. It is
needed because we add IPIP header and the original conntrack
should be preserved, not destroyed. The transmitted IPIP packets
can reuse same conntrack, so we do not set skb->ipvs_property.

- try to destroy conntrack when the IPVS connection is destroyed.
It is not fatal if conntrack disappears before that, it depends
on the used timers.

Fix problems from long time:

- add skb->ip_summed = CHECKSUM_NONE for the LVS-TUN transmitters

Signed-off-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Patrick McHardy <kaber@trash.net>
  • Loading branch information
Julian Anastasov authored and Patrick McHardy committed Sep 21, 2010
1 parent 3575792 commit f4bc17c
Show file tree
Hide file tree
Showing 10 changed files with 475 additions and 196 deletions.
2 changes: 2 additions & 0 deletions include/linux/ip_vs.h
Original file line number Diff line number Diff line change
Expand Up @@ -90,10 +90,12 @@
#define IP_VS_CONN_F_ONE_PACKET 0x2000 /* forward only one packet */

/* Flags that are not sent to backup server start from bit 16 */
#define IP_VS_CONN_F_NFCT (1 << 16) /* use netfilter conntrack */

/* Connection flags from destination that can be changed by user space */
#define IP_VS_CONN_F_DEST_MASK (IP_VS_CONN_F_FWD_MASK | \
IP_VS_CONN_F_ONE_PACKET | \
IP_VS_CONN_F_NFCT | \
0)

#define IP_VS_SCHEDNAME_MAXLEN 16
Expand Down
44 changes: 43 additions & 1 deletion include/net/ip_vs.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,9 @@
#include <linux/ip.h>
#include <linux/ipv6.h> /* for struct ipv6hdr */
#include <net/ipv6.h> /* for ipv6_addr_copy */

#ifdef CONFIG_IP_VS_NFCT
#include <net/netfilter/nf_conntrack.h>
#endif

/* Connections' size value needed by ip_vs_ctl.c */
extern int ip_vs_conn_tab_size;
Expand Down Expand Up @@ -798,6 +800,7 @@ extern int sysctl_ip_vs_expire_nodest_conn;
extern int sysctl_ip_vs_expire_quiescent_template;
extern int sysctl_ip_vs_sync_threshold[2];
extern int sysctl_ip_vs_nat_icmp_send;
extern int sysctl_ip_vs_conntrack;
extern struct ip_vs_stats ip_vs_stats;
extern const struct ctl_path net_vs_ctl_path[];

Expand Down Expand Up @@ -955,8 +958,47 @@ static inline __wsum ip_vs_check_diff2(__be16 old, __be16 new, __wsum oldsum)
return csum_partial(diff, sizeof(diff), oldsum);
}

#ifdef CONFIG_IP_VS_NFCT
/*
* Netfilter connection tracking
* (from ip_vs_nfct.c)
*/
static inline int ip_vs_conntrack_enabled(void)
{
return sysctl_ip_vs_conntrack;
}

extern void ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp,
int outin);
extern int ip_vs_confirm_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp);
extern void ip_vs_nfct_expect_related(struct sk_buff *skb, struct nf_conn *ct,
struct ip_vs_conn *cp, u_int8_t proto,
const __be16 port, int from_rs);
extern void ip_vs_conn_drop_conntrack(struct ip_vs_conn *cp);

#else

static inline int ip_vs_conntrack_enabled(void)
{
return 0;
}

static inline void ip_vs_update_conntrack(struct sk_buff *skb,
struct ip_vs_conn *cp, int outin)
{
}

static inline int ip_vs_confirm_conntrack(struct sk_buff *skb,
struct ip_vs_conn *cp)
{
return NF_ACCEPT;
}

static inline void ip_vs_conn_drop_conntrack(struct ip_vs_conn *cp)
{
}
/* CONFIG_IP_VS_NFCT */
#endif

#endif /* __KERNEL__ */

Expand Down
13 changes: 11 additions & 2 deletions net/netfilter/ipvs/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
#
menuconfig IP_VS
tristate "IP virtual server support"
depends on NET && INET && NETFILTER && NF_CONNTRACK
depends on NET && INET && NETFILTER
---help---
IP Virtual Server support will let you build a high-performance
virtual server based on cluster of two or more real servers. This
Expand Down Expand Up @@ -235,7 +235,8 @@ comment 'IPVS application helper'

config IP_VS_FTP
tristate "FTP protocol helper"
depends on IP_VS_PROTO_TCP && NF_NAT
depends on IP_VS_PROTO_TCP && NF_CONNTRACK && NF_NAT
select IP_VS_NFCT
---help---
FTP is a protocol that transfers IP address and/or port number in
the payload. In the virtual server via Network Address Translation,
Expand All @@ -247,4 +248,12 @@ config IP_VS_FTP
If you want to compile it in kernel, say Y. To compile it as a
module, choose M here. If unsure, say N.

config IP_VS_NFCT
bool "Netfilter connection tracking"
depends on NF_CONNTRACK
---help---
The Netfilter connection tracking support allows the IPVS
connection state to be exported to the Netfilter framework
for filtering purposes.

endif # IP_VS
5 changes: 4 additions & 1 deletion net/netfilter/ipvs/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,13 @@ ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_UDP) += ip_vs_proto_udp.o
ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_AH_ESP) += ip_vs_proto_ah_esp.o
ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_SCTP) += ip_vs_proto_sctp.o

ip_vs-extra_objs-y :=
ip_vs-extra_objs-$(CONFIG_IP_VS_NFCT) += ip_vs_nfct.o

ip_vs-objs := ip_vs_conn.o ip_vs_core.o ip_vs_ctl.o ip_vs_sched.o \
ip_vs_xmit.o ip_vs_app.o ip_vs_sync.o \
ip_vs_est.o ip_vs_proto.o \
$(ip_vs_proto-objs-y)
$(ip_vs_proto-objs-y) $(ip_vs-extra_objs-y)


# IPVS core
Expand Down
13 changes: 13 additions & 0 deletions net/netfilter/ipvs/ip_vs_conn.c
Original file line number Diff line number Diff line change
Expand Up @@ -721,6 +721,9 @@ static void ip_vs_conn_expire(unsigned long data)
if (cp->control)
ip_vs_control_del(cp);

if (cp->flags & IP_VS_CONN_F_NFCT)
ip_vs_conn_drop_conntrack(cp);

if (unlikely(cp->app != NULL))
ip_vs_unbind_app(cp);
ip_vs_unbind_dest(cp);
Expand Down Expand Up @@ -816,6 +819,16 @@ ip_vs_conn_new(int af, int proto, const union nf_inet_addr *caddr, __be16 cport,
if (unlikely(pp && atomic_read(&pp->appcnt)))
ip_vs_bind_app(cp, pp);

/*
* Allow conntrack to be preserved. By default, conntrack
* is created and destroyed for every packet.
* Sometimes keeping conntrack can be useful for
* IP_VS_CONN_F_ONE_PACKET too.
*/

if (ip_vs_conntrack_enabled())
cp->flags |= IP_VS_CONN_F_NFCT;

/* Hash it in the ip_vs_conn_tab finally */
ip_vs_conn_hash(cp);

Expand Down
46 changes: 42 additions & 4 deletions net/netfilter/ipvs/ip_vs_core.c
Original file line number Diff line number Diff line change
Expand Up @@ -537,6 +537,23 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
return NF_DROP;
}

/*
* It is hooked before NF_IP_PRI_NAT_SRC at the NF_INET_POST_ROUTING
* chain and is used to avoid double NAT and confirmation when we do
* not want to keep the conntrack structure
*/
static unsigned int ip_vs_post_routing(unsigned int hooknum,
struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
if (!skb->ipvs_property)
return NF_ACCEPT;
/* The packet was sent from IPVS, exit this chain */
return NF_STOP;
}

__sum16 ip_vs_checksum_complete(struct sk_buff *skb, int offset)
{
return csum_fold(skb_checksum(skb, offset, skb->len - offset, 0));
Expand Down Expand Up @@ -695,7 +712,10 @@ static int handle_response_icmp(int af, struct sk_buff *skb,
/* do the statistics and put it back */
ip_vs_out_stats(cp, skb);

skb->ipvs_property = 1;
if (!(cp->flags & IP_VS_CONN_F_NFCT))
skb->ipvs_property = 1;
else
ip_vs_update_conntrack(skb, cp, 0);
verdict = NF_ACCEPT;

out:
Expand Down Expand Up @@ -928,17 +948,19 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,

ip_vs_out_stats(cp, skb);
ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pp);
ip_vs_update_conntrack(skb, cp, 0);
if (!(cp->flags & IP_VS_CONN_F_NFCT))
skb->ipvs_property = 1;
else
ip_vs_update_conntrack(skb, cp, 0);
ip_vs_conn_put(cp);

skb->ipvs_property = 1;

LeaveFunction(11);
return NF_ACCEPT;

drop:
ip_vs_conn_put(cp);
kfree_skb(skb);
LeaveFunction(11);
return NF_STOLEN;
}

Expand Down Expand Up @@ -1483,6 +1505,14 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
.hooknum = NF_INET_FORWARD,
.priority = 99,
},
/* Before the netfilter connection tracking, exit from POST_ROUTING */
{
.hook = ip_vs_post_routing,
.owner = THIS_MODULE,
.pf = PF_INET,
.hooknum = NF_INET_POST_ROUTING,
.priority = NF_IP_PRI_NAT_SRC-1,
},
#ifdef CONFIG_IP_VS_IPV6
/* After packet filtering, forward packet through VS/DR, VS/TUN,
* or VS/NAT(change destination), so that filtering rules can be
Expand Down Expand Up @@ -1511,6 +1541,14 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
.hooknum = NF_INET_FORWARD,
.priority = 99,
},
/* Before the netfilter connection tracking, exit from POST_ROUTING */
{
.hook = ip_vs_post_routing,
.owner = THIS_MODULE,
.pf = PF_INET6,
.hooknum = NF_INET_POST_ROUTING,
.priority = NF_IP6_PRI_NAT_SRC-1,
},
#endif
};

Expand Down
12 changes: 12 additions & 0 deletions net/netfilter/ipvs/ip_vs_ctl.c
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,9 @@ int sysctl_ip_vs_expire_nodest_conn = 0;
int sysctl_ip_vs_expire_quiescent_template = 0;
int sysctl_ip_vs_sync_threshold[2] = { 3, 50 };
int sysctl_ip_vs_nat_icmp_send = 0;
#ifdef CONFIG_IP_VS_NFCT
int sysctl_ip_vs_conntrack;
#endif


#ifdef CONFIG_IP_VS_DEBUG
Expand Down Expand Up @@ -1580,6 +1583,15 @@ static struct ctl_table vs_vars[] = {
.mode = 0644,
.proc_handler = proc_do_defense_mode,
},
#ifdef CONFIG_IP_VS_NFCT
{
.procname = "conntrack",
.data = &sysctl_ip_vs_conntrack,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = &proc_dointvec,
},
#endif
{
.procname = "secure_tcp",
.data = &sysctl_ip_vs_secure_tcp,
Expand Down
Loading

0 comments on commit f4bc17c

Please sign in to comment.