Skip to content

Commit

Permalink
ipvs: SCTP Trasport Loadbalancing Support
Browse files Browse the repository at this point in the history
Enhance IPVS to load balance SCTP transport protocol packets. This is done
based on the SCTP rfc 4960. All possible control chunks have been taken
care. The state machine used in this code looks some what lengthy. I tried
to make the state machine easy to understand.

Signed-off-by: Venkata Mohan Reddy Koppula <mohanreddykv@gmail.com>
Signed-off-by: Simon Horman <horms@verge.net.au>
Signed-off-by: Patrick McHardy <kaber@trash.net>
  • Loading branch information
Venkata Mohan Reddy authored and Patrick McHardy committed Feb 18, 2010
1 parent 477c608 commit 2906f66
Show file tree
Hide file tree
Showing 8 changed files with 1,285 additions and 12 deletions.
22 changes: 21 additions & 1 deletion include/net/ip_vs.h
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,26 @@ enum {
IP_VS_ICMP_S_LAST,
};

/*
* SCTP State Values
*/
enum ip_vs_sctp_states {
IP_VS_SCTP_S_NONE,
IP_VS_SCTP_S_INIT_CLI,
IP_VS_SCTP_S_INIT_SER,
IP_VS_SCTP_S_INIT_ACK_CLI,
IP_VS_SCTP_S_INIT_ACK_SER,
IP_VS_SCTP_S_ECHO_CLI,
IP_VS_SCTP_S_ECHO_SER,
IP_VS_SCTP_S_ESTABLISHED,
IP_VS_SCTP_S_SHUT_CLI,
IP_VS_SCTP_S_SHUT_SER,
IP_VS_SCTP_S_SHUT_ACK_CLI,
IP_VS_SCTP_S_SHUT_ACK_SER,
IP_VS_SCTP_S_CLOSED,
IP_VS_SCTP_S_LAST
};

/*
* Delta sequence info structure
* Each ip_vs_conn has 2 (output AND input seq. changes).
Expand Down Expand Up @@ -741,7 +761,7 @@ extern struct ip_vs_protocol ip_vs_protocol_udp;
extern struct ip_vs_protocol ip_vs_protocol_icmp;
extern struct ip_vs_protocol ip_vs_protocol_esp;
extern struct ip_vs_protocol ip_vs_protocol_ah;

extern struct ip_vs_protocol ip_vs_protocol_sctp;

/*
* Registering/unregistering scheduler functions
Expand Down
7 changes: 7 additions & 0 deletions net/netfilter/ipvs/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,13 @@ config IP_VS_PROTO_AH
This option enables support for load balancing AH (Authentication
Header) transport protocol. Say Y if unsure.

config IP_VS_PROTO_SCTP
bool "SCTP load balancing support"
select LIBCRC32C
---help---
This option enables support for load balancing SCTP transport
protocol. Say Y if unsure.

comment "IPVS scheduler"

config IP_VS_RR
Expand Down
1 change: 1 addition & 0 deletions net/netfilter/ipvs/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ ip_vs_proto-objs-y :=
ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_TCP) += ip_vs_proto_tcp.o
ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_UDP) += ip_vs_proto_udp.o
ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_AH_ESP) += ip_vs_proto_ah_esp.o
ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_SCTP) += ip_vs_proto_sctp.o

ip_vs-objs := ip_vs_conn.o ip_vs_core.o ip_vs_ctl.o ip_vs_sched.o \
ip_vs_xmit.o ip_vs_app.o ip_vs_sync.o \
Expand Down
62 changes: 53 additions & 9 deletions net/netfilter/ipvs/ip_vs_core.c
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
#include <linux/kernel.h>
#include <linux/ip.h>
#include <linux/tcp.h>
#include <linux/sctp.h>
#include <linux/icmp.h>

#include <net/ip.h>
Expand Down Expand Up @@ -81,6 +82,8 @@ const char *ip_vs_proto_name(unsigned proto)
return "UDP";
case IPPROTO_TCP:
return "TCP";
case IPPROTO_SCTP:
return "SCTP";
case IPPROTO_ICMP:
return "ICMP";
#ifdef CONFIG_IP_VS_IPV6
Expand Down Expand Up @@ -589,8 +592,9 @@ void ip_vs_nat_icmp(struct sk_buff *skb, struct ip_vs_protocol *pp,
ip_send_check(ciph);
}

/* the TCP/UDP port */
if (IPPROTO_TCP == ciph->protocol || IPPROTO_UDP == ciph->protocol) {
/* the TCP/UDP/SCTP port */
if (IPPROTO_TCP == ciph->protocol || IPPROTO_UDP == ciph->protocol ||
IPPROTO_SCTP == ciph->protocol) {
__be16 *ports = (void *)ciph + ciph->ihl*4;

if (inout)
Expand Down Expand Up @@ -630,8 +634,9 @@ void ip_vs_nat_icmp_v6(struct sk_buff *skb, struct ip_vs_protocol *pp,
ciph->saddr = cp->daddr.in6;
}

/* the TCP/UDP port */
if (IPPROTO_TCP == ciph->nexthdr || IPPROTO_UDP == ciph->nexthdr) {
/* the TCP/UDP/SCTP port */
if (IPPROTO_TCP == ciph->nexthdr || IPPROTO_UDP == ciph->nexthdr ||
IPPROTO_SCTP == ciph->nexthdr) {
__be16 *ports = (void *)ciph + sizeof(struct ipv6hdr);

if (inout)
Expand Down Expand Up @@ -679,7 +684,8 @@ static int handle_response_icmp(int af, struct sk_buff *skb,
goto out;
}

if (IPPROTO_TCP == protocol || IPPROTO_UDP == protocol)
if (IPPROTO_TCP == protocol || IPPROTO_UDP == protocol ||
IPPROTO_SCTP == protocol)
offset += 2 * sizeof(__u16);
if (!skb_make_writable(skb, offset))
goto out;
Expand Down Expand Up @@ -857,6 +863,21 @@ static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related)
}
#endif

/*
* Check if sctp chunc is ABORT chunk
*/
static inline int is_sctp_abort(const struct sk_buff *skb, int nh_len)
{
sctp_chunkhdr_t *sch, schunk;
sch = skb_header_pointer(skb, nh_len + sizeof(sctp_sctphdr_t),
sizeof(schunk), &schunk);
if (sch == NULL)
return 0;
if (sch->type == SCTP_CID_ABORT)
return 1;
return 0;
}

static inline int is_tcp_reset(const struct sk_buff *skb, int nh_len)
{
struct tcphdr _tcph, *th;
Expand Down Expand Up @@ -999,7 +1020,8 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb,
if (unlikely(!cp)) {
if (sysctl_ip_vs_nat_icmp_send &&
(pp->protocol == IPPROTO_TCP ||
pp->protocol == IPPROTO_UDP)) {
pp->protocol == IPPROTO_UDP ||
pp->protocol == IPPROTO_SCTP)) {
__be16 _ports[2], *pptr;

pptr = skb_header_pointer(skb, iph.len,
Expand All @@ -1014,8 +1036,13 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb,
* existing entry if it is not RST
* packet or not TCP packet.
*/
if (iph.protocol != IPPROTO_TCP
|| !is_tcp_reset(skb, iph.len)) {
if ((iph.protocol != IPPROTO_TCP &&
iph.protocol != IPPROTO_SCTP)
|| ((iph.protocol == IPPROTO_TCP
&& !is_tcp_reset(skb, iph.len))
|| (iph.protocol == IPPROTO_SCTP
&& !is_sctp_abort(skb,
iph.len)))) {
#ifdef CONFIG_IP_VS_IPV6
if (af == AF_INET6)
icmpv6_send(skb,
Expand Down Expand Up @@ -1235,7 +1262,8 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum)

/* do the statistics and put it back */
ip_vs_in_stats(cp, skb);
if (IPPROTO_TCP == cih->nexthdr || IPPROTO_UDP == cih->nexthdr)
if (IPPROTO_TCP == cih->nexthdr || IPPROTO_UDP == cih->nexthdr ||
IPPROTO_SCTP == cih->nexthdr)
offset += 2 * sizeof(__u16);
verdict = ip_vs_icmp_xmit_v6(skb, cp, pp, offset);
/* do not touch skb anymore */
Expand Down Expand Up @@ -1358,6 +1386,21 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb,
* encorage the standby servers to update the connections timeout
*/
pkts = atomic_add_return(1, &cp->in_pkts);
if (af == AF_INET && (ip_vs_sync_state & IP_VS_STATE_MASTER) &&
cp->protocol == IPPROTO_SCTP) {
if ((cp->state == IP_VS_SCTP_S_ESTABLISHED &&
(atomic_read(&cp->in_pkts) %
sysctl_ip_vs_sync_threshold[1]
== sysctl_ip_vs_sync_threshold[0])) ||
(cp->old_state != cp->state &&
((cp->state == IP_VS_SCTP_S_CLOSED) ||
(cp->state == IP_VS_SCTP_S_SHUT_ACK_CLI) ||
(cp->state == IP_VS_SCTP_S_SHUT_ACK_SER)))) {
ip_vs_sync_conn(cp);
goto out;
}
}

if (af == AF_INET &&
(ip_vs_sync_state & IP_VS_STATE_MASTER) &&
(((cp->protocol != IPPROTO_TCP ||
Expand All @@ -1370,6 +1413,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb,
(cp->state == IP_VS_TCP_S_CLOSE_WAIT) ||
(cp->state == IP_VS_TCP_S_TIME_WAIT)))))
ip_vs_sync_conn(cp);
out:
cp->old_state = cp->state;

ip_vs_conn_put(cp);
Expand Down
5 changes: 3 additions & 2 deletions net/netfilter/ipvs/ip_vs_ctl.c
Original file line number Diff line number Diff line change
Expand Up @@ -2132,8 +2132,9 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
}
}

/* Check for valid protocol: TCP or UDP, even for fwmark!=0 */
if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP) {
/* Check for valid protocol: TCP or UDP or SCTP, even for fwmark!=0 */
if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP &&
usvc.protocol != IPPROTO_SCTP) {
pr_err("set_ctl: invalid protocol: %d %pI4:%d %s\n",
usvc.protocol, &usvc.addr.ip,
ntohs(usvc.port), usvc.sched_name);
Expand Down
3 changes: 3 additions & 0 deletions net/netfilter/ipvs/ip_vs_proto.c
Original file line number Diff line number Diff line change
Expand Up @@ -257,6 +257,9 @@ int __init ip_vs_protocol_init(void)
#ifdef CONFIG_IP_VS_PROTO_UDP
REGISTER_PROTOCOL(&ip_vs_protocol_udp);
#endif
#ifdef CONFIG_IP_VS_PROTO_SCTP
REGISTER_PROTOCOL(&ip_vs_protocol_sctp);
#endif
#ifdef CONFIG_IP_VS_PROTO_AH
REGISTER_PROTOCOL(&ip_vs_protocol_ah);
#endif
Expand Down
Loading

0 comments on commit 2906f66

Please sign in to comment.