Skip to content

Commit

Permalink
Merge branch 'ipvs-next'
Browse files Browse the repository at this point in the history
Simon Horman says:

====================
This pull requests makes the following changes:

* Add simple weighted fail-over scheduler.
  - Unlike other IPVS schedulers this offers fail-over rather than load
    balancing. Connections are directed to the appropriate server based
    solely on highest weight value and server availability.
  - Thanks to Kenny Mathis

* Support IPv6 real servers in IPv4 virtual-services and vice versa
  - This feature is supported in conjunction with the tunnel (IPIP)
    forwarding mechanism. That is, IPv4 may be forwarded in IPv6 and
    vice versa.
  - The motivation for this is to allow more flexibility in the
    choice of IP version offered by both virtual-servers and
    real-servers as they no longer need to match: An IPv4 connection from an
    end-user may be forwarded to a real-server using IPv6 and vice versa.
  - Further work need to be done to support this feature in conjunction
    with connection synchronisation. For now such configurations are
    not allowed.
  - This change includes update to netlink protocol, adding a new
    destination address family attribute. And the necessary changes
    to plumb this information throughout IPVS.
  - Thanks to Alex Gartrell and Julian Anastasov
====================

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
  • Loading branch information
Pablo Neira Ayuso committed Sep 18, 2014
2 parents 07034ae + bc18d37 commit fcfa8f4
Show file tree
Hide file tree
Showing 23 changed files with 544 additions and 225 deletions.
15 changes: 12 additions & 3 deletions include/net/ip_vs.h
Original file line number Diff line number Diff line change
Expand Up @@ -535,6 +535,7 @@ struct ip_vs_conn {
union nf_inet_addr daddr; /* destination address */
volatile __u32 flags; /* status flags */
__u16 protocol; /* Which protocol (TCP/UDP) */
__u16 daf; /* Address family of the dest */
#ifdef CONFIG_NET_NS
struct net *net; /* Name space */
#endif
Expand Down Expand Up @@ -648,6 +649,9 @@ struct ip_vs_dest_user_kern {
/* thresholds for active connections */
u32 u_threshold; /* upper threshold */
u32 l_threshold; /* lower threshold */

/* Address family of addr */
u16 af;
};


Expand Down Expand Up @@ -986,6 +990,10 @@ struct netns_ipvs {
char backup_mcast_ifn[IP_VS_IFNAME_MAXLEN];
/* net name space ptr */
struct net *net; /* Needed by timer routines */
/* Number of heterogeneous destinations, needed because
* heterogeneous are not supported when synchronization is
* enabled */
unsigned int mixed_address_family_dests;
};

#define DEFAULT_SYNC_THRESHOLD 3
Expand Down Expand Up @@ -1210,7 +1218,7 @@ static inline void __ip_vs_conn_put(struct ip_vs_conn *cp)
void ip_vs_conn_put(struct ip_vs_conn *cp);
void ip_vs_conn_fill_cport(struct ip_vs_conn *cp, __be16 cport);

struct ip_vs_conn *ip_vs_conn_new(const struct ip_vs_conn_param *p,
struct ip_vs_conn *ip_vs_conn_new(const struct ip_vs_conn_param *p, int dest_af,
const union nf_inet_addr *daddr,
__be16 dport, unsigned int flags,
struct ip_vs_dest *dest, __u32 fwmark);
Expand Down Expand Up @@ -1396,8 +1404,9 @@ void ip_vs_unregister_nl_ioctl(void);
int ip_vs_control_init(void);
void ip_vs_control_cleanup(void);
struct ip_vs_dest *
ip_vs_find_dest(struct net *net, int af, const union nf_inet_addr *daddr,
__be16 dport, const union nf_inet_addr *vaddr, __be16 vport,
ip_vs_find_dest(struct net *net, int svc_af, int dest_af,
const union nf_inet_addr *daddr, __be16 dport,
const union nf_inet_addr *vaddr, __be16 vport,
__u16 protocol, __u32 fwmark, __u32 flags);
void ip_vs_try_bind_dest(struct ip_vs_conn *cp);

Expand Down
3 changes: 3 additions & 0 deletions include/uapi/linux/ip_vs.h
Original file line number Diff line number Diff line change
Expand Up @@ -384,6 +384,9 @@ enum {
IPVS_DEST_ATTR_PERSIST_CONNS, /* persistent connections */

IPVS_DEST_ATTR_STATS, /* nested attribute for dest stats */

IPVS_DEST_ATTR_ADDR_FAMILY, /* Address family of address */

__IPVS_DEST_ATTR_MAX,
};

Expand Down
10 changes: 10 additions & 0 deletions net/netfilter/ipvs/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,16 @@ config IP_VS_WLC
If you want to compile it in kernel, say Y. To compile it as a
module, choose M here. If unsure, say N.

config IP_VS_FO
tristate "weighted failover scheduling"
---help---
The weighted failover scheduling algorithm directs network
connections to the server with the highest weight that is
currently available.

If you want to compile it in kernel, say Y. To compile it as a
module, choose M here. If unsure, say N.

config IP_VS_LBLC
tristate "locality-based least-connection scheduling"
---help---
Expand Down
1 change: 1 addition & 0 deletions net/netfilter/ipvs/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ obj-$(CONFIG_IP_VS_RR) += ip_vs_rr.o
obj-$(CONFIG_IP_VS_WRR) += ip_vs_wrr.o
obj-$(CONFIG_IP_VS_LC) += ip_vs_lc.o
obj-$(CONFIG_IP_VS_WLC) += ip_vs_wlc.o
obj-$(CONFIG_IP_VS_FO) += ip_vs_fo.o
obj-$(CONFIG_IP_VS_LBLC) += ip_vs_lblc.o
obj-$(CONFIG_IP_VS_LBLCR) += ip_vs_lblcr.o
obj-$(CONFIG_IP_VS_DH) += ip_vs_dh.o
Expand Down
74 changes: 58 additions & 16 deletions net/netfilter/ipvs/ip_vs_conn.c
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@

#include <linux/interrupt.h>
#include <linux/in.h>
#include <linux/inet.h>
#include <linux/net.h>
#include <linux/kernel.h>
#include <linux/module.h>
Expand Down Expand Up @@ -77,6 +78,13 @@ static unsigned int ip_vs_conn_rnd __read_mostly;
#define CT_LOCKARRAY_SIZE (1<<CT_LOCKARRAY_BITS)
#define CT_LOCKARRAY_MASK (CT_LOCKARRAY_SIZE-1)

/* We need an addrstrlen that works with or without v6 */
#ifdef CONFIG_IP_VS_IPV6
#define IP_VS_ADDRSTRLEN INET6_ADDRSTRLEN
#else
#define IP_VS_ADDRSTRLEN (8+1)
#endif

struct ip_vs_aligned_lock
{
spinlock_t l;
Expand Down Expand Up @@ -488,7 +496,12 @@ static inline void ip_vs_bind_xmit(struct ip_vs_conn *cp)
break;

case IP_VS_CONN_F_TUNNEL:
cp->packet_xmit = ip_vs_tunnel_xmit;
#ifdef CONFIG_IP_VS_IPV6
if (cp->daf == AF_INET6)
cp->packet_xmit = ip_vs_tunnel_xmit_v6;
else
#endif
cp->packet_xmit = ip_vs_tunnel_xmit;
break;

case IP_VS_CONN_F_DROUTE:
Expand All @@ -514,7 +527,10 @@ static inline void ip_vs_bind_xmit_v6(struct ip_vs_conn *cp)
break;

case IP_VS_CONN_F_TUNNEL:
cp->packet_xmit = ip_vs_tunnel_xmit_v6;
if (cp->daf == AF_INET6)
cp->packet_xmit = ip_vs_tunnel_xmit_v6;
else
cp->packet_xmit = ip_vs_tunnel_xmit;
break;

case IP_VS_CONN_F_DROUTE:
Expand Down Expand Up @@ -580,7 +596,7 @@ ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest)
ip_vs_proto_name(cp->protocol),
IP_VS_DBG_ADDR(cp->af, &cp->caddr), ntohs(cp->cport),
IP_VS_DBG_ADDR(cp->af, &cp->vaddr), ntohs(cp->vport),
IP_VS_DBG_ADDR(cp->af, &cp->daddr), ntohs(cp->dport),
IP_VS_DBG_ADDR(cp->daf, &cp->daddr), ntohs(cp->dport),
ip_vs_fwd_tag(cp), cp->state,
cp->flags, atomic_read(&cp->refcnt),
atomic_read(&dest->refcnt));
Expand Down Expand Up @@ -616,7 +632,13 @@ void ip_vs_try_bind_dest(struct ip_vs_conn *cp)
struct ip_vs_dest *dest;

rcu_read_lock();
dest = ip_vs_find_dest(ip_vs_conn_net(cp), cp->af, &cp->daddr,

/* This function is only invoked by the synchronization code. We do
* not currently support heterogeneous pools with synchronization,
* so we can make the assumption that the svc_af is the same as the
* dest_af
*/
dest = ip_vs_find_dest(ip_vs_conn_net(cp), cp->af, cp->af, &cp->daddr,
cp->dport, &cp->vaddr, cp->vport,
cp->protocol, cp->fwmark, cp->flags);
if (dest) {
Expand Down Expand Up @@ -671,7 +693,7 @@ static inline void ip_vs_unbind_dest(struct ip_vs_conn *cp)
ip_vs_proto_name(cp->protocol),
IP_VS_DBG_ADDR(cp->af, &cp->caddr), ntohs(cp->cport),
IP_VS_DBG_ADDR(cp->af, &cp->vaddr), ntohs(cp->vport),
IP_VS_DBG_ADDR(cp->af, &cp->daddr), ntohs(cp->dport),
IP_VS_DBG_ADDR(cp->daf, &cp->daddr), ntohs(cp->dport),
ip_vs_fwd_tag(cp), cp->state,
cp->flags, atomic_read(&cp->refcnt),
atomic_read(&dest->refcnt));
Expand Down Expand Up @@ -740,7 +762,7 @@ int ip_vs_check_template(struct ip_vs_conn *ct)
ntohs(ct->cport),
IP_VS_DBG_ADDR(ct->af, &ct->vaddr),
ntohs(ct->vport),
IP_VS_DBG_ADDR(ct->af, &ct->daddr),
IP_VS_DBG_ADDR(ct->daf, &ct->daddr),
ntohs(ct->dport));

/*
Expand Down Expand Up @@ -848,7 +870,7 @@ void ip_vs_conn_expire_now(struct ip_vs_conn *cp)
* Create a new connection entry and hash it into the ip_vs_conn_tab
*/
struct ip_vs_conn *
ip_vs_conn_new(const struct ip_vs_conn_param *p,
ip_vs_conn_new(const struct ip_vs_conn_param *p, int dest_af,
const union nf_inet_addr *daddr, __be16 dport, unsigned int flags,
struct ip_vs_dest *dest, __u32 fwmark)
{
Expand All @@ -867,14 +889,15 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p,
setup_timer(&cp->timer, ip_vs_conn_expire, (unsigned long)cp);
ip_vs_conn_net_set(cp, p->net);
cp->af = p->af;
cp->daf = dest_af;
cp->protocol = p->protocol;
ip_vs_addr_set(p->af, &cp->caddr, p->caddr);
cp->cport = p->cport;
/* proto should only be IPPROTO_IP if p->vaddr is a fwmark */
ip_vs_addr_set(p->protocol == IPPROTO_IP ? AF_UNSPEC : p->af,
&cp->vaddr, p->vaddr);
cp->vport = p->vport;
ip_vs_addr_set(p->af, &cp->daddr, daddr);
ip_vs_addr_set(cp->daf, &cp->daddr, daddr);
cp->dport = dport;
cp->flags = flags;
cp->fwmark = fwmark;
Expand Down Expand Up @@ -1036,6 +1059,7 @@ static int ip_vs_conn_seq_show(struct seq_file *seq, void *v)
struct net *net = seq_file_net(seq);
char pe_data[IP_VS_PENAME_MAXLEN + IP_VS_PEDATA_MAXLEN + 3];
size_t len = 0;
char dbuf[IP_VS_ADDRSTRLEN];

if (!ip_vs_conn_net_eq(cp, net))
return 0;
Expand All @@ -1049,25 +1073,33 @@ static int ip_vs_conn_seq_show(struct seq_file *seq, void *v)
}
pe_data[len] = '\0';

#ifdef CONFIG_IP_VS_IPV6
if (cp->daf == AF_INET6)
snprintf(dbuf, sizeof(dbuf), "%pI6", &cp->daddr.in6);
else
#endif
snprintf(dbuf, sizeof(dbuf), "%08X",
ntohl(cp->daddr.ip));

#ifdef CONFIG_IP_VS_IPV6
if (cp->af == AF_INET6)
seq_printf(seq, "%-3s %pI6 %04X %pI6 %04X "
"%pI6 %04X %-11s %7lu%s\n",
"%s %04X %-11s %7lu%s\n",
ip_vs_proto_name(cp->protocol),
&cp->caddr.in6, ntohs(cp->cport),
&cp->vaddr.in6, ntohs(cp->vport),
&cp->daddr.in6, ntohs(cp->dport),
dbuf, ntohs(cp->dport),
ip_vs_state_name(cp->protocol, cp->state),
(cp->timer.expires-jiffies)/HZ, pe_data);
else
#endif
seq_printf(seq,
"%-3s %08X %04X %08X %04X"
" %08X %04X %-11s %7lu%s\n",
" %s %04X %-11s %7lu%s\n",
ip_vs_proto_name(cp->protocol),
ntohl(cp->caddr.ip), ntohs(cp->cport),
ntohl(cp->vaddr.ip), ntohs(cp->vport),
ntohl(cp->daddr.ip), ntohs(cp->dport),
dbuf, ntohs(cp->dport),
ip_vs_state_name(cp->protocol, cp->state),
(cp->timer.expires-jiffies)/HZ, pe_data);
}
Expand Down Expand Up @@ -1105,6 +1137,7 @@ static const char *ip_vs_origin_name(unsigned int flags)

static int ip_vs_conn_sync_seq_show(struct seq_file *seq, void *v)
{
char dbuf[IP_VS_ADDRSTRLEN];

if (v == SEQ_START_TOKEN)
seq_puts(seq,
Expand All @@ -1116,25 +1149,34 @@ static int ip_vs_conn_sync_seq_show(struct seq_file *seq, void *v)
if (!ip_vs_conn_net_eq(cp, net))
return 0;

#ifdef CONFIG_IP_VS_IPV6
if (cp->daf == AF_INET6)
snprintf(dbuf, sizeof(dbuf), "%pI6", &cp->daddr.in6);
else
#endif
snprintf(dbuf, sizeof(dbuf), "%08X",
ntohl(cp->daddr.ip));

#ifdef CONFIG_IP_VS_IPV6
if (cp->af == AF_INET6)
seq_printf(seq, "%-3s %pI6 %04X %pI6 %04X %pI6 %04X %-11s %-6s %7lu\n",
seq_printf(seq, "%-3s %pI6 %04X %pI6 %04X "
"%s %04X %-11s %-6s %7lu\n",
ip_vs_proto_name(cp->protocol),
&cp->caddr.in6, ntohs(cp->cport),
&cp->vaddr.in6, ntohs(cp->vport),
&cp->daddr.in6, ntohs(cp->dport),
dbuf, ntohs(cp->dport),
ip_vs_state_name(cp->protocol, cp->state),
ip_vs_origin_name(cp->flags),
(cp->timer.expires-jiffies)/HZ);
else
#endif
seq_printf(seq,
"%-3s %08X %04X %08X %04X "
"%08X %04X %-11s %-6s %7lu\n",
"%s %04X %-11s %-6s %7lu\n",
ip_vs_proto_name(cp->protocol),
ntohl(cp->caddr.ip), ntohs(cp->cport),
ntohl(cp->vaddr.ip), ntohs(cp->vport),
ntohl(cp->daddr.ip), ntohs(cp->dport),
dbuf, ntohs(cp->dport),
ip_vs_state_name(cp->protocol, cp->state),
ip_vs_origin_name(cp->flags),
(cp->timer.expires-jiffies)/HZ);
Expand Down
15 changes: 8 additions & 7 deletions net/netfilter/ipvs/ip_vs_core.c
Original file line number Diff line number Diff line change
Expand Up @@ -328,7 +328,7 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
* This adds param.pe_data to the template,
* and thus param.pe_data will be destroyed
* when the template expires */
ct = ip_vs_conn_new(&param, &dest->addr, dport,
ct = ip_vs_conn_new(&param, dest->af, &dest->addr, dport,
IP_VS_CONN_F_TEMPLATE, dest, skb->mark);
if (ct == NULL) {
kfree(param.pe_data);
Expand Down Expand Up @@ -357,7 +357,8 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
ip_vs_conn_fill_param(svc->net, svc->af, iph->protocol, &iph->saddr,
src_port, &iph->daddr, dst_port, &param);

cp = ip_vs_conn_new(&param, &dest->addr, dport, flags, dest, skb->mark);
cp = ip_vs_conn_new(&param, dest->af, &dest->addr, dport, flags, dest,
skb->mark);
if (cp == NULL) {
ip_vs_conn_put(ct);
*ignored = -1;
Expand Down Expand Up @@ -479,7 +480,7 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
ip_vs_conn_fill_param(svc->net, svc->af, iph->protocol,
&iph->saddr, pptr[0], &iph->daddr,
pptr[1], &p);
cp = ip_vs_conn_new(&p, &dest->addr,
cp = ip_vs_conn_new(&p, dest->af, &dest->addr,
dest->port ? dest->port : pptr[1],
flags, dest, skb->mark);
if (!cp) {
Expand All @@ -491,9 +492,9 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
IP_VS_DBG_BUF(6, "Schedule fwd:%c c:%s:%u v:%s:%u "
"d:%s:%u conn->flags:%X conn->refcnt:%d\n",
ip_vs_fwd_tag(cp),
IP_VS_DBG_ADDR(svc->af, &cp->caddr), ntohs(cp->cport),
IP_VS_DBG_ADDR(svc->af, &cp->vaddr), ntohs(cp->vport),
IP_VS_DBG_ADDR(svc->af, &cp->daddr), ntohs(cp->dport),
IP_VS_DBG_ADDR(cp->af, &cp->caddr), ntohs(cp->cport),
IP_VS_DBG_ADDR(cp->af, &cp->vaddr), ntohs(cp->vport),
IP_VS_DBG_ADDR(cp->daf, &cp->daddr), ntohs(cp->dport),
cp->flags, atomic_read(&cp->refcnt));

ip_vs_conn_stats(cp, svc);
Expand Down Expand Up @@ -550,7 +551,7 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
ip_vs_conn_fill_param(svc->net, svc->af, iph->protocol,
&iph->saddr, pptr[0],
&iph->daddr, pptr[1], &p);
cp = ip_vs_conn_new(&p, &daddr, 0,
cp = ip_vs_conn_new(&p, svc->af, &daddr, 0,
IP_VS_CONN_F_BYPASS | flags,
NULL, skb->mark);
if (!cp)
Expand Down
Loading

0 comments on commit fcfa8f4

Please sign in to comment.