Skip to content

Commit

Permalink
ipvs: optimize dst usage for real server
Browse files Browse the repository at this point in the history
Currently when forwarding requests to real servers
we use dst_lock and atomic operations when cloning the
dst_cache value. As the dst_cache value does not change
most of the time it is better to use RCU and to lock
dst_lock only when we need to replace the obsoleted dst.
For this to work we keep dst_cache in new structure protected
by RCU. For packets to remote real servers we will use noref
version of dst_cache, it will be valid while we are in RCU
read-side critical section because now dst_release for replaced
dsts will be invoked after the grace period. Packets to
local real servers that are passed to local stack with
NF_ACCEPT need a dst clone.

Signed-off-by: Julian Anastasov <ja@ssi.bg>
Signed-off by: Hans Schillstrom <hans@schillstrom.com>
Signed-off-by: Simon Horman <horms@verge.net.au>
  • Loading branch information
Julian Anastasov authored and Pablo Neira Ayuso committed Apr 1, 2013
1 parent 4115ded commit 026ace0
Show file tree
Hide file tree
Showing 4 changed files with 177 additions and 59 deletions.
12 changes: 9 additions & 3 deletions include/net/ip_vs.h
Original file line number Diff line number Diff line change
Expand Up @@ -724,6 +724,13 @@ struct ip_vs_service {
struct ip_vs_pe *pe;
};

/* Information for cached dst */
struct ip_vs_dest_dst {
struct dst_entry *dst_cache; /* destination cache entry */
u32 dst_cookie;
union nf_inet_addr dst_saddr;
struct rcu_head rcu_head;
};

/*
* The real server destination forwarding entry
Expand Down Expand Up @@ -752,9 +759,7 @@ struct ip_vs_dest {

/* for destination cache */
spinlock_t dst_lock; /* lock of dst_cache */
struct dst_entry *dst_cache; /* destination cache entry */
u32 dst_cookie;
union nf_inet_addr dst_saddr;
struct ip_vs_dest_dst __rcu *dest_dst; /* cached dst info */

/* for virtual service */
struct ip_vs_service *svc; /* service it belongs to */
Expand Down Expand Up @@ -1427,6 +1432,7 @@ extern int ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
extern int ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
struct ip_vs_protocol *pp, int offset,
unsigned int hooknum, struct ip_vs_iphdr *iph);
extern void ip_vs_dest_dst_rcu_free(struct rcu_head *head);

#ifdef CONFIG_IP_VS_IPV6
extern int ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
Expand Down
11 changes: 7 additions & 4 deletions net/netfilter/ipvs/ip_vs_core.c
Original file line number Diff line number Diff line change
Expand Up @@ -1395,10 +1395,13 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
goto ignore_ipip;
/* Prefer the resulting PMTU */
if (dest) {
spin_lock(&dest->dst_lock);
if (dest->dst_cache)
mtu = dst_mtu(dest->dst_cache);
spin_unlock(&dest->dst_lock);
struct ip_vs_dest_dst *dest_dst;

rcu_read_lock();
dest_dst = rcu_dereference(dest->dest_dst);
if (dest_dst)
mtu = dst_mtu(dest_dst->dst_cache);
rcu_read_unlock();
}
if (mtu > 68 + sizeof(struct iphdr))
mtu -= sizeof(struct iphdr);
Expand Down
25 changes: 18 additions & 7 deletions net/netfilter/ipvs/ip_vs_ctl.c
Original file line number Diff line number Diff line change
Expand Up @@ -641,15 +641,26 @@ struct ip_vs_dest *ip_vs_find_dest(struct net *net, int af,
return dest;
}

/* Release dst_cache for dest in user context */
void ip_vs_dest_dst_rcu_free(struct rcu_head *head)
{
struct ip_vs_dest_dst *dest_dst = container_of(head,
struct ip_vs_dest_dst,
rcu_head);

dst_release(dest_dst->dst_cache);
kfree(dest_dst);
}

/* Release dest_dst and dst_cache for dest in user context */
static void __ip_vs_dst_cache_reset(struct ip_vs_dest *dest)
{
struct dst_entry *old_dst;
struct ip_vs_dest_dst *old;

old_dst = dest->dst_cache;
dest->dst_cache = NULL;
dst_release(old_dst);
dest->dst_saddr.ip = 0;
old = rcu_dereference_protected(dest->dest_dst, 1);
if (old) {
RCU_INIT_POINTER(dest->dest_dst, NULL);
call_rcu(&old->rcu_head, ip_vs_dest_dst_rcu_free);
}
}

/*
Expand Down Expand Up @@ -1513,7 +1524,7 @@ static inline void
ip_vs_forget_dev(struct ip_vs_dest *dest, struct net_device *dev)
{
spin_lock_bh(&dest->dst_lock);
if (dest->dst_cache && dest->dst_cache->dev == dev) {
if (dest->dest_dst && dest->dest_dst->dst_cache->dev == dev) {
IP_VS_DBG_BUF(3, "Reset dev:%s dest %s:%u ,dest->refcnt=%d\n",
dev->name,
IP_VS_DBG_ADDR(dest->af, &dest->addr),
Expand Down
Loading

0 comments on commit 026ace0

Please sign in to comment.