Skip to content

Commit

Permalink
openvswitch: Per cpu flow stats.
Browse files Browse the repository at this point in the history
With mega flow implementation ovs flow can be shared between
multiple CPUs which makes stats updates highly contended
operation. This patch uses per-CPU stats in cases where a flow
is likely to be shared (if there is a wildcard in the 5-tuple
and therefore likely to be spread by RSS). In other situations,
it uses the current strategy, saving memory and allocation time.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Signed-off-by: Jesse Gross <jesse@nicira.com>
  • Loading branch information
Pravin B Shelar authored and Jesse Gross committed Jan 6, 2014
1 parent 795449d commit e298e50
Show file tree
Hide file tree
Showing 7 changed files with 210 additions and 55 deletions.
50 changes: 16 additions & 34 deletions net/openvswitch/datapath.c
Original file line number Diff line number Diff line change
Expand Up @@ -251,9 +251,9 @@ void ovs_dp_process_received_packet(struct vport *p, struct sk_buff *skb)
OVS_CB(skb)->flow = flow;
OVS_CB(skb)->pkt_key = &key;

stats_counter = &stats->n_hit;
ovs_flow_used(OVS_CB(skb)->flow, skb);
ovs_flow_stats_update(OVS_CB(skb)->flow, skb);
ovs_execute_actions(dp, skb);
stats_counter = &stats->n_hit;

out:
/* Update datapath statistics. */
Expand Down Expand Up @@ -459,14 +459,6 @@ static int queue_userspace_packet(struct net *net, int dp_ifindex,
return err;
}

static void clear_stats(struct sw_flow *flow)
{
flow->used = 0;
flow->tcp_flags = 0;
flow->packet_count = 0;
flow->byte_count = 0;
}

static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
{
struct ovs_header *ovs_header = info->userhdr;
Expand Down Expand Up @@ -505,7 +497,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
packet->protocol = htons(ETH_P_802_2);

/* Build an sw_flow for sending this packet. */
flow = ovs_flow_alloc();
flow = ovs_flow_alloc(false);
err = PTR_ERR(flow);
if (IS_ERR(flow))
goto err_kfree_skb;
Expand Down Expand Up @@ -641,10 +633,10 @@ static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp,
const int skb_orig_len = skb->len;
struct nlattr *start;
struct ovs_flow_stats stats;
__be16 tcp_flags;
unsigned long used;
struct ovs_header *ovs_header;
struct nlattr *nla;
unsigned long used;
u8 tcp_flags;
int err;

ovs_header = genlmsg_put(skb, portid, seq, &dp_flow_genl_family, flags, cmd);
Expand Down Expand Up @@ -673,24 +665,17 @@ static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp,

nla_nest_end(skb, nla);

spin_lock_bh(&flow->lock);
used = flow->used;
stats.n_packets = flow->packet_count;
stats.n_bytes = flow->byte_count;
tcp_flags = (u8)ntohs(flow->tcp_flags);
spin_unlock_bh(&flow->lock);

ovs_flow_stats_get(flow, &stats, &used, &tcp_flags);
if (used &&
nla_put_u64(skb, OVS_FLOW_ATTR_USED, ovs_flow_used_time(used)))
goto nla_put_failure;

if (stats.n_packets &&
nla_put(skb, OVS_FLOW_ATTR_STATS,
sizeof(struct ovs_flow_stats), &stats))
nla_put(skb, OVS_FLOW_ATTR_STATS, sizeof(struct ovs_flow_stats), &stats))
goto nla_put_failure;

if (tcp_flags &&
nla_put_u8(skb, OVS_FLOW_ATTR_TCP_FLAGS, tcp_flags))
if ((u8)ntohs(tcp_flags) &&
nla_put_u8(skb, OVS_FLOW_ATTR_TCP_FLAGS, (u8)ntohs(tcp_flags)))
goto nla_put_failure;

/* If OVS_FLOW_ATTR_ACTIONS doesn't fit, skip dumping the actions if
Expand Down Expand Up @@ -770,6 +755,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
struct datapath *dp;
struct sw_flow_actions *acts = NULL;
struct sw_flow_match match;
bool exact_5tuple;
int error;

/* Extract key. */
Expand All @@ -778,7 +764,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
goto error;

ovs_match_init(&match, &key, &mask);
error = ovs_nla_get_match(&match,
error = ovs_nla_get_match(&match, &exact_5tuple,
a[OVS_FLOW_ATTR_KEY], a[OVS_FLOW_ATTR_MASK]);
if (error)
goto error;
Expand Down Expand Up @@ -817,12 +803,11 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
goto err_unlock_ovs;

/* Allocate flow. */
flow = ovs_flow_alloc();
flow = ovs_flow_alloc(!exact_5tuple);
if (IS_ERR(flow)) {
error = PTR_ERR(flow);
goto err_unlock_ovs;
}
clear_stats(flow);

flow->key = masked_key;
flow->unmasked_key = key;
Expand Down Expand Up @@ -866,11 +851,8 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
reply = ovs_flow_cmd_build_info(flow, dp, info, OVS_FLOW_CMD_NEW);

/* Clear stats. */
if (a[OVS_FLOW_ATTR_CLEAR]) {
spin_lock_bh(&flow->lock);
clear_stats(flow);
spin_unlock_bh(&flow->lock);
}
if (a[OVS_FLOW_ATTR_CLEAR])
ovs_flow_stats_clear(flow);
}
ovs_unlock();

Expand Down Expand Up @@ -908,7 +890,7 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
}

ovs_match_init(&match, &key, NULL);
err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL);
err = ovs_nla_get_match(&match, NULL, a[OVS_FLOW_ATTR_KEY], NULL);
if (err)
return err;

Expand Down Expand Up @@ -962,7 +944,7 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
}

ovs_match_init(&match, &key, NULL);
err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL);
err = ovs_nla_get_match(&match, NULL, a[OVS_FLOW_ATTR_KEY], NULL);
if (err)
goto unlock;

Expand Down
96 changes: 89 additions & 7 deletions net/openvswitch/flow.c
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
#include <linux/ip.h>
#include <linux/ipv6.h>
#include <linux/sctp.h>
#include <linux/smp.h>
#include <linux/tcp.h>
#include <linux/udp.h>
#include <linux/icmp.h>
Expand All @@ -60,23 +61,104 @@ u64 ovs_flow_used_time(unsigned long flow_jiffies)

#define TCP_FLAGS_BE16(tp) (*(__be16 *)&tcp_flag_word(tp) & htons(0x0FFF))

void ovs_flow_used(struct sw_flow *flow, struct sk_buff *skb)
void ovs_flow_stats_update(struct sw_flow *flow, struct sk_buff *skb)
{
struct flow_stats *stats;
__be16 tcp_flags = 0;

if (!flow->stats.is_percpu)
stats = flow->stats.stat;
else
stats = this_cpu_ptr(flow->stats.cpu_stats);

if ((flow->key.eth.type == htons(ETH_P_IP) ||
flow->key.eth.type == htons(ETH_P_IPV6)) &&
flow->key.ip.proto == IPPROTO_TCP &&
likely(skb->len >= skb_transport_offset(skb) + sizeof(struct tcphdr))) {
tcp_flags = TCP_FLAGS_BE16(tcp_hdr(skb));
}

spin_lock(&flow->lock);
flow->used = jiffies;
flow->packet_count++;
flow->byte_count += skb->len;
flow->tcp_flags |= tcp_flags;
spin_unlock(&flow->lock);
spin_lock(&stats->lock);
stats->used = jiffies;
stats->packet_count++;
stats->byte_count += skb->len;
stats->tcp_flags |= tcp_flags;
spin_unlock(&stats->lock);
}

static void stats_read(struct flow_stats *stats,
struct ovs_flow_stats *ovs_stats,
unsigned long *used, __be16 *tcp_flags)
{
spin_lock(&stats->lock);
if (time_after(stats->used, *used))
*used = stats->used;
*tcp_flags |= stats->tcp_flags;
ovs_stats->n_packets += stats->packet_count;
ovs_stats->n_bytes += stats->byte_count;
spin_unlock(&stats->lock);
}

void ovs_flow_stats_get(struct sw_flow *flow, struct ovs_flow_stats *ovs_stats,
unsigned long *used, __be16 *tcp_flags)
{
int cpu, cur_cpu;

*used = 0;
*tcp_flags = 0;
memset(ovs_stats, 0, sizeof(*ovs_stats));

if (!flow->stats.is_percpu) {
stats_read(flow->stats.stat, ovs_stats, used, tcp_flags);
} else {
cur_cpu = get_cpu();
for_each_possible_cpu(cpu) {
struct flow_stats *stats;

if (cpu == cur_cpu)
local_bh_disable();

stats = per_cpu_ptr(flow->stats.cpu_stats, cpu);
stats_read(stats, ovs_stats, used, tcp_flags);

if (cpu == cur_cpu)
local_bh_enable();
}
put_cpu();
}
}

static void stats_reset(struct flow_stats *stats)
{
spin_lock(&stats->lock);
stats->used = 0;
stats->packet_count = 0;
stats->byte_count = 0;
stats->tcp_flags = 0;
spin_unlock(&stats->lock);
}

void ovs_flow_stats_clear(struct sw_flow *flow)
{
int cpu, cur_cpu;

if (!flow->stats.is_percpu) {
stats_reset(flow->stats.stat);
} else {
cur_cpu = get_cpu();

for_each_possible_cpu(cpu) {

if (cpu == cur_cpu)
local_bh_disable();

stats_reset(per_cpu_ptr(flow->stats.cpu_stats, cpu));

if (cpu == cur_cpu)
local_bh_enable();
}
put_cpu();
}
}

static int check_header(struct sk_buff *skb, int len)
Expand Down
29 changes: 22 additions & 7 deletions net/openvswitch/flow.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#ifndef FLOW_H
#define FLOW_H 1

#include <linux/cache.h>
#include <linux/kernel.h>
#include <linux/netlink.h>
#include <linux/openvswitch.h>
Expand Down Expand Up @@ -146,6 +147,22 @@ struct sw_flow_actions {
struct nlattr actions[];
};

struct flow_stats {
u64 packet_count; /* Number of packets matched. */
u64 byte_count; /* Number of bytes matched. */
unsigned long used; /* Last used time (in jiffies). */
spinlock_t lock; /* Lock for atomic stats update. */
__be16 tcp_flags; /* Union of seen TCP flags. */
};

struct sw_flow_stats {
bool is_percpu;
union {
struct flow_stats *stat;
struct flow_stats __percpu *cpu_stats;
};
};

struct sw_flow {
struct rcu_head rcu;
struct hlist_node hash_node[2];
Expand All @@ -155,12 +172,7 @@ struct sw_flow {
struct sw_flow_key unmasked_key;
struct sw_flow_mask *mask;
struct sw_flow_actions __rcu *sf_acts;

spinlock_t lock; /* Lock for values below. */
unsigned long used; /* Last used time (in jiffies). */
u64 packet_count; /* Number of packets matched. */
u64 byte_count; /* Number of bytes matched. */
__be16 tcp_flags; /* Union of seen TCP flags. */
struct sw_flow_stats stats;
};

struct arp_eth_header {
Expand All @@ -177,7 +189,10 @@ struct arp_eth_header {
unsigned char ar_tip[4]; /* target IP address */
} __packed;

void ovs_flow_used(struct sw_flow *, struct sk_buff *);
void ovs_flow_stats_update(struct sw_flow *flow, struct sk_buff *skb);
void ovs_flow_stats_get(struct sw_flow *flow, struct ovs_flow_stats *stats,
unsigned long *used, __be16 *tcp_flags);
void ovs_flow_stats_clear(struct sw_flow *flow);
u64 ovs_flow_used_time(unsigned long flow_jiffies);

int ovs_flow_extract(struct sk_buff *, u16 in_port, struct sw_flow_key *);
Expand Down
Loading

0 comments on commit e298e50

Please sign in to comment.