Skip to content

Commit

Permalink
Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/gi…
Browse files Browse the repository at this point in the history
…t/jesse/openvswitch

Jesse Gross says:

====================
A number of significant new features and optimizations for net-next/3.12.
Highlights are:
 * "Megaflows", an optimization that allows userspace to specify which
   flow fields were used to compute the results of the flow lookup.
   This allows for a major reduction in flow setups (the major
   performance bottleneck in Open vSwitch) without reducing flexibility.
 * Converting netlink dump operations to use RCU, allowing for
   additional parallelism in userspace.
 * Matching and modifying SCTP protocol fields.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
David S. Miller committed Aug 28, 2013
2 parents b6750b4 + 5828cd9 commit 5b2941b
Show file tree
Hide file tree
Showing 13 changed files with 1,346 additions and 543 deletions.
40 changes: 40 additions & 0 deletions Documentation/networking/openvswitch.txt
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,46 @@ Often we ellipsize arguments not important to the discussion, e.g.:
in_port(1), eth(...), eth_type(0x0800), ipv4(...), tcp(...)


Wildcarded flow key format
--------------------------

A wildcarded flow is described with two sequences of Netlink attributes
passed over the Netlink socket. A flow key, exactly as described above, and an
optional corresponding flow mask.

A wildcarded flow can represent a group of exact match flows. Each '1' bit
in the mask specifies a exact match with the corresponding bit in the flow key.
A '0' bit specifies a don't care bit, which will match either a '1' or '0' bit
of a incoming packet. Using wildcarded flow can improve the flow set up rate
by reduce the number of new flows need to be processed by the user space program.

Support for the mask Netlink attribute is optional for both the kernel and user
space program. The kernel can ignore the mask attribute, installing an exact
match flow, or reduce the number of don't care bits in the kernel to less than
what was specified by the user space program. In this case, variations in bits
that the kernel does not implement will simply result in additional flow setups.
The kernel module will also work with user space programs that neither support
nor supply flow mask attributes.

Since the kernel may ignore or modify wildcard bits, it can be difficult for
the userspace program to know exactly what matches are installed. There are
two possible approaches: reactively install flows as they miss the kernel
flow table (and therefore not attempt to determine wildcard changes at all)
or use the kernel's response messages to determine the installed wildcards.

When interacting with userspace, the kernel should maintain the match portion
of the key exactly as originally installed. This will provides a handle to
identify the flow for all future operations. However, when reporting the
mask of an installed flow, the mask should include any restrictions imposed
by the kernel.

The behavior when using overlapping wildcarded flows is undefined. It is the
responsibility of the user space program to ensure that any incoming packet
can match at most one flow, wildcarded or not. The current implementation
performs best-effort detection of overlapping wildcarded flows and may reject
some but not all of them. However, this behavior may change in future versions.


Basic rule for evolving flow keys
---------------------------------

Expand Down
1 change: 1 addition & 0 deletions include/net/ipv6.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
#define NEXTHDR_ICMP 58 /* ICMP for IPv6. */
#define NEXTHDR_NONE 59 /* No next header */
#define NEXTHDR_DEST 60 /* Destination options header. */
#define NEXTHDR_SCTP 132 /* SCTP message. */
#define NEXTHDR_MOBILITY 135 /* Mobility header. */

#define NEXTHDR_MAX 255
Expand Down
15 changes: 14 additions & 1 deletion include/uapi/linux/openvswitch.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@

/*
* Copyright (c) 2007-2011 Nicira Networks.
* Copyright (c) 2007-2013 Nicira, Inc.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
Expand Down Expand Up @@ -259,6 +259,7 @@ enum ovs_key_attr {
OVS_KEY_ATTR_ND, /* struct ovs_key_nd */
OVS_KEY_ATTR_SKB_MARK, /* u32 skb mark */
OVS_KEY_ATTR_TUNNEL, /* Nested set of ovs_tunnel attributes */
OVS_KEY_ATTR_SCTP, /* struct ovs_key_sctp */

#ifdef __KERNEL__
OVS_KEY_ATTR_IPV4_TUNNEL, /* struct ovs_key_ipv4_tunnel */
Expand Down Expand Up @@ -333,6 +334,11 @@ struct ovs_key_udp {
__be16 udp_dst;
};

struct ovs_key_sctp {
__be16 sctp_src;
__be16 sctp_dst;
};

struct ovs_key_icmp {
__u8 icmp_type;
__u8 icmp_code;
Expand Down Expand Up @@ -379,6 +385,12 @@ struct ovs_key_nd {
* @OVS_FLOW_ATTR_CLEAR: If present in a %OVS_FLOW_CMD_SET request, clears the
* last-used time, accumulated TCP flags, and statistics for this flow.
* Otherwise ignored in requests. Never present in notifications.
* @OVS_FLOW_ATTR_MASK: Nested %OVS_KEY_ATTR_* attributes specifying the
* mask bits for wildcarded flow match. Mask bit value '1' specifies exact
* match with corresponding flow key bit, while mask bit value '0' specifies
* a wildcarded match. Omitting attribute is treated as wildcarding all
* corresponding fields. Optional for all requests. If not present,
* all flow key bits are exact match bits.
*
* These attributes follow the &struct ovs_header within the Generic Netlink
* payload for %OVS_FLOW_* commands.
Expand All @@ -391,6 +403,7 @@ enum ovs_flow_attr {
OVS_FLOW_ATTR_TCP_FLAGS, /* 8-bit OR'd TCP flags. */
OVS_FLOW_ATTR_USED, /* u64 msecs last used in monotonic time. */
OVS_FLOW_ATTR_CLEAR, /* Flag to clear stats, tcp_flags, used. */
OVS_FLOW_ATTR_MASK, /* Sequence of OVS_KEY_ATTR_* attributes. */
__OVS_FLOW_ATTR_MAX
};

Expand Down
1 change: 1 addition & 0 deletions net/openvswitch/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

config OPENVSWITCH
tristate "Open vSwitch"
select LIBCRC32C
---help---
Open vSwitch is a multilayer Ethernet switch targeted at virtualized
environments. In addition to supporting a variety of features
Expand Down
5 changes: 4 additions & 1 deletion net/openvswitch/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,13 @@ openvswitch-y := \
dp_notify.o \
flow.o \
vport.o \
vport-gre.o \
vport-internal_dev.o \
vport-netdev.o

ifneq ($(CONFIG_OPENVSWITCH_VXLAN),)
openvswitch-y += vport-vxlan.o
endif

ifneq ($(CONFIG_OPENVSWITCH_GRE),)
openvswitch-y += vport-gre.o
endif
45 changes: 43 additions & 2 deletions net/openvswitch/actions.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2007-2012 Nicira, Inc.
* Copyright (c) 2007-2013 Nicira, Inc.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
Expand All @@ -22,6 +22,7 @@
#include <linux/in.h>
#include <linux/ip.h>
#include <linux/openvswitch.h>
#include <linux/sctp.h>
#include <linux/tcp.h>
#include <linux/udp.h>
#include <linux/in6.h>
Expand All @@ -31,6 +32,7 @@
#include <net/ipv6.h>
#include <net/checksum.h>
#include <net/dsfield.h>
#include <net/sctp/checksum.h>

#include "datapath.h"
#include "vport.h"
Expand Down Expand Up @@ -352,6 +354,39 @@ static int set_tcp(struct sk_buff *skb, const struct ovs_key_tcp *tcp_port_key)
return 0;
}

static int set_sctp(struct sk_buff *skb,
const struct ovs_key_sctp *sctp_port_key)
{
struct sctphdr *sh;
int err;
unsigned int sctphoff = skb_transport_offset(skb);

err = make_writable(skb, sctphoff + sizeof(struct sctphdr));
if (unlikely(err))
return err;

sh = sctp_hdr(skb);
if (sctp_port_key->sctp_src != sh->source ||
sctp_port_key->sctp_dst != sh->dest) {
__le32 old_correct_csum, new_csum, old_csum;

old_csum = sh->checksum;
old_correct_csum = sctp_compute_cksum(skb, sctphoff);

sh->source = sctp_port_key->sctp_src;
sh->dest = sctp_port_key->sctp_dst;

new_csum = sctp_compute_cksum(skb, sctphoff);

/* Carry any checksum errors through. */
sh->checksum = old_csum ^ old_correct_csum ^ new_csum;

skb->rxhash = 0;
}

return 0;
}

static int do_output(struct datapath *dp, struct sk_buff *skb, int out_port)
{
struct vport *vport;
Expand All @@ -376,8 +411,10 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb,
const struct nlattr *a;
int rem;

BUG_ON(!OVS_CB(skb)->pkt_key);

upcall.cmd = OVS_PACKET_CMD_ACTION;
upcall.key = &OVS_CB(skb)->flow->key;
upcall.key = OVS_CB(skb)->pkt_key;
upcall.userdata = NULL;
upcall.portid = 0;

Expand Down Expand Up @@ -459,6 +496,10 @@ static int execute_set_action(struct sk_buff *skb,
case OVS_KEY_ATTR_UDP:
err = set_udp(skb, nla_data(nested_attr));
break;

case OVS_KEY_ATTR_SCTP:
err = set_sctp(skb, nla_data(nested_attr));
break;
}

return err;
Expand Down
Loading

0 comments on commit 5b2941b

Please sign in to comment.