Skip to content

Commit

Permalink
openvswitch: Fix egress tunnel info.
Browse files Browse the repository at this point in the history
While transitioning to netdev based vport we broke OVS
feature which allows user to retrieve tunnel packet egress
information for lwtunnel devices.  Following patch fixes it
by introducing ndo operation to get the tunnel egress info.
Same ndo operation can be used for lwtunnel devices and compat
ovs-tnl-vport devices. So after adding such device operation
we can remove similar operation from ovs-vport.

Fixes: 614732e ("openvswitch: Use regular VXLAN net_device device").
Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
Pravin B Shelar authored and David S. Miller committed Oct 23, 2015
1 parent 0c472b9 commit fc4099f
Show file tree
Hide file tree
Showing 16 changed files with 192 additions and 173 deletions.
40 changes: 33 additions & 7 deletions drivers/net/geneve.c
Original file line number Diff line number Diff line change
Expand Up @@ -594,14 +594,12 @@ static struct rtable *geneve_get_rt(struct sk_buff *skb,
rt = ip_route_output_key(geneve->net, fl4);
if (IS_ERR(rt)) {
netdev_dbg(dev, "no route to %pI4\n", &fl4->daddr);
dev->stats.tx_carrier_errors++;
return rt;
return ERR_PTR(-ENETUNREACH);
}
if (rt->dst.dev == dev) { /* is this necessary? */
netdev_dbg(dev, "circular route to %pI4\n", &fl4->daddr);
dev->stats.collisions++;
ip_rt_put(rt);
return ERR_PTR(-EINVAL);
return ERR_PTR(-ELOOP);
}
return rt;
}
Expand All @@ -627,12 +625,12 @@ static netdev_tx_t geneve_xmit(struct sk_buff *skb, struct net_device *dev)
struct ip_tunnel_info *info = NULL;
struct rtable *rt = NULL;
const struct iphdr *iip; /* interior IP header */
int err = -EINVAL;
struct flowi4 fl4;
__u8 tos, ttl;
__be16 sport;
bool udp_csum;
__be16 df;
int err;

if (geneve->collect_md) {
info = skb_tunnel_info(skb);
Expand All @@ -647,7 +645,7 @@ static netdev_tx_t geneve_xmit(struct sk_buff *skb, struct net_device *dev)
rt = geneve_get_rt(skb, dev, &fl4, info);
if (IS_ERR(rt)) {
netdev_dbg(dev, "no route to %pI4\n", &fl4.daddr);
dev->stats.tx_carrier_errors++;
err = PTR_ERR(rt);
goto tx_error;
}

Expand Down Expand Up @@ -699,10 +697,37 @@ static netdev_tx_t geneve_xmit(struct sk_buff *skb, struct net_device *dev)
tx_error:
dev_kfree_skb(skb);
err:
dev->stats.tx_errors++;
if (err == -ELOOP)
dev->stats.collisions++;
else if (err == -ENETUNREACH)
dev->stats.tx_carrier_errors++;
else
dev->stats.tx_errors++;
return NETDEV_TX_OK;
}

static int geneve_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
{
struct ip_tunnel_info *info = skb_tunnel_info(skb);
struct geneve_dev *geneve = netdev_priv(dev);
struct rtable *rt;
struct flowi4 fl4;

if (ip_tunnel_info_af(info) != AF_INET)
return -EINVAL;

rt = geneve_get_rt(skb, dev, &fl4, info);
if (IS_ERR(rt))
return PTR_ERR(rt);

ip_rt_put(rt);
info->key.u.ipv4.src = fl4.saddr;
info->key.tp_src = udp_flow_src_port(geneve->net, skb,
1, USHRT_MAX, true);
info->key.tp_dst = geneve->dst_port;
return 0;
}

static const struct net_device_ops geneve_netdev_ops = {
.ndo_init = geneve_init,
.ndo_uninit = geneve_uninit,
Expand All @@ -713,6 +738,7 @@ static const struct net_device_ops geneve_netdev_ops = {
.ndo_change_mtu = eth_change_mtu,
.ndo_validate_addr = eth_validate_addr,
.ndo_set_mac_address = eth_mac_addr,
.ndo_fill_metadata_dst = geneve_fill_metadata_dst,
};

static void geneve_get_drvinfo(struct net_device *dev,
Expand Down
41 changes: 41 additions & 0 deletions drivers/net/vxlan.c
Original file line number Diff line number Diff line change
Expand Up @@ -2337,6 +2337,46 @@ static int vxlan_change_mtu(struct net_device *dev, int new_mtu)
return 0;
}

static int egress_ipv4_tun_info(struct net_device *dev, struct sk_buff *skb,
struct ip_tunnel_info *info,
__be16 sport, __be16 dport)
{
struct vxlan_dev *vxlan = netdev_priv(dev);
struct rtable *rt;
struct flowi4 fl4;

memset(&fl4, 0, sizeof(fl4));
fl4.flowi4_tos = RT_TOS(info->key.tos);
fl4.flowi4_mark = skb->mark;
fl4.flowi4_proto = IPPROTO_UDP;
fl4.daddr = info->key.u.ipv4.dst;

rt = ip_route_output_key(vxlan->net, &fl4);
if (IS_ERR(rt))
return PTR_ERR(rt);
ip_rt_put(rt);

info->key.u.ipv4.src = fl4.saddr;
info->key.tp_src = sport;
info->key.tp_dst = dport;
return 0;
}

static int vxlan_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
{
struct vxlan_dev *vxlan = netdev_priv(dev);
struct ip_tunnel_info *info = skb_tunnel_info(skb);
__be16 sport, dport;

sport = udp_flow_src_port(dev_net(dev), skb, vxlan->cfg.port_min,
vxlan->cfg.port_max, true);
dport = info->key.tp_dst ? : vxlan->cfg.dst_port;

if (ip_tunnel_info_af(info) == AF_INET)
return egress_ipv4_tun_info(dev, skb, info, sport, dport);
return -EINVAL;
}

static const struct net_device_ops vxlan_netdev_ops = {
.ndo_init = vxlan_init,
.ndo_uninit = vxlan_uninit,
Expand All @@ -2351,6 +2391,7 @@ static const struct net_device_ops vxlan_netdev_ops = {
.ndo_fdb_add = vxlan_fdb_add,
.ndo_fdb_del = vxlan_fdb_delete,
.ndo_fdb_dump = vxlan_fdb_dump,
.ndo_fill_metadata_dst = vxlan_fill_metadata_dst,
};

/* Info for udev, that this is a virtual tunnel endpoint */
Expand Down
7 changes: 7 additions & 0 deletions include/linux/netdevice.h
Original file line number Diff line number Diff line change
Expand Up @@ -1054,6 +1054,10 @@ typedef u16 (*select_queue_fallback_t)(struct net_device *dev,
* This function is used to pass protocol port error state information
* to the switch driver. The switch driver can react to the proto_down
* by doing a phys down on the associated switch port.
* int (*ndo_fill_metadata_dst)(struct net_device *dev, struct sk_buff *skb);
* This function is used to get egress tunnel information for given skb.
* This is useful for retrieving outer tunnel header parameters while
* sampling packet.
*
*/
struct net_device_ops {
Expand Down Expand Up @@ -1227,6 +1231,8 @@ struct net_device_ops {
int (*ndo_get_iflink)(const struct net_device *dev);
int (*ndo_change_proto_down)(struct net_device *dev,
bool proto_down);
int (*ndo_fill_metadata_dst)(struct net_device *dev,
struct sk_buff *skb);
};

/**
Expand Down Expand Up @@ -2203,6 +2209,7 @@ void dev_add_offload(struct packet_offload *po);
void dev_remove_offload(struct packet_offload *po);

int dev_get_iflink(const struct net_device *dev);
int dev_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb);
struct net_device *__dev_get_by_flags(struct net *net, unsigned short flags,
unsigned short mask);
struct net_device *dev_get_by_name(struct net *net, const char *name);
Expand Down
32 changes: 32 additions & 0 deletions include/net/dst_metadata.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,38 @@ static inline struct metadata_dst *tun_rx_dst(int md_size)
return tun_dst;
}

static inline struct metadata_dst *tun_dst_unclone(struct sk_buff *skb)
{
struct metadata_dst *md_dst = skb_metadata_dst(skb);
int md_size = md_dst->u.tun_info.options_len;
struct metadata_dst *new_md;

if (!md_dst)
return ERR_PTR(-EINVAL);

new_md = metadata_dst_alloc(md_size, GFP_ATOMIC);
if (!new_md)
return ERR_PTR(-ENOMEM);

memcpy(&new_md->u.tun_info, &md_dst->u.tun_info,
sizeof(struct ip_tunnel_info) + md_size);
skb_dst_drop(skb);
dst_hold(&new_md->dst);
skb_dst_set(skb, &new_md->dst);
return new_md;
}

static inline struct ip_tunnel_info *skb_tunnel_info_unclone(struct sk_buff *skb)
{
struct metadata_dst *dst;

dst = tun_dst_unclone(skb);
if (IS_ERR(dst))
return NULL;

return &dst->u.tun_info;
}

static inline struct metadata_dst *ip_tun_rx_dst(struct sk_buff *skb,
__be16 flags,
__be64 tunnel_id,
Expand Down
27 changes: 27 additions & 0 deletions net/core/dev.c
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@
#include <linux/rtnetlink.h>
#include <linux/stat.h>
#include <net/dst.h>
#include <net/dst_metadata.h>
#include <net/pkt_sched.h>
#include <net/checksum.h>
#include <net/xfrm.h>
Expand Down Expand Up @@ -681,6 +682,32 @@ int dev_get_iflink(const struct net_device *dev)
}
EXPORT_SYMBOL(dev_get_iflink);

/**
* dev_fill_metadata_dst - Retrieve tunnel egress information.
* @dev: targeted interface
* @skb: The packet.
*
* For better visibility of tunnel traffic OVS needs to retrieve
* egress tunnel information for a packet. Following API allows
* user to get this info.
*/
int dev_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
{
struct ip_tunnel_info *info;

if (!dev->netdev_ops || !dev->netdev_ops->ndo_fill_metadata_dst)
return -EINVAL;

info = skb_tunnel_info_unclone(skb);
if (!info)
return -ENOMEM;
if (unlikely(!(info->mode & IP_TUNNEL_INFO_TX)))
return -EINVAL;

return dev->netdev_ops->ndo_fill_metadata_dst(dev, skb);
}
EXPORT_SYMBOL_GPL(dev_fill_metadata_dst);

/**
* __dev_get_by_name - find a device by its name
* @net: the applicable net namespace
Expand Down
46 changes: 37 additions & 9 deletions net/ipv4/ip_gre.c
Original file line number Diff line number Diff line change
Expand Up @@ -498,10 +498,26 @@ static struct sk_buff *gre_handle_offloads(struct sk_buff *skb,
csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE);
}

static struct rtable *gre_get_rt(struct sk_buff *skb,
struct net_device *dev,
struct flowi4 *fl,
const struct ip_tunnel_key *key)
{
struct net *net = dev_net(dev);

memset(fl, 0, sizeof(*fl));
fl->daddr = key->u.ipv4.dst;
fl->saddr = key->u.ipv4.src;
fl->flowi4_tos = RT_TOS(key->tos);
fl->flowi4_mark = skb->mark;
fl->flowi4_proto = IPPROTO_GRE;

return ip_route_output_key(net, fl);
}

static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct ip_tunnel_info *tun_info;
struct net *net = dev_net(dev);
const struct ip_tunnel_key *key;
struct flowi4 fl;
struct rtable *rt;
Expand All @@ -516,14 +532,7 @@ static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev)
goto err_free_skb;

key = &tun_info->key;
memset(&fl, 0, sizeof(fl));
fl.daddr = key->u.ipv4.dst;
fl.saddr = key->u.ipv4.src;
fl.flowi4_tos = RT_TOS(key->tos);
fl.flowi4_mark = skb->mark;
fl.flowi4_proto = IPPROTO_GRE;

rt = ip_route_output_key(net, &fl);
rt = gre_get_rt(skb, dev, &fl, key);
if (IS_ERR(rt))
goto err_free_skb;

Expand Down Expand Up @@ -566,6 +575,24 @@ static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev)
dev->stats.tx_dropped++;
}

static int gre_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
{
struct ip_tunnel_info *info = skb_tunnel_info(skb);
struct rtable *rt;
struct flowi4 fl4;

if (ip_tunnel_info_af(info) != AF_INET)
return -EINVAL;

rt = gre_get_rt(skb, dev, &fl4, &info->key);
if (IS_ERR(rt))
return PTR_ERR(rt);

ip_rt_put(rt);
info->key.u.ipv4.src = fl4.saddr;
return 0;
}

static netdev_tx_t ipgre_xmit(struct sk_buff *skb,
struct net_device *dev)
{
Expand Down Expand Up @@ -1023,6 +1050,7 @@ static const struct net_device_ops gre_tap_netdev_ops = {
.ndo_change_mtu = ip_tunnel_change_mtu,
.ndo_get_stats64 = ip_tunnel_get_stats64,
.ndo_get_iflink = ip_tunnel_get_iflink,
.ndo_fill_metadata_dst = gre_fill_metadata_dst,
};

static void ipgre_tap_setup(struct net_device *dev)
Expand Down
9 changes: 3 additions & 6 deletions net/openvswitch/actions.c
Original file line number Diff line number Diff line change
Expand Up @@ -768,7 +768,6 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb,
struct sw_flow_key *key, const struct nlattr *attr,
const struct nlattr *actions, int actions_len)
{
struct ip_tunnel_info info;
struct dp_upcall_info upcall;
const struct nlattr *a;
int rem;
Expand Down Expand Up @@ -796,11 +795,9 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb,
if (vport) {
int err;

upcall.egress_tun_info = &info;
err = ovs_vport_get_egress_tun_info(vport, skb,
&upcall);
if (err)
upcall.egress_tun_info = NULL;
err = dev_fill_metadata_dst(vport->dev, skb);
if (!err)
upcall.egress_tun_info = skb_tunnel_info(skb);
}

break;
Expand Down
5 changes: 2 additions & 3 deletions net/openvswitch/datapath.c
Original file line number Diff line number Diff line change
Expand Up @@ -490,9 +490,8 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,

if (upcall_info->egress_tun_info) {
nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_EGRESS_TUN_KEY);
err = ovs_nla_put_egress_tunnel_key(user_skb,
upcall_info->egress_tun_info,
upcall_info->egress_tun_opts);
err = ovs_nla_put_tunnel_info(user_skb,
upcall_info->egress_tun_info);
BUG_ON(err);
nla_nest_end(user_skb, nla);
}
Expand Down
1 change: 0 additions & 1 deletion net/openvswitch/datapath.h
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,6 @@ struct ovs_skb_cb {
*/
struct dp_upcall_info {
struct ip_tunnel_info *egress_tun_info;
const void *egress_tun_opts;
const struct nlattr *userdata;
const struct nlattr *actions;
int actions_len;
Expand Down
Loading

0 comments on commit fc4099f

Please sign in to comment.