Skip to content

Commit

Permalink
Merge branch 'ipvlan-l3'
Browse files Browse the repository at this point in the history
Mahesh Bandewar says:

====================
IPvlan introduce l3s mode

Same old problem with new approach especially from suggestions from
earlier patch-series.

First thing is that this is introduced as a new mode rather than
modifying the old (L3) mode. So the behavior of the existing modes is
preserved as it is and the new L3s mode obeys iptables so that intended
conn-tracking can work.

To do this, the code uses newly added l3mdev_rcv() handler and an
Iptables hook. l3mdev_rcv() to perform an inbound route lookup with the
correct (IPvlan slave) interface and then IPtable-hook at LOCAL_INPUT
to change the input device from master to the slave to complete the
formality.

Supporting stack changes are trivial changes to export symbol to get
IPv4 equivalent code exported for IPv6 and to allow netfilter hook
registration code to allow caller to hold RTNL. Please look into
individual patches for details.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
David S. Miller committed Sep 19, 2016
2 parents a5ea31f + 4fbae7d commit 8ddda65
Show file tree
Hide file tree
Showing 10 changed files with 243 additions and 16 deletions.
7 changes: 6 additions & 1 deletion Documentation/networking/ipvlan.txt
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ The driver can be built into the kernel (CONFIG_IPVLAN=y) or as a module
There are no module parameters for this driver and it can be configured
using IProute2/ip utility.

ip link add link <master-dev> <slave-dev> type ipvlan mode { l2 | L3 }
ip link add link <master-dev> <slave-dev> type ipvlan mode { l2 | l3 | l3s }

e.g. ip link add link ipvl0 eth0 type ipvlan mode l2

Expand All @@ -48,6 +48,11 @@ master device for the L2 processing and routing from that instance will be
used before packets are queued on the outbound device. In this mode the slaves
will not receive nor can send multicast / broadcast traffic.

4.3 L3S mode:
This is very similar to the L3 mode except that iptables (conn-tracking)
works in this mode and hence it is L3-symmetric (L3s). This will have slightly less
performance but that shouldn't matter since you are choosing this mode over plain-L3
mode to make conn-tracking work.

5. What to choose (macvlan vs. ipvlan)?
These two devices are very similar in many regards and the specific use
Expand Down
1 change: 1 addition & 0 deletions drivers/net/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,7 @@ config IPVLAN
tristate "IP-VLAN support"
depends on INET
depends on IPV6
depends on NET_L3_MASTER_DEV
---help---
This allows one to create virtual devices off of a main interface
and packets will be delivered based on the dest L3 (IPv6/IPv4 addr)
Expand Down
6 changes: 6 additions & 0 deletions drivers/net/ipvlan/ipvlan.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,13 @@
#include <linux/if_vlan.h>
#include <linux/ip.h>
#include <linux/inetdevice.h>
#include <linux/netfilter.h>
#include <net/ip.h>
#include <net/ip6_route.h>
#include <net/rtnetlink.h>
#include <net/route.h>
#include <net/addrconf.h>
#include <net/l3mdev.h>

#define IPVLAN_DRV "ipvlan"
#define IPV_DRV_VER "0.1"
Expand Down Expand Up @@ -124,4 +126,8 @@ struct ipvl_addr *ipvlan_find_addr(const struct ipvl_dev *ipvlan,
const void *iaddr, bool is_v6);
bool ipvlan_addr_busy(struct ipvl_port *port, void *iaddr, bool is_v6);
void ipvlan_ht_addr_del(struct ipvl_addr *addr);
struct sk_buff *ipvlan_l3_rcv(struct net_device *dev, struct sk_buff *skb,
u16 proto);
unsigned int ipvlan_nf_input(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state);
#endif /* __IPVLAN_H */
94 changes: 94 additions & 0 deletions drivers/net/ipvlan/ipvlan_core.c
Original file line number Diff line number Diff line change
Expand Up @@ -560,6 +560,7 @@ int ipvlan_queue_xmit(struct sk_buff *skb, struct net_device *dev)
case IPVLAN_MODE_L2:
return ipvlan_xmit_mode_l2(skb, dev);
case IPVLAN_MODE_L3:
case IPVLAN_MODE_L3S:
return ipvlan_xmit_mode_l3(skb, dev);
}

Expand Down Expand Up @@ -664,6 +665,8 @@ rx_handler_result_t ipvlan_handle_frame(struct sk_buff **pskb)
return ipvlan_handle_mode_l2(pskb, port);
case IPVLAN_MODE_L3:
return ipvlan_handle_mode_l3(pskb, port);
case IPVLAN_MODE_L3S:
return RX_HANDLER_PASS;
}

/* Should not reach here */
Expand All @@ -672,3 +675,94 @@ rx_handler_result_t ipvlan_handle_frame(struct sk_buff **pskb)
kfree_skb(skb);
return RX_HANDLER_CONSUMED;
}

static struct ipvl_addr *ipvlan_skb_to_addr(struct sk_buff *skb,
struct net_device *dev)
{
struct ipvl_addr *addr = NULL;
struct ipvl_port *port;
void *lyr3h;
int addr_type;

if (!dev || !netif_is_ipvlan_port(dev))
goto out;

port = ipvlan_port_get_rcu(dev);
if (!port || port->mode != IPVLAN_MODE_L3S)
goto out;

lyr3h = ipvlan_get_L3_hdr(skb, &addr_type);
if (!lyr3h)
goto out;

addr = ipvlan_addr_lookup(port, lyr3h, addr_type, true);
out:
return addr;
}

struct sk_buff *ipvlan_l3_rcv(struct net_device *dev, struct sk_buff *skb,
u16 proto)
{
struct ipvl_addr *addr;
struct net_device *sdev;

addr = ipvlan_skb_to_addr(skb, dev);
if (!addr)
goto out;

sdev = addr->master->dev;
switch (proto) {
case AF_INET:
{
int err;
struct iphdr *ip4h = ip_hdr(skb);

err = ip_route_input_noref(skb, ip4h->daddr, ip4h->saddr,
ip4h->tos, sdev);
if (unlikely(err))
goto out;
break;
}
case AF_INET6:
{
struct dst_entry *dst;
struct ipv6hdr *ip6h = ipv6_hdr(skb);
int flags = RT6_LOOKUP_F_HAS_SADDR;
struct flowi6 fl6 = {
.flowi6_iif = sdev->ifindex,
.daddr = ip6h->daddr,
.saddr = ip6h->saddr,
.flowlabel = ip6_flowinfo(ip6h),
.flowi6_mark = skb->mark,
.flowi6_proto = ip6h->nexthdr,
};

skb_dst_drop(skb);
dst = ip6_route_input_lookup(dev_net(sdev), sdev, &fl6, flags);
skb_dst_set(skb, dst);
break;
}
default:
break;
}

out:
return skb;
}

unsigned int ipvlan_nf_input(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
{
struct ipvl_addr *addr;
unsigned int len;

addr = ipvlan_skb_to_addr(skb, skb->dev);
if (!addr)
goto out;

skb->dev = addr->master->dev;
len = skb->len + ETH_HLEN;
ipvlan_count_rx(addr->master, len, true, false);
out:
return NF_ACCEPT;
}
87 changes: 80 additions & 7 deletions drivers/net/ipvlan/ipvlan_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -9,24 +9,87 @@

#include "ipvlan.h"

static u32 ipvl_nf_hook_refcnt = 0;

static struct nf_hook_ops ipvl_nfops[] __read_mostly = {
{
.hook = ipvlan_nf_input,
.pf = NFPROTO_IPV4,
.hooknum = NF_INET_LOCAL_IN,
.priority = INT_MAX,
},
{
.hook = ipvlan_nf_input,
.pf = NFPROTO_IPV6,
.hooknum = NF_INET_LOCAL_IN,
.priority = INT_MAX,
},
};

static struct l3mdev_ops ipvl_l3mdev_ops __read_mostly = {
.l3mdev_l3_rcv = ipvlan_l3_rcv,
};

static void ipvlan_adjust_mtu(struct ipvl_dev *ipvlan, struct net_device *dev)
{
ipvlan->dev->mtu = dev->mtu - ipvlan->mtu_adj;
}

static void ipvlan_set_port_mode(struct ipvl_port *port, u16 nval)
static int ipvlan_register_nf_hook(void)
{
int err = 0;

if (!ipvl_nf_hook_refcnt) {
err = _nf_register_hooks(ipvl_nfops, ARRAY_SIZE(ipvl_nfops));
if (!err)
ipvl_nf_hook_refcnt = 1;
} else {
ipvl_nf_hook_refcnt++;
}

return err;
}

static void ipvlan_unregister_nf_hook(void)
{
WARN_ON(!ipvl_nf_hook_refcnt);

ipvl_nf_hook_refcnt--;
if (!ipvl_nf_hook_refcnt)
_nf_unregister_hooks(ipvl_nfops, ARRAY_SIZE(ipvl_nfops));
}

static int ipvlan_set_port_mode(struct ipvl_port *port, u16 nval)
{
struct ipvl_dev *ipvlan;
struct net_device *mdev = port->dev;
int err = 0;

ASSERT_RTNL();
if (port->mode != nval) {
if (nval == IPVLAN_MODE_L3S) {
/* New mode is L3S */
err = ipvlan_register_nf_hook();
if (!err) {
mdev->l3mdev_ops = &ipvl_l3mdev_ops;
mdev->priv_flags |= IFF_L3MDEV_MASTER;
} else
return err;
} else if (port->mode == IPVLAN_MODE_L3S) {
/* Old mode was L3S */
mdev->priv_flags &= ~IFF_L3MDEV_MASTER;
ipvlan_unregister_nf_hook();
mdev->l3mdev_ops = NULL;
}
list_for_each_entry(ipvlan, &port->ipvlans, pnode) {
if (nval == IPVLAN_MODE_L3)
if (nval == IPVLAN_MODE_L3 || nval == IPVLAN_MODE_L3S)
ipvlan->dev->flags |= IFF_NOARP;
else
ipvlan->dev->flags &= ~IFF_NOARP;
}
port->mode = nval;
}
return err;
}

static int ipvlan_port_create(struct net_device *dev)
Expand Down Expand Up @@ -74,6 +137,11 @@ static void ipvlan_port_destroy(struct net_device *dev)
struct ipvl_port *port = ipvlan_port_get_rtnl(dev);

dev->priv_flags &= ~IFF_IPVLAN_MASTER;
if (port->mode == IPVLAN_MODE_L3S) {
dev->priv_flags &= ~IFF_L3MDEV_MASTER;
ipvlan_unregister_nf_hook();
dev->l3mdev_ops = NULL;
}
netdev_rx_handler_unregister(dev);
cancel_work_sync(&port->wq);
__skb_queue_purge(&port->backlog);
Expand Down Expand Up @@ -132,7 +200,8 @@ static int ipvlan_open(struct net_device *dev)
struct net_device *phy_dev = ipvlan->phy_dev;
struct ipvl_addr *addr;

if (ipvlan->port->mode == IPVLAN_MODE_L3)
if (ipvlan->port->mode == IPVLAN_MODE_L3 ||
ipvlan->port->mode == IPVLAN_MODE_L3S)
dev->flags |= IFF_NOARP;
else
dev->flags &= ~IFF_NOARP;
Expand Down Expand Up @@ -372,13 +441,14 @@ static int ipvlan_nl_changelink(struct net_device *dev,
{
struct ipvl_dev *ipvlan = netdev_priv(dev);
struct ipvl_port *port = ipvlan_port_get_rtnl(ipvlan->phy_dev);
int err = 0;

if (data && data[IFLA_IPVLAN_MODE]) {
u16 nmode = nla_get_u16(data[IFLA_IPVLAN_MODE]);

ipvlan_set_port_mode(port, nmode);
err = ipvlan_set_port_mode(port, nmode);
}
return 0;
return err;
}

static size_t ipvlan_nl_getsize(const struct net_device *dev)
Expand Down Expand Up @@ -473,10 +543,13 @@ static int ipvlan_link_new(struct net *src_net, struct net_device *dev,
unregister_netdevice(dev);
return err;
}
err = ipvlan_set_port_mode(port, mode);
if (err) {
unregister_netdevice(dev);
return err;
}

list_add_tail_rcu(&ipvlan->pnode, &port->ipvlans);
ipvlan_set_port_mode(port, mode);

netif_stacked_transfer_operstate(phy_dev, dev);
return 0;
}
Expand Down
2 changes: 2 additions & 0 deletions include/linux/netfilter.h
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,8 @@ int nf_register_hook(struct nf_hook_ops *reg);
void nf_unregister_hook(struct nf_hook_ops *reg);
int nf_register_hooks(struct nf_hook_ops *reg, unsigned int n);
void nf_unregister_hooks(struct nf_hook_ops *reg, unsigned int n);
int _nf_register_hooks(struct nf_hook_ops *reg, unsigned int n);
void _nf_unregister_hooks(struct nf_hook_ops *reg, unsigned int n);

/* Functions to register get/setsockopt ranges (non-inclusive). You
need to check permissions yourself! */
Expand Down
3 changes: 3 additions & 0 deletions include/net/ip6_route.h
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,9 @@ static inline bool rt6_need_strict(const struct in6_addr *daddr)
}

void ip6_route_input(struct sk_buff *skb);
struct dst_entry *ip6_route_input_lookup(struct net *net,
struct net_device *dev,
struct flowi6 *fl6, int flags);

struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
struct flowi6 *fl6, int flags);
Expand Down
1 change: 1 addition & 0 deletions include/uapi/linux/if_link.h
Original file line number Diff line number Diff line change
Expand Up @@ -464,6 +464,7 @@ enum {
enum ipvlan_mode {
IPVLAN_MODE_L2 = 0,
IPVLAN_MODE_L3,
IPVLAN_MODE_L3S,
IPVLAN_MODE_MAX
};

Expand Down
7 changes: 4 additions & 3 deletions net/ipv6/route.c
Original file line number Diff line number Diff line change
Expand Up @@ -1147,15 +1147,16 @@ static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *
return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
}

static struct dst_entry *ip6_route_input_lookup(struct net *net,
struct net_device *dev,
struct flowi6 *fl6, int flags)
struct dst_entry *ip6_route_input_lookup(struct net *net,
struct net_device *dev,
struct flowi6 *fl6, int flags)
{
if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
flags |= RT6_LOOKUP_F_IFACE;

return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
}
EXPORT_SYMBOL_GPL(ip6_route_input_lookup);

void ip6_route_input(struct sk_buff *skb)
{
Expand Down
Loading

0 comments on commit 8ddda65

Please sign in to comment.