Skip to content

Commit

Permalink
Merge branch 'Support-for-fdb-ECMP-nexthop-groups'
Browse files Browse the repository at this point in the history
Roopa Prabhu says:

====================
Support for fdb ECMP nexthop groups

This series introduces ecmp nexthops and nexthop groups
for mac fdb entries. In subsequent patches this is used
by the vxlan driver fdb entries. The use case is
E-VPN multihoming [1,2,3] which requires bridged vxlan traffic
to be load balanced to remote switches (vteps) belonging to
the same multi-homed ethernet segment (This is analogous to
a multi-homed LAG but over vxlan).

Changes include new nexthop flag NHA_FDB for nexthops
referenced by fdb entries. These nexthops only have ip.
The patches make sure that routes dont reference such nexthops.

example:
$ip nexthop add id 12 via 172.16.1.2 fdb
$ip nexthop add id 13 via 172.16.1.3 fdb
$ip nexthop add id 102 group 12/13 fdb

$bridge fdb add 02:02:00:00:00:13 dev vxlan1000 nhid 101 self

[1] E-VPN https://tools.ietf.org/html/rfc7432
[2] E-VPN VxLAN: https://tools.ietf.org/html/rfc8365
[3] LPC talk with mention of nexthop groups for L2 ecmp
http://vger.kernel.org/lpc_net2018_talks/scaling_bridge_fdb_database_slidesV3.pdf

v4 -
    - fix error path free_skb in vxlan_xmit_nh
    - fix atomic notifier initialization issue
      (Reported-by: kernel test robot <rong.a.chen@intel.com>)
      The reported error was easy to locate and fix, but i was not
      able to re-test with the robot reproducer script due to some
      other issues with running the script on my test system.

v3 - fix wording in selftest print as pointed out by davidA

v2 -
	- dropped nikolays fixes for nexthop multipath null pointer deref
	  (he will send those separately)
	- added negative tests for route add with fdb nexthop + a few more
	- Fixes for a few  fdb replace conditions found during more testing
	- Moved to rcu_dereference_rtnl in vxlan_fdb_info and consolidate rcu
	  dereferences
	- Fixes to build failures Reported-by: kbuild test robot <lkp@intel.com>
	- DavidA, I am going to send a separate patch for the neighbor code validation
	  for NDA_NH_ID if thats ok.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
David S. Miller committed May 22, 2020
2 parents 7b1b843 + 0534c54 commit 4001f1f
Show file tree
Hide file tree
Showing 11 changed files with 650 additions and 90 deletions.
339 changes: 276 additions & 63 deletions drivers/net/vxlan.c

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions include/net/ip6_fib.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ struct fib6_config {
struct nl_info fc_nlinfo;
struct nlattr *fc_encap;
u16 fc_encap_type;
bool fc_is_fdb;
};

struct fib6_node {
Expand Down
1 change: 1 addition & 0 deletions include/net/netns/nexthop.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,5 +14,6 @@ struct netns_nexthop {

unsigned int seq; /* protected by rtnl_mutex */
u32 last_id_allocated;
struct atomic_notifier_head notifier_chain;
};
#endif
44 changes: 44 additions & 0 deletions include/net/nexthop.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#define __LINUX_NEXTHOP_H

#include <linux/netdevice.h>
#include <linux/notifier.h>
#include <linux/route.h>
#include <linux/types.h>
#include <net/ip_fib.h>
Expand All @@ -26,6 +27,7 @@ struct nh_config {
u8 nh_family;
u8 nh_protocol;
u8 nh_blackhole;
u8 nh_fdb;
u32 nh_flags;

int nh_ifindex;
Expand All @@ -52,6 +54,7 @@ struct nh_info {

u8 family;
bool reject_nh;
bool fdb_nh;

union {
struct fib_nh_common fib_nhc;
Expand Down Expand Up @@ -80,6 +83,7 @@ struct nexthop {
struct rb_node rb_node; /* entry on netns rbtree */
struct list_head fi_list; /* v4 entries using nh */
struct list_head f6i_list; /* v6 entries using nh */
struct list_head fdb_list; /* fdb entries using this nh */
struct list_head grp_list; /* nh group entries using this nh */
struct net *net;

Expand All @@ -88,6 +92,7 @@ struct nexthop {
u8 protocol; /* app managing this nh */
u8 nh_flags;
bool is_group;
bool is_fdb_nh;

refcount_t refcnt;
struct rcu_head rcu;
Expand All @@ -98,6 +103,17 @@ struct nexthop {
};
};

enum nexthop_event_type {
NEXTHOP_EVENT_ADD,
NEXTHOP_EVENT_DEL
};

int call_nexthop_notifier(struct notifier_block *nb, struct net *net,
enum nexthop_event_type event_type,
struct nexthop *nh);
int register_nexthop_notifier(struct net *net, struct notifier_block *nb);
int unregister_nexthop_notifier(struct net *net, struct notifier_block *nb);

/* caller is holding rcu or rtnl; no reference taken to nexthop */
struct nexthop *nexthop_find_by_id(struct net *net, u32 id);
void nexthop_free_rcu(struct rcu_head *head);
Expand Down Expand Up @@ -304,4 +320,32 @@ static inline void nexthop_path_fib6_result(struct fib6_result *res, int hash)
int nexthop_for_each_fib6_nh(struct nexthop *nh,
int (*cb)(struct fib6_nh *nh, void *arg),
void *arg);

static inline int nexthop_get_family(struct nexthop *nh)
{
struct nh_info *nhi = rcu_dereference_rtnl(nh->nh_info);

return nhi->family;
}

static inline
struct fib_nh_common *nexthop_fdb_nhc(struct nexthop *nh)
{
struct nh_info *nhi = rcu_dereference_rtnl(nh->nh_info);

return &nhi->fib_nhc;
}

static inline struct fib_nh_common *nexthop_path_fdb_result(struct nexthop *nh,
int hash)
{
struct nh_info *nhi;
struct nexthop *nhp;

nhp = nexthop_select_path(nh, hash);
if (unlikely(!nhp))
return NULL;
nhi = rcu_dereference(nhp->nh_info);
return &nhi->fib_nhc;
}
#endif
25 changes: 25 additions & 0 deletions include/net/vxlan.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include <net/dst_metadata.h>
#include <net/rtnetlink.h>
#include <net/switchdev.h>
#include <net/nexthop.h>

#define IANA_VXLAN_UDP_PORT 4789

Expand Down Expand Up @@ -487,4 +488,28 @@ static inline void vxlan_flag_attr_error(int attrtype,
#undef VXLAN_FLAG
}

static inline bool vxlan_fdb_nh_path_select(struct nexthop *nh,
int hash,
struct vxlan_rdst *rdst)
{
struct fib_nh_common *nhc;

nhc = nexthop_path_fdb_result(nh, hash);
if (unlikely(!nhc))
return false;

switch (nhc->nhc_gw_family) {
case AF_INET:
rdst->remote_ip.sin.sin_addr.s_addr = nhc->nhc_gw.ipv4;
rdst->remote_ip.sa.sa_family = AF_INET;
break;
case AF_INET6:
rdst->remote_ip.sin6.sin6_addr = nhc->nhc_gw.ipv6;
rdst->remote_ip.sa.sa_family = AF_INET6;
break;
}

return true;
}

#endif
1 change: 1 addition & 0 deletions include/uapi/linux/neighbour.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ enum {
NDA_LINK_NETNSID,
NDA_SRC_VNI,
NDA_PROTOCOL, /* Originator of entry */
NDA_NH_ID,
__NDA_MAX
};

Expand Down
3 changes: 3 additions & 0 deletions include/uapi/linux/nexthop.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,9 @@ enum {
NHA_GROUPS, /* flag; only return nexthop groups in dump */
NHA_MASTER, /* u32; only return nexthops with given master dev */

NHA_FDB, /* flag; nexthop belongs to a bridge fdb */
/* if NHA_FDB is added, OIF, BLACKHOLE, ENCAP cannot be set */

__NHA_MAX,
};

Expand Down
2 changes: 2 additions & 0 deletions net/core/neighbour.c
Original file line number Diff line number Diff line change
Expand Up @@ -1771,6 +1771,7 @@ static struct neigh_table *neigh_find_table(int family)
}

const struct nla_policy nda_policy[NDA_MAX+1] = {
[NDA_UNSPEC] = { .strict_start_type = NDA_NH_ID },
[NDA_DST] = { .type = NLA_BINARY, .len = MAX_ADDR_LEN },
[NDA_LLADDR] = { .type = NLA_BINARY, .len = MAX_ADDR_LEN },
[NDA_CACHEINFO] = { .len = sizeof(struct nda_cacheinfo) },
Expand All @@ -1781,6 +1782,7 @@ const struct nla_policy nda_policy[NDA_MAX+1] = {
[NDA_IFINDEX] = { .type = NLA_U32 },
[NDA_MASTER] = { .type = NLA_U32 },
[NDA_PROTOCOL] = { .type = NLA_U8 },
[NDA_NH_ID] = { .type = NLA_U32 },
};

static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh,
Expand Down
Loading

0 comments on commit 4001f1f

Please sign in to comment.