Skip to content

Commit

Permalink
net/ipv6: Revert attempt to simplify route replace and append
Browse files Browse the repository at this point in the history
NetworkManager likes to manage linklocal prefix routes and does so with
the NLM_F_APPEND flag, breaking attempts to simplify the IPv6 route
code and by extension enable multipath routes with device only nexthops.

Revert f34436a and these followup patches:
6eba08c ("ipv6: Only emit append events for appended routes").
ce45bde ("mlxsw: spectrum_router: Align with new route replace logic")
53b562d ("mlxsw: spectrum_router: Allow appending to dev-only routes")

Update the fib_tests cases to reflect the old behavior.

Fixes: f34436a ("net/ipv6: Simplify route replace and appending into multipath route")
Signed-off-by: David Ahern <dsahern@gmail.com>
  • Loading branch information
David Ahern authored and David S. Miller committed Jul 4, 2018
1 parent d5a672a commit 33bd5ac
Show file tree
Hide file tree
Showing 5 changed files with 117 additions and 137 deletions.
48 changes: 24 additions & 24 deletions drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
Original file line number Diff line number Diff line change
Expand Up @@ -4756,6 +4756,12 @@ static void mlxsw_sp_rt6_destroy(struct mlxsw_sp_rt6 *mlxsw_sp_rt6)
kfree(mlxsw_sp_rt6);
}

static bool mlxsw_sp_fib6_rt_can_mp(const struct fib6_info *rt)
{
/* RTF_CACHE routes are ignored */
return (rt->fib6_flags & (RTF_GATEWAY | RTF_ADDRCONF)) == RTF_GATEWAY;
}

static struct fib6_info *
mlxsw_sp_fib6_entry_rt(const struct mlxsw_sp_fib6_entry *fib6_entry)
{
Expand All @@ -4765,11 +4771,11 @@ mlxsw_sp_fib6_entry_rt(const struct mlxsw_sp_fib6_entry *fib6_entry)

static struct mlxsw_sp_fib6_entry *
mlxsw_sp_fib6_node_mp_entry_find(const struct mlxsw_sp_fib_node *fib_node,
const struct fib6_info *nrt, bool append)
const struct fib6_info *nrt, bool replace)
{
struct mlxsw_sp_fib6_entry *fib6_entry;

if (!append)
if (!mlxsw_sp_fib6_rt_can_mp(nrt) || replace)
return NULL;

list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) {
Expand All @@ -4784,7 +4790,8 @@ mlxsw_sp_fib6_node_mp_entry_find(const struct mlxsw_sp_fib_node *fib_node,
break;
if (rt->fib6_metric < nrt->fib6_metric)
continue;
if (rt->fib6_metric == nrt->fib6_metric)
if (rt->fib6_metric == nrt->fib6_metric &&
mlxsw_sp_fib6_rt_can_mp(rt))
return fib6_entry;
if (rt->fib6_metric > nrt->fib6_metric)
break;
Expand Down Expand Up @@ -5163,7 +5170,7 @@ static struct mlxsw_sp_fib6_entry *
mlxsw_sp_fib6_node_entry_find(const struct mlxsw_sp_fib_node *fib_node,
const struct fib6_info *nrt, bool replace)
{
struct mlxsw_sp_fib6_entry *fib6_entry;
struct mlxsw_sp_fib6_entry *fib6_entry, *fallback = NULL;

list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) {
struct fib6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
Expand All @@ -5172,13 +5179,18 @@ mlxsw_sp_fib6_node_entry_find(const struct mlxsw_sp_fib_node *fib_node,
continue;
if (rt->fib6_table->tb6_id != nrt->fib6_table->tb6_id)
break;
if (replace && rt->fib6_metric == nrt->fib6_metric)
return fib6_entry;
if (replace && rt->fib6_metric == nrt->fib6_metric) {
if (mlxsw_sp_fib6_rt_can_mp(rt) ==
mlxsw_sp_fib6_rt_can_mp(nrt))
return fib6_entry;
if (mlxsw_sp_fib6_rt_can_mp(nrt))
fallback = fallback ?: fib6_entry;
}
if (rt->fib6_metric > nrt->fib6_metric)
return fib6_entry;
return fallback ?: fib6_entry;
}

return NULL;
return fallback;
}

static int
Expand Down Expand Up @@ -5304,8 +5316,7 @@ static void mlxsw_sp_fib6_entry_replace(struct mlxsw_sp *mlxsw_sp,
}

static int mlxsw_sp_router_fib6_add(struct mlxsw_sp *mlxsw_sp,
struct fib6_info *rt, bool replace,
bool append)
struct fib6_info *rt, bool replace)
{
struct mlxsw_sp_fib6_entry *fib6_entry;
struct mlxsw_sp_fib_node *fib_node;
Expand All @@ -5331,22 +5342,14 @@ static int mlxsw_sp_router_fib6_add(struct mlxsw_sp *mlxsw_sp,
/* Before creating a new entry, try to append route to an existing
* multipath entry.
*/
fib6_entry = mlxsw_sp_fib6_node_mp_entry_find(fib_node, rt, append);
fib6_entry = mlxsw_sp_fib6_node_mp_entry_find(fib_node, rt, replace);
if (fib6_entry) {
err = mlxsw_sp_fib6_entry_nexthop_add(mlxsw_sp, fib6_entry, rt);
if (err)
goto err_fib6_entry_nexthop_add;
return 0;
}

/* We received an append event, yet did not find any route to
* append to.
*/
if (WARN_ON(append)) {
err = -EINVAL;
goto err_fib6_entry_append;
}

fib6_entry = mlxsw_sp_fib6_entry_create(mlxsw_sp, fib_node, rt);
if (IS_ERR(fib6_entry)) {
err = PTR_ERR(fib6_entry);
Expand All @@ -5364,7 +5367,6 @@ static int mlxsw_sp_router_fib6_add(struct mlxsw_sp *mlxsw_sp,
err_fib6_node_entry_link:
mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
err_fib6_entry_create:
err_fib6_entry_append:
err_fib6_entry_nexthop_add:
mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
return err;
Expand Down Expand Up @@ -5715,7 +5717,7 @@ static void mlxsw_sp_router_fib6_event_work(struct work_struct *work)
struct mlxsw_sp_fib_event_work *fib_work =
container_of(work, struct mlxsw_sp_fib_event_work, work);
struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
bool replace, append;
bool replace;
int err;

rtnl_lock();
Expand All @@ -5726,10 +5728,8 @@ static void mlxsw_sp_router_fib6_event_work(struct work_struct *work)
case FIB_EVENT_ENTRY_APPEND: /* fall through */
case FIB_EVENT_ENTRY_ADD:
replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE;
append = fib_work->event == FIB_EVENT_ENTRY_APPEND;
err = mlxsw_sp_router_fib6_add(mlxsw_sp,
fib_work->fen6_info.rt, replace,
append);
fib_work->fen6_info.rt, replace);
if (err)
mlxsw_sp_router_fib_abort(mlxsw_sp);
mlxsw_sp_rt6_release(fib_work->fen6_info.rt);
Expand Down
6 changes: 6 additions & 0 deletions include/net/ip6_route.h
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,12 @@ static inline bool rt6_need_strict(const struct in6_addr *daddr)
(IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
}

static inline bool rt6_qualify_for_ecmp(const struct fib6_info *f6i)
{
return (f6i->fib6_flags & (RTF_GATEWAY|RTF_ADDRCONF|RTF_DYNAMIC)) ==
RTF_GATEWAY;
}

void ip6_route_input(struct sk_buff *skb);
struct dst_entry *ip6_route_input_lookup(struct net *net,
struct net_device *dev,
Expand Down
156 changes: 86 additions & 70 deletions net/ipv6/ip6_fib.c
Original file line number Diff line number Diff line change
Expand Up @@ -935,20 +935,19 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt,
{
struct fib6_info *leaf = rcu_dereference_protected(fn->leaf,
lockdep_is_held(&rt->fib6_table->tb6_lock));
enum fib_event_type event = FIB_EVENT_ENTRY_ADD;
struct fib6_info *iter = NULL, *match = NULL;
struct fib6_info *iter = NULL;
struct fib6_info __rcu **ins;
struct fib6_info __rcu **fallback_ins = NULL;
int replace = (info->nlh &&
(info->nlh->nlmsg_flags & NLM_F_REPLACE));
int append = (info->nlh &&
(info->nlh->nlmsg_flags & NLM_F_APPEND));
int add = (!info->nlh ||
(info->nlh->nlmsg_flags & NLM_F_CREATE));
int found = 0;
bool rt_can_ecmp = rt6_qualify_for_ecmp(rt);
u16 nlflags = NLM_F_EXCL;
int err;

if (append)
if (info->nlh && (info->nlh->nlmsg_flags & NLM_F_APPEND))
nlflags |= NLM_F_APPEND;

ins = &fn->leaf;
Expand All @@ -970,8 +969,13 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt,

nlflags &= ~NLM_F_EXCL;
if (replace) {
found++;
break;
if (rt_can_ecmp == rt6_qualify_for_ecmp(iter)) {
found++;
break;
}
if (rt_can_ecmp)
fallback_ins = fallback_ins ?: ins;
goto next_iter;
}

if (rt6_duplicate_nexthop(iter, rt)) {
Expand All @@ -986,51 +990,71 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt,
fib6_metric_set(iter, RTAX_MTU, rt->fib6_pmtu);
return -EEXIST;
}

/* first route that matches */
if (!match)
match = iter;
/* If we have the same destination and the same metric,
* but not the same gateway, then the route we try to
* add is sibling to this route, increment our counter
* of siblings, and later we will add our route to the
* list.
* Only static routes (which don't have flag
* RTF_EXPIRES) are used for ECMPv6.
*
* To avoid long list, we only had siblings if the
* route have a gateway.
*/
if (rt_can_ecmp &&
rt6_qualify_for_ecmp(iter))
rt->fib6_nsiblings++;
}

if (iter->fib6_metric > rt->fib6_metric)
break;

next_iter:
ins = &iter->fib6_next;
}

if (fallback_ins && !found) {
/* No ECMP-able route found, replace first non-ECMP one */
ins = fallback_ins;
iter = rcu_dereference_protected(*ins,
lockdep_is_held(&rt->fib6_table->tb6_lock));
found++;
}

/* Reset round-robin state, if necessary */
if (ins == &fn->leaf)
fn->rr_ptr = NULL;

/* Link this route to others same route. */
if (append && match) {
if (rt->fib6_nsiblings) {
unsigned int fib6_nsiblings;
struct fib6_info *sibling, *temp_sibling;

if (rt->fib6_flags & RTF_REJECT) {
NL_SET_ERR_MSG(extack,
"Can not append a REJECT route");
return -EINVAL;
} else if (match->fib6_flags & RTF_REJECT) {
NL_SET_ERR_MSG(extack,
"Can not append to a REJECT route");
return -EINVAL;
/* Find the first route that have the same metric */
sibling = leaf;
while (sibling) {
if (sibling->fib6_metric == rt->fib6_metric &&
rt6_qualify_for_ecmp(sibling)) {
list_add_tail(&rt->fib6_siblings,
&sibling->fib6_siblings);
break;
}
sibling = rcu_dereference_protected(sibling->fib6_next,
lockdep_is_held(&rt->fib6_table->tb6_lock));
}
event = FIB_EVENT_ENTRY_APPEND;
rt->fib6_nsiblings = match->fib6_nsiblings;
list_add_tail(&rt->fib6_siblings, &match->fib6_siblings);
match->fib6_nsiblings++;

/* For each sibling in the list, increment the counter of
* siblings. BUG() if counters does not match, list of siblings
* is broken!
*/
fib6_nsiblings = 0;
list_for_each_entry_safe(sibling, temp_sibling,
&match->fib6_siblings, fib6_siblings) {
&rt->fib6_siblings, fib6_siblings) {
sibling->fib6_nsiblings++;
BUG_ON(sibling->fib6_nsiblings != match->fib6_nsiblings);
BUG_ON(sibling->fib6_nsiblings != rt->fib6_nsiblings);
fib6_nsiblings++;
}

rt6_multipath_rebalance(match);
BUG_ON(fib6_nsiblings != rt->fib6_nsiblings);
rt6_multipath_rebalance(temp_sibling);
}

/*
Expand All @@ -1043,8 +1067,9 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt,
add:
nlflags |= NLM_F_CREATE;

err = call_fib6_entry_notifiers(info->nl_net, event, rt,
extack);
err = call_fib6_entry_notifiers(info->nl_net,
FIB_EVENT_ENTRY_ADD,
rt, extack);
if (err)
return err;

Expand All @@ -1062,7 +1087,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt,
}

} else {
struct fib6_info *tmp;
int nsiblings;

if (!found) {
if (add)
Expand All @@ -1077,57 +1102,48 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt,
if (err)
return err;

/* if route being replaced has siblings, set tmp to
* last one, otherwise tmp is current route. this is
* used to set fib6_next for new route
*/
if (iter->fib6_nsiblings)
tmp = list_last_entry(&iter->fib6_siblings,
struct fib6_info,
fib6_siblings);
else
tmp = iter;

/* insert new route */
atomic_inc(&rt->fib6_ref);
rcu_assign_pointer(rt->fib6_node, fn);
rt->fib6_next = tmp->fib6_next;
rt->fib6_next = iter->fib6_next;
rcu_assign_pointer(*ins, rt);

if (!info->skip_notify)
inet6_rt_notify(RTM_NEWROUTE, rt, info, NLM_F_REPLACE);
if (!(fn->fn_flags & RTN_RTINFO)) {
info->nl_net->ipv6.rt6_stats->fib_route_nodes++;
fn->fn_flags |= RTN_RTINFO;
}
nsiblings = iter->fib6_nsiblings;
iter->fib6_node = NULL;
fib6_purge_rt(iter, fn, info->nl_net);
if (rcu_access_pointer(fn->rr_ptr) == iter)
fn->rr_ptr = NULL;
fib6_info_release(iter);

/* delete old route */
rt = iter;

if (rt->fib6_nsiblings) {
struct fib6_info *tmp;

if (nsiblings) {
/* Replacing an ECMP route, remove all siblings */
list_for_each_entry_safe(iter, tmp, &rt->fib6_siblings,
fib6_siblings) {
iter->fib6_node = NULL;
fib6_purge_rt(iter, fn, info->nl_net);
if (rcu_access_pointer(fn->rr_ptr) == iter)
fn->rr_ptr = NULL;
fib6_info_release(iter);

rt->fib6_nsiblings--;
info->nl_net->ipv6.rt6_stats->fib_rt_entries--;
ins = &rt->fib6_next;
iter = rcu_dereference_protected(*ins,
lockdep_is_held(&rt->fib6_table->tb6_lock));
while (iter) {
if (iter->fib6_metric > rt->fib6_metric)
break;
if (rt6_qualify_for_ecmp(iter)) {
*ins = iter->fib6_next;
iter->fib6_node = NULL;
fib6_purge_rt(iter, fn, info->nl_net);
if (rcu_access_pointer(fn->rr_ptr) == iter)
fn->rr_ptr = NULL;
fib6_info_release(iter);
nsiblings--;
info->nl_net->ipv6.rt6_stats->fib_rt_entries--;
} else {
ins = &iter->fib6_next;
}
iter = rcu_dereference_protected(*ins,
lockdep_is_held(&rt->fib6_table->tb6_lock));
}
WARN_ON(nsiblings != 0);
}

WARN_ON(rt->fib6_nsiblings != 0);

rt->fib6_node = NULL;
fib6_purge_rt(rt, fn, info->nl_net);
if (rcu_access_pointer(fn->rr_ptr) == rt)
fn->rr_ptr = NULL;
fib6_info_release(rt);
}

return 0;
Expand Down
Loading

0 comments on commit 33bd5ac

Please sign in to comment.