From 4d0ab3a6885e3e9040310a8d8f54503366083626 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 2 Apr 2025 14:42:23 +0300 Subject: [PATCH 1/2] ipv6: Start path selection from the first nexthop Cited commit transitioned IPv6 path selection to use hash-threshold instead of modulo-N. With hash-threshold, each nexthop is assigned a region boundary in the multipath hash function's output space and a nexthop is chosen if the calculated hash is smaller than the nexthop's region boundary. Hash-threshold does not work correctly if path selection does not start with the first nexthop. For example, if fib6_select_path() is always passed the last nexthop in the group, then it will always be chosen because its region boundary covers the entire hash function's output space. Fix this by starting the selection process from the first nexthop and do not consider nexthops for which rt6_score_route() provided a negative score. Fixes: 3d709f69a3e7 ("ipv6: Use hash-threshold instead of modulo-N") Reported-by: Stanislav Fomichev Closes: https://lore.kernel.org/netdev/Z9RIyKZDNoka53EO@mini-arch/ Signed-off-by: Ido Schimmel Link: https://patch.msgid.link/20250402114224.293392-2-idosch@nvidia.com Signed-off-by: Jakub Kicinski --- net/ipv6/route.c | 38 +++++++++++++++++++++++++++++++++++--- 1 file changed, 35 insertions(+), 3 deletions(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index c3406a0d45bd..864f0002034b 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -412,11 +412,35 @@ static bool rt6_check_expired(const struct rt6_info *rt) return false; } +static struct fib6_info * +rt6_multipath_first_sibling_rcu(const struct fib6_info *rt) +{ + struct fib6_info *iter; + struct fib6_node *fn; + + fn = rcu_dereference(rt->fib6_node); + if (!fn) + goto out; + iter = rcu_dereference(fn->leaf); + if (!iter) + goto out; + + while (iter) { + if (iter->fib6_metric == rt->fib6_metric && + rt6_qualify_for_ecmp(iter)) + return iter; + iter = rcu_dereference(iter->fib6_next); + } + +out: + return NULL; +} + void fib6_select_path(const struct net *net, struct fib6_result *res, struct flowi6 *fl6, int oif, bool have_oif_match, const struct sk_buff *skb, int strict) { - struct fib6_info *match = res->f6i; + struct fib6_info *first, *match = res->f6i; struct fib6_info *sibling; if (!match->nh && (!match->fib6_nsiblings || have_oif_match)) @@ -440,10 +464,18 @@ void fib6_select_path(const struct net *net, struct fib6_result *res, return; } - if (fl6->mp_hash <= atomic_read(&match->fib6_nh->fib_nh_upper_bound)) + first = rt6_multipath_first_sibling_rcu(match); + if (!first) goto out; - list_for_each_entry_rcu(sibling, &match->fib6_siblings, + if (fl6->mp_hash <= atomic_read(&first->fib6_nh->fib_nh_upper_bound) && + rt6_score_route(first->fib6_nh, first->fib6_flags, oif, + strict) >= 0) { + match = first; + goto out; + } + + list_for_each_entry_rcu(sibling, &first->fib6_siblings, fib6_siblings) { const struct fib6_nh *nh = sibling->fib6_nh; int nh_upper_bound; From 8b8e0dd357165e0258d9f9cdab5366720ed2f619 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 2 Apr 2025 14:42:24 +0300 Subject: [PATCH 2/2] ipv6: Do not consider link down nexthops in path selection Nexthops whose link is down are not supposed to be considered during path selection when the "ignore_routes_with_linkdown" sysctl is set. This is done by assigning them a negative region boundary. However, when comparing the computed hash (unsigned) with the region boundary (signed), the negative region boundary is treated as unsigned, resulting in incorrect nexthop selection. Fix by treating the computed hash as signed. Note that the computed hash is always in range of [0, 2^31 - 1]. Fixes: 3d709f69a3e7 ("ipv6: Use hash-threshold instead of modulo-N") Signed-off-by: Ido Schimmel Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/20250402114224.293392-3-idosch@nvidia.com Signed-off-by: Jakub Kicinski --- net/ipv6/route.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 864f0002034b..ab12b816ab94 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -442,6 +442,7 @@ void fib6_select_path(const struct net *net, struct fib6_result *res, { struct fib6_info *first, *match = res->f6i; struct fib6_info *sibling; + int hash; if (!match->nh && (!match->fib6_nsiblings || have_oif_match)) goto out; @@ -468,7 +469,8 @@ void fib6_select_path(const struct net *net, struct fib6_result *res, if (!first) goto out; - if (fl6->mp_hash <= atomic_read(&first->fib6_nh->fib_nh_upper_bound) && + hash = fl6->mp_hash; + if (hash <= atomic_read(&first->fib6_nh->fib_nh_upper_bound) && rt6_score_route(first->fib6_nh, first->fib6_flags, oif, strict) >= 0) { match = first; @@ -481,7 +483,7 @@ void fib6_select_path(const struct net *net, struct fib6_result *res, int nh_upper_bound; nh_upper_bound = atomic_read(&nh->fib_nh_upper_bound); - if (fl6->mp_hash > nh_upper_bound) + if (hash > nh_upper_bound) continue; if (rt6_score_route(nh, sibling->fib6_flags, oif, strict) < 0) break;