From 7efc0b6b666d757e07417f59397e7f5f340e74e0 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Fri, 2 Mar 2018 08:32:12 -0800 Subject: [PATCH 01/10] net/ipv4: Pass net to fib_multipath_hash instead of fib_info fib_multipath_hash only needs net struct to check a sysctl. Make it clear by passing net instead of fib_info. In the end this allows alignment between the ipv4 and ipv6 versions. Signed-off-by: David Ahern Reviewed-by: Ido Schimmel Reviewed-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/net/ip_fib.h | 2 +- net/ipv4/fib_semantics.c | 2 +- net/ipv4/route.c | 5 ++--- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 8812582a94d50..7c7522e8585b6 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -395,7 +395,7 @@ int fib_sync_down_addr(struct net_device *dev, __be32 local); int fib_sync_up(struct net_device *dev, unsigned int nh_flags); #ifdef CONFIG_IP_ROUTE_MULTIPATH -int fib_multipath_hash(const struct fib_info *fi, const struct flowi4 *fl4, +int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4, const struct sk_buff *skb, struct flow_keys *flkeys); #endif void fib_select_multipath(struct fib_result *res, int hash); diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 181b0d8d589c7..e7c602c600ace 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -1770,7 +1770,7 @@ void fib_select_path(struct net *net, struct fib_result *res, #ifdef CONFIG_IP_ROUTE_MULTIPATH if (res->fi->fib_nhs > 1) { - int h = fib_multipath_hash(res->fi, fl4, skb, NULL); + int h = fib_multipath_hash(net, fl4, skb, NULL); fib_select_multipath(res, h); } diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 3bb686dac2730..1689c569bbc32 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1782,10 +1782,9 @@ static void ip_multipath_l3_keys(const struct sk_buff *skb, } /* if skb is set it will be used and fl4 can be NULL */ -int fib_multipath_hash(const struct fib_info *fi, const struct flowi4 *fl4, +int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4, const struct sk_buff *skb, struct flow_keys *flkeys) { - struct net *net = fi->fib_net; struct flow_keys hash_keys; u32 mhash; @@ -1852,7 +1851,7 @@ static int ip_mkroute_input(struct sk_buff *skb, { #ifdef CONFIG_IP_ROUTE_MULTIPATH if (res->fi && res->fi->fib_nhs > 1) { - int h = fib_multipath_hash(res->fi, NULL, skb, hkeys); + int h = fib_multipath_hash(res->fi->fib_net, NULL, skb, hkeys); fib_select_multipath(res, h); } From 6f74b6c259158d89ad258cda8e86240e01052884 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Fri, 2 Mar 2018 08:32:13 -0800 Subject: [PATCH 02/10] net: Align ip_multipath_l3_keys and ip6_multipath_l3_keys Symmetry is good and allows easy comparison that ipv4 and ipv6 are doing the same thing. To that end, change ip_multipath_l3_keys to set addresses at the end after the icmp compares, and move the initialization of ipv6 flow keys to rt6_multipath_hash. Signed-off-by: David Ahern Reviewed-by: Ido Schimmel Reviewed-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/ipv4/route.c | 20 +++++++++++--------- net/ipv6/route.c | 4 ++-- 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 1689c569bbc32..df57bc68e8e00 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1748,37 +1748,39 @@ static void ip_multipath_l3_keys(const struct sk_buff *skb, struct flow_keys *hash_keys) { const struct iphdr *outer_iph = ip_hdr(skb); + const struct iphdr *key_iph = outer_iph; const struct iphdr *inner_iph; const struct icmphdr *icmph; struct iphdr _inner_iph; struct icmphdr _icmph; - hash_keys->addrs.v4addrs.src = outer_iph->saddr; - hash_keys->addrs.v4addrs.dst = outer_iph->daddr; if (likely(outer_iph->protocol != IPPROTO_ICMP)) - return; + goto out; if (unlikely((outer_iph->frag_off & htons(IP_OFFSET)) != 0)) - return; + goto out; icmph = skb_header_pointer(skb, outer_iph->ihl * 4, sizeof(_icmph), &_icmph); if (!icmph) - return; + goto out; if (icmph->type != ICMP_DEST_UNREACH && icmph->type != ICMP_REDIRECT && icmph->type != ICMP_TIME_EXCEEDED && icmph->type != ICMP_PARAMETERPROB) - return; + goto out; inner_iph = skb_header_pointer(skb, outer_iph->ihl * 4 + sizeof(_icmph), sizeof(_inner_iph), &_inner_iph); if (!inner_iph) - return; - hash_keys->addrs.v4addrs.src = inner_iph->saddr; - hash_keys->addrs.v4addrs.dst = inner_iph->daddr; + goto out; + + key_iph = inner_iph; +out: + hash_keys->addrs.v4addrs.src = key_iph->saddr; + hash_keys->addrs.v4addrs.dst = key_iph->daddr; } /* if skb is set it will be used and fl4 can be NULL */ diff --git a/net/ipv6/route.c b/net/ipv6/route.c index e2bb40824c854..190d9690dfe0c 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1815,8 +1815,6 @@ static void ip6_multipath_l3_keys(const struct sk_buff *skb, key_iph = inner_iph; _flkeys = NULL; out: - memset(keys, 0, sizeof(*keys)); - keys->control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; if (_flkeys) { keys->addrs.v6addrs.src = _flkeys->addrs.v6addrs.src; keys->addrs.v6addrs.dst = _flkeys->addrs.v6addrs.dst; @@ -1837,6 +1835,8 @@ u32 rt6_multipath_hash(const struct flowi6 *fl6, const struct sk_buff *skb, struct flow_keys hash_keys; if (skb) { + memset(&hash_keys, 0, sizeof(hash_keys)); + hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; ip6_multipath_l3_keys(skb, &hash_keys, flkeys); return flow_hash_from_keys(&hash_keys) >> 1; } From ec7127a5d53d891b77a11433026dca72c57eaf88 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Fri, 2 Mar 2018 08:32:14 -0800 Subject: [PATCH 03/10] net/ipv4: Simplify fib_multipath_hash with optional flow keys As of commit e37b1e978bec5 ("ipv6: route: dissect flow in input path if fib rules need it") fib_multipath_hash takes an optional flow keys. If non-NULL it means the skb has already been dissected. If not set, then fib_multipath_hash needs to call skb_flow_dissect_flow_keys. Simplify the logic by setting flkeys to the local stack variable keys. Simplifies fib_multipath_hash by only have 1 set of instructions setting hash_keys. Signed-off-by: David Ahern Reviewed-by: Ido Schimmel Reviewed-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/ipv4/route.c | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index df57bc68e8e00..6a7b3cba39720 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1810,24 +1810,20 @@ int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4, /* short-circuit if we already have L4 hash present */ if (skb->l4_hash) return skb_get_hash_raw(skb) >> 1; + memset(&hash_keys, 0, sizeof(hash_keys)); - if (flkeys) { - hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; - hash_keys.addrs.v4addrs.src = flkeys->addrs.v4addrs.src; - hash_keys.addrs.v4addrs.dst = flkeys->addrs.v4addrs.dst; - hash_keys.ports.src = flkeys->ports.src; - hash_keys.ports.dst = flkeys->ports.dst; - hash_keys.basic.ip_proto = flkeys->basic.ip_proto; - } else { + if (!flkeys) { skb_flow_dissect_flow_keys(skb, &keys, flag); - hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; - hash_keys.addrs.v4addrs.src = keys.addrs.v4addrs.src; - hash_keys.addrs.v4addrs.dst = keys.addrs.v4addrs.dst; - hash_keys.ports.src = keys.ports.src; - hash_keys.ports.dst = keys.ports.dst; - hash_keys.basic.ip_proto = keys.basic.ip_proto; + flkeys = &keys; } + + hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; + hash_keys.addrs.v4addrs.src = flkeys->addrs.v4addrs.src; + hash_keys.addrs.v4addrs.dst = flkeys->addrs.v4addrs.dst; + hash_keys.ports.src = flkeys->ports.src; + hash_keys.ports.dst = flkeys->ports.dst; + hash_keys.basic.ip_proto = flkeys->basic.ip_proto; } else { memset(&hash_keys, 0, sizeof(hash_keys)); hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; From 9a2a537acc75dfd19c4358bc9cb6042bdc60698c Mon Sep 17 00:00:00 2001 From: David Ahern Date: Fri, 2 Mar 2018 08:32:15 -0800 Subject: [PATCH 04/10] net/ipv6: Make rt6_multipath_hash similar to fib_multipath_hash Make rt6_multipath_hash more of a direct parallel to fib_multipath_hash and reduce stack and overhead in the process: get_hash_from_flowi6 is just a wrapper around __get_hash_from_flowi6 with another stack allocation for flow_keys. Move setting the addresses, protocol and label into rt6_multipath_hash and allow it to make the call to flow_hash_from_keys. Signed-off-by: David Ahern Reviewed-by: Ido Schimmel Reviewed-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/ipv6/route.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 190d9690dfe0c..5c89af2c54f47 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1833,15 +1833,21 @@ u32 rt6_multipath_hash(const struct flowi6 *fl6, const struct sk_buff *skb, struct flow_keys *flkeys) { struct flow_keys hash_keys; + u32 mhash; + memset(&hash_keys, 0, sizeof(hash_keys)); + hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; if (skb) { - memset(&hash_keys, 0, sizeof(hash_keys)); - hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; ip6_multipath_l3_keys(skb, &hash_keys, flkeys); - return flow_hash_from_keys(&hash_keys) >> 1; + } else { + hash_keys.addrs.v6addrs.src = fl6->saddr; + hash_keys.addrs.v6addrs.dst = fl6->daddr; + hash_keys.tags.flow_label = (__force u32)fl6->flowlabel; + hash_keys.basic.ip_proto = fl6->flowi6_proto; } + mhash = flow_hash_from_keys(&hash_keys); - return get_hash_from_flowi6(fl6) >> 1; + return mhash >> 1; } void ip6_route_input(struct sk_buff *skb) From 3192dac64c73d8c0eb4274a3da23d829fb5177af Mon Sep 17 00:00:00 2001 From: David Ahern Date: Fri, 2 Mar 2018 08:32:16 -0800 Subject: [PATCH 05/10] net: Rename NETEVENT_MULTIPATH_HASH_UPDATE Rename NETEVENT_MULTIPATH_HASH_UPDATE to NETEVENT_IPV4_MPATH_HASH_UPDATE to denote it relates to a change in the IPv4 hash policy. Signed-off-by: David Ahern Reviewed-by: Ido Schimmel Reviewed-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 2 +- include/net/netevent.h | 2 +- net/ipv4/sysctl_net_ipv4.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 69f16c605b9d8..93d48c1b2bf83 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -2430,7 +2430,7 @@ static int mlxsw_sp_router_netevent_event(struct notifier_block *nb, mlxsw_core_schedule_work(&net_work->work); mlxsw_sp_port_dev_put(mlxsw_sp_port); break; - case NETEVENT_MULTIPATH_HASH_UPDATE: + case NETEVENT_IPV4_MPATH_HASH_UPDATE: net = ptr; if (!net_eq(net, &init_net)) diff --git a/include/net/netevent.h b/include/net/netevent.h index 40e7bab684905..baee605a94ab3 100644 --- a/include/net/netevent.h +++ b/include/net/netevent.h @@ -26,7 +26,7 @@ enum netevent_notif_type { NETEVENT_NEIGH_UPDATE = 1, /* arg is struct neighbour ptr */ NETEVENT_REDIRECT, /* arg is struct netevent_redirect ptr */ NETEVENT_DELAY_PROBE_TIME_UPDATE, /* arg is struct neigh_parms ptr */ - NETEVENT_MULTIPATH_HASH_UPDATE, /* arg is struct net ptr */ + NETEVENT_IPV4_MPATH_HASH_UPDATE, /* arg is struct net ptr */ }; int register_netevent_notifier(struct notifier_block *nb); diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 89683d868b371..011de9a20ec63 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -400,7 +400,7 @@ static int proc_fib_multipath_hash_policy(struct ctl_table *table, int write, ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); if (write && ret == 0) - call_netevent_notifiers(NETEVENT_MULTIPATH_HASH_UPDATE, net); + call_netevent_notifiers(NETEVENT_IPV4_MPATH_HASH_UPDATE, net); return ret; } From b75cc8f90f07342467b3bd51dbc0054f185032c9 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Fri, 2 Mar 2018 08:32:17 -0800 Subject: [PATCH 06/10] net/ipv6: Pass skb to route lookup IPv6 does path selection for multipath routes deep in the lookup functions. The next patch adds L4 hash option and needs the skb for the forward path. To get the skb to the relevant FIB lookup functions it needs to go through the fib rules layer, so add a lookup_data argument to the fib_lookup_arg struct. Signed-off-by: David Ahern Reviewed-by: Ido Schimmel Reviewed-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- drivers/infiniband/core/cma.c | 2 +- drivers/net/ipvlan/ipvlan_core.c | 3 +- drivers/net/vrf.c | 7 +-- include/net/fib_rules.h | 1 + include/net/ip6_fib.h | 4 +- include/net/ip6_route.h | 11 +++-- net/ipv6/anycast.c | 2 +- net/ipv6/fib6_rules.c | 8 ++-- net/ipv6/icmp.c | 3 +- net/ipv6/ip6_fib.c | 3 +- net/ipv6/ip6_gre.c | 2 +- net/ipv6/ip6_tunnel.c | 4 +- net/ipv6/ip6_vti.c | 2 +- net/ipv6/mcast.c | 4 +- net/ipv6/netfilter/ip6t_rpfilter.c | 2 +- net/ipv6/netfilter/nft_fib_ipv6.c | 3 +- net/ipv6/route.c | 72 ++++++++++++++++++------------ net/ipv6/seg6_local.c | 4 +- 18 files changed, 83 insertions(+), 54 deletions(-) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 3ae32d1ddd27b..915bbd867b612 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -1334,7 +1334,7 @@ static bool validate_ipv6_net_dev(struct net_device *net_dev, IPV6_ADDR_LINKLOCAL; struct rt6_info *rt = rt6_lookup(dev_net(net_dev), &dst_addr->sin6_addr, &src_addr->sin6_addr, net_dev->ifindex, - strict); + NULL, strict); bool ret; if (!rt) diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c index 17daebd19e656..1a8132eb2a3ec 100644 --- a/drivers/net/ipvlan/ipvlan_core.c +++ b/drivers/net/ipvlan/ipvlan_core.c @@ -817,7 +817,8 @@ struct sk_buff *ipvlan_l3_rcv(struct net_device *dev, struct sk_buff *skb, }; skb_dst_drop(skb); - dst = ip6_route_input_lookup(dev_net(sdev), sdev, &fl6, flags); + dst = ip6_route_input_lookup(dev_net(sdev), sdev, &fl6, + skb, flags); skb_dst_set(skb, dst); break; } diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index e459e601c57f6..c6be49d3a9ebd 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -941,6 +941,7 @@ static struct rt6_info *vrf_ip6_route_lookup(struct net *net, const struct net_device *dev, struct flowi6 *fl6, int ifindex, + const struct sk_buff *skb, int flags) { struct net_vrf *vrf = netdev_priv(dev); @@ -959,7 +960,7 @@ static struct rt6_info *vrf_ip6_route_lookup(struct net *net, if (!table) return NULL; - return ip6_pol_route(net, table, ifindex, fl6, flags); + return ip6_pol_route(net, table, ifindex, fl6, skb, flags); } static void vrf_ip6_input_dst(struct sk_buff *skb, struct net_device *vrf_dev, @@ -977,7 +978,7 @@ static void vrf_ip6_input_dst(struct sk_buff *skb, struct net_device *vrf_dev, struct net *net = dev_net(vrf_dev); struct rt6_info *rt6; - rt6 = vrf_ip6_route_lookup(net, vrf_dev, &fl6, ifindex, + rt6 = vrf_ip6_route_lookup(net, vrf_dev, &fl6, ifindex, skb, RT6_LOOKUP_F_HAS_SADDR | RT6_LOOKUP_F_IFACE); if (unlikely(!rt6)) return; @@ -1110,7 +1111,7 @@ static struct dst_entry *vrf_link_scope_lookup(const struct net_device *dev, if (!ipv6_addr_any(&fl6->saddr)) flags |= RT6_LOOKUP_F_HAS_SADDR; - rt = vrf_ip6_route_lookup(net, dev, fl6, fl6->flowi6_oif, flags); + rt = vrf_ip6_route_lookup(net, dev, fl6, fl6->flowi6_oif, NULL, flags); if (rt) dst = &rt->dst; diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h index 1c9e17c11953c..e5cfcfc7dd93c 100644 --- a/include/net/fib_rules.h +++ b/include/net/fib_rules.h @@ -47,6 +47,7 @@ struct fib_rule { struct fib_lookup_arg { void *lookup_ptr; + const void *lookup_data; void *result; struct fib_rule *rule; u32 table; diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 8d906a35b534e..5e86fd9dc8573 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -350,7 +350,8 @@ struct fib6_table { typedef struct rt6_info *(*pol_lookup_t)(struct net *, struct fib6_table *, - struct flowi6 *, int); + struct flowi6 *, + const struct sk_buff *, int); struct fib6_entry_notifier_info { struct fib_notifier_info info; /* must be first */ @@ -364,6 +365,7 @@ struct fib6_entry_notifier_info { struct fib6_table *fib6_get_table(struct net *net, u32 id); struct fib6_table *fib6_new_table(struct net *net, u32 id); struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6, + const struct sk_buff *skb, int flags, pol_lookup_t lookup); struct fib6_node *fib6_lookup(struct fib6_node *root, diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index da2bde5fda8f6..9594f93179522 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -75,7 +75,8 @@ static inline bool rt6_qualify_for_ecmp(const struct rt6_info *rt) void ip6_route_input(struct sk_buff *skb); struct dst_entry *ip6_route_input_lookup(struct net *net, struct net_device *dev, - struct flowi6 *fl6, int flags); + struct flowi6 *fl6, + const struct sk_buff *skb, int flags); struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk, struct flowi6 *fl6, int flags); @@ -88,9 +89,10 @@ static inline struct dst_entry *ip6_route_output(struct net *net, } struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6, - int flags); + const struct sk_buff *skb, int flags); struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, - int ifindex, struct flowi6 *fl6, int flags); + int ifindex, struct flowi6 *fl6, + const struct sk_buff *skb, int flags); void ip6_route_init_special_entries(void); int ip6_route_init(void); @@ -126,7 +128,8 @@ static inline int ip6_route_get_saddr(struct net *net, struct rt6_info *rt, } struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr, - const struct in6_addr *saddr, int oif, int flags); + const struct in6_addr *saddr, int oif, + const struct sk_buff *skb, int flags); u32 rt6_multipath_hash(const struct flowi6 *fl6, const struct sk_buff *skb, struct flow_keys *hkeys); diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c index d7d0abc7fd0ed..c61718dba2e60 100644 --- a/net/ipv6/anycast.c +++ b/net/ipv6/anycast.c @@ -78,7 +78,7 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr) if (ifindex == 0) { struct rt6_info *rt; - rt = rt6_lookup(net, addr, NULL, 0, 0); + rt = rt6_lookup(net, addr, NULL, 0, NULL, 0); if (rt) { dev = rt->dst.dev; ip6_rt_put(rt); diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index 04e5f523e50ff..00ef9467f3c04 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -61,11 +61,13 @@ unsigned int fib6_rules_seq_read(struct net *net) } struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6, + const struct sk_buff *skb, int flags, pol_lookup_t lookup) { if (net->ipv6.fib6_has_custom_rules) { struct fib_lookup_arg arg = { .lookup_ptr = lookup, + .lookup_data = skb, .flags = FIB_LOOKUP_NOREF, }; @@ -80,11 +82,11 @@ struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6, } else { struct rt6_info *rt; - rt = lookup(net, net->ipv6.fib6_local_tbl, fl6, flags); + rt = lookup(net, net->ipv6.fib6_local_tbl, fl6, skb, flags); if (rt != net->ipv6.ip6_null_entry && rt->dst.error != -EAGAIN) return &rt->dst; ip6_rt_put(rt); - rt = lookup(net, net->ipv6.fib6_main_tbl, fl6, flags); + rt = lookup(net, net->ipv6.fib6_main_tbl, fl6, skb, flags); if (rt->dst.error != -EAGAIN) return &rt->dst; ip6_rt_put(rt); @@ -130,7 +132,7 @@ static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp, goto out; } - rt = lookup(net, table, flp6, flags); + rt = lookup(net, table, flp6, arg->lookup_data, flags); if (rt != net->ipv6.ip6_null_entry) { struct fib6_rule *r = (struct fib6_rule *)rule; diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index b0778d323b6ec..a5d929223820e 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -629,7 +629,8 @@ int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs, int type, skb_pull(skb2, nhs); skb_reset_network_header(skb2); - rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr, NULL, 0, 0); + rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr, NULL, 0, + skb, 0); if (rt && rt->dst.dev) skb2->dev = rt->dst.dev; diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index cab95cf3b39fc..2f995e9e3050c 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -299,11 +299,12 @@ struct fib6_table *fib6_get_table(struct net *net, u32 id) } struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6, + const struct sk_buff *skb, int flags, pol_lookup_t lookup) { struct rt6_info *rt; - rt = lookup(net, net->ipv6.fib6_main_tbl, fl6, flags); + rt = lookup(net, net->ipv6.fib6_main_tbl, fl6, skb, flags); if (rt->dst.error == -EAGAIN) { ip6_rt_put(rt); rt = net->ipv6.ip6_null_entry; diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 4f150a394387f..83c7766c8c756 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -1053,7 +1053,7 @@ static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu) struct rt6_info *rt = rt6_lookup(t->net, &p->raddr, &p->laddr, - p->link, strict); + p->link, NULL, strict); if (!rt) return; diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 869e2e6750f7a..1124f310df5ae 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -679,7 +679,7 @@ ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, /* Try to guess incoming interface */ rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr, - NULL, 0, 0); + NULL, 0, skb2, 0); if (rt && rt->dst.dev) skb2->dev = rt->dst.dev; @@ -1444,7 +1444,7 @@ static void ip6_tnl_link_config(struct ip6_tnl *t) struct rt6_info *rt = rt6_lookup(t->net, &p->raddr, &p->laddr, - p->link, strict); + p->link, NULL, strict); if (!rt) return; diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index c617ea17faa8a..a482b854eeea9 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -645,7 +645,7 @@ static void vti6_link_config(struct ip6_tnl *t) (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL)); struct rt6_info *rt = rt6_lookup(t->net, &p->raddr, &p->laddr, - p->link, strict); + p->link, NULL, strict); if (rt) tdev = rt->dst.dev; diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index d9bb933dd5c42..d1a0cefac2730 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -165,7 +165,7 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr) if (ifindex == 0) { struct rt6_info *rt; - rt = rt6_lookup(net, addr, NULL, 0, 0); + rt = rt6_lookup(net, addr, NULL, 0, NULL, 0); if (rt) { dev = rt->dst.dev; ip6_rt_put(rt); @@ -254,7 +254,7 @@ static struct inet6_dev *ip6_mc_find_dev_rcu(struct net *net, struct inet6_dev *idev = NULL; if (ifindex == 0) { - struct rt6_info *rt = rt6_lookup(net, group, NULL, 0, 0); + struct rt6_info *rt = rt6_lookup(net, group, NULL, 0, NULL, 0); if (rt) { dev = rt->dst.dev; diff --git a/net/ipv6/netfilter/ip6t_rpfilter.c b/net/ipv6/netfilter/ip6t_rpfilter.c index 94deb69bbbdaa..910a27318f581 100644 --- a/net/ipv6/netfilter/ip6t_rpfilter.c +++ b/net/ipv6/netfilter/ip6t_rpfilter.c @@ -53,7 +53,7 @@ static bool rpfilter_lookup_reverse6(struct net *net, const struct sk_buff *skb, lookup_flags |= RT6_LOOKUP_F_IFACE; } - rt = (void *) ip6_route_lookup(net, &fl6, lookup_flags); + rt = (void *)ip6_route_lookup(net, &fl6, skb, lookup_flags); if (rt->dst.error) goto out; diff --git a/net/ipv6/netfilter/nft_fib_ipv6.c b/net/ipv6/netfilter/nft_fib_ipv6.c index cc5174c7254c5..3230b3d7b11b3 100644 --- a/net/ipv6/netfilter/nft_fib_ipv6.c +++ b/net/ipv6/netfilter/nft_fib_ipv6.c @@ -181,7 +181,8 @@ void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs, *dest = 0; again: - rt = (void *)ip6_route_lookup(nft_net(pkt), &fl6, lookup_flags); + rt = (void *)ip6_route_lookup(nft_net(pkt), &fl6, pkt->skb, + lookup_flags); if (rt->dst.error) goto put_rt_err; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 5c89af2c54f47..d2b8368663cbe 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -452,6 +452,7 @@ static bool rt6_check_expired(const struct rt6_info *rt) static struct rt6_info *rt6_multipath_select(struct rt6_info *match, struct flowi6 *fl6, int oif, + const struct sk_buff *skb, int strict) { struct rt6_info *sibling, *next_sibling; @@ -460,7 +461,7 @@ static struct rt6_info *rt6_multipath_select(struct rt6_info *match, * case it will always be non-zero. Otherwise now is the time to do it. */ if (!fl6->mp_hash) - fl6->mp_hash = rt6_multipath_hash(fl6, NULL, NULL); + fl6->mp_hash = rt6_multipath_hash(fl6, skb, NULL); if (fl6->mp_hash <= atomic_read(&match->rt6i_nh_upper_bound)) return match; @@ -914,7 +915,9 @@ static bool ip6_hold_safe(struct net *net, struct rt6_info **prt, static struct rt6_info *ip6_pol_route_lookup(struct net *net, struct fib6_table *table, - struct flowi6 *fl6, int flags) + struct flowi6 *fl6, + const struct sk_buff *skb, + int flags) { struct rt6_info *rt, *rt_cache; struct fib6_node *fn; @@ -929,8 +932,8 @@ static struct rt6_info *ip6_pol_route_lookup(struct net *net, rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags); if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0) - rt = rt6_multipath_select(rt, fl6, - fl6->flowi6_oif, flags); + rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, + skb, flags); } if (rt == net->ipv6.ip6_null_entry) { fn = fib6_backtrack(fn, &fl6->saddr); @@ -954,14 +957,15 @@ static struct rt6_info *ip6_pol_route_lookup(struct net *net, } struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6, - int flags) + const struct sk_buff *skb, int flags) { - return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup); + return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_lookup); } EXPORT_SYMBOL_GPL(ip6_route_lookup); struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr, - const struct in6_addr *saddr, int oif, int strict) + const struct in6_addr *saddr, int oif, + const struct sk_buff *skb, int strict) { struct flowi6 fl6 = { .flowi6_oif = oif, @@ -975,7 +979,7 @@ struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr, flags |= RT6_LOOKUP_F_HAS_SADDR; } - dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup); + dst = fib6_rule_lookup(net, &fl6, skb, flags, ip6_pol_route_lookup); if (dst->error == 0) return (struct rt6_info *) dst; @@ -1647,7 +1651,8 @@ void rt6_age_exceptions(struct rt6_info *rt, } struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, - int oif, struct flowi6 *fl6, int flags) + int oif, struct flowi6 *fl6, + const struct sk_buff *skb, int flags) { struct fib6_node *fn, *saved_fn; struct rt6_info *rt, *rt_cache; @@ -1669,7 +1674,7 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, redo_rt6_select: rt = rt6_select(net, fn, oif, strict); if (rt->rt6i_nsiblings) - rt = rt6_multipath_select(rt, fl6, oif, strict); + rt = rt6_multipath_select(rt, fl6, oif, skb, strict); if (rt == net->ipv6.ip6_null_entry) { fn = fib6_backtrack(fn, &fl6->saddr); if (fn) @@ -1768,20 +1773,25 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, } EXPORT_SYMBOL_GPL(ip6_pol_route); -static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table, - struct flowi6 *fl6, int flags) +static struct rt6_info *ip6_pol_route_input(struct net *net, + struct fib6_table *table, + struct flowi6 *fl6, + const struct sk_buff *skb, + int flags) { - return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags); + return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, skb, flags); } struct dst_entry *ip6_route_input_lookup(struct net *net, struct net_device *dev, - struct flowi6 *fl6, int flags) + struct flowi6 *fl6, + const struct sk_buff *skb, + int flags) { if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG) flags |= RT6_LOOKUP_F_IFACE; - return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input); + return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_input); } EXPORT_SYMBOL_GPL(ip6_route_input_lookup); @@ -1876,13 +1886,17 @@ void ip6_route_input(struct sk_buff *skb) if (unlikely(fl6.flowi6_proto == IPPROTO_ICMPV6)) fl6.mp_hash = rt6_multipath_hash(&fl6, skb, flkeys); skb_dst_drop(skb); - skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags)); + skb_dst_set(skb, + ip6_route_input_lookup(net, skb->dev, &fl6, skb, flags)); } -static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table, - struct flowi6 *fl6, int flags) +static struct rt6_info *ip6_pol_route_output(struct net *net, + struct fib6_table *table, + struct flowi6 *fl6, + const struct sk_buff *skb, + int flags) { - return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags); + return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, skb, flags); } struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk, @@ -1910,7 +1924,7 @@ struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk, else if (sk) flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs); - return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output); + return fib6_rule_lookup(net, fl6, NULL, flags, ip6_pol_route_output); } EXPORT_SYMBOL_GPL(ip6_route_output_flags); @@ -2159,6 +2173,7 @@ struct ip6rd_flowi { static struct rt6_info *__ip6_route_redirect(struct net *net, struct fib6_table *table, struct flowi6 *fl6, + const struct sk_buff *skb, int flags) { struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6; @@ -2232,8 +2247,9 @@ static struct rt6_info *__ip6_route_redirect(struct net *net, }; static struct dst_entry *ip6_route_redirect(struct net *net, - const struct flowi6 *fl6, - const struct in6_addr *gateway) + const struct flowi6 *fl6, + const struct sk_buff *skb, + const struct in6_addr *gateway) { int flags = RT6_LOOKUP_F_HAS_SADDR; struct ip6rd_flowi rdfl; @@ -2241,7 +2257,7 @@ static struct dst_entry *ip6_route_redirect(struct net *net, rdfl.fl6 = *fl6; rdfl.gateway = *gateway; - return fib6_rule_lookup(net, &rdfl.fl6, + return fib6_rule_lookup(net, &rdfl.fl6, skb, flags, __ip6_route_redirect); } @@ -2261,7 +2277,7 @@ void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark, fl6.flowlabel = ip6_flowinfo(iph); fl6.flowi6_uid = uid; - dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr); + dst = ip6_route_redirect(net, &fl6, skb, &ipv6_hdr(skb)->saddr); rt6_do_redirect(dst, NULL, skb); dst_release(dst); } @@ -2283,7 +2299,7 @@ void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif, fl6.saddr = iph->daddr; fl6.flowi6_uid = sock_net_uid(net, NULL); - dst = ip6_route_redirect(net, &fl6, &iph->saddr); + dst = ip6_route_redirect(net, &fl6, skb, &iph->saddr); rt6_do_redirect(dst, NULL, skb); dst_release(dst); } @@ -2485,7 +2501,7 @@ static struct rt6_info *ip6_nh_lookup_table(struct net *net, flags |= RT6_LOOKUP_F_HAS_SADDR; flags |= RT6_LOOKUP_F_IGNORE_LINKSTATE; - rt = ip6_pol_route(net, table, cfg->fc_ifindex, &fl6, flags); + rt = ip6_pol_route(net, table, cfg->fc_ifindex, &fl6, NULL, flags); /* if table lookup failed, fall back to full lookup */ if (rt == net->ipv6.ip6_null_entry) { @@ -2548,7 +2564,7 @@ static int ip6_route_check_nh(struct net *net, } if (!grt) - grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1); + grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, NULL, 1); if (!grt) goto out; @@ -4613,7 +4629,7 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, if (!ipv6_addr_any(&fl6.saddr)) flags |= RT6_LOOKUP_F_HAS_SADDR; - dst = ip6_route_input_lookup(net, dev, &fl6, flags); + dst = ip6_route_input_lookup(net, dev, &fl6, NULL, flags); rcu_read_unlock(); } else { diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c index ba3767ef5e93a..45722327375af 100644 --- a/net/ipv6/seg6_local.c +++ b/net/ipv6/seg6_local.c @@ -161,7 +161,7 @@ static void lookup_nexthop(struct sk_buff *skb, struct in6_addr *nhaddr, fl6.flowi6_flags = FLOWI_FLAG_KNOWN_NH; if (!tbl_id) { - dst = ip6_route_input_lookup(net, skb->dev, &fl6, flags); + dst = ip6_route_input_lookup(net, skb->dev, &fl6, skb, flags); } else { struct fib6_table *table; @@ -169,7 +169,7 @@ static void lookup_nexthop(struct sk_buff *skb, struct in6_addr *nhaddr, if (!table) goto out; - rt = ip6_pol_route(net, table, 0, &fl6, flags); + rt = ip6_pol_route(net, table, 0, &fl6, skb, flags); dst = &rt->dst; } From b4bac172e90ce4a93df8adf44eb70d91b9d611eb Mon Sep 17 00:00:00 2001 From: David Ahern Date: Fri, 2 Mar 2018 08:32:18 -0800 Subject: [PATCH 07/10] net/ipv6: Add support for path selection using hash of 5-tuple Some operators prefer IPv6 path selection to use a standard 5-tuple hash rather than just an L3 hash with the flow the label. To that end add support to IPv6 for multipath hash policy similar to bf4e0a3db97eb ("net: ipv4: add support for ECMP hash policy choice"). The default is still L3 which covers source and destination addresses along with flow label and IPv6 protocol. Signed-off-by: David Ahern Reviewed-by: Ido Schimmel Tested-by: Ido Schimmel Reviewed-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- Documentation/networking/ip-sysctl.txt | 7 +++ include/net/ip6_route.h | 4 +- include/net/netevent.h | 1 + include/net/netns/ipv6.h | 1 + net/ipv6/icmp.c | 2 +- net/ipv6/route.c | 68 ++++++++++++++++++++------ net/ipv6/sysctl_net_ipv6.c | 27 ++++++++++ 7 files changed, 91 insertions(+), 19 deletions(-) diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index a553d4e4a0fb4..783675a730e53 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -1363,6 +1363,13 @@ flowlabel_reflect - BOOLEAN FALSE: disabled Default: FALSE +fib_multipath_hash_policy - INTEGER + Controls which hash policy to use for multipath routes. + Default: 0 (Layer 3) + Possible values: + 0 - Layer 3 (source and destination addresses plus flow label) + 1 - Layer 4 (standard 5-tuple) + anycast_src_echo_reply - BOOLEAN Controls the use of anycast addresses as source addresses for ICMPv6 echo reply diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 9594f93179522..ce2abc0ff1020 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -130,8 +130,8 @@ static inline int ip6_route_get_saddr(struct net *net, struct rt6_info *rt, struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr, const struct in6_addr *saddr, int oif, const struct sk_buff *skb, int flags); -u32 rt6_multipath_hash(const struct flowi6 *fl6, const struct sk_buff *skb, - struct flow_keys *hkeys); +u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6, + const struct sk_buff *skb, struct flow_keys *hkeys); struct dst_entry *icmp6_dst_alloc(struct net_device *dev, struct flowi6 *fl6); diff --git a/include/net/netevent.h b/include/net/netevent.h index baee605a94ab3..d9918261701c9 100644 --- a/include/net/netevent.h +++ b/include/net/netevent.h @@ -27,6 +27,7 @@ enum netevent_notif_type { NETEVENT_REDIRECT, /* arg is struct netevent_redirect ptr */ NETEVENT_DELAY_PROBE_TIME_UPDATE, /* arg is struct neigh_parms ptr */ NETEVENT_IPV4_MPATH_HASH_UPDATE, /* arg is struct net ptr */ + NETEVENT_IPV6_MPATH_HASH_UPDATE, /* arg is struct net ptr */ }; int register_netevent_notifier(struct notifier_block *nb); diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index e286fda09fcfe..5b51110435fcc 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -28,6 +28,7 @@ struct netns_sysctl_ipv6 { int ip6_rt_gc_elasticity; int ip6_rt_mtu_expires; int ip6_rt_min_advmss; + int multipath_hash_policy; int flowlabel_consistency; int auto_flowlabels; int icmpv6_time; diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index a5d929223820e..6f84668be6ea2 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -522,7 +522,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, fl6.fl6_icmp_type = type; fl6.fl6_icmp_code = code; fl6.flowi6_uid = sock_net_uid(net, NULL); - fl6.mp_hash = rt6_multipath_hash(&fl6, skb, NULL); + fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, NULL); security_skb_classify_flow(skb, flowi6_to_flowi(&fl6)); sk = icmpv6_xmit_lock(net); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index d2b8368663cbe..f0ae58424c45d 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -450,7 +450,8 @@ static bool rt6_check_expired(const struct rt6_info *rt) return false; } -static struct rt6_info *rt6_multipath_select(struct rt6_info *match, +static struct rt6_info *rt6_multipath_select(const struct net *net, + struct rt6_info *match, struct flowi6 *fl6, int oif, const struct sk_buff *skb, int strict) @@ -461,7 +462,7 @@ static struct rt6_info *rt6_multipath_select(struct rt6_info *match, * case it will always be non-zero. Otherwise now is the time to do it. */ if (!fl6->mp_hash) - fl6->mp_hash = rt6_multipath_hash(fl6, skb, NULL); + fl6->mp_hash = rt6_multipath_hash(net, fl6, skb, NULL); if (fl6->mp_hash <= atomic_read(&match->rt6i_nh_upper_bound)) return match; @@ -932,7 +933,7 @@ static struct rt6_info *ip6_pol_route_lookup(struct net *net, rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags); if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0) - rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, + rt = rt6_multipath_select(net, rt, fl6, fl6->flowi6_oif, skb, flags); } if (rt == net->ipv6.ip6_null_entry) { @@ -1674,7 +1675,7 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, redo_rt6_select: rt = rt6_select(net, fn, oif, strict); if (rt->rt6i_nsiblings) - rt = rt6_multipath_select(rt, fl6, oif, skb, strict); + rt = rt6_multipath_select(net, rt, fl6, oif, skb, strict); if (rt == net->ipv6.ip6_null_entry) { fn = fib6_backtrack(fn, &fl6->saddr); if (fn) @@ -1839,21 +1840,56 @@ static void ip6_multipath_l3_keys(const struct sk_buff *skb, } /* if skb is set it will be used and fl6 can be NULL */ -u32 rt6_multipath_hash(const struct flowi6 *fl6, const struct sk_buff *skb, - struct flow_keys *flkeys) +u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6, + const struct sk_buff *skb, struct flow_keys *flkeys) { struct flow_keys hash_keys; u32 mhash; - memset(&hash_keys, 0, sizeof(hash_keys)); - hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; - if (skb) { - ip6_multipath_l3_keys(skb, &hash_keys, flkeys); - } else { - hash_keys.addrs.v6addrs.src = fl6->saddr; - hash_keys.addrs.v6addrs.dst = fl6->daddr; - hash_keys.tags.flow_label = (__force u32)fl6->flowlabel; - hash_keys.basic.ip_proto = fl6->flowi6_proto; + switch (net->ipv6.sysctl.multipath_hash_policy) { + case 0: + memset(&hash_keys, 0, sizeof(hash_keys)); + hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; + if (skb) { + ip6_multipath_l3_keys(skb, &hash_keys, flkeys); + } else { + hash_keys.addrs.v6addrs.src = fl6->saddr; + hash_keys.addrs.v6addrs.dst = fl6->daddr; + hash_keys.tags.flow_label = (__force u32)fl6->flowlabel; + hash_keys.basic.ip_proto = fl6->flowi6_proto; + } + break; + case 1: + if (skb) { + unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP; + struct flow_keys keys; + + /* short-circuit if we already have L4 hash present */ + if (skb->l4_hash) + return skb_get_hash_raw(skb) >> 1; + + memset(&hash_keys, 0, sizeof(hash_keys)); + + if (!flkeys) { + skb_flow_dissect_flow_keys(skb, &keys, flag); + flkeys = &keys; + } + hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; + hash_keys.addrs.v6addrs.src = flkeys->addrs.v6addrs.src; + hash_keys.addrs.v6addrs.dst = flkeys->addrs.v6addrs.dst; + hash_keys.ports.src = flkeys->ports.src; + hash_keys.ports.dst = flkeys->ports.dst; + hash_keys.basic.ip_proto = flkeys->basic.ip_proto; + } else { + memset(&hash_keys, 0, sizeof(hash_keys)); + hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; + hash_keys.addrs.v6addrs.src = fl6->saddr; + hash_keys.addrs.v6addrs.dst = fl6->daddr; + hash_keys.ports.src = fl6->fl6_sport; + hash_keys.ports.dst = fl6->fl6_dport; + hash_keys.basic.ip_proto = fl6->flowi6_proto; + } + break; } mhash = flow_hash_from_keys(&hash_keys); @@ -1884,7 +1920,7 @@ void ip6_route_input(struct sk_buff *skb) flkeys = &_flkeys; if (unlikely(fl6.flowi6_proto == IPPROTO_ICMPV6)) - fl6.mp_hash = rt6_multipath_hash(&fl6, skb, flkeys); + fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, flkeys); skb_dst_drop(skb); skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, skb, flags)); diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c index 262f791f1b9b9..966c42af92f47 100644 --- a/net/ipv6/sysctl_net_ipv6.c +++ b/net/ipv6/sysctl_net_ipv6.c @@ -16,14 +16,31 @@ #include #include #include +#include #ifdef CONFIG_NETLABEL #include #endif +static int zero; static int one = 1; static int auto_flowlabels_min; static int auto_flowlabels_max = IP6_AUTO_FLOW_LABEL_MAX; +static int proc_rt6_multipath_hash_policy(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos) +{ + struct net *net; + int ret; + + net = container_of(table->data, struct net, + ipv6.sysctl.multipath_hash_policy); + ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); + if (write && ret == 0) + call_netevent_notifiers(NETEVENT_IPV6_MPATH_HASH_UPDATE, net); + + return ret; +} static struct ctl_table ipv6_table_template[] = { { @@ -126,6 +143,15 @@ static struct ctl_table ipv6_table_template[] = { .mode = 0644, .proc_handler = proc_dointvec }, + { + .procname = "fib_multipath_hash_policy", + .data = &init_net.ipv6.sysctl.multipath_hash_policy, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_rt6_multipath_hash_policy, + .extra1 = &zero, + .extra2 = &one, + }, { } }; @@ -190,6 +216,7 @@ static int __net_init ipv6_sysctl_net_init(struct net *net) ipv6_table[11].data = &net->ipv6.sysctl.max_hbh_opts_cnt; ipv6_table[12].data = &net->ipv6.sysctl.max_dst_opts_len; ipv6_table[13].data = &net->ipv6.sysctl.max_hbh_opts_len; + ipv6_table[14].data = &net->ipv6.sysctl.multipath_hash_policy, ipv6_route_table = ipv6_route_sysctl_init(net); if (!ipv6_route_table) From 5e18b9c550397e4796bdca0f4cb8880fcc127dfe Mon Sep 17 00:00:00 2001 From: David Ahern Date: Fri, 2 Mar 2018 08:32:19 -0800 Subject: [PATCH 08/10] mlxsw: spectrum_router: Add support for ipv6 hash policy update Similar to 28678f07f127d ("mlxsw: spectrum_router: Update multipath hash parameters upon netevents") for IPv4, make sure the kernel and asic are using the same hash algorithm for path selection. Signed-off-by: David Ahern Reviewed-by: Ido Schimmel Tested-by: Ido Schimmel Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlxsw/spectrum_router.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 93d48c1b2bf83..a8a578610a7bf 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -2431,6 +2431,7 @@ static int mlxsw_sp_router_netevent_event(struct notifier_block *nb, mlxsw_sp_port_dev_put(mlxsw_sp_port); break; case NETEVENT_IPV4_MPATH_HASH_UPDATE: + case NETEVENT_IPV6_MPATH_HASH_UPDATE: net = ptr; if (!net_eq(net, &init_net)) @@ -7030,13 +7031,25 @@ static void mlxsw_sp_mp4_hash_init(char *recr2_pl) static void mlxsw_sp_mp6_hash_init(char *recr2_pl) { + bool only_l3 = !init_net.ipv6.sysctl.multipath_hash_policy; + mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_IPV6_EN_NOT_TCP_NOT_UDP); mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_IPV6_EN_TCP_UDP); mlxsw_reg_recr2_ipv6_sip_enable(recr2_pl); mlxsw_reg_recr2_ipv6_dip_enable(recr2_pl); - mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_IPV6_FLOW_LABEL); mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_IPV6_NEXT_HEADER); + if (only_l3) { + mlxsw_sp_mp_hash_field_set(recr2_pl, + MLXSW_REG_RECR2_IPV6_FLOW_LABEL); + } else { + mlxsw_sp_mp_hash_header_set(recr2_pl, + MLXSW_REG_RECR2_TCP_UDP_EN_IPV6); + mlxsw_sp_mp_hash_field_set(recr2_pl, + MLXSW_REG_RECR2_TCP_UDP_SPORT); + mlxsw_sp_mp_hash_field_set(recr2_pl, + MLXSW_REG_RECR2_TCP_UDP_DPORT); + } } static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp) From de7a0f871fabe74fff7481caf7d3efe03b58fe58 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Fri, 2 Mar 2018 08:32:20 -0800 Subject: [PATCH 09/10] net: Remove unused get_hash_from_flow functions __get_hash_from_flowi6 is still used for flowlabels, but the IPv4 variant and the wrappers to both are not used. Remove them. Signed-off-by: David Ahern Reviewed-by: Ido Schimmel Reviewed-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/net/flow.h | 16 ---------------- net/core/flow_dissector.c | 16 ---------------- 2 files changed, 32 deletions(-) diff --git a/include/net/flow.h b/include/net/flow.h index 64e7ee9cb980c..8ce21793094e3 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -222,20 +222,4 @@ static inline unsigned int flow_key_size(u16 family) __u32 __get_hash_from_flowi6(const struct flowi6 *fl6, struct flow_keys *keys); -static inline __u32 get_hash_from_flowi6(const struct flowi6 *fl6) -{ - struct flow_keys keys; - - return __get_hash_from_flowi6(fl6, &keys); -} - -__u32 __get_hash_from_flowi4(const struct flowi4 *fl4, struct flow_keys *keys); - -static inline __u32 get_hash_from_flowi4(const struct flowi4 *fl4) -{ - struct flow_keys keys; - - return __get_hash_from_flowi4(fl4, &keys); -} - #endif diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 559db9ea8d86f..d29f09bc5ff90 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -1341,22 +1341,6 @@ __u32 __get_hash_from_flowi6(const struct flowi6 *fl6, struct flow_keys *keys) } EXPORT_SYMBOL(__get_hash_from_flowi6); -__u32 __get_hash_from_flowi4(const struct flowi4 *fl4, struct flow_keys *keys) -{ - memset(keys, 0, sizeof(*keys)); - - keys->addrs.v4addrs.src = fl4->saddr; - keys->addrs.v4addrs.dst = fl4->daddr; - keys->control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; - keys->ports.src = fl4->fl4_sport; - keys->ports.dst = fl4->fl4_dport; - keys->keyid.keyid = fl4->fl4_gre_key; - keys->basic.ip_proto = fl4->flowi4_proto; - - return flow_hash_from_keys(keys); -} -EXPORT_SYMBOL(__get_hash_from_flowi4); - static const struct flow_dissector_key flow_keys_dissector_keys[] = { { .key_id = FLOW_DISSECTOR_KEY_CONTROL, From 91a5c1ecba9f23f247b3772e6d54aabfebc0e300 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Fri, 2 Mar 2018 08:32:21 -0800 Subject: [PATCH 10/10] selftests: forwarding: Add multipath test for L4 hashing Add IPv6 multipath test using L4 hashing. Created with inputs from Ido Schimmel. Signed-off-by: David Ahern Reviewed-by: Ido Schimmel Tested-by: Ido Schimmel Signed-off-by: David S. Miller --- .../net/forwarding/router_multipath.sh | 44 +++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/tools/testing/selftests/net/forwarding/router_multipath.sh b/tools/testing/selftests/net/forwarding/router_multipath.sh index 55595305a6048..3bc351008db63 100755 --- a/tools/testing/selftests/net/forwarding/router_multipath.sh +++ b/tools/testing/selftests/net/forwarding/router_multipath.sh @@ -235,6 +235,45 @@ multipath4_test() sysctl -q -w net.ipv4.fib_multipath_hash_policy=$hash_policy } +multipath6_l4_test() +{ + local desc="$1" + local weight_rp12=$2 + local weight_rp13=$3 + local t0_rp12 t0_rp13 t1_rp12 t1_rp13 + local packets_rp12 packets_rp13 + local hash_policy + + # Transmit multiple flows from h1 to h2 and make sure they are + # distributed between both multipath links (rp12 and rp13) + # according to the configured weights. + hash_policy=$(sysctl -n net.ipv6.fib_multipath_hash_policy) + sysctl -q -w net.ipv6.fib_multipath_hash_policy=1 + + ip route replace 2001:db8:2::/64 vrf vrf-r1 \ + nexthop via fe80:2::22 dev $rp12 weight $weight_rp12 \ + nexthop via fe80:3::23 dev $rp13 weight $weight_rp13 + + t0_rp12=$(link_stats_tx_packets_get $rp12) + t0_rp13=$(link_stats_tx_packets_get $rp13) + + $MZ $h1 -6 -q -p 64 -A 2001:db8:1::2 -B 2001:db8:2::2 \ + -d 1msec -t udp "sp=1024,dp=0-32768" + + t1_rp12=$(link_stats_tx_packets_get $rp12) + t1_rp13=$(link_stats_tx_packets_get $rp13) + + let "packets_rp12 = $t1_rp12 - $t0_rp12" + let "packets_rp13 = $t1_rp13 - $t0_rp13" + multipath_eval "$desc" $weight_rp12 $weight_rp13 $packets_rp12 $packets_rp13 + + ip route replace 2001:db8:2::/64 vrf vrf-r1 \ + nexthop via fe80:2::22 dev $rp12 \ + nexthop via fe80:3::23 dev $rp13 + + sysctl -q -w net.ipv6.fib_multipath_hash_policy=$hash_policy +} + multipath6_test() { local desc="$1" @@ -278,6 +317,11 @@ multipath_test() multipath6_test "ECMP" 1 1 multipath6_test "Weighted MP 2:1" 2 1 multipath6_test "Weighted MP 11:45" 11 45 + + log_info "Running IPv6 L4 hash multipath tests" + multipath6_l4_test "ECMP" 1 1 + multipath6_l4_test "Weighted MP 2:1" 2 1 + multipath6_l4_test "Weighted MP 11:45" 11 45 } setup_prepare()