diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 1904c0323d390..b85b15851841b 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -442,7 +442,7 @@ struct mlxsw_sp_fib6_entry { struct mlxsw_sp_rt6 { struct list_head list; - struct rt6_info *rt; + struct fib6_info *rt; }; struct mlxsw_sp_lpm_tree { @@ -2770,9 +2770,9 @@ mlxsw_sp_nexthop6_group_cmp(const struct mlxsw_sp_nexthop_group *nh_grp, struct in6_addr *gw; int ifindex, weight; - ifindex = mlxsw_sp_rt6->rt->dst.dev->ifindex; - weight = mlxsw_sp_rt6->rt->rt6i_nh_weight; - gw = &mlxsw_sp_rt6->rt->rt6i_gateway; + ifindex = mlxsw_sp_rt6->rt->fib6_nh.nh_dev->ifindex; + weight = mlxsw_sp_rt6->rt->fib6_nh.nh_weight; + gw = &mlxsw_sp_rt6->rt->fib6_nh.nh_gw; if (!mlxsw_sp_nexthop6_group_has_nexthop(nh_grp, gw, ifindex, weight)) return false; @@ -2838,7 +2838,7 @@ mlxsw_sp_nexthop6_group_hash(struct mlxsw_sp_fib6_entry *fib6_entry, u32 seed) struct net_device *dev; list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) { - dev = mlxsw_sp_rt6->rt->dst.dev; + dev = mlxsw_sp_rt6->rt->fib6_nh.nh_dev; val ^= dev->ifindex; } @@ -3834,11 +3834,11 @@ mlxsw_sp_rt6_nexthop(struct mlxsw_sp_nexthop_group *nh_grp, for (i = 0; i < nh_grp->count; i++) { struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i]; - struct rt6_info *rt = mlxsw_sp_rt6->rt; + struct fib6_info *rt = mlxsw_sp_rt6->rt; - if (nh->rif && nh->rif->dev == rt->dst.dev && + if (nh->rif && nh->rif->dev == rt->fib6_nh.nh_dev && ipv6_addr_equal((const struct in6_addr *) &nh->gw_addr, - &rt->rt6i_gateway)) + &rt->fib6_nh.nh_gw)) return nh; continue; } @@ -3895,7 +3895,7 @@ mlxsw_sp_fib6_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry) if (fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_LOCAL) { list_first_entry(&fib6_entry->rt6_list, struct mlxsw_sp_rt6, - list)->rt->rt6i_nh_flags |= RTNH_F_OFFLOAD; + list)->rt->fib6_nh.nh_flags |= RTNH_F_OFFLOAD; return; } @@ -3905,9 +3905,9 @@ mlxsw_sp_fib6_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry) nh = mlxsw_sp_rt6_nexthop(nh_grp, mlxsw_sp_rt6); if (nh && nh->offloaded) - mlxsw_sp_rt6->rt->rt6i_nh_flags |= RTNH_F_OFFLOAD; + mlxsw_sp_rt6->rt->fib6_nh.nh_flags |= RTNH_F_OFFLOAD; else - mlxsw_sp_rt6->rt->rt6i_nh_flags &= ~RTNH_F_OFFLOAD; + mlxsw_sp_rt6->rt->fib6_nh.nh_flags &= ~RTNH_F_OFFLOAD; } } @@ -3920,9 +3920,9 @@ mlxsw_sp_fib6_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry) fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry, common); list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) { - struct rt6_info *rt = mlxsw_sp_rt6->rt; + struct fib6_info *rt = mlxsw_sp_rt6->rt; - rt->rt6i_nh_flags &= ~RTNH_F_OFFLOAD; + rt->fib6_nh.nh_flags &= ~RTNH_F_OFFLOAD; } } @@ -4699,7 +4699,7 @@ static void mlxsw_sp_router_fib4_del(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_fib_node_put(mlxsw_sp, fib_node); } -static bool mlxsw_sp_fib6_rt_should_ignore(const struct rt6_info *rt) +static bool mlxsw_sp_fib6_rt_should_ignore(const struct fib6_info *rt) { /* Packets with link-local destination IP arriving to the router * are trapped to the CPU, so no need to program specific routes @@ -4721,7 +4721,7 @@ static bool mlxsw_sp_fib6_rt_should_ignore(const struct rt6_info *rt) return false; } -static struct mlxsw_sp_rt6 *mlxsw_sp_rt6_create(struct rt6_info *rt) +static struct mlxsw_sp_rt6 *mlxsw_sp_rt6_create(struct fib6_info *rt) { struct mlxsw_sp_rt6 *mlxsw_sp_rt6; @@ -4734,18 +4734,18 @@ static struct mlxsw_sp_rt6 *mlxsw_sp_rt6_create(struct rt6_info *rt) * memory. */ mlxsw_sp_rt6->rt = rt; - rt6_hold(rt); + fib6_info_hold(rt); return mlxsw_sp_rt6; } #if IS_ENABLED(CONFIG_IPV6) -static void mlxsw_sp_rt6_release(struct rt6_info *rt) +static void mlxsw_sp_rt6_release(struct fib6_info *rt) { - rt6_release(rt); + fib6_info_release(rt); } #else -static void mlxsw_sp_rt6_release(struct rt6_info *rt) +static void mlxsw_sp_rt6_release(struct fib6_info *rt) { } #endif @@ -4756,13 +4756,13 @@ static void mlxsw_sp_rt6_destroy(struct mlxsw_sp_rt6 *mlxsw_sp_rt6) kfree(mlxsw_sp_rt6); } -static bool mlxsw_sp_fib6_rt_can_mp(const struct rt6_info *rt) +static bool mlxsw_sp_fib6_rt_can_mp(const struct fib6_info *rt) { /* RTF_CACHE routes are ignored */ return (rt->rt6i_flags & (RTF_GATEWAY | RTF_ADDRCONF)) == RTF_GATEWAY; } -static struct rt6_info * +static struct fib6_info * mlxsw_sp_fib6_entry_rt(const struct mlxsw_sp_fib6_entry *fib6_entry) { return list_first_entry(&fib6_entry->rt6_list, struct mlxsw_sp_rt6, @@ -4771,7 +4771,7 @@ mlxsw_sp_fib6_entry_rt(const struct mlxsw_sp_fib6_entry *fib6_entry) static struct mlxsw_sp_fib6_entry * mlxsw_sp_fib6_node_mp_entry_find(const struct mlxsw_sp_fib_node *fib_node, - const struct rt6_info *nrt, bool replace) + const struct fib6_info *nrt, bool replace) { struct mlxsw_sp_fib6_entry *fib6_entry; @@ -4779,7 +4779,7 @@ mlxsw_sp_fib6_node_mp_entry_find(const struct mlxsw_sp_fib_node *fib_node, return NULL; list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) { - struct rt6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry); + struct fib6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry); /* RT6_TABLE_LOCAL and RT6_TABLE_MAIN share the same * virtual router. @@ -4802,7 +4802,7 @@ mlxsw_sp_fib6_node_mp_entry_find(const struct mlxsw_sp_fib_node *fib_node, static struct mlxsw_sp_rt6 * mlxsw_sp_fib6_entry_rt_find(const struct mlxsw_sp_fib6_entry *fib6_entry, - const struct rt6_info *rt) + const struct fib6_info *rt) { struct mlxsw_sp_rt6 *mlxsw_sp_rt6; @@ -4815,21 +4815,21 @@ mlxsw_sp_fib6_entry_rt_find(const struct mlxsw_sp_fib6_entry *fib6_entry, } static bool mlxsw_sp_nexthop6_ipip_type(const struct mlxsw_sp *mlxsw_sp, - const struct rt6_info *rt, + const struct fib6_info *rt, enum mlxsw_sp_ipip_type *ret) { - return rt->dst.dev && - mlxsw_sp_netdev_ipip_type(mlxsw_sp, rt->dst.dev, ret); + return rt->fib6_nh.nh_dev && + mlxsw_sp_netdev_ipip_type(mlxsw_sp, rt->fib6_nh.nh_dev, ret); } static int mlxsw_sp_nexthop6_type_init(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop_group *nh_grp, struct mlxsw_sp_nexthop *nh, - const struct rt6_info *rt) + const struct fib6_info *rt) { const struct mlxsw_sp_ipip_ops *ipip_ops; struct mlxsw_sp_ipip_entry *ipip_entry; - struct net_device *dev = rt->dst.dev; + struct net_device *dev = rt->fib6_nh.nh_dev; struct mlxsw_sp_rif *rif; int err; @@ -4870,13 +4870,13 @@ static void mlxsw_sp_nexthop6_type_fini(struct mlxsw_sp *mlxsw_sp, static int mlxsw_sp_nexthop6_init(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop_group *nh_grp, struct mlxsw_sp_nexthop *nh, - const struct rt6_info *rt) + const struct fib6_info *rt) { - struct net_device *dev = rt->dst.dev; + struct net_device *dev = rt->fib6_nh.nh_dev; nh->nh_grp = nh_grp; - nh->nh_weight = rt->rt6i_nh_weight; - memcpy(&nh->gw_addr, &rt->rt6i_gateway, sizeof(nh->gw_addr)); + nh->nh_weight = rt->fib6_nh.nh_weight; + memcpy(&nh->gw_addr, &rt->fib6_nh.nh_gw, sizeof(nh->gw_addr)); mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh); list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list); @@ -4897,7 +4897,7 @@ static void mlxsw_sp_nexthop6_fini(struct mlxsw_sp *mlxsw_sp, } static bool mlxsw_sp_rt6_is_gateway(const struct mlxsw_sp *mlxsw_sp, - const struct rt6_info *rt) + const struct fib6_info *rt) { return rt->rt6i_flags & RTF_GATEWAY || mlxsw_sp_nexthop6_ipip_type(mlxsw_sp, rt, NULL); @@ -4928,7 +4928,7 @@ mlxsw_sp_nexthop6_group_create(struct mlxsw_sp *mlxsw_sp, nh_grp->gateway = mlxsw_sp_rt6_is_gateway(mlxsw_sp, mlxsw_sp_rt6->rt); nh_grp->count = fib6_entry->nrt6; for (i = 0; i < nh_grp->count; i++) { - struct rt6_info *rt = mlxsw_sp_rt6->rt; + struct fib6_info *rt = mlxsw_sp_rt6->rt; nh = &nh_grp->nexthops[i]; err = mlxsw_sp_nexthop6_init(mlxsw_sp, nh_grp, nh, rt); @@ -5040,7 +5040,7 @@ mlxsw_sp_nexthop6_group_update(struct mlxsw_sp *mlxsw_sp, static int mlxsw_sp_fib6_entry_nexthop_add(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib6_entry *fib6_entry, - struct rt6_info *rt) + struct fib6_info *rt) { struct mlxsw_sp_rt6 *mlxsw_sp_rt6; int err; @@ -5068,7 +5068,7 @@ mlxsw_sp_fib6_entry_nexthop_add(struct mlxsw_sp *mlxsw_sp, static void mlxsw_sp_fib6_entry_nexthop_del(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib6_entry *fib6_entry, - struct rt6_info *rt) + struct fib6_info *rt) { struct mlxsw_sp_rt6 *mlxsw_sp_rt6; @@ -5084,7 +5084,7 @@ mlxsw_sp_fib6_entry_nexthop_del(struct mlxsw_sp *mlxsw_sp, static void mlxsw_sp_fib6_entry_type_set(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib_entry *fib_entry, - const struct rt6_info *rt) + const struct fib6_info *rt) { /* Packets hitting RTF_REJECT routes need to be discarded by the * stack. We can rely on their destination device not having a @@ -5118,7 +5118,7 @@ mlxsw_sp_fib6_entry_rt_destroy_all(struct mlxsw_sp_fib6_entry *fib6_entry) static struct mlxsw_sp_fib6_entry * mlxsw_sp_fib6_entry_create(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib_node *fib_node, - struct rt6_info *rt) + struct fib6_info *rt) { struct mlxsw_sp_fib6_entry *fib6_entry; struct mlxsw_sp_fib_entry *fib_entry; @@ -5168,12 +5168,12 @@ static void mlxsw_sp_fib6_entry_destroy(struct mlxsw_sp *mlxsw_sp, static struct mlxsw_sp_fib6_entry * mlxsw_sp_fib6_node_entry_find(const struct mlxsw_sp_fib_node *fib_node, - const struct rt6_info *nrt, bool replace) + const struct fib6_info *nrt, bool replace) { struct mlxsw_sp_fib6_entry *fib6_entry, *fallback = NULL; list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) { - struct rt6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry); + struct fib6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry); if (rt->rt6i_table->tb6_id > nrt->rt6i_table->tb6_id) continue; @@ -5198,7 +5198,7 @@ mlxsw_sp_fib6_node_list_insert(struct mlxsw_sp_fib6_entry *new6_entry, bool replace) { struct mlxsw_sp_fib_node *fib_node = new6_entry->common.fib_node; - struct rt6_info *nrt = mlxsw_sp_fib6_entry_rt(new6_entry); + struct fib6_info *nrt = mlxsw_sp_fib6_entry_rt(new6_entry); struct mlxsw_sp_fib6_entry *fib6_entry; fib6_entry = mlxsw_sp_fib6_node_entry_find(fib_node, nrt, replace); @@ -5213,7 +5213,7 @@ mlxsw_sp_fib6_node_list_insert(struct mlxsw_sp_fib6_entry *new6_entry, struct mlxsw_sp_fib6_entry *last; list_for_each_entry(last, &fib_node->entry_list, common.list) { - struct rt6_info *rt = mlxsw_sp_fib6_entry_rt(last); + struct fib6_info *rt = mlxsw_sp_fib6_entry_rt(last); if (nrt->rt6i_table->tb6_id > rt->rt6i_table->tb6_id) break; @@ -5268,7 +5268,7 @@ mlxsw_sp_fib6_node_entry_unlink(struct mlxsw_sp *mlxsw_sp, static struct mlxsw_sp_fib6_entry * mlxsw_sp_fib6_entry_lookup(struct mlxsw_sp *mlxsw_sp, - const struct rt6_info *rt) + const struct fib6_info *rt) { struct mlxsw_sp_fib6_entry *fib6_entry; struct mlxsw_sp_fib_node *fib_node; @@ -5287,7 +5287,7 @@ mlxsw_sp_fib6_entry_lookup(struct mlxsw_sp *mlxsw_sp, return NULL; list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) { - struct rt6_info *iter_rt = mlxsw_sp_fib6_entry_rt(fib6_entry); + struct fib6_info *iter_rt = mlxsw_sp_fib6_entry_rt(fib6_entry); if (rt->rt6i_table->tb6_id == iter_rt->rt6i_table->tb6_id && rt->rt6i_metric == iter_rt->rt6i_metric && @@ -5316,7 +5316,7 @@ static void mlxsw_sp_fib6_entry_replace(struct mlxsw_sp *mlxsw_sp, } static int mlxsw_sp_router_fib6_add(struct mlxsw_sp *mlxsw_sp, - struct rt6_info *rt, bool replace) + struct fib6_info *rt, bool replace) { struct mlxsw_sp_fib6_entry *fib6_entry; struct mlxsw_sp_fib_node *fib_node; @@ -5373,7 +5373,7 @@ static int mlxsw_sp_router_fib6_add(struct mlxsw_sp *mlxsw_sp, } static void mlxsw_sp_router_fib6_del(struct mlxsw_sp *mlxsw_sp, - struct rt6_info *rt) + struct fib6_info *rt) { struct mlxsw_sp_fib6_entry *fib6_entry; struct mlxsw_sp_fib_node *fib_node; @@ -5836,7 +5836,7 @@ static void mlxsw_sp_router_fib6_event(struct mlxsw_sp_fib_event_work *fib_work, fen6_info = container_of(info, struct fib6_entry_notifier_info, info); fib_work->fen6_info = *fen6_info; - rt6_hold(fib_work->fen6_info.rt); + fib6_info_hold(fib_work->fen6_info.rt); break; } } diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 0a2b180d138a4..90b5f3900c22e 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -48,6 +48,9 @@ static unsigned int vrf_net_id; struct net_vrf { struct rtable __rcu *rth; struct rt6_info __rcu *rt6; +#if IS_ENABLED(CONFIG_IPV6) + struct fib6_table *fib6_table; +#endif u32 tb_id; }; @@ -496,7 +499,6 @@ static int vrf_rt6_create(struct net_device *dev) int flags = DST_HOST | DST_NOPOLICY | DST_NOXFRM; struct net_vrf *vrf = netdev_priv(dev); struct net *net = dev_net(dev); - struct fib6_table *rt6i_table; struct rt6_info *rt6; int rc = -ENOMEM; @@ -504,8 +506,8 @@ static int vrf_rt6_create(struct net_device *dev) if (!ipv6_mod_enabled()) return 0; - rt6i_table = fib6_new_table(net, vrf->tb_id); - if (!rt6i_table) + vrf->fib6_table = fib6_new_table(net, vrf->tb_id); + if (!vrf->fib6_table) goto out; /* create a dst for routing packets out a VRF device */ @@ -513,7 +515,6 @@ static int vrf_rt6_create(struct net_device *dev) if (!rt6) goto out; - rt6->rt6i_table = rt6i_table; rt6->dst.output = vrf_output6; rcu_assign_pointer(vrf->rt6, rt6); @@ -946,22 +947,8 @@ static struct rt6_info *vrf_ip6_route_lookup(struct net *net, int flags) { struct net_vrf *vrf = netdev_priv(dev); - struct fib6_table *table = NULL; - struct rt6_info *rt6; - - rcu_read_lock(); - - /* fib6_table does not have a refcnt and can not be freed */ - rt6 = rcu_dereference(vrf->rt6); - if (likely(rt6)) - table = rt6->rt6i_table; - - rcu_read_unlock(); - - if (!table) - return NULL; - return ip6_pol_route(net, table, ifindex, fl6, skb, flags); + return ip6_pol_route(net, vrf->fib6_table, ifindex, fl6, skb, flags); } static void vrf_ip6_input_dst(struct sk_buff *skb, struct net_device *vrf_dev, diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h index d4088d1a688d4..d6089b2e64fed 100644 --- a/include/net/if_inet6.h +++ b/include/net/if_inet6.h @@ -64,7 +64,7 @@ struct inet6_ifaddr { struct delayed_work dad_work; struct inet6_dev *idev; - struct rt6_info *rt; + struct fib6_info *rt; struct hlist_node addr_lst; struct list_head if_list; @@ -144,7 +144,7 @@ struct ipv6_ac_socklist { struct ifacaddr6 { struct in6_addr aca_addr; struct inet6_dev *aca_idev; - struct rt6_info *aca_rt; + struct fib6_info *aca_rt; struct ifacaddr6 *aca_next; int aca_users; refcount_t aca_refcnt; diff --git a/include/net/ip.h b/include/net/ip.h index ecffd843e7b89..dc4a2d6e58a51 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -396,6 +396,9 @@ static inline unsigned int ip_skb_dst_mtu(struct sock *sk, return min(READ_ONCE(skb_dst(skb)->dev->mtu), IP_MAX_MTU); } +int ip_metrics_convert(struct net *net, struct nlattr *fc_mx, int fc_mx_len, + u32 *metrics); + u32 ip_idents_reserve(u32 hash, int segs); void __ip_select_ident(struct net *net, struct iphdr *iph, int segs); diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 5e86fd9dc8573..a36116b92100c 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -38,6 +38,7 @@ #endif struct rt6_info; +struct fib6_info; struct fib6_config { u32 fc_table; @@ -74,12 +75,12 @@ struct fib6_node { #ifdef CONFIG_IPV6_SUBTREES struct fib6_node __rcu *subtree; #endif - struct rt6_info __rcu *leaf; + struct fib6_info __rcu *leaf; __u16 fn_bit; /* bit key */ __u16 fn_flags; int fn_sernum; - struct rt6_info __rcu *rr_ptr; + struct fib6_info __rcu *rr_ptr; struct rcu_head rcu; }; @@ -94,11 +95,6 @@ struct fib6_gc_args { #define FIB6_SUBTREE(fn) (rcu_dereference_protected((fn)->subtree, 1)) #endif -struct mx6_config { - const u32 *mx; - DECLARE_BITMAP(mx_valid, RTAX_MAX); -}; - /* * routing information * @@ -127,56 +123,72 @@ struct rt6_exception { #define FIB6_EXCEPTION_BUCKET_SIZE (1 << FIB6_EXCEPTION_BUCKET_SIZE_SHIFT) #define FIB6_MAX_DEPTH 5 -struct rt6_info { - struct dst_entry dst; - struct rt6_info __rcu *rt6_next; - struct rt6_info *from; +struct fib6_nh { + struct in6_addr nh_gw; + struct net_device *nh_dev; + struct lwtunnel_state *nh_lwtstate; - /* - * Tail elements of dst_entry (__refcnt etc.) - * and these elements (rarely used in hot path) are in - * the same cache line. - */ + unsigned int nh_flags; + atomic_t nh_upper_bound; + int nh_weight; +}; + +struct fib6_info { struct fib6_table *rt6i_table; + struct fib6_info __rcu *rt6_next; struct fib6_node __rcu *rt6i_node; - struct in6_addr rt6i_gateway; - /* Multipath routes: - * siblings is a list of rt6_info that have the the same metric/weight, + * siblings is a list of fib6_info that have the the same metric/weight, * destination, but not the same gateway. nsiblings is just a cache * to speed up lookup. */ struct list_head rt6i_siblings; unsigned int rt6i_nsiblings; - atomic_t rt6i_nh_upper_bound; atomic_t rt6i_ref; + struct inet6_dev *rt6i_idev; + unsigned long expires; + struct dst_metrics *fib6_metrics; +#define fib6_pmtu fib6_metrics->metrics[RTAX_MTU-1] - unsigned int rt6i_nh_flags; - - /* These are in a separate cache line. */ - struct rt6key rt6i_dst ____cacheline_aligned_in_smp; + struct rt6key rt6i_dst; u32 rt6i_flags; struct rt6key rt6i_src; struct rt6key rt6i_prefsrc; - struct list_head rt6i_uncached; - struct uncached_list *rt6i_uncached_list; - - struct inet6_dev *rt6i_idev; struct rt6_info * __percpu *rt6i_pcpu; struct rt6_exception_bucket __rcu *rt6i_exception_bucket; u32 rt6i_metric; - u32 rt6i_pmtu; - /* more non-fragment space at head required */ - int rt6i_nh_weight; - unsigned short rt6i_nfheader_len; u8 rt6i_protocol; + u8 fib6_type; u8 exception_bucket_flushed:1, should_flush:1, - unused:6; + dst_nocount:1, + dst_nopolicy:1, + dst_host:1, + unused:3; + + struct fib6_nh fib6_nh; +}; + +struct rt6_info { + struct dst_entry dst; + struct fib6_info *from; + + struct rt6key rt6i_dst; + struct rt6key rt6i_src; + struct in6_addr rt6i_gateway; + struct inet6_dev *rt6i_idev; + u32 rt6i_flags; + struct rt6key rt6i_prefsrc; + + struct list_head rt6i_uncached; + struct uncached_list *rt6i_uncached_list; + + /* more non-fragment space at head required */ + unsigned short rt6i_nfheader_len; }; #define for_each_fib6_node_rt_rcu(fn) \ @@ -192,6 +204,26 @@ static inline struct inet6_dev *ip6_dst_idev(struct dst_entry *dst) return ((struct rt6_info *)dst)->rt6i_idev; } +static inline void fib6_clean_expires(struct fib6_info *f6i) +{ + f6i->rt6i_flags &= ~RTF_EXPIRES; + f6i->expires = 0; +} + +static inline void fib6_set_expires(struct fib6_info *f6i, + unsigned long expires) +{ + f6i->expires = expires; + f6i->rt6i_flags |= RTF_EXPIRES; +} + +static inline bool fib6_check_expired(const struct fib6_info *f6i) +{ + if (f6i->rt6i_flags & RTF_EXPIRES) + return time_after(jiffies, f6i->expires); + return false; +} + static inline void rt6_clean_expires(struct rt6_info *rt) { rt->rt6i_flags &= ~RTF_EXPIRES; @@ -206,11 +238,9 @@ static inline void rt6_set_expires(struct rt6_info *rt, unsigned long expires) static inline void rt6_update_expires(struct rt6_info *rt0, int timeout) { - struct rt6_info *rt; + if (!(rt0->rt6i_flags & RTF_EXPIRES) && rt0->from) + rt0->dst.expires = rt0->from->expires; - for (rt = rt0; rt && !(rt->rt6i_flags & RTF_EXPIRES); rt = rt->from); - if (rt && rt != rt0) - rt0->dst.expires = rt->dst.expires; dst_set_expires(&rt0->dst, timeout); rt0->rt6i_flags |= RTF_EXPIRES; } @@ -220,7 +250,7 @@ static inline void rt6_update_expires(struct rt6_info *rt0, int timeout) * Return true if we can get cookie safely * Return false if not */ -static inline bool rt6_get_cookie_safe(const struct rt6_info *rt, +static inline bool rt6_get_cookie_safe(const struct fib6_info *rt, u32 *cookie) { struct fib6_node *fn; @@ -246,9 +276,7 @@ static inline u32 rt6_get_cookie(const struct rt6_info *rt) if (rt->rt6i_flags & RTF_PCPU || (unlikely(!list_empty(&rt->rt6i_uncached)) && rt->from)) - rt = rt->from; - - rt6_get_cookie_safe(rt, &cookie); + rt6_get_cookie_safe(rt->from, &cookie); return cookie; } @@ -262,20 +290,18 @@ static inline void ip6_rt_put(struct rt6_info *rt) dst_release(&rt->dst); } -void rt6_free_pcpu(struct rt6_info *non_pcpu_rt); +struct fib6_info *fib6_info_alloc(gfp_t gfp_flags); +void fib6_info_destroy(struct fib6_info *f6i); -static inline void rt6_hold(struct rt6_info *rt) +static inline void fib6_info_hold(struct fib6_info *f6i) { - atomic_inc(&rt->rt6i_ref); + atomic_inc(&f6i->rt6i_ref); } -static inline void rt6_release(struct rt6_info *rt) +static inline void fib6_info_release(struct fib6_info *f6i) { - if (atomic_dec_and_test(&rt->rt6i_ref)) { - rt6_free_pcpu(rt); - dst_dev_put(&rt->dst); - dst_release(&rt->dst); - } + if (f6i && atomic_dec_and_test(&f6i->rt6i_ref)) + fib6_info_destroy(f6i); } enum fib6_walk_state { @@ -291,7 +317,7 @@ enum fib6_walk_state { struct fib6_walker { struct list_head lh; struct fib6_node *root, *node; - struct rt6_info *leaf; + struct fib6_info *leaf; enum fib6_walk_state state; unsigned int skip; unsigned int count; @@ -355,7 +381,7 @@ typedef struct rt6_info *(*pol_lookup_t)(struct net *, struct fib6_entry_notifier_info { struct fib_notifier_info info; /* must be first */ - struct rt6_info *rt; + struct fib6_info *rt; }; /* @@ -377,15 +403,14 @@ struct fib6_node *fib6_locate(struct fib6_node *root, const struct in6_addr *saddr, int src_len, bool exact_match); -void fib6_clean_all(struct net *net, int (*func)(struct rt6_info *, void *arg), +void fib6_clean_all(struct net *net, int (*func)(struct fib6_info *, void *arg), void *arg); -int fib6_add(struct fib6_node *root, struct rt6_info *rt, - struct nl_info *info, struct mx6_config *mxc, - struct netlink_ext_ack *extack); -int fib6_del(struct rt6_info *rt, struct nl_info *info); +int fib6_add(struct fib6_node *root, struct fib6_info *rt, + struct nl_info *info, struct netlink_ext_ack *extack); +int fib6_del(struct fib6_info *rt, struct nl_info *info); -void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info, +void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info, unsigned int flags); void fib6_run_gc(unsigned long expires, struct net *net, bool force); @@ -408,8 +433,14 @@ void __net_exit fib6_notifier_exit(struct net *net); unsigned int fib6_tables_seq_read(struct net *net); int fib6_tables_dump(struct net *net, struct notifier_block *nb); -void fib6_update_sernum(struct rt6_info *rt); -void fib6_update_sernum_upto_root(struct net *net, struct rt6_info *rt); +void fib6_update_sernum(struct net *net, struct fib6_info *rt); +void fib6_update_sernum_upto_root(struct net *net, struct fib6_info *rt); + +void fib6_metric_set(struct fib6_info *f6i, int metric, u32 val); +static inline bool fib6_metric_locked(struct fib6_info *f6i, int metric) +{ + return !!(f6i->fib6_metrics->metrics[RTAX_LOCK - 1] & (1 << metric)); +} #ifdef CONFIG_IPV6_MULTIPLE_TABLES int fib6_rules_init(void); diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 08b132381984f..d5fb1e4ae7ac2 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -66,7 +66,7 @@ static inline bool rt6_need_strict(const struct in6_addr *daddr) (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK); } -static inline bool rt6_qualify_for_ecmp(const struct rt6_info *rt) +static inline bool rt6_qualify_for_ecmp(const struct fib6_info *rt) { return (rt->rt6i_flags & (RTF_GATEWAY|RTF_ADDRCONF|RTF_DYNAMIC)) == RTF_GATEWAY; @@ -100,22 +100,21 @@ void ip6_route_cleanup(void); int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg); -int ip6_route_add(struct fib6_config *cfg, struct netlink_ext_ack *extack); -int ip6_ins_rt(struct rt6_info *); -int ip6_del_rt(struct rt6_info *); +int ip6_route_add(struct fib6_config *cfg, gfp_t gfp_flags, + struct netlink_ext_ack *extack); +int ip6_ins_rt(struct net *net, struct fib6_info *rt); +int ip6_del_rt(struct net *net, struct fib6_info *rt); -void rt6_flush_exceptions(struct rt6_info *rt); -int rt6_remove_exception_rt(struct rt6_info *rt); -void rt6_age_exceptions(struct rt6_info *rt, struct fib6_gc_args *gc_args, +void rt6_flush_exceptions(struct fib6_info *rt); +void rt6_age_exceptions(struct fib6_info *rt, struct fib6_gc_args *gc_args, unsigned long now); -static inline int ip6_route_get_saddr(struct net *net, struct rt6_info *rt, +static inline int ip6_route_get_saddr(struct net *net, struct fib6_info *rt, const struct in6_addr *daddr, unsigned int prefs, struct in6_addr *saddr) { - struct inet6_dev *idev = - rt ? ip6_dst_idev((struct dst_entry *)rt) : NULL; + struct inet6_dev *idev = rt ? rt->rt6i_idev : NULL; int err = 0; if (rt && rt->rt6i_prefsrc.plen) @@ -137,8 +136,9 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev, struct flowi6 *fl6); void fib6_force_start_gc(struct net *net); -struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, - const struct in6_addr *addr, bool anycast); +struct fib6_info *addrconf_dst_alloc(struct net *net, struct inet6_dev *idev, + const struct in6_addr *addr, bool anycast, + gfp_t gfp_flags); struct rt6_info *ip6_dst_alloc(struct net *net, struct net_device *dev, int flags); @@ -147,9 +147,11 @@ struct rt6_info *ip6_dst_alloc(struct net *net, struct net_device *dev, * support functions for ND * */ -struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, +struct fib6_info *rt6_get_dflt_router(struct net *net, + const struct in6_addr *addr, struct net_device *dev); -struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr, +struct fib6_info *rt6_add_dflt_router(struct net *net, + const struct in6_addr *gwaddr, struct net_device *dev, unsigned int pref); void rt6_purge_dflt_routers(struct net *net); @@ -174,14 +176,14 @@ struct rt6_rtnl_dump_arg { struct net *net; }; -int rt6_dump_route(struct rt6_info *rt, void *p_arg); +int rt6_dump_route(struct fib6_info *rt, void *p_arg); void rt6_mtu_change(struct net_device *dev, unsigned int mtu); void rt6_remove_prefsrc(struct inet6_ifaddr *ifp); void rt6_clean_tohost(struct net *net, struct in6_addr *gateway); void rt6_sync_up(struct net_device *dev, unsigned int nh_flags); void rt6_disable_ip(struct net_device *dev, unsigned long event); void rt6_sync_down_dev(struct net_device *dev, unsigned long event); -void rt6_multipath_rebalance(struct rt6_info *rt); +void rt6_multipath_rebalance(struct fib6_info *rt); void rt6_uncached_list_add(struct rt6_info *rt); void rt6_uncached_list_del(struct rt6_info *rt); @@ -269,12 +271,15 @@ static inline struct in6_addr *rt6_nexthop(struct rt6_info *rt, return daddr; } -static inline bool rt6_duplicate_nexthop(struct rt6_info *a, struct rt6_info *b) +static inline bool rt6_duplicate_nexthop(struct fib6_info *a, struct fib6_info *b) { - return a->dst.dev == b->dst.dev && + return a->fib6_nh.nh_dev == b->fib6_nh.nh_dev && a->rt6i_idev == b->rt6i_idev && - ipv6_addr_equal(&a->rt6i_gateway, &b->rt6i_gateway) && - !lwtunnel_cmp_encap(a->dst.lwtstate, b->dst.lwtstate); + ipv6_addr_equal(&a->fib6_nh.nh_gw, &b->fib6_nh.nh_gw) && + !lwtunnel_cmp_encap(a->fib6_nh.nh_lwtstate, b->fib6_nh.nh_lwtstate); } +struct neighbour *ip6_neigh_lookup(const struct in6_addr *gw, + struct net_device *dev, struct sk_buff *skb, + const void *daddr); #endif diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index c29f09cfc9d73..97b3a54579c82 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -60,7 +60,8 @@ struct netns_ipv6 { #endif struct xt_table *ip6table_nat; #endif - struct rt6_info *ip6_null_entry; + struct fib6_info *fib6_null_entry; + struct rt6_info *ip6_null_entry; struct rt6_statistics *rt6_stats; struct timer_list ip6_fib_timer; struct hlist_head *fib_table_hash; diff --git a/net/core/dst.c b/net/core/dst.c index 007aa0b082915..2d9b37f8944a9 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -58,6 +58,7 @@ const struct dst_metrics dst_default_metrics = { */ .refcnt = REFCOUNT_INIT(1), }; +EXPORT_SYMBOL(dst_default_metrics); void dst_init(struct dst_entry *dst, struct dst_ops *ops, struct net_device *dev, int initial_ref, int initial_obsolete, diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 45936922d7e23..80802546c2797 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -785,13 +785,15 @@ int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst, u32 id, long expires, u32 error) { struct rta_cacheinfo ci = { - .rta_lastuse = jiffies_delta_to_clock_t(jiffies - dst->lastuse), - .rta_used = dst->__use, - .rta_clntref = atomic_read(&(dst->__refcnt)), .rta_error = error, .rta_id = id, }; + if (dst) { + ci.rta_lastuse = jiffies_delta_to_clock_t(jiffies - dst->lastuse); + ci.rta_used = dst->__use; + ci.rta_clntref = atomic_read(&dst->__refcnt); + } if (expires) { unsigned long clock; diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index a07b7dd06defb..b379520f91334 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -13,7 +13,8 @@ obj-y := route.o inetpeer.o protocol.o \ tcp_offload.o datagram.o raw.o udp.o udplite.o \ udp_offload.o arp.o icmp.o devinet.o af_inet.o igmp.o \ fib_frontend.o fib_semantics.o fib_trie.o fib_notifier.o \ - inet_fragment.o ping.o ip_tunnel_core.o gre_offload.o + inet_fragment.o ping.o ip_tunnel_core.o gre_offload.o \ + metrics.o obj-$(CONFIG_NET_IP_TUNNEL) += ip_tunnel.o obj-$(CONFIG_SYSCTL) += sysctl_net_ipv4.o diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index c27122f01b874..6608db23f54b6 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -1019,47 +1019,8 @@ static bool fib_valid_prefsrc(struct fib_config *cfg, __be32 fib_prefsrc) static int fib_convert_metrics(struct fib_info *fi, const struct fib_config *cfg) { - bool ecn_ca = false; - struct nlattr *nla; - int remaining; - - if (!cfg->fc_mx) - return 0; - - nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) { - int type = nla_type(nla); - u32 val; - - if (!type) - continue; - if (type > RTAX_MAX) - return -EINVAL; - - if (type == RTAX_CC_ALGO) { - char tmp[TCP_CA_NAME_MAX]; - - nla_strlcpy(tmp, nla, sizeof(tmp)); - val = tcp_ca_get_key_by_name(fi->fib_net, tmp, &ecn_ca); - if (val == TCP_CA_UNSPEC) - return -EINVAL; - } else { - val = nla_get_u32(nla); - } - if (type == RTAX_ADVMSS && val > 65535 - 40) - val = 65535 - 40; - if (type == RTAX_MTU && val > 65535 - 15) - val = 65535 - 15; - if (type == RTAX_HOPLIMIT && val > 255) - val = 255; - if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK)) - return -EINVAL; - fi->fib_metrics->metrics[type - 1] = val; - } - - if (ecn_ca) - fi->fib_metrics->metrics[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA; - - return 0; + return ip_metrics_convert(fi->fib_net, cfg->fc_mx, cfg->fc_mx_len, + fi->fib_metrics->metrics); } struct fib_info *fib_create_info(struct fib_config *cfg, diff --git a/net/ipv4/metrics.c b/net/ipv4/metrics.c new file mode 100644 index 0000000000000..5121c6475e6b0 --- /dev/null +++ b/net/ipv4/metrics.c @@ -0,0 +1,53 @@ +#include +#include +#include +#include +#include +#include + +int ip_metrics_convert(struct net *net, struct nlattr *fc_mx, int fc_mx_len, + u32 *metrics) +{ + bool ecn_ca = false; + struct nlattr *nla; + int remaining; + + if (!fc_mx) + return 0; + + nla_for_each_attr(nla, fc_mx, fc_mx_len, remaining) { + int type = nla_type(nla); + u32 val; + + if (!type) + continue; + if (type > RTAX_MAX) + return -EINVAL; + + if (type == RTAX_CC_ALGO) { + char tmp[TCP_CA_NAME_MAX]; + + nla_strlcpy(tmp, nla, sizeof(tmp)); + val = tcp_ca_get_key_by_name(net, tmp, &ecn_ca); + if (val == TCP_CA_UNSPEC) + return -EINVAL; + } else { + val = nla_get_u32(nla); + } + if (type == RTAX_ADVMSS && val > 65535 - 40) + val = 65535 - 40; + if (type == RTAX_MTU && val > 65535 - 15) + val = 65535 - 15; + if (type == RTAX_HOPLIMIT && val > 255) + val = 255; + if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK)) + return -EINVAL; + metrics[type - 1] = val; + } + + if (ecn_ca) + metrics[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA; + + return 0; +} +EXPORT_SYMBOL_GPL(ip_metrics_convert); diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index b2c0175125db2..a294e86a9b25a 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -170,7 +170,7 @@ static void addrconf_type_change(struct net_device *dev, unsigned long event); static int addrconf_ifdown(struct net_device *dev, int how); -static struct rt6_info *addrconf_get_prefix_route(const struct in6_addr *pfx, +static struct fib6_info *addrconf_get_prefix_route(const struct in6_addr *pfx, int plen, const struct net_device *dev, u32 flags, u32 noflags); @@ -916,7 +916,6 @@ void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp) pr_warn("Freeing alive inet6 address %p\n", ifp); return; } - ip6_rt_put(ifp->rt); kfree_rcu(ifp, rcu); } @@ -995,7 +994,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, gfp_t gfp_flags = can_block ? GFP_KERNEL : GFP_ATOMIC; struct net *net = dev_net(idev->dev); struct inet6_ifaddr *ifa = NULL; - struct rt6_info *rt = NULL; + struct fib6_info *rt = NULL; int err = 0; int addr_type = ipv6_addr_type(addr); @@ -1037,7 +1036,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, goto out; } - rt = addrconf_dst_alloc(idev, addr, false); + rt = addrconf_dst_alloc(net, idev, addr, false, gfp_flags); if (IS_ERR(rt)) { err = PTR_ERR(rt); rt = NULL; @@ -1046,7 +1045,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, if (net->ipv6.devconf_all->disable_policy || idev->cnf.disable_policy) - rt->dst.flags |= DST_NOPOLICY; + rt->dst_nopolicy = true; neigh_parms_data_state_setall(idev->nd_parms); @@ -1102,8 +1101,8 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, inet6addr_notifier_call_chain(NETDEV_UP, ifa); out: if (unlikely(err < 0)) { - if (rt) - ip6_rt_put(rt); + fib6_info_release(rt); + if (ifa) { if (ifa->idev) in6_dev_put(ifa->idev); @@ -1179,7 +1178,7 @@ check_cleanup_prefix_route(struct inet6_ifaddr *ifp, unsigned long *expires) static void cleanup_prefix_route(struct inet6_ifaddr *ifp, unsigned long expires, bool del_rt) { - struct rt6_info *rt; + struct fib6_info *rt; rt = addrconf_get_prefix_route(&ifp->addr, ifp->prefix_len, @@ -1187,11 +1186,11 @@ cleanup_prefix_route(struct inet6_ifaddr *ifp, unsigned long expires, bool del_r 0, RTF_GATEWAY | RTF_DEFAULT); if (rt) { if (del_rt) - ip6_del_rt(rt); + ip6_del_rt(dev_net(ifp->idev->dev), rt); else { if (!(rt->rt6i_flags & RTF_EXPIRES)) - rt6_set_expires(rt, expires); - ip6_rt_put(rt); + fib6_set_expires(rt, expires); + fib6_info_release(rt); } } } @@ -2320,7 +2319,7 @@ static void ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpad static void addrconf_prefix_route(struct in6_addr *pfx, int plen, struct net_device *dev, - unsigned long expires, u32 flags) + unsigned long expires, u32 flags, gfp_t gfp_flags) { struct fib6_config cfg = { .fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_PREFIX, @@ -2331,6 +2330,7 @@ addrconf_prefix_route(struct in6_addr *pfx, int plen, struct net_device *dev, .fc_flags = RTF_UP | flags, .fc_nlinfo.nl_net = dev_net(dev), .fc_protocol = RTPROT_KERNEL, + .fc_type = RTN_UNICAST, }; cfg.fc_dst = *pfx; @@ -2344,17 +2344,17 @@ addrconf_prefix_route(struct in6_addr *pfx, int plen, struct net_device *dev, cfg.fc_flags |= RTF_NONEXTHOP; #endif - ip6_route_add(&cfg, NULL); + ip6_route_add(&cfg, gfp_flags, NULL); } -static struct rt6_info *addrconf_get_prefix_route(const struct in6_addr *pfx, +static struct fib6_info *addrconf_get_prefix_route(const struct in6_addr *pfx, int plen, const struct net_device *dev, u32 flags, u32 noflags) { struct fib6_node *fn; - struct rt6_info *rt = NULL; + struct fib6_info *rt = NULL; struct fib6_table *table; u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_PREFIX; @@ -2368,14 +2368,13 @@ static struct rt6_info *addrconf_get_prefix_route(const struct in6_addr *pfx, goto out; for_each_fib6_node_rt_rcu(fn) { - if (rt->dst.dev->ifindex != dev->ifindex) + if (rt->fib6_nh.nh_dev->ifindex != dev->ifindex) continue; if ((rt->rt6i_flags & flags) != flags) continue; if ((rt->rt6i_flags & noflags) != 0) continue; - if (!dst_hold_safe(&rt->dst)) - rt = NULL; + fib6_info_hold(rt); break; } out: @@ -2394,12 +2393,13 @@ static void addrconf_add_mroute(struct net_device *dev) .fc_ifindex = dev->ifindex, .fc_dst_len = 8, .fc_flags = RTF_UP, + .fc_type = RTN_UNICAST, .fc_nlinfo.nl_net = dev_net(dev), }; ipv6_addr_set(&cfg.fc_dst, htonl(0xFF000000), 0, 0, 0); - ip6_route_add(&cfg, NULL); + ip6_route_add(&cfg, GFP_ATOMIC, NULL); } static struct inet6_dev *addrconf_add_dev(struct net_device *dev) @@ -2641,7 +2641,7 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao) */ if (pinfo->onlink) { - struct rt6_info *rt; + struct fib6_info *rt; unsigned long rt_expires; /* Avoid arithmetic overflow. Really, we could @@ -2666,13 +2666,13 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao) if (rt) { /* Autoconf prefix route */ if (valid_lft == 0) { - ip6_del_rt(rt); + ip6_del_rt(net, rt); rt = NULL; } else if (addrconf_finite_timeout(rt_expires)) { /* not infinity */ - rt6_set_expires(rt, jiffies + rt_expires); + fib6_set_expires(rt, jiffies + rt_expires); } else { - rt6_clean_expires(rt); + fib6_clean_expires(rt); } } else if (valid_lft) { clock_t expires = 0; @@ -2683,9 +2683,9 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao) expires = jiffies_to_clock_t(rt_expires); } addrconf_prefix_route(&pinfo->prefix, pinfo->prefix_len, - dev, expires, flags); + dev, expires, flags, GFP_ATOMIC); } - ip6_rt_put(rt); + fib6_info_release(rt); } /* Try to figure out our local address for this prefix */ @@ -2898,7 +2898,7 @@ static int inet6_addr_add(struct net *net, int ifindex, if (!IS_ERR(ifp)) { if (!(ifa_flags & IFA_F_NOPREFIXROUTE)) { addrconf_prefix_route(&ifp->addr, ifp->prefix_len, dev, - expires, flags); + expires, flags, GFP_KERNEL); } /* Send a netlink notification if DAD is enabled and @@ -3051,7 +3051,8 @@ static void sit_add_v4_addrs(struct inet6_dev *idev) if (addr.s6_addr32[3]) { add_addr(idev, &addr, plen, scope); - addrconf_prefix_route(&addr, plen, idev->dev, 0, pflags); + addrconf_prefix_route(&addr, plen, idev->dev, 0, pflags, + GFP_ATOMIC); return; } @@ -3076,7 +3077,7 @@ static void sit_add_v4_addrs(struct inet6_dev *idev) add_addr(idev, &addr, plen, flag); addrconf_prefix_route(&addr, plen, idev->dev, 0, - pflags); + pflags, GFP_ATOMIC); } } } @@ -3116,7 +3117,8 @@ void addrconf_add_linklocal(struct inet6_dev *idev, ifp = ipv6_add_addr(idev, addr, NULL, 64, IFA_LINK, addr_flags, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME, true, NULL); if (!IS_ERR(ifp)) { - addrconf_prefix_route(&ifp->addr, ifp->prefix_len, idev->dev, 0, 0); + addrconf_prefix_route(&ifp->addr, ifp->prefix_len, idev->dev, + 0, 0, GFP_ATOMIC); addrconf_dad_start(ifp); in6_ifa_put(ifp); } @@ -3231,7 +3233,8 @@ static void addrconf_addr_gen(struct inet6_dev *idev, bool prefix_route) addrconf_add_linklocal(idev, &addr, IFA_F_STABLE_PRIVACY); else if (prefix_route) - addrconf_prefix_route(&addr, 64, idev->dev, 0, 0); + addrconf_prefix_route(&addr, 64, idev->dev, + 0, 0, GFP_KERNEL); break; case IN6_ADDR_GEN_MODE_EUI64: /* addrconf_add_linklocal also adds a prefix_route and we @@ -3241,7 +3244,8 @@ static void addrconf_addr_gen(struct inet6_dev *idev, bool prefix_route) if (ipv6_generate_eui64(addr.s6_addr + 8, idev->dev) == 0) addrconf_add_linklocal(idev, &addr, 0); else if (prefix_route) - addrconf_prefix_route(&addr, 64, idev->dev, 0, 0); + addrconf_prefix_route(&addr, 64, idev->dev, + 0, 0, GFP_ATOMIC); break; case IN6_ADDR_GEN_MODE_NONE: default: @@ -3333,7 +3337,8 @@ static void addrconf_gre_config(struct net_device *dev) } #endif -static int fixup_permanent_addr(struct inet6_dev *idev, +static int fixup_permanent_addr(struct net *net, + struct inet6_dev *idev, struct inet6_ifaddr *ifp) { /* !rt6i_node means the host route was removed from the @@ -3341,9 +3346,10 @@ static int fixup_permanent_addr(struct inet6_dev *idev, * case regenerate the host route. */ if (!ifp->rt || !ifp->rt->rt6i_node) { - struct rt6_info *rt, *prev; + struct fib6_info *rt, *prev; - rt = addrconf_dst_alloc(idev, &ifp->addr, false); + rt = addrconf_dst_alloc(net, idev, &ifp->addr, false, + GFP_ATOMIC); if (IS_ERR(rt)) return PTR_ERR(rt); @@ -3353,12 +3359,12 @@ static int fixup_permanent_addr(struct inet6_dev *idev, ifp->rt = rt; spin_unlock(&ifp->lock); - ip6_rt_put(prev); + fib6_info_release(prev); } if (!(ifp->flags & IFA_F_NOPREFIXROUTE)) { addrconf_prefix_route(&ifp->addr, ifp->prefix_len, - idev->dev, 0, 0); + idev->dev, 0, 0, GFP_ATOMIC); } if (ifp->state == INET6_IFADDR_STATE_PREDAD) @@ -3367,7 +3373,7 @@ static int fixup_permanent_addr(struct inet6_dev *idev, return 0; } -static void addrconf_permanent_addr(struct net_device *dev) +static void addrconf_permanent_addr(struct net *net, struct net_device *dev) { struct inet6_ifaddr *ifp, *tmp; struct inet6_dev *idev; @@ -3380,7 +3386,7 @@ static void addrconf_permanent_addr(struct net_device *dev) list_for_each_entry_safe(ifp, tmp, &idev->addr_list, if_list) { if ((ifp->flags & IFA_F_PERMANENT) && - fixup_permanent_addr(idev, ifp) < 0) { + fixup_permanent_addr(net, idev, ifp) < 0) { write_unlock_bh(&idev->lock); in6_ifa_hold(ifp); ipv6_del_addr(ifp); @@ -3449,7 +3455,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, if (event == NETDEV_UP) { /* restore routes for permanent addresses */ - addrconf_permanent_addr(dev); + addrconf_permanent_addr(net, dev); if (!addrconf_link_ready(dev)) { /* device is not ready yet. */ @@ -3707,7 +3713,7 @@ static int addrconf_ifdown(struct net_device *dev, int how) keep_addr = (!how && _keep_addr > 0 && !idev->cnf.disable_ipv6); list_for_each_entry_safe(ifa, tmp, &idev->addr_list, if_list) { - struct rt6_info *rt = NULL; + struct fib6_info *rt = NULL; bool keep; addrconf_del_dad_work(ifa); @@ -3735,7 +3741,7 @@ static int addrconf_ifdown(struct net_device *dev, int how) spin_unlock_bh(&ifa->lock); if (rt) - ip6_del_rt(rt); + ip6_del_rt(net, rt); if (state != INET6_IFADDR_STATE_DEAD) { __ipv6_ifa_notify(RTM_DELADDR, ifa); @@ -3853,6 +3859,7 @@ static void addrconf_dad_begin(struct inet6_ifaddr *ifp) struct inet6_dev *idev = ifp->idev; struct net_device *dev = idev->dev; bool bump_id, notify = false; + struct net *net; addrconf_join_solict(dev, &ifp->addr); @@ -3863,8 +3870,9 @@ static void addrconf_dad_begin(struct inet6_ifaddr *ifp) if (ifp->state == INET6_IFADDR_STATE_DEAD) goto out; + net = dev_net(dev); if (dev->flags&(IFF_NOARP|IFF_LOOPBACK) || - (dev_net(dev)->ipv6.devconf_all->accept_dad < 1 && + (net->ipv6.devconf_all->accept_dad < 1 && idev->cnf.accept_dad < 1) || !(ifp->flags&IFA_F_TENTATIVE) || ifp->flags & IFA_F_NODAD) { @@ -3900,8 +3908,8 @@ static void addrconf_dad_begin(struct inet6_ifaddr *ifp) * Frames right away */ if (ifp->flags & IFA_F_OPTIMISTIC) { - ip6_ins_rt(ifp->rt); - if (ipv6_use_optimistic_addr(dev_net(dev), idev)) { + ip6_ins_rt(net, ifp->rt); + if (ipv6_use_optimistic_addr(net, idev)) { /* Because optimistic nodes can use this address, * notify listeners. If DAD fails, RTM_DELADDR is sent. */ @@ -4567,8 +4575,9 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, u32 ifa_flags, ipv6_ifa_notify(0, ifp); if (!(ifa_flags & IFA_F_NOPREFIXROUTE)) { - addrconf_prefix_route(&ifp->addr, ifp->prefix_len, ifp->idev->dev, - expires, flags); + addrconf_prefix_route(&ifp->addr, ifp->prefix_len, + ifp->idev->dev, expires, flags, + GFP_KERNEL); } else if (had_prefixroute) { enum cleanup_prefix_rt_t action; unsigned long rt_expires; @@ -5604,28 +5613,29 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) * to do it again */ if (!rcu_access_pointer(ifp->rt->rt6i_node)) - ip6_ins_rt(ifp->rt); + ip6_ins_rt(net, ifp->rt); if (ifp->idev->cnf.forwarding) addrconf_join_anycast(ifp); if (!ipv6_addr_any(&ifp->peer_addr)) addrconf_prefix_route(&ifp->peer_addr, 128, - ifp->idev->dev, 0, 0); + ifp->idev->dev, 0, 0, + GFP_KERNEL); break; case RTM_DELADDR: if (ifp->idev->cnf.forwarding) addrconf_leave_anycast(ifp); addrconf_leave_solict(ifp->idev, &ifp->addr); if (!ipv6_addr_any(&ifp->peer_addr)) { - struct rt6_info *rt; + struct fib6_info *rt; rt = addrconf_get_prefix_route(&ifp->peer_addr, 128, ifp->idev->dev, 0, 0); if (rt) - ip6_del_rt(rt); + ip6_del_rt(net, rt); } if (ifp->rt) { - if (dst_hold_safe(&ifp->rt->dst)) - ip6_del_rt(ifp->rt); + ip6_del_rt(net, ifp->rt); + ifp->rt = NULL; } rt_genid_bump_ipv6(net); break; @@ -5972,11 +5982,11 @@ void addrconf_disable_policy_idev(struct inet6_dev *idev, int val) list_for_each_entry(ifa, &idev->addr_list, if_list) { spin_lock(&ifa->lock); if (ifa->rt) { - struct rt6_info *rt = ifa->rt; + struct fib6_info *rt = ifa->rt; int cpu; rcu_read_lock(); - addrconf_set_nopolicy(ifa->rt, val); + ifa->rt->dst_nopolicy = val ? true : false; if (rt->rt6i_pcpu) { for_each_possible_cpu(cpu) { struct rt6_info **rtp; diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c index bbcabbba9bd80..0e35657501a70 100644 --- a/net/ipv6/anycast.c +++ b/net/ipv6/anycast.c @@ -213,12 +213,12 @@ static void aca_put(struct ifacaddr6 *ac) { if (refcount_dec_and_test(&ac->aca_refcnt)) { in6_dev_put(ac->aca_idev); - dst_release(&ac->aca_rt->dst); + fib6_info_release(ac->aca_rt); kfree(ac); } } -static struct ifacaddr6 *aca_alloc(struct rt6_info *rt, +static struct ifacaddr6 *aca_alloc(struct fib6_info *rt, const struct in6_addr *addr) { struct inet6_dev *idev = rt->rt6i_idev; @@ -231,6 +231,7 @@ static struct ifacaddr6 *aca_alloc(struct rt6_info *rt, aca->aca_addr = *addr; in6_dev_hold(idev); aca->aca_idev = idev; + fib6_info_hold(rt); aca->aca_rt = rt; aca->aca_users = 1; /* aca_tstamp should be updated upon changes */ @@ -246,7 +247,8 @@ static struct ifacaddr6 *aca_alloc(struct rt6_info *rt, int __ipv6_dev_ac_inc(struct inet6_dev *idev, const struct in6_addr *addr) { struct ifacaddr6 *aca; - struct rt6_info *rt; + struct fib6_info *rt; + struct net *net; int err; ASSERT_RTNL(); @@ -265,14 +267,15 @@ int __ipv6_dev_ac_inc(struct inet6_dev *idev, const struct in6_addr *addr) } } - rt = addrconf_dst_alloc(idev, addr, true); + net = dev_net(idev->dev); + rt = addrconf_dst_alloc(net, idev, addr, true, GFP_ATOMIC); if (IS_ERR(rt)) { err = PTR_ERR(rt); goto out; } aca = aca_alloc(rt, addr); if (!aca) { - ip6_rt_put(rt); + fib6_info_release(rt); err = -ENOMEM; goto out; } @@ -286,7 +289,7 @@ int __ipv6_dev_ac_inc(struct inet6_dev *idev, const struct in6_addr *addr) aca_get(aca); write_unlock_bh(&idev->lock); - ip6_ins_rt(rt); + ip6_ins_rt(net, rt); addrconf_join_solict(idev->dev, &aca->aca_addr); @@ -328,8 +331,7 @@ int __ipv6_dev_ac_dec(struct inet6_dev *idev, const struct in6_addr *addr) write_unlock_bh(&idev->lock); addrconf_leave_solict(idev, &aca->aca_addr); - dst_hold(&aca->aca_rt->dst); - ip6_del_rt(aca->aca_rt); + ip6_del_rt(dev_net(idev->dev), aca->aca_rt); aca_put(aca); return 0; @@ -356,8 +358,7 @@ void ipv6_ac_destroy_dev(struct inet6_dev *idev) addrconf_leave_solict(idev, &aca->aca_addr); - dst_hold(&aca->aca_rt->dst); - ip6_del_rt(aca->aca_rt); + ip6_del_rt(dev_net(idev->dev), aca->aca_rt); aca_put(aca); diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index deab2db6692eb..2ab49b7cac222 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -43,7 +43,7 @@ static struct kmem_cache *fib6_node_kmem __read_mostly; struct fib6_cleaner { struct fib6_walker w; struct net *net; - int (*func)(struct rt6_info *, void *arg); + int (*func)(struct fib6_info *, void *arg); int sernum; void *arg; }; @@ -54,7 +54,7 @@ struct fib6_cleaner { #define FWS_INIT FWS_L #endif -static struct rt6_info *fib6_find_prefix(struct net *net, +static struct fib6_info *fib6_find_prefix(struct net *net, struct fib6_table *table, struct fib6_node *fn); static struct fib6_node *fib6_repair_tree(struct net *net, @@ -105,9 +105,8 @@ enum { FIB6_NO_SERNUM_CHANGE = 0, }; -void fib6_update_sernum(struct rt6_info *rt) +void fib6_update_sernum(struct net *net, struct fib6_info *rt) { - struct net *net = dev_net(rt->dst.dev); struct fib6_node *fn; fn = rcu_dereference_protected(rt->rt6i_node, @@ -146,6 +145,71 @@ static __be32 addr_bit_set(const void *token, int fn_bit) addr[fn_bit >> 5]; } +struct fib6_info *fib6_info_alloc(gfp_t gfp_flags) +{ + struct fib6_info *f6i; + + f6i = kzalloc(sizeof(*f6i), gfp_flags); + if (!f6i) + return NULL; + + f6i->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, gfp_flags); + if (!f6i->rt6i_pcpu) { + kfree(f6i); + return NULL; + } + + INIT_LIST_HEAD(&f6i->rt6i_siblings); + f6i->fib6_metrics = (struct dst_metrics *)&dst_default_metrics; + + atomic_inc(&f6i->rt6i_ref); + + return f6i; +} + +void fib6_info_destroy(struct fib6_info *f6i) +{ + struct rt6_exception_bucket *bucket; + struct dst_metrics *m; + + WARN_ON(f6i->rt6i_node); + + bucket = rcu_dereference_protected(f6i->rt6i_exception_bucket, 1); + if (bucket) { + f6i->rt6i_exception_bucket = NULL; + kfree(bucket); + } + + if (f6i->rt6i_pcpu) { + int cpu; + + for_each_possible_cpu(cpu) { + struct rt6_info **ppcpu_rt; + struct rt6_info *pcpu_rt; + + ppcpu_rt = per_cpu_ptr(f6i->rt6i_pcpu, cpu); + pcpu_rt = *ppcpu_rt; + if (pcpu_rt) { + dst_dev_put(&pcpu_rt->dst); + dst_release(&pcpu_rt->dst); + *ppcpu_rt = NULL; + } + } + } + + if (f6i->rt6i_idev) + in6_dev_put(f6i->rt6i_idev); + if (f6i->fib6_nh.nh_dev) + dev_put(f6i->fib6_nh.nh_dev); + + m = f6i->fib6_metrics; + if (m != &dst_default_metrics && refcount_dec_and_test(&m->refcnt)) + kfree(m); + + kfree(f6i); +} +EXPORT_SYMBOL_GPL(fib6_info_destroy); + static struct fib6_node *node_alloc(struct net *net) { struct fib6_node *fn; @@ -176,28 +240,6 @@ static void node_free(struct net *net, struct fib6_node *fn) net->ipv6.rt6_stats->fib_nodes--; } -void rt6_free_pcpu(struct rt6_info *non_pcpu_rt) -{ - int cpu; - - if (!non_pcpu_rt->rt6i_pcpu) - return; - - for_each_possible_cpu(cpu) { - struct rt6_info **ppcpu_rt; - struct rt6_info *pcpu_rt; - - ppcpu_rt = per_cpu_ptr(non_pcpu_rt->rt6i_pcpu, cpu); - pcpu_rt = *ppcpu_rt; - if (pcpu_rt) { - dst_dev_put(&pcpu_rt->dst); - dst_release(&pcpu_rt->dst); - *ppcpu_rt = NULL; - } - } -} -EXPORT_SYMBOL_GPL(rt6_free_pcpu); - static void fib6_free_table(struct fib6_table *table) { inetpeer_invalidate_tree(&table->tb6_peers); @@ -232,7 +274,7 @@ static struct fib6_table *fib6_alloc_table(struct net *net, u32 id) if (table) { table->tb6_id = id; rcu_assign_pointer(table->tb6_root.leaf, - net->ipv6.ip6_null_entry); + net->ipv6.fib6_null_entry); table->tb6_root.fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO; inet_peer_base_init(&table->tb6_peers); } @@ -340,7 +382,7 @@ unsigned int fib6_tables_seq_read(struct net *net) static int call_fib6_entry_notifier(struct notifier_block *nb, struct net *net, enum fib_event_type event_type, - struct rt6_info *rt) + struct fib6_info *rt) { struct fib6_entry_notifier_info info = { .rt = rt, @@ -351,7 +393,7 @@ static int call_fib6_entry_notifier(struct notifier_block *nb, struct net *net, static int call_fib6_entry_notifiers(struct net *net, enum fib_event_type event_type, - struct rt6_info *rt, + struct fib6_info *rt, struct netlink_ext_ack *extack) { struct fib6_entry_notifier_info info = { @@ -368,16 +410,16 @@ struct fib6_dump_arg { struct notifier_block *nb; }; -static void fib6_rt_dump(struct rt6_info *rt, struct fib6_dump_arg *arg) +static void fib6_rt_dump(struct fib6_info *rt, struct fib6_dump_arg *arg) { - if (rt == arg->net->ipv6.ip6_null_entry) + if (rt == arg->net->ipv6.fib6_null_entry) return; call_fib6_entry_notifier(arg->nb, arg->net, FIB_EVENT_ENTRY_ADD, rt); } static int fib6_node_dump(struct fib6_walker *w) { - struct rt6_info *rt; + struct fib6_info *rt; for_each_fib6_walker_rt(w) fib6_rt_dump(rt, w->args); @@ -426,7 +468,7 @@ int fib6_tables_dump(struct net *net, struct notifier_block *nb) static int fib6_dump_node(struct fib6_walker *w) { int res; - struct rt6_info *rt; + struct fib6_info *rt; for_each_fib6_walker_rt(w) { res = rt6_dump_route(rt, w->args); @@ -443,7 +485,7 @@ static int fib6_dump_node(struct fib6_walker *w) */ if (rt->rt6i_nsiblings) rt = list_last_entry(&rt->rt6i_siblings, - struct rt6_info, + struct fib6_info, rt6i_siblings); } w->leaf = NULL; @@ -579,6 +621,24 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) return res; } +void fib6_metric_set(struct fib6_info *f6i, int metric, u32 val) +{ + if (!f6i) + return; + + if (f6i->fib6_metrics == &dst_default_metrics) { + struct dst_metrics *p = kzalloc(sizeof(*p), GFP_ATOMIC); + + if (!p) + return; + + refcount_set(&p->refcnt, 1); + f6i->fib6_metrics = p; + } + + f6i->fib6_metrics->metrics[metric - 1] = val; +} + /* * Routing Table * @@ -608,7 +668,7 @@ static struct fib6_node *fib6_add_1(struct net *net, fn = root; do { - struct rt6_info *leaf = rcu_dereference_protected(fn->leaf, + struct fib6_info *leaf = rcu_dereference_protected(fn->leaf, lockdep_is_held(&table->tb6_lock)); key = (struct rt6key *)((u8 *)leaf + offset); @@ -637,11 +697,11 @@ static struct fib6_node *fib6_add_1(struct net *net, /* clean up an intermediate node */ if (!(fn->fn_flags & RTN_RTINFO)) { RCU_INIT_POINTER(fn->leaf, NULL); - rt6_release(leaf); + fib6_info_release(leaf); /* remove null_entry in the root node */ } else if (fn->fn_flags & RTN_TL_ROOT && rcu_access_pointer(fn->leaf) == - net->ipv6.ip6_null_entry) { + net->ipv6.fib6_null_entry) { RCU_INIT_POINTER(fn->leaf, NULL); } @@ -802,39 +862,7 @@ static struct fib6_node *fib6_add_1(struct net *net, return ln; } -static void fib6_copy_metrics(u32 *mp, const struct mx6_config *mxc) -{ - int i; - - for (i = 0; i < RTAX_MAX; i++) { - if (test_bit(i, mxc->mx_valid)) - mp[i] = mxc->mx[i]; - } -} - -static int fib6_commit_metrics(struct dst_entry *dst, struct mx6_config *mxc) -{ - if (!mxc->mx) - return 0; - - if (dst->flags & DST_HOST) { - u32 *mp = dst_metrics_write_ptr(dst); - - if (unlikely(!mp)) - return -ENOMEM; - - fib6_copy_metrics(mp, mxc); - } else { - dst_init_metrics(dst, mxc->mx, false); - - /* We've stolen mx now. */ - mxc->mx = NULL; - } - - return 0; -} - -static void fib6_purge_rt(struct rt6_info *rt, struct fib6_node *fn, +static void fib6_purge_rt(struct fib6_info *rt, struct fib6_node *fn, struct net *net) { struct fib6_table *table = rt->rt6i_table; @@ -847,18 +875,38 @@ static void fib6_purge_rt(struct rt6_info *rt, struct fib6_node *fn, * to still alive ones. */ while (fn) { - struct rt6_info *leaf = rcu_dereference_protected(fn->leaf, + struct fib6_info *leaf = rcu_dereference_protected(fn->leaf, lockdep_is_held(&table->tb6_lock)); - struct rt6_info *new_leaf; + struct fib6_info *new_leaf; if (!(fn->fn_flags & RTN_RTINFO) && leaf == rt) { new_leaf = fib6_find_prefix(net, table, fn); atomic_inc(&new_leaf->rt6i_ref); + rcu_assign_pointer(fn->leaf, new_leaf); - rt6_release(rt); + fib6_info_release(rt); } fn = rcu_dereference_protected(fn->parent, lockdep_is_held(&table->tb6_lock)); } + + if (rt->rt6i_pcpu) { + int cpu; + + /* release the reference to this fib entry from + * all of its cached pcpu routes + */ + for_each_possible_cpu(cpu) { + struct rt6_info **ppcpu_rt; + struct rt6_info *pcpu_rt; + + ppcpu_rt = per_cpu_ptr(rt->rt6i_pcpu, cpu); + pcpu_rt = *ppcpu_rt; + if (pcpu_rt) { + fib6_info_release(pcpu_rt->from); + pcpu_rt->from = NULL; + } + } + } } } @@ -866,15 +914,15 @@ static void fib6_purge_rt(struct rt6_info *rt, struct fib6_node *fn, * Insert routing information in a node. */ -static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, - struct nl_info *info, struct mx6_config *mxc, +static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt, + struct nl_info *info, struct netlink_ext_ack *extack) { - struct rt6_info *leaf = rcu_dereference_protected(fn->leaf, + struct fib6_info *leaf = rcu_dereference_protected(fn->leaf, lockdep_is_held(&rt->rt6i_table->tb6_lock)); - struct rt6_info *iter = NULL; - struct rt6_info __rcu **ins; - struct rt6_info __rcu **fallback_ins = NULL; + struct fib6_info *iter = NULL; + struct fib6_info __rcu **ins; + struct fib6_info __rcu **fallback_ins = NULL; int replace = (info->nlh && (info->nlh->nlmsg_flags & NLM_F_REPLACE)); int add = (!info->nlh || @@ -921,10 +969,10 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, if (!(iter->rt6i_flags & RTF_EXPIRES)) return -EEXIST; if (!(rt->rt6i_flags & RTF_EXPIRES)) - rt6_clean_expires(iter); + fib6_clean_expires(iter); else - rt6_set_expires(iter, rt->dst.expires); - iter->rt6i_pmtu = rt->rt6i_pmtu; + fib6_set_expires(iter, rt->expires); + fib6_metric_set(iter, RTAX_MTU, rt->fib6_pmtu); return -EEXIST; } /* If we have the same destination and the same metric, @@ -965,7 +1013,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, /* Link this route to others same route. */ if (rt->rt6i_nsiblings) { unsigned int rt6i_nsiblings; - struct rt6_info *sibling, *temp_sibling; + struct fib6_info *sibling, *temp_sibling; /* Find the first route that have the same metric */ sibling = leaf; @@ -1003,9 +1051,6 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, add: nlflags |= NLM_F_CREATE; - err = fib6_commit_metrics(&rt->dst, mxc); - if (err) - return err; err = call_fib6_entry_notifiers(info->nl_net, FIB_EVENT_ENTRY_ADD, @@ -1036,10 +1081,6 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, return -ENOENT; } - err = fib6_commit_metrics(&rt->dst, mxc); - if (err) - return err; - err = call_fib6_entry_notifiers(info->nl_net, FIB_EVENT_ENTRY_REPLACE, rt, extack); @@ -1061,7 +1102,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, fib6_purge_rt(iter, fn, info->nl_net); if (rcu_access_pointer(fn->rr_ptr) == iter) fn->rr_ptr = NULL; - rt6_release(iter); + fib6_info_release(iter); if (nsiblings) { /* Replacing an ECMP route, remove all siblings */ @@ -1077,7 +1118,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, fib6_purge_rt(iter, fn, info->nl_net); if (rcu_access_pointer(fn->rr_ptr) == iter) fn->rr_ptr = NULL; - rt6_release(iter); + fib6_info_release(iter); nsiblings--; info->nl_net->ipv6.rt6_stats->fib_rt_entries--; } else { @@ -1093,10 +1134,10 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, return 0; } -static void fib6_start_gc(struct net *net, struct rt6_info *rt) +static void fib6_start_gc(struct net *net, struct fib6_info *rt) { if (!timer_pending(&net->ipv6.ip6_fib_timer) && - (rt->rt6i_flags & (RTF_EXPIRES | RTF_CACHE))) + (rt->rt6i_flags & RTF_EXPIRES)) mod_timer(&net->ipv6.ip6_fib_timer, jiffies + net->ipv6.sysctl.ip6_rt_gc_interval); } @@ -1108,7 +1149,7 @@ void fib6_force_start_gc(struct net *net) jiffies + net->ipv6.sysctl.ip6_rt_gc_interval); } -static void __fib6_update_sernum_upto_root(struct rt6_info *rt, +static void __fib6_update_sernum_upto_root(struct fib6_info *rt, int sernum) { struct fib6_node *fn = rcu_dereference_protected(rt->rt6i_node, @@ -1123,7 +1164,7 @@ static void __fib6_update_sernum_upto_root(struct rt6_info *rt, } } -void fib6_update_sernum_upto_root(struct net *net, struct rt6_info *rt) +void fib6_update_sernum_upto_root(struct net *net, struct fib6_info *rt) { __fib6_update_sernum_upto_root(rt, fib6_new_sernum(net)); } @@ -1135,9 +1176,8 @@ void fib6_update_sernum_upto_root(struct net *net, struct rt6_info *rt) * Need to own table->tb6_lock */ -int fib6_add(struct fib6_node *root, struct rt6_info *rt, - struct nl_info *info, struct mx6_config *mxc, - struct netlink_ext_ack *extack) +int fib6_add(struct fib6_node *root, struct fib6_info *rt, + struct nl_info *info, struct netlink_ext_ack *extack) { struct fib6_table *table = rt->rt6i_table; struct fib6_node *fn, *pn = NULL; @@ -1146,11 +1186,6 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, int replace_required = 0; int sernum = fib6_new_sernum(info->nl_net); - if (WARN_ON_ONCE(!atomic_read(&rt->dst.__refcnt))) - return -EINVAL; - if (WARN_ON_ONCE(rt->rt6i_flags & RTF_CACHE)) - return -EINVAL; - if (info->nlh) { if (!(info->nlh->nlmsg_flags & NLM_F_CREATE)) allow_create = 0; @@ -1162,7 +1197,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, fn = fib6_add_1(info->nl_net, table, root, &rt->rt6i_dst.addr, rt->rt6i_dst.plen, - offsetof(struct rt6_info, rt6i_dst), allow_create, + offsetof(struct fib6_info, rt6i_dst), allow_create, replace_required, extack); if (IS_ERR(fn)) { err = PTR_ERR(fn); @@ -1194,16 +1229,16 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, if (!sfn) goto failure; - atomic_inc(&info->nl_net->ipv6.ip6_null_entry->rt6i_ref); + atomic_inc(&info->nl_net->ipv6.fib6_null_entry->rt6i_ref); rcu_assign_pointer(sfn->leaf, - info->nl_net->ipv6.ip6_null_entry); + info->nl_net->ipv6.fib6_null_entry); sfn->fn_flags = RTN_ROOT; /* Now add the first leaf node to new subtree */ sn = fib6_add_1(info->nl_net, table, sfn, &rt->rt6i_src.addr, rt->rt6i_src.plen, - offsetof(struct rt6_info, rt6i_src), + offsetof(struct fib6_info, rt6i_src), allow_create, replace_required, extack); if (IS_ERR(sn)) { @@ -1222,7 +1257,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, } else { sn = fib6_add_1(info->nl_net, table, FIB6_SUBTREE(fn), &rt->rt6i_src.addr, rt->rt6i_src.plen, - offsetof(struct rt6_info, rt6i_src), + offsetof(struct fib6_info, rt6i_src), allow_create, replace_required, extack); if (IS_ERR(sn)) { @@ -1235,7 +1270,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, if (fn->fn_flags & RTN_TL_ROOT) { /* put back null_entry for root node */ rcu_assign_pointer(fn->leaf, - info->nl_net->ipv6.ip6_null_entry); + info->nl_net->ipv6.fib6_null_entry); } else { atomic_inc(&rt->rt6i_ref); rcu_assign_pointer(fn->leaf, rt); @@ -1245,7 +1280,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, } #endif - err = fib6_add_rt2node(fn, rt, info, mxc, extack); + err = fib6_add_rt2node(fn, rt, info, extack); if (!err) { __fib6_update_sernum_upto_root(rt, sernum); fib6_start_gc(info->nl_net, rt); @@ -1259,13 +1294,13 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, * super-tree leaf node we have to find a new one for it. */ if (pn != fn) { - struct rt6_info *pn_leaf = + struct fib6_info *pn_leaf = rcu_dereference_protected(pn->leaf, lockdep_is_held(&table->tb6_lock)); if (pn_leaf == rt) { pn_leaf = NULL; RCU_INIT_POINTER(pn->leaf, NULL); - atomic_dec(&rt->rt6i_ref); + fib6_info_release(rt); } if (!pn_leaf && !(pn->fn_flags & RTN_RTINFO)) { pn_leaf = fib6_find_prefix(info->nl_net, table, @@ -1274,10 +1309,10 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, if (!pn_leaf) { WARN_ON(!pn_leaf); pn_leaf = - info->nl_net->ipv6.ip6_null_entry; + info->nl_net->ipv6.fib6_null_entry; } #endif - atomic_inc(&pn_leaf->rt6i_ref); + fib6_info_hold(pn_leaf); rcu_assign_pointer(pn->leaf, pn_leaf); } } @@ -1299,10 +1334,6 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, (fn->fn_flags & RTN_TL_ROOT && !rcu_access_pointer(fn->leaf)))) fib6_repair_tree(info->nl_net, table, fn); - /* Always release dst as dst->__refcnt is guaranteed - * to be taken before entering this function - */ - dst_release_immediate(&rt->dst); return err; } @@ -1312,7 +1343,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, */ struct lookup_args { - int offset; /* key offset on rt6_info */ + int offset; /* key offset on fib6_info */ const struct in6_addr *addr; /* search key */ }; @@ -1350,7 +1381,7 @@ static struct fib6_node *fib6_lookup_1(struct fib6_node *root, struct fib6_node *subtree = FIB6_SUBTREE(fn); if (subtree || fn->fn_flags & RTN_RTINFO) { - struct rt6_info *leaf = rcu_dereference(fn->leaf); + struct fib6_info *leaf = rcu_dereference(fn->leaf); struct rt6key *key; if (!leaf) @@ -1390,12 +1421,12 @@ struct fib6_node *fib6_lookup(struct fib6_node *root, const struct in6_addr *dad struct fib6_node *fn; struct lookup_args args[] = { { - .offset = offsetof(struct rt6_info, rt6i_dst), + .offset = offsetof(struct fib6_info, rt6i_dst), .addr = daddr, }, #ifdef CONFIG_IPV6_SUBTREES { - .offset = offsetof(struct rt6_info, rt6i_src), + .offset = offsetof(struct fib6_info, rt6i_src), .addr = saddr, }, #endif @@ -1431,7 +1462,7 @@ static struct fib6_node *fib6_locate_1(struct fib6_node *root, struct fib6_node *fn, *prev = NULL; for (fn = root; fn ; ) { - struct rt6_info *leaf = rcu_dereference(fn->leaf); + struct fib6_info *leaf = rcu_dereference(fn->leaf); struct rt6key *key; /* This node is being deleted */ @@ -1480,7 +1511,7 @@ struct fib6_node *fib6_locate(struct fib6_node *root, struct fib6_node *fn; fn = fib6_locate_1(root, daddr, dst_len, - offsetof(struct rt6_info, rt6i_dst), + offsetof(struct fib6_info, rt6i_dst), exact_match); #ifdef CONFIG_IPV6_SUBTREES @@ -1491,7 +1522,7 @@ struct fib6_node *fib6_locate(struct fib6_node *root, if (subtree) { fn = fib6_locate_1(subtree, saddr, src_len, - offsetof(struct rt6_info, rt6i_src), + offsetof(struct fib6_info, rt6i_src), exact_match); } } @@ -1510,14 +1541,14 @@ struct fib6_node *fib6_locate(struct fib6_node *root, * */ -static struct rt6_info *fib6_find_prefix(struct net *net, +static struct fib6_info *fib6_find_prefix(struct net *net, struct fib6_table *table, struct fib6_node *fn) { struct fib6_node *child_left, *child_right; if (fn->fn_flags & RTN_ROOT) - return net->ipv6.ip6_null_entry; + return net->ipv6.fib6_null_entry; while (fn) { child_left = rcu_dereference_protected(fn->left, @@ -1554,7 +1585,7 @@ static struct fib6_node *fib6_repair_tree(struct net *net, /* Set fn->leaf to null_entry for root node. */ if (fn->fn_flags & RTN_TL_ROOT) { - rcu_assign_pointer(fn->leaf, net->ipv6.ip6_null_entry); + rcu_assign_pointer(fn->leaf, net->ipv6.fib6_null_entry); return fn; } @@ -1569,11 +1600,11 @@ static struct fib6_node *fib6_repair_tree(struct net *net, lockdep_is_held(&table->tb6_lock)); struct fib6_node *pn_l = rcu_dereference_protected(pn->left, lockdep_is_held(&table->tb6_lock)); - struct rt6_info *fn_leaf = rcu_dereference_protected(fn->leaf, + struct fib6_info *fn_leaf = rcu_dereference_protected(fn->leaf, lockdep_is_held(&table->tb6_lock)); - struct rt6_info *pn_leaf = rcu_dereference_protected(pn->leaf, + struct fib6_info *pn_leaf = rcu_dereference_protected(pn->leaf, lockdep_is_held(&table->tb6_lock)); - struct rt6_info *new_fn_leaf; + struct fib6_info *new_fn_leaf; RT6_TRACE("fixing tree: plen=%d iter=%d\n", fn->fn_bit, iter); iter++; @@ -1599,10 +1630,10 @@ static struct fib6_node *fib6_repair_tree(struct net *net, #if RT6_DEBUG >= 2 if (!new_fn_leaf) { WARN_ON(!new_fn_leaf); - new_fn_leaf = net->ipv6.ip6_null_entry; + new_fn_leaf = net->ipv6.fib6_null_entry; } #endif - atomic_inc(&new_fn_leaf->rt6i_ref); + fib6_info_hold(new_fn_leaf); rcu_assign_pointer(fn->leaf, new_fn_leaf); return pn; } @@ -1658,23 +1689,21 @@ static struct fib6_node *fib6_repair_tree(struct net *net, return pn; RCU_INIT_POINTER(pn->leaf, NULL); - rt6_release(pn_leaf); + fib6_info_release(pn_leaf); fn = pn; } } static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn, - struct rt6_info __rcu **rtp, struct nl_info *info) + struct fib6_info __rcu **rtp, struct nl_info *info) { struct fib6_walker *w; - struct rt6_info *rt = rcu_dereference_protected(*rtp, + struct fib6_info *rt = rcu_dereference_protected(*rtp, lockdep_is_held(&table->tb6_lock)); struct net *net = info->nl_net; RT6_TRACE("fib6_del_route\n"); - WARN_ON_ONCE(rt->rt6i_flags & RTF_CACHE); - /* Unlink it */ *rtp = rt->rt6_next; rt->rt6i_node = NULL; @@ -1690,7 +1719,7 @@ static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn, /* Remove this entry from other siblings */ if (rt->rt6i_nsiblings) { - struct rt6_info *sibling, *next_sibling; + struct fib6_info *sibling, *next_sibling; list_for_each_entry_safe(sibling, next_sibling, &rt->rt6i_siblings, rt6i_siblings) @@ -1730,40 +1759,30 @@ static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn, call_fib6_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, rt, NULL); if (!info->skip_notify) inet6_rt_notify(RTM_DELROUTE, rt, info, 0); - rt6_release(rt); + fib6_info_release(rt); } /* Need to own table->tb6_lock */ -int fib6_del(struct rt6_info *rt, struct nl_info *info) +int fib6_del(struct fib6_info *rt, struct nl_info *info) { struct fib6_node *fn = rcu_dereference_protected(rt->rt6i_node, lockdep_is_held(&rt->rt6i_table->tb6_lock)); struct fib6_table *table = rt->rt6i_table; struct net *net = info->nl_net; - struct rt6_info __rcu **rtp; - struct rt6_info __rcu **rtp_next; + struct fib6_info __rcu **rtp; + struct fib6_info __rcu **rtp_next; -#if RT6_DEBUG >= 2 - if (rt->dst.obsolete > 0) { - WARN_ON(fn); - return -ENOENT; - } -#endif - if (!fn || rt == net->ipv6.ip6_null_entry) + if (!fn || rt == net->ipv6.fib6_null_entry) return -ENOENT; WARN_ON(!(fn->fn_flags & RTN_RTINFO)); - /* remove cached dst from exception table */ - if (rt->rt6i_flags & RTF_CACHE) - return rt6_remove_exception_rt(rt); - /* * Walk the leaf entries looking for ourself */ for (rtp = &fn->leaf; *rtp; rtp = rtp_next) { - struct rt6_info *cur = rcu_dereference_protected(*rtp, + struct fib6_info *cur = rcu_dereference_protected(*rtp, lockdep_is_held(&table->tb6_lock)); if (rt == cur) { fib6_del_route(table, fn, rtp, info); @@ -1907,7 +1926,7 @@ static int fib6_walk(struct net *net, struct fib6_walker *w) static int fib6_clean_node(struct fib6_walker *w) { int res; - struct rt6_info *rt; + struct fib6_info *rt; struct fib6_cleaner *c = container_of(w, struct fib6_cleaner, w); struct nl_info info = { .nl_net = c->net, @@ -1942,7 +1961,7 @@ static int fib6_clean_node(struct fib6_walker *w) if (WARN_ON(!rt->rt6i_nsiblings)) continue; rt = list_last_entry(&rt->rt6i_siblings, - struct rt6_info, rt6i_siblings); + struct fib6_info, rt6i_siblings); continue; } WARN_ON(res != 0); @@ -1961,7 +1980,7 @@ static int fib6_clean_node(struct fib6_walker *w) */ static void fib6_clean_tree(struct net *net, struct fib6_node *root, - int (*func)(struct rt6_info *, void *arg), + int (*func)(struct fib6_info *, void *arg), int sernum, void *arg) { struct fib6_cleaner c; @@ -1979,7 +1998,7 @@ static void fib6_clean_tree(struct net *net, struct fib6_node *root, } static void __fib6_clean_all(struct net *net, - int (*func)(struct rt6_info *, void *), + int (*func)(struct fib6_info *, void *), int sernum, void *arg) { struct fib6_table *table; @@ -1999,7 +2018,7 @@ static void __fib6_clean_all(struct net *net, rcu_read_unlock(); } -void fib6_clean_all(struct net *net, int (*func)(struct rt6_info *, void *), +void fib6_clean_all(struct net *net, int (*func)(struct fib6_info *, void *), void *arg) { __fib6_clean_all(net, func, FIB6_NO_SERNUM_CHANGE, arg); @@ -2016,7 +2035,7 @@ static void fib6_flush_trees(struct net *net) * Garbage collection */ -static int fib6_age(struct rt6_info *rt, void *arg) +static int fib6_age(struct fib6_info *rt, void *arg) { struct fib6_gc_args *gc_args = arg; unsigned long now = jiffies; @@ -2026,8 +2045,8 @@ static int fib6_age(struct rt6_info *rt, void *arg) * Routes are expired even if they are in use. */ - if (rt->rt6i_flags & RTF_EXPIRES && rt->dst.expires) { - if (time_after(now, rt->dst.expires)) { + if (rt->rt6i_flags & RTF_EXPIRES && rt->expires) { + if (time_after(now, rt->expires)) { RT6_TRACE("expiring %p\n", rt); return -1; } @@ -2110,7 +2129,7 @@ static int __net_init fib6_net_init(struct net *net) net->ipv6.fib6_main_tbl->tb6_id = RT6_TABLE_MAIN; rcu_assign_pointer(net->ipv6.fib6_main_tbl->tb6_root.leaf, - net->ipv6.ip6_null_entry); + net->ipv6.fib6_null_entry); net->ipv6.fib6_main_tbl->tb6_root.fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO; inet_peer_base_init(&net->ipv6.fib6_main_tbl->tb6_peers); @@ -2122,7 +2141,7 @@ static int __net_init fib6_net_init(struct net *net) goto out_fib6_main_tbl; net->ipv6.fib6_local_tbl->tb6_id = RT6_TABLE_LOCAL; rcu_assign_pointer(net->ipv6.fib6_local_tbl->tb6_root.leaf, - net->ipv6.ip6_null_entry); + net->ipv6.fib6_null_entry); net->ipv6.fib6_local_tbl->tb6_root.fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO; inet_peer_base_init(&net->ipv6.fib6_local_tbl->tb6_peers); @@ -2220,8 +2239,9 @@ struct ipv6_route_iter { static int ipv6_route_seq_show(struct seq_file *seq, void *v) { - struct rt6_info *rt = v; + struct fib6_info *rt = v; struct ipv6_route_iter *iter = seq->private; + const struct net_device *dev; seq_printf(seq, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen); @@ -2231,14 +2251,14 @@ static int ipv6_route_seq_show(struct seq_file *seq, void *v) seq_puts(seq, "00000000000000000000000000000000 00 "); #endif if (rt->rt6i_flags & RTF_GATEWAY) - seq_printf(seq, "%pi6", &rt->rt6i_gateway); + seq_printf(seq, "%pi6", &rt->fib6_nh.nh_gw); else seq_puts(seq, "00000000000000000000000000000000"); + dev = rt->fib6_nh.nh_dev; seq_printf(seq, " %08x %08x %08x %08x %8s\n", - rt->rt6i_metric, atomic_read(&rt->dst.__refcnt), - rt->dst.__use, rt->rt6i_flags, - rt->dst.dev ? rt->dst.dev->name : ""); + rt->rt6i_metric, atomic_read(&rt->rt6i_ref), 0, + rt->rt6i_flags, dev ? dev->name : ""); iter->w.leaf = NULL; return 0; } @@ -2311,14 +2331,14 @@ static void ipv6_route_check_sernum(struct ipv6_route_iter *iter) static void *ipv6_route_seq_next(struct seq_file *seq, void *v, loff_t *pos) { int r; - struct rt6_info *n; + struct fib6_info *n; struct net *net = seq_file_net(seq); struct ipv6_route_iter *iter = seq->private; if (!v) goto iter_table; - n = rcu_dereference_bh(((struct rt6_info *)v)->rt6_next); + n = rcu_dereference_bh(((struct fib6_info *)v)->rt6_next); if (n) { ++*pos; return n; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index a39b04f9fa6e5..3db47986ef386 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -968,7 +968,8 @@ static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk, if (!had_dst) *dst = ip6_route_output(net, sk, fl6); rt = (*dst)->error ? NULL : (struct rt6_info *)*dst; - err = ip6_route_get_saddr(net, rt, &fl6->daddr, + err = ip6_route_get_saddr(net, rt ? rt->from : NULL, + &fl6->daddr, sk ? inet6_sk(sk)->srcprefs : 0, &fl6->saddr); if (err) diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 9de4dfb126ba0..1026452986928 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1155,7 +1155,8 @@ static void ndisc_router_discovery(struct sk_buff *skb) struct ra_msg *ra_msg = (struct ra_msg *)skb_transport_header(skb); struct neighbour *neigh = NULL; struct inet6_dev *in6_dev; - struct rt6_info *rt = NULL; + struct fib6_info *rt = NULL; + struct net *net; int lifetime; struct ndisc_options ndopts; int optlen; @@ -1253,9 +1254,9 @@ static void ndisc_router_discovery(struct sk_buff *skb) /* Do not accept RA with source-addr found on local machine unless * accept_ra_from_local is set to true. */ + net = dev_net(in6_dev->dev); if (!in6_dev->cnf.accept_ra_from_local && - ipv6_chk_addr(dev_net(in6_dev->dev), &ipv6_hdr(skb)->saddr, - in6_dev->dev, 0)) { + ipv6_chk_addr(net, &ipv6_hdr(skb)->saddr, in6_dev->dev, 0)) { ND_PRINTK(2, info, "RA from local address detected on dev: %s: default router ignored\n", skb->dev->name); @@ -1272,20 +1273,22 @@ static void ndisc_router_discovery(struct sk_buff *skb) pref = ICMPV6_ROUTER_PREF_MEDIUM; #endif - rt = rt6_get_dflt_router(&ipv6_hdr(skb)->saddr, skb->dev); + rt = rt6_get_dflt_router(net, &ipv6_hdr(skb)->saddr, skb->dev); if (rt) { - neigh = dst_neigh_lookup(&rt->dst, &ipv6_hdr(skb)->saddr); + neigh = ip6_neigh_lookup(&rt->fib6_nh.nh_gw, + rt->fib6_nh.nh_dev, NULL, + &ipv6_hdr(skb)->saddr); if (!neigh) { ND_PRINTK(0, err, "RA: %s got default router without neighbour\n", __func__); - ip6_rt_put(rt); + fib6_info_release(rt); return; } } if (rt && lifetime == 0) { - ip6_del_rt(rt); + ip6_del_rt(net, rt); rt = NULL; } @@ -1294,7 +1297,8 @@ static void ndisc_router_discovery(struct sk_buff *skb) if (!rt && lifetime) { ND_PRINTK(3, info, "RA: adding default router\n"); - rt = rt6_add_dflt_router(&ipv6_hdr(skb)->saddr, skb->dev, pref); + rt = rt6_add_dflt_router(net, &ipv6_hdr(skb)->saddr, + skb->dev, pref); if (!rt) { ND_PRINTK(0, err, "RA: %s failed to add default route\n", @@ -1302,12 +1306,14 @@ static void ndisc_router_discovery(struct sk_buff *skb) return; } - neigh = dst_neigh_lookup(&rt->dst, &ipv6_hdr(skb)->saddr); + neigh = ip6_neigh_lookup(&rt->fib6_nh.nh_gw, + rt->fib6_nh.nh_dev, NULL, + &ipv6_hdr(skb)->saddr); if (!neigh) { ND_PRINTK(0, err, "RA: %s got default router without neighbour\n", __func__); - ip6_rt_put(rt); + fib6_info_release(rt); return; } neigh->flags |= NTF_ROUTER; @@ -1316,14 +1322,13 @@ static void ndisc_router_discovery(struct sk_buff *skb) } if (rt) - rt6_set_expires(rt, jiffies + (HZ * lifetime)); + fib6_set_expires(rt, jiffies + (HZ * lifetime)); if (in6_dev->cnf.accept_ra_min_hop_limit < 256 && ra_msg->icmph.icmp6_hop_limit) { if (in6_dev->cnf.accept_ra_min_hop_limit <= ra_msg->icmph.icmp6_hop_limit) { in6_dev->cnf.hop_limit = ra_msg->icmph.icmp6_hop_limit; - if (rt) - dst_metric_set(&rt->dst, RTAX_HOPLIMIT, - ra_msg->icmph.icmp6_hop_limit); + fib6_metric_set(rt, RTAX_HOPLIMIT, + ra_msg->icmph.icmp6_hop_limit); } else { ND_PRINTK(2, warn, "RA: Got route advertisement with lower hop_limit than minimum\n"); } @@ -1475,10 +1480,7 @@ static void ndisc_router_discovery(struct sk_buff *skb) ND_PRINTK(2, warn, "RA: invalid mtu: %d\n", mtu); } else if (in6_dev->cnf.mtu6 != mtu) { in6_dev->cnf.mtu6 = mtu; - - if (rt) - dst_metric_set(&rt->dst, RTAX_MTU, mtu); - + fib6_metric_set(rt, RTAX_MTU, mtu); rt6_mtu_change(skb->dev, mtu); } } @@ -1497,7 +1499,7 @@ static void ndisc_router_discovery(struct sk_buff *skb) ND_PRINTK(2, warn, "RA: invalid RA options\n"); } out: - ip6_rt_put(rt); + fib6_info_release(rt); if (neigh) neigh_release(neigh); } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 1d738bfe893bc..f9c363327d620 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -78,7 +78,6 @@ enum rt6_nud_state { RT6_NUD_SUCCEED = 1 }; -static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort); static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie); static unsigned int ip6_default_advmss(const struct dst_entry *dst); static unsigned int ip6_mtu(const struct dst_entry *dst); @@ -97,25 +96,24 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb, u32 mtu); static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb); -static void rt6_dst_from_metrics_check(struct rt6_info *rt); -static int rt6_score_route(struct rt6_info *rt, int oif, int strict); -static size_t rt6_nlmsg_size(struct rt6_info *rt); -static int rt6_fill_node(struct net *net, - struct sk_buff *skb, struct rt6_info *rt, - struct in6_addr *dst, struct in6_addr *src, +static int rt6_score_route(struct fib6_info *rt, int oif, int strict); +static size_t rt6_nlmsg_size(struct fib6_info *rt); +static int rt6_fill_node(struct net *net, struct sk_buff *skb, + struct fib6_info *rt, struct dst_entry *dst, + struct in6_addr *dest, struct in6_addr *src, int iif, int type, u32 portid, u32 seq, unsigned int flags); -static struct rt6_info *rt6_find_cached_rt(struct rt6_info *rt, +static struct rt6_info *rt6_find_cached_rt(struct fib6_info *rt, struct in6_addr *daddr, struct in6_addr *saddr); #ifdef CONFIG_IPV6_ROUTE_INFO -static struct rt6_info *rt6_add_route_info(struct net *net, +static struct fib6_info *rt6_add_route_info(struct net *net, const struct in6_addr *prefix, int prefixlen, const struct in6_addr *gwaddr, struct net_device *dev, unsigned int pref); -static struct rt6_info *rt6_get_route_info(struct net *net, +static struct fib6_info *rt6_get_route_info(struct net *net, const struct in6_addr *prefix, int prefixlen, const struct in6_addr *gwaddr, struct net_device *dev); @@ -184,29 +182,10 @@ static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev) } } -static u32 *rt6_pcpu_cow_metrics(struct rt6_info *rt) -{ - return dst_metrics_write_ptr(&rt->from->dst); -} - -static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old) -{ - struct rt6_info *rt = (struct rt6_info *)dst; - - if (rt->rt6i_flags & RTF_PCPU) - return rt6_pcpu_cow_metrics(rt); - else if (rt->rt6i_flags & RTF_CACHE) - return NULL; - else - return dst_cow_metrics_generic(dst, old); -} - -static inline const void *choose_neigh_daddr(struct rt6_info *rt, +static inline const void *choose_neigh_daddr(const struct in6_addr *p, struct sk_buff *skb, const void *daddr) { - struct in6_addr *p = &rt->rt6i_gateway; - if (!ipv6_addr_any(p)) return (const void *) p; else if (skb) @@ -214,18 +193,27 @@ static inline const void *choose_neigh_daddr(struct rt6_info *rt, return daddr; } -static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst, - struct sk_buff *skb, - const void *daddr) +struct neighbour *ip6_neigh_lookup(const struct in6_addr *gw, + struct net_device *dev, + struct sk_buff *skb, + const void *daddr) { - struct rt6_info *rt = (struct rt6_info *) dst; struct neighbour *n; - daddr = choose_neigh_daddr(rt, skb, daddr); - n = __ipv6_neigh_lookup(dst->dev, daddr); + daddr = choose_neigh_daddr(gw, skb, daddr); + n = __ipv6_neigh_lookup(dev, daddr); if (n) return n; - return neigh_create(&nd_tbl, daddr, dst->dev); + return neigh_create(&nd_tbl, daddr, dev); +} + +static struct neighbour *ip6_dst_neigh_lookup(const struct dst_entry *dst, + struct sk_buff *skb, + const void *daddr) +{ + const struct rt6_info *rt = container_of(dst, struct rt6_info, dst); + + return ip6_neigh_lookup(&rt->rt6i_gateway, dst->dev, skb, daddr); } static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr) @@ -233,7 +221,7 @@ static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr) struct net_device *dev = dst->dev; struct rt6_info *rt = (struct rt6_info *)dst; - daddr = choose_neigh_daddr(rt, NULL, daddr); + daddr = choose_neigh_daddr(&rt->rt6i_gateway, NULL, daddr); if (!daddr) return; if (dev->flags & (IFF_NOARP | IFF_LOOPBACK)) @@ -250,7 +238,7 @@ static struct dst_ops ip6_dst_ops_template = { .check = ip6_dst_check, .default_advmss = ip6_default_advmss, .mtu = ip6_mtu, - .cow_metrics = ipv6_cow_metrics, + .cow_metrics = dst_cow_metrics_generic, .destroy = ip6_dst_destroy, .ifdown = ip6_dst_ifdown, .negative_advice = ip6_negative_advice, @@ -258,7 +246,7 @@ static struct dst_ops ip6_dst_ops_template = { .update_pmtu = ip6_rt_update_pmtu, .redirect = rt6_do_redirect, .local_out = __ip6_local_out, - .neigh_lookup = ip6_neigh_lookup, + .neigh_lookup = ip6_dst_neigh_lookup, .confirm_neigh = ip6_confirm_neigh, }; @@ -288,13 +276,22 @@ static struct dst_ops ip6_dst_blackhole_ops = { .update_pmtu = ip6_rt_blackhole_update_pmtu, .redirect = ip6_rt_blackhole_redirect, .cow_metrics = dst_cow_metrics_generic, - .neigh_lookup = ip6_neigh_lookup, + .neigh_lookup = ip6_dst_neigh_lookup, }; static const u32 ip6_template_metrics[RTAX_MAX] = { [RTAX_HOPLIMIT - 1] = 0, }; +static const struct fib6_info fib6_null_entry_template = { + .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), + .rt6i_protocol = RTPROT_KERNEL, + .rt6i_metric = ~(u32)0, + .rt6i_ref = ATOMIC_INIT(1), + .fib6_type = RTN_UNREACHABLE, + .fib6_metrics = (struct dst_metrics *)&dst_default_metrics, +}; + static const struct rt6_info ip6_null_entry_template = { .dst = { .__refcnt = ATOMIC_INIT(1), @@ -305,9 +302,6 @@ static const struct rt6_info ip6_null_entry_template = { .output = ip6_pkt_discard_out, }, .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), - .rt6i_protocol = RTPROT_KERNEL, - .rt6i_metric = ~(u32) 0, - .rt6i_ref = ATOMIC_INIT(1), }; #ifdef CONFIG_IPV6_MULTIPLE_TABLES @@ -322,9 +316,6 @@ static const struct rt6_info ip6_prohibit_entry_template = { .output = ip6_pkt_prohibit_out, }, .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), - .rt6i_protocol = RTPROT_KERNEL, - .rt6i_metric = ~(u32) 0, - .rt6i_ref = ATOMIC_INIT(1), }; static const struct rt6_info ip6_blk_hole_entry_template = { @@ -337,9 +328,6 @@ static const struct rt6_info ip6_blk_hole_entry_template = { .output = dst_discard_out, }, .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), - .rt6i_protocol = RTPROT_KERNEL, - .rt6i_metric = ~(u32) 0, - .rt6i_ref = ATOMIC_INIT(1), }; #endif @@ -349,14 +337,12 @@ static void rt6_info_init(struct rt6_info *rt) struct dst_entry *dst = &rt->dst; memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst)); - INIT_LIST_HEAD(&rt->rt6i_siblings); INIT_LIST_HEAD(&rt->rt6i_uncached); } /* allocate dst with ip6_dst_ops */ -static struct rt6_info *__ip6_dst_alloc(struct net *net, - struct net_device *dev, - int flags) +struct rt6_info *ip6_dst_alloc(struct net *net, struct net_device *dev, + int flags) { struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev, 1, DST_OBSOLETE_FORCE_CHK, flags); @@ -368,34 +354,15 @@ static struct rt6_info *__ip6_dst_alloc(struct net *net, return rt; } - -struct rt6_info *ip6_dst_alloc(struct net *net, - struct net_device *dev, - int flags) -{ - struct rt6_info *rt = __ip6_dst_alloc(net, dev, flags); - - if (rt) { - rt->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_ATOMIC); - if (!rt->rt6i_pcpu) { - dst_release_immediate(&rt->dst); - return NULL; - } - } - - return rt; -} EXPORT_SYMBOL(ip6_dst_alloc); static void ip6_dst_destroy(struct dst_entry *dst) { struct rt6_info *rt = (struct rt6_info *)dst; - struct rt6_exception_bucket *bucket; - struct rt6_info *from = rt->from; + struct fib6_info *from = rt->from; struct inet6_dev *idev; dst_destroy_metrics_generic(dst); - free_percpu(rt->rt6i_pcpu); rt6_uncached_list_del(rt); idev = rt->rt6i_idev; @@ -403,14 +370,9 @@ static void ip6_dst_destroy(struct dst_entry *dst) rt->rt6i_idev = NULL; in6_dev_put(idev); } - bucket = rcu_dereference_protected(rt->rt6i_exception_bucket, 1); - if (bucket) { - rt->rt6i_exception_bucket = NULL; - kfree(bucket); - } rt->from = NULL; - dst_release(&from->dst); + fib6_info_release(from); } static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, @@ -445,18 +407,18 @@ static bool rt6_check_expired(const struct rt6_info *rt) return true; } else if (rt->from) { return rt->dst.obsolete != DST_OBSOLETE_FORCE_CHK || - rt6_check_expired(rt->from); + fib6_check_expired(rt->from); } return false; } -static struct rt6_info *rt6_multipath_select(const struct net *net, - struct rt6_info *match, +static struct fib6_info *rt6_multipath_select(const struct net *net, + struct fib6_info *match, struct flowi6 *fl6, int oif, const struct sk_buff *skb, int strict) { - struct rt6_info *sibling, *next_sibling; + struct fib6_info *sibling, *next_sibling; /* We might have already computed the hash for ICMPv6 errors. In such * case it will always be non-zero. Otherwise now is the time to do it. @@ -464,12 +426,15 @@ static struct rt6_info *rt6_multipath_select(const struct net *net, if (!fl6->mp_hash) fl6->mp_hash = rt6_multipath_hash(net, fl6, skb, NULL); - if (fl6->mp_hash <= atomic_read(&match->rt6i_nh_upper_bound)) + if (fl6->mp_hash <= atomic_read(&match->fib6_nh.nh_upper_bound)) return match; list_for_each_entry_safe(sibling, next_sibling, &match->rt6i_siblings, rt6i_siblings) { - if (fl6->mp_hash > atomic_read(&sibling->rt6i_nh_upper_bound)) + int nh_upper_bound; + + nh_upper_bound = atomic_read(&sibling->fib6_nh.nh_upper_bound); + if (fl6->mp_hash > nh_upper_bound) continue; if (rt6_score_route(sibling, oif, strict) < 0) break; @@ -484,22 +449,23 @@ static struct rt6_info *rt6_multipath_select(const struct net *net, * Route lookup. rcu_read_lock() should be held. */ -static inline struct rt6_info *rt6_device_match(struct net *net, - struct rt6_info *rt, +static inline struct fib6_info *rt6_device_match(struct net *net, + struct fib6_info *rt, const struct in6_addr *saddr, int oif, int flags) { - struct rt6_info *local = NULL; - struct rt6_info *sprt; + struct fib6_info *local = NULL; + struct fib6_info *sprt; - if (!oif && ipv6_addr_any(saddr) && !(rt->rt6i_nh_flags & RTNH_F_DEAD)) + if (!oif && ipv6_addr_any(saddr) && + !(rt->fib6_nh.nh_flags & RTNH_F_DEAD)) return rt; for (sprt = rt; sprt; sprt = rcu_dereference(sprt->rt6_next)) { - struct net_device *dev = sprt->dst.dev; + const struct net_device *dev = sprt->fib6_nh.nh_dev; - if (sprt->rt6i_nh_flags & RTNH_F_DEAD) + if (sprt->fib6_nh.nh_flags & RTNH_F_DEAD) continue; if (oif) { @@ -528,10 +494,10 @@ static inline struct rt6_info *rt6_device_match(struct net *net, return local; if (flags & RT6_LOOKUP_F_IFACE) - return net->ipv6.ip6_null_entry; + return net->ipv6.fib6_null_entry; } - return rt->rt6i_nh_flags & RTNH_F_DEAD ? net->ipv6.ip6_null_entry : rt; + return rt->fib6_nh.nh_flags & RTNH_F_DEAD ? net->ipv6.fib6_null_entry : rt; } #ifdef CONFIG_IPV6_ROUTER_PREF @@ -553,10 +519,13 @@ static void rt6_probe_deferred(struct work_struct *w) kfree(work); } -static void rt6_probe(struct rt6_info *rt) +static void rt6_probe(struct fib6_info *rt) { struct __rt6_probe_work *work; + const struct in6_addr *nh_gw; struct neighbour *neigh; + struct net_device *dev; + /* * Okay, this does not seem to be appropriate * for now, however, we need to check if it @@ -567,8 +536,11 @@ static void rt6_probe(struct rt6_info *rt) */ if (!rt || !(rt->rt6i_flags & RTF_GATEWAY)) return; + + nh_gw = &rt->fib6_nh.nh_gw; + dev = rt->fib6_nh.nh_dev; rcu_read_lock_bh(); - neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway); + neigh = __ipv6_neigh_lookup_noref(dev, nh_gw); if (neigh) { if (neigh->nud_state & NUD_VALID) goto out; @@ -590,9 +562,9 @@ static void rt6_probe(struct rt6_info *rt) if (work) { INIT_WORK(&work->work, rt6_probe_deferred); - work->target = rt->rt6i_gateway; - dev_hold(rt->dst.dev); - work->dev = rt->dst.dev; + work->target = *nh_gw; + dev_hold(dev); + work->dev = dev; schedule_work(&work->work); } @@ -600,7 +572,7 @@ static void rt6_probe(struct rt6_info *rt) rcu_read_unlock_bh(); } #else -static inline void rt6_probe(struct rt6_info *rt) +static inline void rt6_probe(struct fib6_info *rt) { } #endif @@ -608,9 +580,10 @@ static inline void rt6_probe(struct rt6_info *rt) /* * Default Router Selection (RFC 2461 6.3.6) */ -static inline int rt6_check_dev(struct rt6_info *rt, int oif) +static inline int rt6_check_dev(struct fib6_info *rt, int oif) { - struct net_device *dev = rt->dst.dev; + const struct net_device *dev = rt->fib6_nh.nh_dev; + if (!oif || dev->ifindex == oif) return 2; if ((dev->flags & IFF_LOOPBACK) && @@ -619,17 +592,18 @@ static inline int rt6_check_dev(struct rt6_info *rt, int oif) return 0; } -static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt) +static inline enum rt6_nud_state rt6_check_neigh(struct fib6_info *rt) { - struct neighbour *neigh; enum rt6_nud_state ret = RT6_NUD_FAIL_HARD; + struct neighbour *neigh; if (rt->rt6i_flags & RTF_NONEXTHOP || !(rt->rt6i_flags & RTF_GATEWAY)) return RT6_NUD_SUCCEED; rcu_read_lock_bh(); - neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway); + neigh = __ipv6_neigh_lookup_noref(rt->fib6_nh.nh_dev, + &rt->fib6_nh.nh_gw); if (neigh) { read_lock(&neigh->lock); if (neigh->nud_state & NUD_VALID) @@ -650,8 +624,7 @@ static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt) return ret; } -static int rt6_score_route(struct rt6_info *rt, int oif, - int strict) +static int rt6_score_route(struct fib6_info *rt, int oif, int strict) { int m; @@ -669,23 +642,23 @@ static int rt6_score_route(struct rt6_info *rt, int oif, return m; } -static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict, - int *mpri, struct rt6_info *match, +static struct fib6_info *find_match(struct fib6_info *rt, int oif, int strict, + int *mpri, struct fib6_info *match, bool *do_rr) { int m; bool match_do_rr = false; struct inet6_dev *idev = rt->rt6i_idev; - if (rt->rt6i_nh_flags & RTNH_F_DEAD) + if (rt->fib6_nh.nh_flags & RTNH_F_DEAD) goto out; if (idev->cnf.ignore_routes_with_linkdown && - rt->rt6i_nh_flags & RTNH_F_LINKDOWN && + rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN && !(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE)) goto out; - if (rt6_check_expired(rt)) + if (fib6_check_expired(rt)) goto out; m = rt6_score_route(rt, oif, strict); @@ -709,13 +682,13 @@ static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict, return match; } -static struct rt6_info *find_rr_leaf(struct fib6_node *fn, - struct rt6_info *leaf, - struct rt6_info *rr_head, +static struct fib6_info *find_rr_leaf(struct fib6_node *fn, + struct fib6_info *leaf, + struct fib6_info *rr_head, u32 metric, int oif, int strict, bool *do_rr) { - struct rt6_info *rt, *match, *cont; + struct fib6_info *rt, *match, *cont; int mpri = -1; match = NULL; @@ -748,16 +721,16 @@ static struct rt6_info *find_rr_leaf(struct fib6_node *fn, return match; } -static struct rt6_info *rt6_select(struct net *net, struct fib6_node *fn, +static struct fib6_info *rt6_select(struct net *net, struct fib6_node *fn, int oif, int strict) { - struct rt6_info *leaf = rcu_dereference(fn->leaf); - struct rt6_info *match, *rt0; + struct fib6_info *leaf = rcu_dereference(fn->leaf); + struct fib6_info *match, *rt0; bool do_rr = false; int key_plen; - if (!leaf || leaf == net->ipv6.ip6_null_entry) - return net->ipv6.ip6_null_entry; + if (!leaf || leaf == net->ipv6.fib6_null_entry) + return net->ipv6.fib6_null_entry; rt0 = rcu_dereference(fn->rr_ptr); if (!rt0) @@ -774,13 +747,13 @@ static struct rt6_info *rt6_select(struct net *net, struct fib6_node *fn, key_plen = rt0->rt6i_src.plen; #endif if (fn->fn_bit != key_plen) - return net->ipv6.ip6_null_entry; + return net->ipv6.fib6_null_entry; match = find_rr_leaf(fn, leaf, rt0, rt0->rt6i_metric, oif, strict, &do_rr); if (do_rr) { - struct rt6_info *next = rcu_dereference(rt0->rt6_next); + struct fib6_info *next = rcu_dereference(rt0->rt6_next); /* no entries matched; do round-robin */ if (!next || next->rt6i_metric != rt0->rt6i_metric) @@ -795,10 +768,10 @@ static struct rt6_info *rt6_select(struct net *net, struct fib6_node *fn, } } - return match ? match : net->ipv6.ip6_null_entry; + return match ? match : net->ipv6.fib6_null_entry; } -static bool rt6_is_gw_or_nonexthop(const struct rt6_info *rt) +static bool rt6_is_gw_or_nonexthop(const struct fib6_info *rt) { return (rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY)); } @@ -812,7 +785,7 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, struct in6_addr prefix_buf, *prefix; unsigned int pref; unsigned long lifetime; - struct rt6_info *rt; + struct fib6_info *rt; if (len < sizeof(struct route_info)) { return -EINVAL; @@ -850,13 +823,13 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, } if (rinfo->prefix_len == 0) - rt = rt6_get_dflt_router(gwaddr, dev); + rt = rt6_get_dflt_router(net, gwaddr, dev); else rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev); if (rt && !lifetime) { - ip6_del_rt(rt); + ip6_del_rt(net, rt); rt = NULL; } @@ -869,16 +842,157 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, if (rt) { if (!addrconf_finite_timeout(lifetime)) - rt6_clean_expires(rt); + fib6_clean_expires(rt); else - rt6_set_expires(rt, jiffies + HZ * lifetime); + fib6_set_expires(rt, jiffies + HZ * lifetime); - ip6_rt_put(rt); + fib6_info_release(rt); } return 0; } #endif +/* + * Misc support functions + */ + +/* called with rcu_lock held */ +static struct net_device *ip6_rt_get_dev_rcu(struct fib6_info *rt) +{ + struct net_device *dev = rt->fib6_nh.nh_dev; + + if (rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST)) { + /* for copies of local routes, dst->dev needs to be the + * device if it is a master device, the master device if + * device is enslaved, and the loopback as the default + */ + if (netif_is_l3_slave(dev) && + !rt6_need_strict(&rt->rt6i_dst.addr)) + dev = l3mdev_master_dev_rcu(dev); + else if (!netif_is_l3_master(dev)) + dev = dev_net(dev)->loopback_dev; + /* last case is netif_is_l3_master(dev) is true in which + * case we want dev returned to be dev + */ + } + + return dev; +} + +static const int fib6_prop[RTN_MAX + 1] = { + [RTN_UNSPEC] = 0, + [RTN_UNICAST] = 0, + [RTN_LOCAL] = 0, + [RTN_BROADCAST] = 0, + [RTN_ANYCAST] = 0, + [RTN_MULTICAST] = 0, + [RTN_BLACKHOLE] = -EINVAL, + [RTN_UNREACHABLE] = -EHOSTUNREACH, + [RTN_PROHIBIT] = -EACCES, + [RTN_THROW] = -EAGAIN, + [RTN_NAT] = -EINVAL, + [RTN_XRESOLVE] = -EINVAL, +}; + +static int ip6_rt_type_to_error(u8 fib6_type) +{ + return fib6_prop[fib6_type]; +} + +static unsigned short fib6_info_dst_flags(struct fib6_info *rt) +{ + unsigned short flags = 0; + + if (rt->dst_nocount) + flags |= DST_NOCOUNT; + if (rt->dst_nopolicy) + flags |= DST_NOPOLICY; + if (rt->dst_host) + flags |= DST_HOST; + + return flags; +} + +static void ip6_rt_init_dst_reject(struct rt6_info *rt, struct fib6_info *ort) +{ + rt->dst.error = ip6_rt_type_to_error(ort->fib6_type); + + switch (ort->fib6_type) { + case RTN_BLACKHOLE: + rt->dst.output = dst_discard_out; + rt->dst.input = dst_discard; + break; + case RTN_PROHIBIT: + rt->dst.output = ip6_pkt_prohibit_out; + rt->dst.input = ip6_pkt_prohibit; + break; + case RTN_THROW: + case RTN_UNREACHABLE: + default: + rt->dst.output = ip6_pkt_discard_out; + rt->dst.input = ip6_pkt_discard; + break; + } +} + +static void ip6_rt_init_dst(struct rt6_info *rt, struct fib6_info *ort) +{ + rt->dst.flags |= fib6_info_dst_flags(ort); + + if (ort->rt6i_flags & RTF_REJECT) { + ip6_rt_init_dst_reject(rt, ort); + return; + } + + rt->dst.error = 0; + rt->dst.output = ip6_output; + + if (ort->fib6_type == RTN_LOCAL) { + rt->dst.input = ip6_input; + } else if (ipv6_addr_type(&ort->rt6i_dst.addr) & IPV6_ADDR_MULTICAST) { + rt->dst.input = ip6_mc_input; + } else { + rt->dst.input = ip6_forward; + } + + if (ort->fib6_nh.nh_lwtstate) { + rt->dst.lwtstate = lwtstate_get(ort->fib6_nh.nh_lwtstate); + lwtunnel_set_redirect(&rt->dst); + } + + rt->dst.lastuse = jiffies; +} + +static void rt6_set_from(struct rt6_info *rt, struct fib6_info *from) +{ + rt->rt6i_flags &= ~RTF_EXPIRES; + fib6_info_hold(from); + rt->from = from; + dst_init_metrics(&rt->dst, from->fib6_metrics->metrics, true); + if (from->fib6_metrics != &dst_default_metrics) { + rt->dst._metrics |= DST_METRICS_REFCOUNTED; + refcount_inc(&from->fib6_metrics->refcnt); + } +} + +static void ip6_rt_copy_init(struct rt6_info *rt, struct fib6_info *ort) +{ + ip6_rt_init_dst(rt, ort); + + rt->rt6i_dst = ort->rt6i_dst; + rt->rt6i_idev = ort->rt6i_idev; + if (rt->rt6i_idev) + in6_dev_hold(rt->rt6i_idev); + rt->rt6i_gateway = ort->fib6_nh.nh_gw; + rt->rt6i_flags = ort->rt6i_flags; + rt6_set_from(rt, ort); +#ifdef CONFIG_IPV6_SUBTREES + rt->rt6i_src = ort->rt6i_src; +#endif + rt->rt6i_prefsrc = ort->rt6i_prefsrc; + rt->dst.lwtstate = lwtstate_get(ort->fib6_nh.nh_lwtstate); +} + static struct fib6_node* fib6_backtrack(struct fib6_node *fn, struct in6_addr *saddr) { @@ -914,14 +1028,29 @@ static bool ip6_hold_safe(struct net *net, struct rt6_info **prt, return false; } +/* called with rcu_lock held */ +static struct rt6_info *ip6_create_rt_rcu(struct fib6_info *rt) +{ + unsigned short flags = fib6_info_dst_flags(rt); + struct net_device *dev = rt->fib6_nh.nh_dev; + struct rt6_info *nrt; + + nrt = ip6_dst_alloc(dev_net(dev), dev, flags); + if (nrt) + ip6_rt_copy_init(nrt, rt); + + return nrt; +} + static struct rt6_info *ip6_pol_route_lookup(struct net *net, struct fib6_table *table, struct flowi6 *fl6, const struct sk_buff *skb, int flags) { - struct rt6_info *rt, *rt_cache; + struct fib6_info *f6i; struct fib6_node *fn; + struct rt6_info *rt; if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) flags &= ~RT6_LOOKUP_F_IFACE; @@ -929,35 +1058,43 @@ static struct rt6_info *ip6_pol_route_lookup(struct net *net, rcu_read_lock(); fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); restart: - rt = rcu_dereference(fn->leaf); - if (!rt) { - rt = net->ipv6.ip6_null_entry; + f6i = rcu_dereference(fn->leaf); + if (!f6i) { + f6i = net->ipv6.fib6_null_entry; } else { - rt = rt6_device_match(net, rt, &fl6->saddr, + f6i = rt6_device_match(net, f6i, &fl6->saddr, fl6->flowi6_oif, flags); - if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0) - rt = rt6_multipath_select(net, rt, fl6, fl6->flowi6_oif, - skb, flags); + if (f6i->rt6i_nsiblings && fl6->flowi6_oif == 0) + f6i = rt6_multipath_select(net, f6i, fl6, + fl6->flowi6_oif, skb, flags); } - if (rt == net->ipv6.ip6_null_entry) { + if (f6i == net->ipv6.fib6_null_entry) { fn = fib6_backtrack(fn, &fl6->saddr); if (fn) goto restart; } - /* Search through exception table */ - rt_cache = rt6_find_cached_rt(rt, &fl6->daddr, &fl6->saddr); - if (rt_cache) - rt = rt_cache; - if (ip6_hold_safe(net, &rt, true)) - dst_use_noref(&rt->dst, jiffies); + /* Search through exception table */ + rt = rt6_find_cached_rt(f6i, &fl6->daddr, &fl6->saddr); + if (rt) { + if (ip6_hold_safe(net, &rt, true)) + dst_use_noref(&rt->dst, jiffies); + } else if (f6i == net->ipv6.fib6_null_entry) { + rt = net->ipv6.ip6_null_entry; + dst_hold(&rt->dst); + } else { + rt = ip6_create_rt_rcu(f6i); + if (!rt) { + rt = net->ipv6.ip6_null_entry; + dst_hold(&rt->dst); + } + } rcu_read_unlock(); trace_fib6_table_lookup(net, rt, table, fl6); return rt; - } struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6, @@ -999,8 +1136,7 @@ EXPORT_SYMBOL(rt6_lookup); * Caller must hold dst before calling it. */ -static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info, - struct mx6_config *mxc, +static int __ip6_ins_rt(struct fib6_info *rt, struct nl_info *info, struct netlink_ext_ack *extack) { int err; @@ -1008,46 +1144,20 @@ static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info, table = rt->rt6i_table; spin_lock_bh(&table->tb6_lock); - err = fib6_add(&table->tb6_root, rt, info, mxc, extack); + err = fib6_add(&table->tb6_root, rt, info, extack); spin_unlock_bh(&table->tb6_lock); return err; } -int ip6_ins_rt(struct rt6_info *rt) -{ - struct nl_info info = { .nl_net = dev_net(rt->dst.dev), }; - struct mx6_config mxc = { .mx = NULL, }; - - /* Hold dst to account for the reference from the fib6 tree */ - dst_hold(&rt->dst); - return __ip6_ins_rt(rt, &info, &mxc, NULL); -} - -/* called with rcu_lock held */ -static struct net_device *ip6_rt_get_dev_rcu(struct rt6_info *rt) +int ip6_ins_rt(struct net *net, struct fib6_info *rt) { - struct net_device *dev = rt->dst.dev; + struct nl_info info = { .nl_net = net, }; - if (rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST)) { - /* for copies of local routes, dst->dev needs to be the - * device if it is a master device, the master device if - * device is enslaved, and the loopback as the default - */ - if (netif_is_l3_slave(dev) && - !rt6_need_strict(&rt->rt6i_dst.addr)) - dev = l3mdev_master_dev_rcu(dev); - else if (!netif_is_l3_master(dev)) - dev = dev_net(dev)->loopback_dev; - /* last case is netif_is_l3_master(dev) is true in which - * case we want dev returned to be dev - */ - } - - return dev; + return __ip6_ins_rt(rt, &info, NULL); } -static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort, +static struct rt6_info *ip6_rt_cache_alloc(struct fib6_info *ort, const struct in6_addr *daddr, const struct in6_addr *saddr) { @@ -1058,19 +1168,15 @@ static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort, * Clone the route. */ - if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU)) - ort = ort->from; - rcu_read_lock(); dev = ip6_rt_get_dev_rcu(ort); - rt = __ip6_dst_alloc(dev_net(dev), dev, 0); + rt = ip6_dst_alloc(dev_net(dev), dev, 0); rcu_read_unlock(); if (!rt) return NULL; ip6_rt_copy_init(rt, ort); rt->rt6i_flags |= RTF_CACHE; - rt->rt6i_metric = 0; rt->dst.flags |= DST_HOST; rt->rt6i_dst.addr = *daddr; rt->rt6i_dst.plen = 128; @@ -1090,45 +1196,44 @@ static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort, return rt; } -static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt) +static struct rt6_info *ip6_rt_pcpu_alloc(struct fib6_info *rt) { + unsigned short flags = fib6_info_dst_flags(rt); struct net_device *dev; struct rt6_info *pcpu_rt; rcu_read_lock(); dev = ip6_rt_get_dev_rcu(rt); - pcpu_rt = __ip6_dst_alloc(dev_net(dev), dev, rt->dst.flags); + pcpu_rt = ip6_dst_alloc(dev_net(dev), dev, flags); rcu_read_unlock(); if (!pcpu_rt) return NULL; ip6_rt_copy_init(pcpu_rt, rt); - pcpu_rt->rt6i_protocol = rt->rt6i_protocol; pcpu_rt->rt6i_flags |= RTF_PCPU; return pcpu_rt; } /* It should be called with rcu_read_lock() acquired */ -static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt) +static struct rt6_info *rt6_get_pcpu_route(struct fib6_info *rt) { struct rt6_info *pcpu_rt, **p; p = this_cpu_ptr(rt->rt6i_pcpu); pcpu_rt = *p; - if (pcpu_rt && ip6_hold_safe(NULL, &pcpu_rt, false)) - rt6_dst_from_metrics_check(pcpu_rt); + if (pcpu_rt) + ip6_hold_safe(NULL, &pcpu_rt, false); return pcpu_rt; } -static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt) +static struct rt6_info *rt6_make_pcpu_route(struct net *net, + struct fib6_info *rt) { struct rt6_info *pcpu_rt, *prev, **p; pcpu_rt = ip6_rt_pcpu_alloc(rt); if (!pcpu_rt) { - struct net *net = dev_net(rt->dst.dev); - dst_hold(&net->ipv6.ip6_null_entry->dst); return net->ipv6.ip6_null_entry; } @@ -1138,7 +1243,6 @@ static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt) prev = cmpxchg(p, NULL, pcpu_rt); BUG_ON(prev); - rt6_dst_from_metrics_check(pcpu_rt); return pcpu_rt; } @@ -1158,9 +1262,8 @@ static void rt6_remove_exception(struct rt6_exception_bucket *bucket, return; net = dev_net(rt6_ex->rt6i->dst.dev); - rt6_ex->rt6i->rt6i_node = NULL; hlist_del_rcu(&rt6_ex->hlist); - rt6_release(rt6_ex->rt6i); + dst_release(&rt6_ex->rt6i->dst); kfree_rcu(rt6_ex, rcu); WARN_ON_ONCE(!bucket->depth); bucket->depth--; @@ -1268,20 +1371,25 @@ __rt6_find_exception_rcu(struct rt6_exception_bucket **bucket, return NULL; } +static unsigned int fib6_mtu(const struct fib6_info *rt) +{ + unsigned int mtu; + + mtu = rt->fib6_pmtu ? : rt->rt6i_idev->cnf.mtu6; + mtu = min_t(unsigned int, mtu, IP6_MAX_MTU); + + return mtu - lwtunnel_headroom(rt->fib6_nh.nh_lwtstate, mtu); +} + static int rt6_insert_exception(struct rt6_info *nrt, - struct rt6_info *ort) + struct fib6_info *ort) { - struct net *net = dev_net(ort->dst.dev); + struct net *net = dev_net(nrt->dst.dev); struct rt6_exception_bucket *bucket; struct in6_addr *src_key = NULL; struct rt6_exception *rt6_ex; int err = 0; - /* ort can't be a cache or pcpu route */ - if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU)) - ort = ort->from; - WARN_ON_ONCE(ort->rt6i_flags & (RTF_CACHE | RTF_PCPU)); - spin_lock_bh(&rt6_exception_lock); if (ort->exception_bucket_flushed) { @@ -1320,7 +1428,7 @@ static int rt6_insert_exception(struct rt6_info *nrt, * Only insert this exception route if its mtu * is less than ort's mtu value. */ - if (nrt->rt6i_pmtu >= dst_mtu(&ort->dst)) { + if (dst_metric_raw(&nrt->dst, RTAX_MTU) >= fib6_mtu(ort)) { err = -EINVAL; goto out; } @@ -1337,8 +1445,6 @@ static int rt6_insert_exception(struct rt6_info *nrt, } rt6_ex->rt6i = nrt; rt6_ex->stamp = jiffies; - atomic_inc(&nrt->rt6i_ref); - nrt->rt6i_node = ort->rt6i_node; hlist_add_head_rcu(&rt6_ex->hlist, &bucket->chain); bucket->depth++; net->ipv6.rt6_stats->fib_rt_cache++; @@ -1352,7 +1458,7 @@ static int rt6_insert_exception(struct rt6_info *nrt, /* Update fn->fn_sernum to invalidate all cached dst */ if (!err) { spin_lock_bh(&ort->rt6i_table->tb6_lock); - fib6_update_sernum(ort); + fib6_update_sernum(net, ort); spin_unlock_bh(&ort->rt6i_table->tb6_lock); fib6_force_start_gc(net); } @@ -1360,7 +1466,7 @@ static int rt6_insert_exception(struct rt6_info *nrt, return err; } -void rt6_flush_exceptions(struct rt6_info *rt) +void rt6_flush_exceptions(struct fib6_info *rt) { struct rt6_exception_bucket *bucket; struct rt6_exception *rt6_ex; @@ -1390,7 +1496,7 @@ void rt6_flush_exceptions(struct rt6_info *rt) /* Find cached rt in the hash table inside passed in rt * Caller has to hold rcu_read_lock() */ -static struct rt6_info *rt6_find_cached_rt(struct rt6_info *rt, +static struct rt6_info *rt6_find_cached_rt(struct fib6_info *rt, struct in6_addr *daddr, struct in6_addr *saddr) { @@ -1420,10 +1526,10 @@ static struct rt6_info *rt6_find_cached_rt(struct rt6_info *rt, } /* Remove the passed in cached rt from the hash table that contains it */ -int rt6_remove_exception_rt(struct rt6_info *rt) +static int rt6_remove_exception_rt(struct rt6_info *rt) { struct rt6_exception_bucket *bucket; - struct rt6_info *from = rt->from; + struct fib6_info *from = rt->from; struct in6_addr *src_key = NULL; struct rt6_exception *rt6_ex; int err; @@ -1468,7 +1574,7 @@ int rt6_remove_exception_rt(struct rt6_info *rt) static void rt6_update_exception_stamp_rt(struct rt6_info *rt) { struct rt6_exception_bucket *bucket; - struct rt6_info *from = rt->from; + struct fib6_info *from = rt->from; struct in6_addr *src_key = NULL; struct rt6_exception *rt6_ex; @@ -1498,7 +1604,7 @@ static void rt6_update_exception_stamp_rt(struct rt6_info *rt) rcu_read_unlock(); } -static void rt6_exceptions_remove_prefsrc(struct rt6_info *rt) +static void rt6_exceptions_remove_prefsrc(struct fib6_info *rt) { struct rt6_exception_bucket *bucket; struct rt6_exception *rt6_ex; @@ -1540,7 +1646,7 @@ static bool rt6_mtu_change_route_allowed(struct inet6_dev *idev, } static void rt6_exceptions_update_pmtu(struct inet6_dev *idev, - struct rt6_info *rt, int mtu) + struct fib6_info *rt, int mtu) { struct rt6_exception_bucket *bucket; struct rt6_exception *rt6_ex; @@ -1557,12 +1663,12 @@ static void rt6_exceptions_update_pmtu(struct inet6_dev *idev, struct rt6_info *entry = rt6_ex->rt6i; /* For RTF_CACHE with rt6i_pmtu == 0 (i.e. a redirected - * route), the metrics of its rt->dst.from have already + * route), the metrics of its rt->from have already * been updated. */ - if (entry->rt6i_pmtu && + if (dst_metric_raw(&entry->dst, RTAX_MTU) && rt6_mtu_change_route_allowed(idev, entry, mtu)) - entry->rt6i_pmtu = mtu; + dst_metric_set(&entry->dst, RTAX_MTU, mtu); } bucket++; } @@ -1570,7 +1676,7 @@ static void rt6_exceptions_update_pmtu(struct inet6_dev *idev, #define RTF_CACHE_GATEWAY (RTF_GATEWAY | RTF_CACHE) -static void rt6_exceptions_clean_tohost(struct rt6_info *rt, +static void rt6_exceptions_clean_tohost(struct fib6_info *rt, struct in6_addr *gateway) { struct rt6_exception_bucket *bucket; @@ -1649,7 +1755,7 @@ static void rt6_age_examine_exception(struct rt6_exception_bucket *bucket, gc_args->more++; } -void rt6_age_exceptions(struct rt6_info *rt, +void rt6_age_exceptions(struct fib6_info *rt, struct fib6_gc_args *gc_args, unsigned long now) { @@ -1685,7 +1791,8 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, const struct sk_buff *skb, int flags) { struct fib6_node *fn, *saved_fn; - struct rt6_info *rt, *rt_cache; + struct fib6_info *f6i; + struct rt6_info *rt; int strict = 0; strict |= flags & RT6_LOOKUP_F_IFACE; @@ -1702,10 +1809,10 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, oif = 0; redo_rt6_select: - rt = rt6_select(net, fn, oif, strict); - if (rt->rt6i_nsiblings) - rt = rt6_multipath_select(net, rt, fl6, oif, skb, strict); - if (rt == net->ipv6.ip6_null_entry) { + f6i = rt6_select(net, fn, oif, strict); + if (f6i->rt6i_nsiblings) + f6i = rt6_multipath_select(net, f6i, fl6, oif, skb, strict); + if (f6i == net->ipv6.fib6_null_entry) { fn = fib6_backtrack(fn, &fl6->saddr); if (fn) goto redo_rt6_select; @@ -1717,26 +1824,25 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, } } - /*Search through exception table */ - rt_cache = rt6_find_cached_rt(rt, &fl6->daddr, &fl6->saddr); - if (rt_cache) - rt = rt_cache; - - if (rt == net->ipv6.ip6_null_entry) { + if (f6i == net->ipv6.fib6_null_entry) { + rt = net->ipv6.ip6_null_entry; rcu_read_unlock(); dst_hold(&rt->dst); trace_fib6_table_lookup(net, rt, table, fl6); return rt; - } else if (rt->rt6i_flags & RTF_CACHE) { - if (ip6_hold_safe(net, &rt, true)) { + } + + /*Search through exception table */ + rt = rt6_find_cached_rt(f6i, &fl6->daddr, &fl6->saddr); + if (rt) { + if (ip6_hold_safe(net, &rt, true)) dst_use_noref(&rt->dst, jiffies); - rt6_dst_from_metrics_check(rt); - } + rcu_read_unlock(); trace_fib6_table_lookup(net, rt, table, fl6); return rt; } else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) && - !(rt->rt6i_flags & RTF_GATEWAY))) { + !(f6i->rt6i_flags & RTF_GATEWAY))) { /* Create a RTF_CACHE clone which will not be * owned by the fib6 tree. It is for the special case where * the daddr in the skb during the neighbor look-up is different @@ -1745,17 +1851,11 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, struct rt6_info *uncached_rt; - if (ip6_hold_safe(net, &rt, true)) { - dst_use_noref(&rt->dst, jiffies); - } else { - rcu_read_unlock(); - uncached_rt = rt; - goto uncached_rt_out; - } + fib6_info_hold(f6i); rcu_read_unlock(); - uncached_rt = ip6_rt_cache_alloc(rt, &fl6->daddr, NULL); - dst_release(&rt->dst); + uncached_rt = ip6_rt_cache_alloc(f6i, &fl6->daddr, NULL); + fib6_info_release(f6i); if (uncached_rt) { /* Uncached_rt's refcnt is taken during ip6_rt_cache_alloc() @@ -1768,7 +1868,6 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, dst_hold(&uncached_rt->dst); } -uncached_rt_out: trace_fib6_table_lookup(net, uncached_rt, table, fl6); return uncached_rt; @@ -1777,24 +1876,12 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, struct rt6_info *pcpu_rt; - dst_use_noref(&rt->dst, jiffies); local_bh_disable(); - pcpu_rt = rt6_get_pcpu_route(rt); - - if (!pcpu_rt) { - /* atomic_inc_not_zero() is needed when using rcu */ - if (atomic_inc_not_zero(&rt->rt6i_ref)) { - /* No dst_hold() on rt is needed because grabbing - * rt->rt6i_ref makes sure rt can't be released. - */ - pcpu_rt = rt6_make_pcpu_route(rt); - rt6_release(rt); - } else { - /* rt is already removed from tree */ - pcpu_rt = net->ipv6.ip6_null_entry; - dst_hold(&pcpu_rt->dst); - } - } + pcpu_rt = rt6_get_pcpu_route(f6i); + + if (!pcpu_rt) + pcpu_rt = rt6_make_pcpu_route(net, f6i); + local_bh_enable(); rcu_read_unlock(); trace_fib6_table_lookup(net, pcpu_rt, table, fl6); @@ -2015,7 +2102,6 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori rt->rt6i_idev = in6_dev_get(loopback_dev); rt->rt6i_gateway = ort->rt6i_gateway; rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU; - rt->rt6i_metric = 0; memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key)); #ifdef CONFIG_IPV6_SUBTREES @@ -2031,18 +2117,26 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori * Destination cache support functions */ -static void rt6_dst_from_metrics_check(struct rt6_info *rt) +static bool fib6_check(struct fib6_info *f6i, u32 cookie) { - if (rt->from && - dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(&rt->from->dst)) - dst_init_metrics(&rt->dst, dst_metrics_ptr(&rt->from->dst), true); + u32 rt_cookie = 0; + + if ((f6i && !rt6_get_cookie_safe(f6i, &rt_cookie)) || + rt_cookie != cookie) + return false; + + if (fib6_check_expired(f6i)) + return false; + + return true; } static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie) { u32 rt_cookie = 0; - if (!rt6_get_cookie_safe(rt, &rt_cookie) || rt_cookie != cookie) + if ((rt->from && !rt6_get_cookie_safe(rt->from, &rt_cookie)) || + rt_cookie != cookie) return NULL; if (rt6_check_expired(rt)) @@ -2055,7 +2149,7 @@ static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie) { if (!__rt6_check_expired(rt) && rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK && - rt6_check(rt->from, cookie)) + fib6_check(rt->from, cookie)) return &rt->dst; else return NULL; @@ -2072,8 +2166,6 @@ static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie) * into this function always. */ - rt6_dst_from_metrics_check(rt); - if (rt->rt6i_flags & RTF_PCPU || (unlikely(!list_empty(&rt->rt6i_uncached)) && rt->from)) return rt6_dst_from_check(rt, cookie); @@ -2088,7 +2180,7 @@ static struct dst_entry *ip6_negative_advice(struct dst_entry *dst) if (rt) { if (rt->rt6i_flags & RTF_CACHE) { if (rt6_check_expired(rt)) { - ip6_del_rt(rt); + rt6_remove_exception_rt(rt); dst = NULL; } } else { @@ -2109,12 +2201,12 @@ static void ip6_link_failure(struct sk_buff *skb) if (rt) { if (rt->rt6i_flags & RTF_CACHE) { if (dst_hold_safe(&rt->dst)) - ip6_del_rt(rt); - } else { + rt6_remove_exception_rt(rt); + } else if (rt->from) { struct fib6_node *fn; rcu_read_lock(); - fn = rcu_dereference(rt->rt6i_node); + fn = rcu_dereference(rt->from->rt6i_node); if (fn && (rt->rt6i_flags & RTF_DEFAULT)) fn->fn_sernum = -1; rcu_read_unlock(); @@ -2126,16 +2218,15 @@ static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu) { struct net *net = dev_net(rt->dst.dev); + dst_metric_set(&rt->dst, RTAX_MTU, mtu); rt->rt6i_flags |= RTF_MODIFIED; - rt->rt6i_pmtu = mtu; rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires); } static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt) { return !(rt->rt6i_flags & RTF_CACHE) && - (rt->rt6i_flags & RTF_PCPU || - rcu_access_pointer(rt->rt6i_node)); + (rt->rt6i_flags & RTF_PCPU || rt->from); } static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk, @@ -2173,10 +2264,10 @@ static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk, } else if (daddr) { struct rt6_info *nrt6; - nrt6 = ip6_rt_cache_alloc(rt6, daddr, saddr); + nrt6 = ip6_rt_cache_alloc(rt6->from, daddr, saddr); if (nrt6) { rt6_do_update_pmtu(nrt6, mtu); - if (rt6_insert_exception(nrt6, rt6)) + if (rt6_insert_exception(nrt6, rt6->from)) dst_release_immediate(&nrt6->dst); } } @@ -2259,7 +2350,8 @@ static struct rt6_info *__ip6_route_redirect(struct net *net, int flags) { struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6; - struct rt6_info *rt, *rt_cache; + struct rt6_info *ret = NULL, *rt_cache; + struct fib6_info *rt; struct fib6_node *fn; /* Get the "current" route for this destination and @@ -2276,29 +2368,29 @@ static struct rt6_info *__ip6_route_redirect(struct net *net, fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); restart: for_each_fib6_node_rt_rcu(fn) { - if (rt->rt6i_nh_flags & RTNH_F_DEAD) + if (rt->fib6_nh.nh_flags & RTNH_F_DEAD) continue; - if (rt6_check_expired(rt)) + if (fib6_check_expired(rt)) continue; - if (rt->dst.error) + if (rt->rt6i_flags & RTF_REJECT) break; if (!(rt->rt6i_flags & RTF_GATEWAY)) continue; - if (fl6->flowi6_oif != rt->dst.dev->ifindex) + if (fl6->flowi6_oif != rt->fib6_nh.nh_dev->ifindex) continue; /* rt_cache's gateway might be different from its 'parent' * in the case of an ip redirect. * So we keep searching in the exception table if the gateway * is different. */ - if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway)) { + if (!ipv6_addr_equal(&rdfl->gateway, &rt->fib6_nh.nh_gw)) { rt_cache = rt6_find_cached_rt(rt, &fl6->daddr, &fl6->saddr); if (rt_cache && ipv6_addr_equal(&rdfl->gateway, &rt_cache->rt6i_gateway)) { - rt = rt_cache; + ret = rt_cache; break; } continue; @@ -2307,25 +2399,28 @@ static struct rt6_info *__ip6_route_redirect(struct net *net, } if (!rt) - rt = net->ipv6.ip6_null_entry; - else if (rt->dst.error) { - rt = net->ipv6.ip6_null_entry; + rt = net->ipv6.fib6_null_entry; + else if (rt->rt6i_flags & RTF_REJECT) { + ret = net->ipv6.ip6_null_entry; goto out; } - if (rt == net->ipv6.ip6_null_entry) { + if (rt == net->ipv6.fib6_null_entry) { fn = fib6_backtrack(fn, &fl6->saddr); if (fn) goto restart; } out: - ip6_hold_safe(net, &rt, true); + if (ret) + dst_hold(&ret->dst); + else + ret = ip6_create_rt_rcu(rt); rcu_read_unlock(); - trace_fib6_table_lookup(net, rt, table, fl6); - return rt; + trace_fib6_table_lookup(net, ret, table, fl6); + return ret; }; static struct dst_entry *ip6_route_redirect(struct net *net, @@ -2417,12 +2512,8 @@ static unsigned int ip6_default_advmss(const struct dst_entry *dst) static unsigned int ip6_mtu(const struct dst_entry *dst) { - const struct rt6_info *rt = (const struct rt6_info *)dst; - unsigned int mtu = rt->rt6i_pmtu; struct inet6_dev *idev; - - if (mtu) - goto out; + unsigned int mtu; mtu = dst_metric_raw(dst, RTAX_MTU); if (mtu) @@ -2506,60 +2597,24 @@ static int ip6_dst_gc(struct dst_ops *ops) return entries > rt_max_size; } -static int ip6_convert_metrics(struct mx6_config *mxc, - const struct fib6_config *cfg) +static int ip6_convert_metrics(struct net *net, struct fib6_info *rt, + struct fib6_config *cfg) { - struct net *net = cfg->fc_nlinfo.nl_net; - bool ecn_ca = false; - struct nlattr *nla; - int remaining; - u32 *mp; - - if (!cfg->fc_mx) - return 0; - - mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL); - if (unlikely(!mp)) - return -ENOMEM; - - nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) { - int type = nla_type(nla); - u32 val; - - if (!type) - continue; - if (unlikely(type > RTAX_MAX)) - goto err; - - if (type == RTAX_CC_ALGO) { - char tmp[TCP_CA_NAME_MAX]; + int err = 0; - nla_strlcpy(tmp, nla, sizeof(tmp)); - val = tcp_ca_get_key_by_name(net, tmp, &ecn_ca); - if (val == TCP_CA_UNSPEC) - goto err; - } else { - val = nla_get_u32(nla); - } - if (type == RTAX_HOPLIMIT && val > 255) - val = 255; - if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK)) - goto err; + if (cfg->fc_mx) { + rt->fib6_metrics = kzalloc(sizeof(*rt->fib6_metrics), + GFP_KERNEL); + if (unlikely(!rt->fib6_metrics)) + return -ENOMEM; - mp[type - 1] = val; - __set_bit(type - 1, mxc->mx_valid); - } + refcount_set(&rt->fib6_metrics->refcnt, 1); - if (ecn_ca) { - __set_bit(RTAX_FEATURES - 1, mxc->mx_valid); - mp[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA; + err = ip_metrics_convert(net, cfg->fc_mx, cfg->fc_mx_len, + rt->fib6_metrics->metrics); } - mxc->mx = mp; - return 0; - err: - kfree(mp); - return -EINVAL; + return err; } static struct rt6_info *ip6_nh_lookup_table(struct net *net, @@ -2745,11 +2800,12 @@ static int ip6_validate_gw(struct net *net, struct fib6_config *cfg, return err; } -static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg, +static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg, + gfp_t gfp_flags, struct netlink_ext_ack *extack) { struct net *net = cfg->fc_nlinfo.nl_net; - struct rt6_info *rt = NULL; + struct fib6_info *rt = NULL; struct net_device *dev = NULL; struct inet6_dev *idev = NULL; struct fib6_table *table; @@ -2768,6 +2824,11 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg, goto out; } + if (cfg->fc_type > RTN_MAX) { + NL_SET_ERR_MSG(extack, "Invalid route type"); + goto out; + } + if (cfg->fc_dst_len > 128) { NL_SET_ERR_MSG(extack, "Invalid prefix length"); goto out; @@ -2826,19 +2887,23 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg, if (!table) goto out; - rt = ip6_dst_alloc(net, NULL, - (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT); + err = -ENOMEM; + rt = fib6_info_alloc(gfp_flags); + if (!rt) + goto out; - if (!rt) { - err = -ENOMEM; + if (cfg->fc_flags & RTF_ADDRCONF) + rt->dst_nocount = true; + + err = ip6_convert_metrics(net, rt, cfg); + if (err < 0) goto out; - } if (cfg->fc_flags & RTF_EXPIRES) - rt6_set_expires(rt, jiffies + + fib6_set_expires(rt, jiffies + clock_t_to_jiffies(cfg->fc_expires)); else - rt6_clean_expires(rt); + fib6_clean_expires(rt); if (cfg->fc_protocol == RTPROT_UNSPEC) cfg->fc_protocol = RTPROT_BOOT; @@ -2846,15 +2911,6 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg, addr_type = ipv6_addr_type(&cfg->fc_dst); - if (addr_type & IPV6_ADDR_MULTICAST) - rt->dst.input = ip6_mc_input; - else if (cfg->fc_flags & RTF_LOCAL) - rt->dst.input = ip6_input; - else - rt->dst.input = ip6_forward; - - rt->dst.output = ip6_output; - if (cfg->fc_encap) { struct lwtunnel_state *lwtstate; @@ -2863,14 +2919,13 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg, &lwtstate, extack); if (err) goto out; - rt->dst.lwtstate = lwtstate_get(lwtstate); - lwtunnel_set_redirect(&rt->dst); + rt->fib6_nh.nh_lwtstate = lwtstate_get(lwtstate); } ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len); rt->rt6i_dst.plen = cfg->fc_dst_len; if (rt->rt6i_dst.plen == 128) - rt->dst.flags |= DST_HOST; + rt->dst_host = true; #ifdef CONFIG_IPV6_SUBTREES ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len); @@ -2878,7 +2933,9 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg, #endif rt->rt6i_metric = cfg->fc_metric; - rt->rt6i_nh_weight = 1; + rt->fib6_nh.nh_weight = 1; + + rt->fib6_type = cfg->fc_type; /* We cannot add true routes via loopback here, they would result in kernel looping; promote them to reject routes @@ -2902,27 +2959,6 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg, } } rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP; - switch (cfg->fc_type) { - case RTN_BLACKHOLE: - rt->dst.error = -EINVAL; - rt->dst.output = dst_discard_out; - rt->dst.input = dst_discard; - break; - case RTN_PROHIBIT: - rt->dst.error = -EACCES; - rt->dst.output = ip6_pkt_prohibit_out; - rt->dst.input = ip6_pkt_prohibit; - break; - case RTN_THROW: - case RTN_UNREACHABLE: - default: - rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN - : (cfg->fc_type == RTN_UNREACHABLE) - ? -EHOSTUNREACH : -ENETUNREACH; - rt->dst.output = ip6_pkt_discard_out; - rt->dst.input = ip6_pkt_discard; - break; - } goto install_route; } @@ -2931,7 +2967,7 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg, if (err) goto out; - rt->rt6i_gateway = cfg->fc_gateway; + rt->fib6_nh.nh_gw = cfg->fc_gateway; } err = -ENODEV; @@ -2966,9 +3002,9 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg, install_route: if (!(rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST)) && !netif_carrier_ok(dev)) - rt->rt6i_nh_flags |= RTNH_F_LINKDOWN; - rt->rt6i_nh_flags |= (cfg->fc_flags & RTNH_F_ONLINK); - rt->dst.dev = dev; + rt->fib6_nh.nh_flags |= RTNH_F_LINKDOWN; + rt->fib6_nh.nh_flags |= (cfg->fc_flags & RTNH_F_ONLINK); + rt->fib6_nh.nh_dev = dev; rt->rt6i_idev = idev; rt->rt6i_table = table; @@ -2980,49 +3016,34 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg, dev_put(dev); if (idev) in6_dev_put(idev); - if (rt) - dst_release_immediate(&rt->dst); + fib6_info_release(rt); return ERR_PTR(err); } -int ip6_route_add(struct fib6_config *cfg, +int ip6_route_add(struct fib6_config *cfg, gfp_t gfp_flags, struct netlink_ext_ack *extack) { - struct mx6_config mxc = { .mx = NULL, }; - struct rt6_info *rt; + struct fib6_info *rt; int err; - rt = ip6_route_info_create(cfg, extack); - if (IS_ERR(rt)) { - err = PTR_ERR(rt); - rt = NULL; - goto out; - } + rt = ip6_route_info_create(cfg, gfp_flags, extack); + if (IS_ERR(rt)) + return PTR_ERR(rt); - err = ip6_convert_metrics(&mxc, cfg); - if (err) - goto out; - - err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc, extack); - - kfree(mxc.mx); - - return err; -out: - if (rt) - dst_release_immediate(&rt->dst); + err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, extack); + fib6_info_release(rt); return err; } -static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info) +static int __ip6_del_rt(struct fib6_info *rt, struct nl_info *info) { - int err; + struct net *net = info->nl_net; struct fib6_table *table; - struct net *net = dev_net(rt->dst.dev); + int err; - if (rt == net->ipv6.ip6_null_entry) { + if (rt == net->ipv6.fib6_null_entry) { err = -ENOENT; goto out; } @@ -3033,19 +3054,18 @@ static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info) spin_unlock_bh(&table->tb6_lock); out: - ip6_rt_put(rt); + fib6_info_release(rt); return err; } -int ip6_del_rt(struct rt6_info *rt) +int ip6_del_rt(struct net *net, struct fib6_info *rt) { - struct nl_info info = { - .nl_net = dev_net(rt->dst.dev), - }; + struct nl_info info = { .nl_net = net }; + return __ip6_del_rt(rt, &info); } -static int __ip6_del_rt_siblings(struct rt6_info *rt, struct fib6_config *cfg) +static int __ip6_del_rt_siblings(struct fib6_info *rt, struct fib6_config *cfg) { struct nl_info *info = &cfg->fc_nlinfo; struct net *net = info->nl_net; @@ -3053,20 +3073,20 @@ static int __ip6_del_rt_siblings(struct rt6_info *rt, struct fib6_config *cfg) struct fib6_table *table; int err = -ENOENT; - if (rt == net->ipv6.ip6_null_entry) + if (rt == net->ipv6.fib6_null_entry) goto out_put; table = rt->rt6i_table; spin_lock_bh(&table->tb6_lock); if (rt->rt6i_nsiblings && cfg->fc_delete_all_nh) { - struct rt6_info *sibling, *next_sibling; + struct fib6_info *sibling, *next_sibling; /* prefer to send a single notification with all hops */ skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any()); if (skb) { u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0; - if (rt6_fill_node(net, skb, rt, + if (rt6_fill_node(net, skb, rt, NULL, NULL, NULL, 0, RTM_DELROUTE, info->portid, seq, 0) < 0) { kfree_skb(skb); @@ -3088,7 +3108,7 @@ static int __ip6_del_rt_siblings(struct rt6_info *rt, struct fib6_config *cfg) out_unlock: spin_unlock_bh(&table->tb6_lock); out_put: - ip6_rt_put(rt); + fib6_info_release(rt); if (skb) { rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE, @@ -3097,11 +3117,28 @@ static int __ip6_del_rt_siblings(struct rt6_info *rt, struct fib6_config *cfg) return err; } +static int ip6_del_cached_rt(struct rt6_info *rt, struct fib6_config *cfg) +{ + int rc = -ESRCH; + + if (cfg->fc_ifindex && rt->dst.dev->ifindex != cfg->fc_ifindex) + goto out; + + if (cfg->fc_flags & RTF_GATEWAY && + !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway)) + goto out; + if (dst_hold_safe(&rt->dst)) + rc = rt6_remove_exception_rt(rt); +out: + return rc; +} + static int ip6_route_del(struct fib6_config *cfg, struct netlink_ext_ack *extack) { - struct rt6_info *rt, *rt_cache; + struct rt6_info *rt_cache; struct fib6_table *table; + struct fib6_info *rt; struct fib6_node *fn; int err = -ESRCH; @@ -3121,25 +3158,29 @@ static int ip6_route_del(struct fib6_config *cfg, if (fn) { for_each_fib6_node_rt_rcu(fn) { if (cfg->fc_flags & RTF_CACHE) { + int rc; + rt_cache = rt6_find_cached_rt(rt, &cfg->fc_dst, &cfg->fc_src); - if (!rt_cache) - continue; - rt = rt_cache; + if (rt_cache) { + rc = ip6_del_cached_rt(rt_cache, cfg); + if (rc != -ESRCH) + return rc; + } + continue; } if (cfg->fc_ifindex && - (!rt->dst.dev || - rt->dst.dev->ifindex != cfg->fc_ifindex)) + (!rt->fib6_nh.nh_dev || + rt->fib6_nh.nh_dev->ifindex != cfg->fc_ifindex)) continue; if (cfg->fc_flags & RTF_GATEWAY && - !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway)) + !ipv6_addr_equal(&cfg->fc_gateway, &rt->fib6_nh.nh_gw)) continue; if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric) continue; if (cfg->fc_protocol && cfg->fc_protocol != rt->rt6i_protocol) continue; - if (!dst_hold_safe(&rt->dst)) - break; + fib6_info_hold(rt); rcu_read_unlock(); /* if gateway was specified only delete the one hop */ @@ -3242,7 +3283,7 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu NEIGH_UPDATE_F_ISROUTER)), NDISC_REDIRECT, &ndopts); - nrt = ip6_rt_cache_alloc(rt, &msg->dest, NULL); + nrt = ip6_rt_cache_alloc(rt->from, &msg->dest, NULL); if (!nrt) goto out; @@ -3250,14 +3291,13 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu if (on_link) nrt->rt6i_flags &= ~RTF_GATEWAY; - nrt->rt6i_protocol = RTPROT_REDIRECT; nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key; /* No need to remove rt from the exception table if rt is * a cached route because rt6_insert_exception() will * takes care of it */ - if (rt6_insert_exception(nrt, rt)) { + if (rt6_insert_exception(nrt, rt->from)) { dst_release_immediate(&nrt->dst); goto out; } @@ -3272,44 +3312,8 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu neigh_release(neigh); } -/* - * Misc support functions - */ - -static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from) -{ - BUG_ON(from->from); - - rt->rt6i_flags &= ~RTF_EXPIRES; - dst_hold(&from->dst); - rt->from = from; - dst_init_metrics(&rt->dst, dst_metrics_ptr(&from->dst), true); -} - -static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort) -{ - rt->dst.input = ort->dst.input; - rt->dst.output = ort->dst.output; - rt->rt6i_dst = ort->rt6i_dst; - rt->dst.error = ort->dst.error; - rt->rt6i_idev = ort->rt6i_idev; - if (rt->rt6i_idev) - in6_dev_hold(rt->rt6i_idev); - rt->dst.lastuse = jiffies; - rt->rt6i_gateway = ort->rt6i_gateway; - rt->rt6i_flags = ort->rt6i_flags; - rt6_set_from(rt, ort); - rt->rt6i_metric = ort->rt6i_metric; -#ifdef CONFIG_IPV6_SUBTREES - rt->rt6i_src = ort->rt6i_src; -#endif - rt->rt6i_prefsrc = ort->rt6i_prefsrc; - rt->rt6i_table = ort->rt6i_table; - rt->dst.lwtstate = lwtstate_get(ort->dst.lwtstate); -} - #ifdef CONFIG_IPV6_ROUTE_INFO -static struct rt6_info *rt6_get_route_info(struct net *net, +static struct fib6_info *rt6_get_route_info(struct net *net, const struct in6_addr *prefix, int prefixlen, const struct in6_addr *gwaddr, struct net_device *dev) @@ -3317,7 +3321,7 @@ static struct rt6_info *rt6_get_route_info(struct net *net, u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO; int ifindex = dev->ifindex; struct fib6_node *fn; - struct rt6_info *rt = NULL; + struct fib6_info *rt = NULL; struct fib6_table *table; table = fib6_get_table(net, tb_id); @@ -3330,13 +3334,13 @@ static struct rt6_info *rt6_get_route_info(struct net *net, goto out; for_each_fib6_node_rt_rcu(fn) { - if (rt->dst.dev->ifindex != ifindex) + if (rt->fib6_nh.nh_dev->ifindex != ifindex) continue; if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY)) continue; - if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr)) + if (!ipv6_addr_equal(&rt->fib6_nh.nh_gw, gwaddr)) continue; - ip6_hold_safe(NULL, &rt, false); + fib6_info_hold(rt); break; } out: @@ -3344,7 +3348,7 @@ static struct rt6_info *rt6_get_route_info(struct net *net, return rt; } -static struct rt6_info *rt6_add_route_info(struct net *net, +static struct fib6_info *rt6_add_route_info(struct net *net, const struct in6_addr *prefix, int prefixlen, const struct in6_addr *gwaddr, struct net_device *dev, @@ -3357,6 +3361,7 @@ static struct rt6_info *rt6_add_route_info(struct net *net, .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO | RTF_UP | RTF_PREF(pref), .fc_protocol = RTPROT_RA, + .fc_type = RTN_UNICAST, .fc_nlinfo.portid = 0, .fc_nlinfo.nlh = NULL, .fc_nlinfo.nl_net = net, @@ -3370,36 +3375,39 @@ static struct rt6_info *rt6_add_route_info(struct net *net, if (!prefixlen) cfg.fc_flags |= RTF_DEFAULT; - ip6_route_add(&cfg, NULL); + ip6_route_add(&cfg, GFP_ATOMIC, NULL); return rt6_get_route_info(net, prefix, prefixlen, gwaddr, dev); } #endif -struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev) +struct fib6_info *rt6_get_dflt_router(struct net *net, + const struct in6_addr *addr, + struct net_device *dev) { u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT; - struct rt6_info *rt; + struct fib6_info *rt; struct fib6_table *table; - table = fib6_get_table(dev_net(dev), tb_id); + table = fib6_get_table(net, tb_id); if (!table) return NULL; rcu_read_lock(); for_each_fib6_node_rt_rcu(&table->tb6_root) { - if (dev == rt->dst.dev && + if (dev == rt->fib6_nh.nh_dev && ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) && - ipv6_addr_equal(&rt->rt6i_gateway, addr)) + ipv6_addr_equal(&rt->fib6_nh.nh_gw, addr)) break; } if (rt) - ip6_hold_safe(NULL, &rt, false); + fib6_info_hold(rt); rcu_read_unlock(); return rt; } -struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr, +struct fib6_info *rt6_add_dflt_router(struct net *net, + const struct in6_addr *gwaddr, struct net_device *dev, unsigned int pref) { @@ -3410,14 +3418,15 @@ struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr, .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | RTF_UP | RTF_EXPIRES | RTF_PREF(pref), .fc_protocol = RTPROT_RA, + .fc_type = RTN_UNICAST, .fc_nlinfo.portid = 0, .fc_nlinfo.nlh = NULL, - .fc_nlinfo.nl_net = dev_net(dev), + .fc_nlinfo.nl_net = net, }; cfg.fc_gateway = *gwaddr; - if (!ip6_route_add(&cfg, NULL)) { + if (!ip6_route_add(&cfg, GFP_ATOMIC, NULL)) { struct fib6_table *table; table = fib6_get_table(dev_net(dev), cfg.fc_table); @@ -3425,24 +3434,22 @@ struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr, table->flags |= RT6_TABLE_HAS_DFLT_ROUTER; } - return rt6_get_dflt_router(gwaddr, dev); + return rt6_get_dflt_router(net, gwaddr, dev); } -static void __rt6_purge_dflt_routers(struct fib6_table *table) +static void __rt6_purge_dflt_routers(struct net *net, + struct fib6_table *table) { - struct rt6_info *rt; + struct fib6_info *rt; restart: rcu_read_lock(); for_each_fib6_node_rt_rcu(&table->tb6_root) { if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) && (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) { - if (dst_hold_safe(&rt->dst)) { - rcu_read_unlock(); - ip6_del_rt(rt); - } else { - rcu_read_unlock(); - } + fib6_info_hold(rt); + rcu_read_unlock(); + ip6_del_rt(net, rt); goto restart; } } @@ -3463,7 +3470,7 @@ void rt6_purge_dflt_routers(struct net *net) head = &net->ipv6.fib_table_hash[h]; hlist_for_each_entry_rcu(table, head, tb6_hlist) { if (table->flags & RT6_TABLE_HAS_DFLT_ROUTER) - __rt6_purge_dflt_routers(table); + __rt6_purge_dflt_routers(net, table); } } @@ -3484,6 +3491,7 @@ static void rtmsg_to_fib6_config(struct net *net, cfg->fc_dst_len = rtmsg->rtmsg_dst_len; cfg->fc_src_len = rtmsg->rtmsg_src_len; cfg->fc_flags = rtmsg->rtmsg_flags; + cfg->fc_type = rtmsg->rtmsg_type; cfg->fc_nlinfo.nl_net = net; @@ -3513,7 +3521,7 @@ int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg) rtnl_lock(); switch (cmd) { case SIOCADDRT: - err = ip6_route_add(&cfg, NULL); + err = ip6_route_add(&cfg, GFP_KERNEL, NULL); break; case SIOCDELRT: err = ip6_route_del(&cfg, NULL); @@ -3583,34 +3591,38 @@ static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff * Allocate a dst for local (unicast / anycast) address. */ -struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, +struct fib6_info *addrconf_dst_alloc(struct net *net, + struct inet6_dev *idev, const struct in6_addr *addr, - bool anycast) + bool anycast, gfp_t gfp_flags) { u32 tb_id; - struct net *net = dev_net(idev->dev); struct net_device *dev = idev->dev; - struct rt6_info *rt; + struct fib6_info *rt; - rt = ip6_dst_alloc(net, dev, DST_NOCOUNT); + rt = fib6_info_alloc(gfp_flags); if (!rt) return ERR_PTR(-ENOMEM); - in6_dev_hold(idev); + rt->dst_nocount = true; - rt->dst.flags |= DST_HOST; - rt->dst.input = ip6_input; - rt->dst.output = ip6_output; + in6_dev_hold(idev); rt->rt6i_idev = idev; + rt->dst_host = true; rt->rt6i_protocol = RTPROT_KERNEL; rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP; - if (anycast) + if (anycast) { + rt->fib6_type = RTN_ANYCAST; rt->rt6i_flags |= RTF_ANYCAST; - else + } else { + rt->fib6_type = RTN_LOCAL; rt->rt6i_flags |= RTF_LOCAL; + } - rt->rt6i_gateway = *addr; + rt->fib6_nh.nh_gw = *addr; + dev_hold(dev); + rt->fib6_nh.nh_dev = dev; rt->rt6i_dst.addr = *addr; rt->rt6i_dst.plen = 128; tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL; @@ -3626,14 +3638,14 @@ struct arg_dev_net_ip { struct in6_addr *addr; }; -static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg) +static int fib6_remove_prefsrc(struct fib6_info *rt, void *arg) { struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev; struct net *net = ((struct arg_dev_net_ip *)arg)->net; struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr; - if (((void *)rt->dst.dev == dev || !dev) && - rt != net->ipv6.ip6_null_entry && + if (((void *)rt->fib6_nh.nh_dev == dev || !dev) && + rt != net->ipv6.fib6_null_entry && ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) { spin_lock_bh(&rt6_exception_lock); /* remove prefsrc entry */ @@ -3659,12 +3671,12 @@ void rt6_remove_prefsrc(struct inet6_ifaddr *ifp) #define RTF_RA_ROUTER (RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY) /* Remove routers and update dst entries when gateway turn into host. */ -static int fib6_clean_tohost(struct rt6_info *rt, void *arg) +static int fib6_clean_tohost(struct fib6_info *rt, void *arg) { struct in6_addr *gateway = (struct in6_addr *)arg; if (((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) && - ipv6_addr_equal(gateway, &rt->rt6i_gateway)) { + ipv6_addr_equal(gateway, &rt->fib6_nh.nh_gw)) { return -1; } @@ -3690,9 +3702,9 @@ struct arg_netdev_event { }; }; -static struct rt6_info *rt6_multipath_first_sibling(const struct rt6_info *rt) +static struct fib6_info *rt6_multipath_first_sibling(const struct fib6_info *rt) { - struct rt6_info *iter; + struct fib6_info *iter; struct fib6_node *fn; fn = rcu_dereference_protected(rt->rt6i_node, @@ -3710,47 +3722,47 @@ static struct rt6_info *rt6_multipath_first_sibling(const struct rt6_info *rt) return NULL; } -static bool rt6_is_dead(const struct rt6_info *rt) +static bool rt6_is_dead(const struct fib6_info *rt) { - if (rt->rt6i_nh_flags & RTNH_F_DEAD || - (rt->rt6i_nh_flags & RTNH_F_LINKDOWN && + if (rt->fib6_nh.nh_flags & RTNH_F_DEAD || + (rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN && rt->rt6i_idev->cnf.ignore_routes_with_linkdown)) return true; return false; } -static int rt6_multipath_total_weight(const struct rt6_info *rt) +static int rt6_multipath_total_weight(const struct fib6_info *rt) { - struct rt6_info *iter; + struct fib6_info *iter; int total = 0; if (!rt6_is_dead(rt)) - total += rt->rt6i_nh_weight; + total += rt->fib6_nh.nh_weight; list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings) { if (!rt6_is_dead(iter)) - total += iter->rt6i_nh_weight; + total += iter->fib6_nh.nh_weight; } return total; } -static void rt6_upper_bound_set(struct rt6_info *rt, int *weight, int total) +static void rt6_upper_bound_set(struct fib6_info *rt, int *weight, int total) { int upper_bound = -1; if (!rt6_is_dead(rt)) { - *weight += rt->rt6i_nh_weight; + *weight += rt->fib6_nh.nh_weight; upper_bound = DIV_ROUND_CLOSEST_ULL((u64) (*weight) << 31, total) - 1; } - atomic_set(&rt->rt6i_nh_upper_bound, upper_bound); + atomic_set(&rt->fib6_nh.nh_upper_bound, upper_bound); } -static void rt6_multipath_upper_bound_set(struct rt6_info *rt, int total) +static void rt6_multipath_upper_bound_set(struct fib6_info *rt, int total) { - struct rt6_info *iter; + struct fib6_info *iter; int weight = 0; rt6_upper_bound_set(rt, &weight, total); @@ -3759,9 +3771,9 @@ static void rt6_multipath_upper_bound_set(struct rt6_info *rt, int total) rt6_upper_bound_set(iter, &weight, total); } -void rt6_multipath_rebalance(struct rt6_info *rt) +void rt6_multipath_rebalance(struct fib6_info *rt) { - struct rt6_info *first; + struct fib6_info *first; int total; /* In case the entire multipath route was marked for flushing, @@ -3783,14 +3795,14 @@ void rt6_multipath_rebalance(struct rt6_info *rt) rt6_multipath_upper_bound_set(first, total); } -static int fib6_ifup(struct rt6_info *rt, void *p_arg) +static int fib6_ifup(struct fib6_info *rt, void *p_arg) { const struct arg_netdev_event *arg = p_arg; - const struct net *net = dev_net(arg->dev); + struct net *net = dev_net(arg->dev); - if (rt != net->ipv6.ip6_null_entry && rt->dst.dev == arg->dev) { - rt->rt6i_nh_flags &= ~arg->nh_flags; - fib6_update_sernum_upto_root(dev_net(rt->dst.dev), rt); + if (rt != net->ipv6.fib6_null_entry && rt->fib6_nh.nh_dev == arg->dev) { + rt->fib6_nh.nh_flags &= ~arg->nh_flags; + fib6_update_sernum_upto_root(net, rt); rt6_multipath_rebalance(rt); } @@ -3812,76 +3824,77 @@ void rt6_sync_up(struct net_device *dev, unsigned int nh_flags) fib6_clean_all(dev_net(dev), fib6_ifup, &arg); } -static bool rt6_multipath_uses_dev(const struct rt6_info *rt, +static bool rt6_multipath_uses_dev(const struct fib6_info *rt, const struct net_device *dev) { - struct rt6_info *iter; + struct fib6_info *iter; - if (rt->dst.dev == dev) + if (rt->fib6_nh.nh_dev == dev) return true; list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings) - if (iter->dst.dev == dev) + if (iter->fib6_nh.nh_dev == dev) return true; return false; } -static void rt6_multipath_flush(struct rt6_info *rt) +static void rt6_multipath_flush(struct fib6_info *rt) { - struct rt6_info *iter; + struct fib6_info *iter; rt->should_flush = 1; list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings) iter->should_flush = 1; } -static unsigned int rt6_multipath_dead_count(const struct rt6_info *rt, +static unsigned int rt6_multipath_dead_count(const struct fib6_info *rt, const struct net_device *down_dev) { - struct rt6_info *iter; + struct fib6_info *iter; unsigned int dead = 0; - if (rt->dst.dev == down_dev || rt->rt6i_nh_flags & RTNH_F_DEAD) + if (rt->fib6_nh.nh_dev == down_dev || + rt->fib6_nh.nh_flags & RTNH_F_DEAD) dead++; list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings) - if (iter->dst.dev == down_dev || - iter->rt6i_nh_flags & RTNH_F_DEAD) + if (iter->fib6_nh.nh_dev == down_dev || + iter->fib6_nh.nh_flags & RTNH_F_DEAD) dead++; return dead; } -static void rt6_multipath_nh_flags_set(struct rt6_info *rt, +static void rt6_multipath_nh_flags_set(struct fib6_info *rt, const struct net_device *dev, unsigned int nh_flags) { - struct rt6_info *iter; + struct fib6_info *iter; - if (rt->dst.dev == dev) - rt->rt6i_nh_flags |= nh_flags; + if (rt->fib6_nh.nh_dev == dev) + rt->fib6_nh.nh_flags |= nh_flags; list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings) - if (iter->dst.dev == dev) - iter->rt6i_nh_flags |= nh_flags; + if (iter->fib6_nh.nh_dev == dev) + iter->fib6_nh.nh_flags |= nh_flags; } /* called with write lock held for table with rt */ -static int fib6_ifdown(struct rt6_info *rt, void *p_arg) +static int fib6_ifdown(struct fib6_info *rt, void *p_arg) { const struct arg_netdev_event *arg = p_arg; const struct net_device *dev = arg->dev; - const struct net *net = dev_net(dev); + struct net *net = dev_net(dev); - if (rt == net->ipv6.ip6_null_entry) + if (rt == net->ipv6.fib6_null_entry) return 0; switch (arg->event) { case NETDEV_UNREGISTER: - return rt->dst.dev == dev ? -1 : 0; + return rt->fib6_nh.nh_dev == dev ? -1 : 0; case NETDEV_DOWN: if (rt->should_flush) return -1; if (!rt->rt6i_nsiblings) - return rt->dst.dev == dev ? -1 : 0; + return rt->fib6_nh.nh_dev == dev ? -1 : 0; if (rt6_multipath_uses_dev(rt, dev)) { unsigned int count; @@ -3892,15 +3905,15 @@ static int fib6_ifdown(struct rt6_info *rt, void *p_arg) } rt6_multipath_nh_flags_set(rt, dev, RTNH_F_DEAD | RTNH_F_LINKDOWN); - fib6_update_sernum(rt); + fib6_update_sernum(net, rt); rt6_multipath_rebalance(rt); } return -2; case NETDEV_CHANGE: - if (rt->dst.dev != dev || + if (rt->fib6_nh.nh_dev != dev || rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST)) break; - rt->rt6i_nh_flags |= RTNH_F_LINKDOWN; + rt->fib6_nh.nh_flags |= RTNH_F_LINKDOWN; rt6_multipath_rebalance(rt); break; } @@ -3932,7 +3945,7 @@ struct rt6_mtu_change_arg { unsigned int mtu; }; -static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg) +static int rt6_mtu_change_route(struct fib6_info *rt, void *p_arg) { struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg; struct inet6_dev *idev; @@ -3952,12 +3965,15 @@ static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg) Since RFC 1981 doesn't include administrative MTU increase update PMTU increase is a MUST. (i.e. jumbo frame) */ - if (rt->dst.dev == arg->dev && - !dst_metric_locked(&rt->dst, RTAX_MTU)) { + if (rt->fib6_nh.nh_dev == arg->dev && + !fib6_metric_locked(rt, RTAX_MTU)) { + u32 mtu = rt->fib6_pmtu; + + if (mtu >= arg->mtu || + (mtu < arg->mtu && mtu == idev->cnf.mtu6)) + fib6_metric_set(rt, RTAX_MTU, arg->mtu); + spin_lock_bh(&rt6_exception_lock); - if (dst_metric_raw(&rt->dst, RTAX_MTU) && - rt6_mtu_change_route_allowed(idev, rt, arg->mtu)) - dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu); rt6_exceptions_update_pmtu(idev, rt, arg->mtu); spin_unlock_bh(&rt6_exception_lock); } @@ -4116,9 +4132,8 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, } struct rt6_nh { - struct rt6_info *rt6_info; + struct fib6_info *fib6_info; struct fib6_config r_cfg; - struct mx6_config mxc; struct list_head next; }; @@ -4133,23 +4148,25 @@ static void ip6_print_replace_route_err(struct list_head *rt6_nh_list) } } -static int ip6_route_info_append(struct list_head *rt6_nh_list, - struct rt6_info *rt, struct fib6_config *r_cfg) +static int ip6_route_info_append(struct net *net, + struct list_head *rt6_nh_list, + struct fib6_info *rt, + struct fib6_config *r_cfg) { struct rt6_nh *nh; int err = -EEXIST; list_for_each_entry(nh, rt6_nh_list, next) { - /* check if rt6_info already exists */ - if (rt6_duplicate_nexthop(nh->rt6_info, rt)) + /* check if fib6_info already exists */ + if (rt6_duplicate_nexthop(nh->fib6_info, rt)) return err; } nh = kzalloc(sizeof(*nh), GFP_KERNEL); if (!nh) return -ENOMEM; - nh->rt6_info = rt; - err = ip6_convert_metrics(&nh->mxc, r_cfg); + nh->fib6_info = rt; + err = ip6_convert_metrics(net, rt, r_cfg); if (err) { kfree(nh); return err; @@ -4160,8 +4177,8 @@ static int ip6_route_info_append(struct list_head *rt6_nh_list, return 0; } -static void ip6_route_mpath_notify(struct rt6_info *rt, - struct rt6_info *rt_last, +static void ip6_route_mpath_notify(struct fib6_info *rt, + struct fib6_info *rt_last, struct nl_info *info, __u16 nlflags) { @@ -4173,7 +4190,7 @@ static void ip6_route_mpath_notify(struct rt6_info *rt, */ if ((nlflags & NLM_F_APPEND) && rt_last && rt_last->rt6i_nsiblings) { rt = list_first_entry(&rt_last->rt6i_siblings, - struct rt6_info, + struct fib6_info, rt6i_siblings); } @@ -4184,11 +4201,11 @@ static void ip6_route_mpath_notify(struct rt6_info *rt, static int ip6_route_multipath_add(struct fib6_config *cfg, struct netlink_ext_ack *extack) { - struct rt6_info *rt_notif = NULL, *rt_last = NULL; + struct fib6_info *rt_notif = NULL, *rt_last = NULL; struct nl_info *info = &cfg->fc_nlinfo; struct fib6_config r_cfg; struct rtnexthop *rtnh; - struct rt6_info *rt; + struct fib6_info *rt; struct rt6_nh *err_nh; struct rt6_nh *nh, *nh_safe; __u16 nlflags; @@ -4208,7 +4225,7 @@ static int ip6_route_multipath_add(struct fib6_config *cfg, rtnh = (struct rtnexthop *)cfg->fc_mp; /* Parse a Multipath Entry and build a list (rt6_nh_list) of - * rt6_info structs per nexthop + * fib6_info structs per nexthop */ while (rtnh_ok(rtnh, remaining)) { memcpy(&r_cfg, cfg, sizeof(*cfg)); @@ -4231,18 +4248,19 @@ static int ip6_route_multipath_add(struct fib6_config *cfg, } r_cfg.fc_flags |= (rtnh->rtnh_flags & RTNH_F_ONLINK); - rt = ip6_route_info_create(&r_cfg, extack); + rt = ip6_route_info_create(&r_cfg, GFP_KERNEL, extack); if (IS_ERR(rt)) { err = PTR_ERR(rt); rt = NULL; goto cleanup; } - rt->rt6i_nh_weight = rtnh->rtnh_hops + 1; + rt->fib6_nh.nh_weight = rtnh->rtnh_hops + 1; - err = ip6_route_info_append(&rt6_nh_list, rt, &r_cfg); + err = ip6_route_info_append(info->nl_net, &rt6_nh_list, + rt, &r_cfg); if (err) { - dst_release_immediate(&rt->dst); + fib6_info_release(rt); goto cleanup; } @@ -4257,14 +4275,16 @@ static int ip6_route_multipath_add(struct fib6_config *cfg, err_nh = NULL; list_for_each_entry(nh, &rt6_nh_list, next) { - rt_last = nh->rt6_info; - err = __ip6_ins_rt(nh->rt6_info, info, &nh->mxc, extack); + rt_last = nh->fib6_info; + err = __ip6_ins_rt(nh->fib6_info, info, extack); + fib6_info_release(nh->fib6_info); + /* save reference to first route for notification */ if (!rt_notif && !err) - rt_notif = nh->rt6_info; + rt_notif = nh->fib6_info; - /* nh->rt6_info is used or freed at this point, reset to NULL*/ - nh->rt6_info = NULL; + /* nh->fib6_info is used or freed at this point, reset to NULL*/ + nh->fib6_info = NULL; if (err) { if (replace && nhn) ip6_print_replace_route_err(&rt6_nh_list); @@ -4305,9 +4325,8 @@ static int ip6_route_multipath_add(struct fib6_config *cfg, cleanup: list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) { - if (nh->rt6_info) - dst_release_immediate(&nh->rt6_info->dst); - kfree(nh->mxc.mx); + if (nh->fib6_info) + fib6_info_release(nh->fib6_info); list_del(&nh->next); kfree(nh); } @@ -4384,10 +4403,10 @@ static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, if (cfg.fc_mp) return ip6_route_multipath_add(&cfg, extack); else - return ip6_route_add(&cfg, extack); + return ip6_route_add(&cfg, GFP_KERNEL, extack); } -static size_t rt6_nlmsg_size(struct rt6_info *rt) +static size_t rt6_nlmsg_size(struct fib6_info *rt) { int nexthop_len = 0; @@ -4395,7 +4414,7 @@ static size_t rt6_nlmsg_size(struct rt6_info *rt) nexthop_len = nla_total_size(0) /* RTA_MULTIPATH */ + NLA_ALIGN(sizeof(struct rtnexthop)) + nla_total_size(16) /* RTA_GATEWAY */ - + lwtunnel_get_encap_size(rt->dst.lwtstate); + + lwtunnel_get_encap_size(rt->fib6_nh.nh_lwtstate); nexthop_len *= rt->rt6i_nsiblings; } @@ -4413,38 +4432,38 @@ static size_t rt6_nlmsg_size(struct rt6_info *rt) + nla_total_size(sizeof(struct rta_cacheinfo)) + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */ + nla_total_size(1) /* RTA_PREF */ - + lwtunnel_get_encap_size(rt->dst.lwtstate) + + lwtunnel_get_encap_size(rt->fib6_nh.nh_lwtstate) + nexthop_len; } -static int rt6_nexthop_info(struct sk_buff *skb, struct rt6_info *rt, +static int rt6_nexthop_info(struct sk_buff *skb, struct fib6_info *rt, unsigned int *flags, bool skip_oif) { - if (rt->rt6i_nh_flags & RTNH_F_DEAD) + if (rt->fib6_nh.nh_flags & RTNH_F_DEAD) *flags |= RTNH_F_DEAD; - if (rt->rt6i_nh_flags & RTNH_F_LINKDOWN) { + if (rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN) { *flags |= RTNH_F_LINKDOWN; if (rt->rt6i_idev->cnf.ignore_routes_with_linkdown) *flags |= RTNH_F_DEAD; } if (rt->rt6i_flags & RTF_GATEWAY) { - if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->rt6i_gateway) < 0) + if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->fib6_nh.nh_gw) < 0) goto nla_put_failure; } - *flags |= (rt->rt6i_nh_flags & RTNH_F_ONLINK); - if (rt->rt6i_nh_flags & RTNH_F_OFFLOAD) + *flags |= (rt->fib6_nh.nh_flags & RTNH_F_ONLINK); + if (rt->fib6_nh.nh_flags & RTNH_F_OFFLOAD) *flags |= RTNH_F_OFFLOAD; /* not needed for multipath encoding b/c it has a rtnexthop struct */ - if (!skip_oif && rt->dst.dev && - nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex)) + if (!skip_oif && rt->fib6_nh.nh_dev && + nla_put_u32(skb, RTA_OIF, rt->fib6_nh.nh_dev->ifindex)) goto nla_put_failure; - if (rt->dst.lwtstate && - lwtunnel_fill_encap(skb, rt->dst.lwtstate) < 0) + if (rt->fib6_nh.nh_lwtstate && + lwtunnel_fill_encap(skb, rt->fib6_nh.nh_lwtstate) < 0) goto nla_put_failure; return 0; @@ -4454,8 +4473,9 @@ static int rt6_nexthop_info(struct sk_buff *skb, struct rt6_info *rt, } /* add multipath next hop */ -static int rt6_add_nexthop(struct sk_buff *skb, struct rt6_info *rt) +static int rt6_add_nexthop(struct sk_buff *skb, struct fib6_info *rt) { + const struct net_device *dev = rt->fib6_nh.nh_dev; struct rtnexthop *rtnh; unsigned int flags = 0; @@ -4463,8 +4483,8 @@ static int rt6_add_nexthop(struct sk_buff *skb, struct rt6_info *rt) if (!rtnh) goto nla_put_failure; - rtnh->rtnh_hops = rt->rt6i_nh_weight - 1; - rtnh->rtnh_ifindex = rt->dst.dev ? rt->dst.dev->ifindex : 0; + rtnh->rtnh_hops = rt->fib6_nh.nh_weight - 1; + rtnh->rtnh_ifindex = dev ? dev->ifindex : 0; if (rt6_nexthop_info(skb, rt, &flags, true) < 0) goto nla_put_failure; @@ -4480,16 +4500,16 @@ static int rt6_add_nexthop(struct sk_buff *skb, struct rt6_info *rt) return -EMSGSIZE; } -static int rt6_fill_node(struct net *net, - struct sk_buff *skb, struct rt6_info *rt, - struct in6_addr *dst, struct in6_addr *src, +static int rt6_fill_node(struct net *net, struct sk_buff *skb, + struct fib6_info *rt, struct dst_entry *dst, + struct in6_addr *dest, struct in6_addr *src, int iif, int type, u32 portid, u32 seq, unsigned int flags) { - u32 metrics[RTAX_MAX]; struct rtmsg *rtm; struct nlmsghdr *nlh; - long expires; + long expires = 0; + u32 *pmetrics; u32 table; nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags); @@ -4508,30 +4528,8 @@ static int rt6_fill_node(struct net *net, rtm->rtm_table = table; if (nla_put_u32(skb, RTA_TABLE, table)) goto nla_put_failure; - if (rt->rt6i_flags & RTF_REJECT) { - switch (rt->dst.error) { - case -EINVAL: - rtm->rtm_type = RTN_BLACKHOLE; - break; - case -EACCES: - rtm->rtm_type = RTN_PROHIBIT; - break; - case -EAGAIN: - rtm->rtm_type = RTN_THROW; - break; - default: - rtm->rtm_type = RTN_UNREACHABLE; - break; - } - } - else if (rt->rt6i_flags & RTF_LOCAL) - rtm->rtm_type = RTN_LOCAL; - else if (rt->rt6i_flags & RTF_ANYCAST) - rtm->rtm_type = RTN_ANYCAST; - else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK)) - rtm->rtm_type = RTN_LOCAL; - else - rtm->rtm_type = RTN_UNICAST; + + rtm->rtm_type = rt->fib6_type; rtm->rtm_flags = 0; rtm->rtm_scope = RT_SCOPE_UNIVERSE; rtm->rtm_protocol = rt->rt6i_protocol; @@ -4539,8 +4537,8 @@ static int rt6_fill_node(struct net *net, if (rt->rt6i_flags & RTF_CACHE) rtm->rtm_flags |= RTM_F_CLONED; - if (dst) { - if (nla_put_in6_addr(skb, RTA_DST, dst)) + if (dest) { + if (nla_put_in6_addr(skb, RTA_DST, dest)) goto nla_put_failure; rtm->rtm_dst_len = 128; } else if (rtm->rtm_dst_len) @@ -4568,9 +4566,9 @@ static int rt6_fill_node(struct net *net, #endif if (nla_put_u32(skb, RTA_IIF, iif)) goto nla_put_failure; - } else if (dst) { + } else if (dest) { struct in6_addr saddr_buf; - if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 && + if (ip6_route_get_saddr(net, rt, dest, 0, &saddr_buf) == 0 && nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf)) goto nla_put_failure; } @@ -4582,10 +4580,8 @@ static int rt6_fill_node(struct net *net, goto nla_put_failure; } - memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics)); - if (rt->rt6i_pmtu) - metrics[RTAX_MTU - 1] = rt->rt6i_pmtu; - if (rtnetlink_put_metrics(skb, metrics) < 0) + pmetrics = dst ? dst_metrics_ptr(dst) : rt->fib6_metrics->metrics; + if (rtnetlink_put_metrics(skb, pmetrics) < 0) goto nla_put_failure; if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric)) @@ -4595,7 +4591,7 @@ static int rt6_fill_node(struct net *net, * each as a nexthop within RTA_MULTIPATH. */ if (rt->rt6i_nsiblings) { - struct rt6_info *sibling, *next_sibling; + struct fib6_info *sibling, *next_sibling; struct nlattr *mp; mp = nla_nest_start(skb, RTA_MULTIPATH); @@ -4617,9 +4613,12 @@ static int rt6_fill_node(struct net *net, goto nla_put_failure; } - expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0; + if (rt->rt6i_flags & RTF_EXPIRES) { + expires = dst ? dst->expires : rt->expires; + expires -= jiffies; + } - if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0) + if (rtnl_put_cacheinfo(skb, dst, 0, expires, dst ? dst->error : 0) < 0) goto nla_put_failure; if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags))) @@ -4634,12 +4633,12 @@ static int rt6_fill_node(struct net *net, return -EMSGSIZE; } -int rt6_dump_route(struct rt6_info *rt, void *p_arg) +int rt6_dump_route(struct fib6_info *rt, void *p_arg) { struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg; struct net *net = arg->net; - if (rt == net->ipv6.ip6_null_entry) + if (rt == net->ipv6.fib6_null_entry) return 0; if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) { @@ -4653,10 +4652,9 @@ int rt6_dump_route(struct rt6_info *rt, void *p_arg) } } - return rt6_fill_node(net, - arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE, - NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq, - NLM_F_MULTI); + return rt6_fill_node(net, arg->skb, rt, NULL, NULL, NULL, 0, + RTM_NEWROUTE, NETLINK_CB(arg->cb->skb).portid, + arg->cb->nlh->nlmsg_seq, NLM_F_MULTI); } static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, @@ -4753,14 +4751,6 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, goto errout; } - if (fibmatch && rt->from) { - struct rt6_info *ort = rt->from; - - dst_hold(&ort->dst); - ip6_rt_put(rt); - rt = ort; - } - skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); if (!skb) { ip6_rt_put(rt); @@ -4770,13 +4760,14 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, skb_dst_set(skb, &rt->dst); if (fibmatch) - err = rt6_fill_node(net, skb, rt, NULL, NULL, iif, + err = rt6_fill_node(net, skb, rt->from, NULL, NULL, NULL, iif, RTM_NEWROUTE, NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, 0); else - err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif, - RTM_NEWROUTE, NETLINK_CB(in_skb).portid, - nlh->nlmsg_seq, 0); + err = rt6_fill_node(net, skb, rt->from, dst, + &fl6.daddr, &fl6.saddr, iif, RTM_NEWROUTE, + NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, + 0); if (err < 0) { kfree_skb(skb); goto errout; @@ -4787,7 +4778,7 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, return err; } -void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info, +void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info, unsigned int nlm_flags) { struct sk_buff *skb; @@ -4802,8 +4793,8 @@ void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info, if (!skb) goto errout; - err = rt6_fill_node(net, skb, rt, NULL, NULL, 0, - event, info->portid, seq, nlm_flags); + err = rt6_fill_node(net, skb, rt, NULL, NULL, NULL, 0, + event, info->portid, seq, nlm_flags); if (err < 0) { /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */ WARN_ON(err == -EMSGSIZE); @@ -4828,6 +4819,8 @@ static int ip6_route_dev_notify(struct notifier_block *this, return NOTIFY_OK; if (event == NETDEV_REGISTER) { + net->ipv6.fib6_null_entry->fib6_nh.nh_dev = dev; + net->ipv6.fib6_null_entry->rt6i_idev = in6_dev_get(dev); net->ipv6.ip6_null_entry->dst.dev = dev; net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev); #ifdef CONFIG_IPV6_MULTIPLE_TABLES @@ -4841,6 +4834,7 @@ static int ip6_route_dev_notify(struct notifier_block *this, /* NETDEV_UNREGISTER could be fired for multiple times by * netdev_wait_allrefs(). Make sure we only call this once. */ + in6_dev_put_clear(&net->ipv6.fib6_null_entry->rt6i_idev); in6_dev_put_clear(&net->ipv6.ip6_null_entry->rt6i_idev); #ifdef CONFIG_IPV6_MULTIPLE_TABLES in6_dev_put_clear(&net->ipv6.ip6_prohibit_entry->rt6i_idev); @@ -5024,11 +5018,17 @@ static int __net_init ip6_route_net_init(struct net *net) if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0) goto out_ip6_dst_ops; + net->ipv6.fib6_null_entry = kmemdup(&fib6_null_entry_template, + sizeof(*net->ipv6.fib6_null_entry), + GFP_KERNEL); + if (!net->ipv6.fib6_null_entry) + goto out_ip6_dst_entries; + net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template, sizeof(*net->ipv6.ip6_null_entry), GFP_KERNEL); if (!net->ipv6.ip6_null_entry) - goto out_ip6_dst_entries; + goto out_fib6_null_entry; net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops; dst_init_metrics(&net->ipv6.ip6_null_entry->dst, ip6_template_metrics, true); @@ -5075,6 +5075,8 @@ static int __net_init ip6_route_net_init(struct net *net) out_ip6_null_entry: kfree(net->ipv6.ip6_null_entry); #endif +out_fib6_null_entry: + kfree(net->ipv6.fib6_null_entry); out_ip6_dst_entries: dst_entries_destroy(&net->ipv6.ip6_dst_ops); out_ip6_dst_ops: @@ -5083,6 +5085,7 @@ static int __net_init ip6_route_net_init(struct net *net) static void __net_exit ip6_route_net_exit(struct net *net) { + kfree(net->ipv6.fib6_null_entry); kfree(net->ipv6.ip6_null_entry); #ifdef CONFIG_IPV6_MULTIPLE_TABLES kfree(net->ipv6.ip6_prohibit_entry); @@ -5153,6 +5156,8 @@ void __init ip6_route_init_special_entries(void) /* Registering of the loopback is done before this portion of code, * the loopback reference in rt6_info will not be taken, do it * manually for init_net */ + init_net.ipv6.fib6_null_entry->fib6_nh.nh_dev = init_net.loopback_dev; + init_net.ipv6.fib6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev; init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); #ifdef CONFIG_IPV6_MULTIPLE_TABLES diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 416fe67271a92..2cff209d0fc1b 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -107,8 +107,6 @@ static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, * it was magically lost, so this code needs audit */ xdst->u.rt6.rt6i_flags = rt->rt6i_flags & (RTF_ANYCAST | RTF_LOCAL); - xdst->u.rt6.rt6i_metric = rt->rt6i_metric; - xdst->u.rt6.rt6i_node = rt->rt6i_node; xdst->route_cookie = rt6_get_cookie(rt); xdst->u.rt6.rt6i_gateway = rt->rt6i_gateway; xdst->u.rt6.rt6i_dst = rt->rt6i_dst;