From 5d1f0f09b5f0176494419a056db3a6647becd316 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Thu, 28 Jan 2021 13:49:13 +0100 Subject: [PATCH 01/12] nexthop: Rename nexthop_free_mpath nexthop_free_mpath really should be nexthop_free_group. Rename it. Signed-off-by: David Ahern Reviewed-by: Ido Schimmel Signed-off-by: Petr Machata Reviewed-by: David Ahern Signed-off-by: Jakub Kicinski --- net/ipv4/nexthop.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index e6dfca4262424..1deb9e4df1de5 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -209,7 +209,7 @@ static void nexthop_devhash_add(struct net *net, struct nh_info *nhi) hlist_add_head(&nhi->dev_hash, head); } -static void nexthop_free_mpath(struct nexthop *nh) +static void nexthop_free_group(struct nexthop *nh) { struct nh_group *nhg; int i; @@ -249,7 +249,7 @@ void nexthop_free_rcu(struct rcu_head *head) struct nexthop *nh = container_of(head, struct nexthop, rcu); if (nh->is_group) - nexthop_free_mpath(nh); + nexthop_free_group(nh); else nexthop_free_single(nh); From 79bc55e3fee9f6169756d1a9d68c3ba9e774c3b1 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Thu, 28 Jan 2021 13:49:14 +0100 Subject: [PATCH 02/12] nexthop: Dispatch nexthop_select_path() by group type The logic for selecting path depends on the next-hop group type. Adapt the nexthop_select_path() to dispatch according to the group type. Signed-off-by: Petr Machata Reviewed-by: Ido Schimmel Reviewed-by: David Ahern Signed-off-by: Jakub Kicinski --- net/ipv4/nexthop.c | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index 1deb9e4df1de5..43bb5f4513436 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -680,16 +680,11 @@ static bool ipv4_good_nh(const struct fib_nh *nh) return !!(state & NUD_VALID); } -struct nexthop *nexthop_select_path(struct nexthop *nh, int hash) +static struct nexthop *nexthop_select_path_mp(struct nh_group *nhg, int hash) { struct nexthop *rc = NULL; - struct nh_group *nhg; int i; - if (!nh->is_group) - return nh; - - nhg = rcu_dereference(nh->nh_grp); for (i = 0; i < nhg->num_nh; ++i) { struct nh_grp_entry *nhge = &nhg->nh_entries[i]; struct nh_info *nhi; @@ -721,6 +716,21 @@ struct nexthop *nexthop_select_path(struct nexthop *nh, int hash) return rc; } + +struct nexthop *nexthop_select_path(struct nexthop *nh, int hash) +{ + struct nh_group *nhg; + + if (!nh->is_group) + return nh; + + nhg = rcu_dereference(nh->nh_grp); + if (nhg->mpath) + return nexthop_select_path_mp(nhg, hash); + + /* Unreachable. */ + return NULL; +} EXPORT_SYMBOL_GPL(nexthop_select_path); int nexthop_for_each_fib6_nh(struct nexthop *nh, From b9bae61be46645aa3b8b5d79d5cdfabe55ae1507 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Thu, 28 Jan 2021 13:49:15 +0100 Subject: [PATCH 03/12] nexthop: Introduce to struct nh_grp_entry a per-type union The values that a next-hop group needs to keep track of depend on the group type. Introduce a union to separate fields specific to the mpath groups from fields specific to other group types. Signed-off-by: Petr Machata Reviewed-by: Ido Schimmel Reviewed-by: David Ahern Signed-off-by: Jakub Kicinski --- include/net/nexthop.h | 7 ++++++- net/ipv4/nexthop.c | 4 ++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/include/net/nexthop.h b/include/net/nexthop.h index 226930d66b637..d0e245b0635d9 100644 --- a/include/net/nexthop.h +++ b/include/net/nexthop.h @@ -66,7 +66,12 @@ struct nh_info { struct nh_grp_entry { struct nexthop *nh; u8 weight; - atomic_t upper_bound; + + union { + struct { + atomic_t upper_bound; + } mpath; + }; struct list_head nh_list; struct nexthop *nh_parent; /* nexthop of group with this entry */ diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index 43bb5f4513436..7a30df5aea755 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -689,7 +689,7 @@ static struct nexthop *nexthop_select_path_mp(struct nh_group *nhg, int hash) struct nh_grp_entry *nhge = &nhg->nh_entries[i]; struct nh_info *nhi; - if (hash > atomic_read(&nhge->upper_bound)) + if (hash > atomic_read(&nhge->mpath.upper_bound)) continue; nhi = rcu_dereference(nhge->nh->nh_info); @@ -924,7 +924,7 @@ static void nh_group_rebalance(struct nh_group *nhg) w += nhge->weight; upper_bound = DIV_ROUND_CLOSEST_ULL((u64)w << 31, total) - 1; - atomic_set(&nhge->upper_bound, upper_bound); + atomic_set(&nhge->mpath.upper_bound, upper_bound); } } From 720ccd9a728506ca4721b18a22a2157a9d48ed60 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Thu, 28 Jan 2021 13:49:16 +0100 Subject: [PATCH 04/12] nexthop: Assert the invariant that a NH group is of only one type Most of the code that deals with nexthop groups relies on the fact that the group is of exactly one well-known type. Currently there is only one type, "mpath", but as more next-hop group types come, it becomes desirable to have a central place where the setting is validated. Introduce such place into nexthop_create_group(), such that the check is done before the code that relies on that invariant is invoked. Signed-off-by: Petr Machata Reviewed-by: Ido Schimmel Reviewed-by: David Ahern Signed-off-by: Jakub Kicinski --- net/ipv4/nexthop.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index 7a30df5aea755..c09b8231f56ae 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -1466,10 +1466,13 @@ static struct nexthop *nexthop_create_group(struct net *net, nhg->nh_entries[i].nh_parent = nh; } - if (cfg->nh_grp_type == NEXTHOP_GRP_TYPE_MPATH) { + if (cfg->nh_grp_type == NEXTHOP_GRP_TYPE_MPATH) nhg->mpath = 1; + + WARN_ON_ONCE(nhg->mpath != 1); + + if (nhg->mpath) nh_group_rebalance(nhg); - } if (cfg->nh_fdb) nhg->fdb_nh = 1; From 09ad6becf5355fe0645f500f518fbbd531715722 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 28 Jan 2021 13:49:17 +0100 Subject: [PATCH 05/12] nexthop: Use enum to encode notification type Currently there are only two types of in-kernel nexthop notification. The two are distinguished by the 'is_grp' boolean field in 'struct nh_notifier_info'. As more notification types are introduced for more next-hop group types, a boolean is not an easily extensible interface. Instead, convert it to an enum. Signed-off-by: Ido Schimmel Reviewed-by: Petr Machata Signed-off-by: Petr Machata Reviewed-by: David Ahern Signed-off-by: Jakub Kicinski --- .../ethernet/mellanox/mlxsw/spectrum_router.c | 54 ++++++++++++++----- drivers/net/netdevsim/fib.c | 23 ++++---- include/net/nexthop.h | 7 ++- net/ipv4/nexthop.c | 14 ++--- 4 files changed, 69 insertions(+), 29 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 41424ee909a08..0ac7014703aab 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -4309,11 +4309,18 @@ static int mlxsw_sp_nexthop_obj_validate(struct mlxsw_sp *mlxsw_sp, if (event != NEXTHOP_EVENT_REPLACE) return 0; - if (!info->is_grp) + switch (info->type) { + case NH_NOTIFIER_INFO_TYPE_SINGLE: return mlxsw_sp_nexthop_obj_single_validate(mlxsw_sp, info->nh, info->extack); - return mlxsw_sp_nexthop_obj_group_validate(mlxsw_sp, info->nh_grp, - info->extack); + case NH_NOTIFIER_INFO_TYPE_GRP: + return mlxsw_sp_nexthop_obj_group_validate(mlxsw_sp, + info->nh_grp, + info->extack); + default: + NL_SET_ERR_MSG_MOD(info->extack, "Unsupported nexthop type"); + return -EOPNOTSUPP; + } } static bool mlxsw_sp_nexthop_obj_is_gateway(struct mlxsw_sp *mlxsw_sp, @@ -4321,13 +4328,17 @@ static bool mlxsw_sp_nexthop_obj_is_gateway(struct mlxsw_sp *mlxsw_sp, { const struct net_device *dev; - if (info->is_grp) + switch (info->type) { + case NH_NOTIFIER_INFO_TYPE_SINGLE: + dev = info->nh->dev; + return info->nh->gw_family || info->nh->is_reject || + mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, NULL); + case NH_NOTIFIER_INFO_TYPE_GRP: /* Already validated earlier. */ return true; - - dev = info->nh->dev; - return info->nh->gw_family || info->nh->is_reject || - mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, NULL); + default: + return false; + } } static void mlxsw_sp_nexthop_obj_blackhole_init(struct mlxsw_sp *mlxsw_sp, @@ -4410,11 +4421,22 @@ mlxsw_sp_nexthop_obj_group_info_init(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop_group *nh_grp, struct nh_notifier_info *info) { - unsigned int nhs = info->is_grp ? info->nh_grp->num_nh : 1; struct mlxsw_sp_nexthop_group_info *nhgi; struct mlxsw_sp_nexthop *nh; + unsigned int nhs; int err, i; + switch (info->type) { + case NH_NOTIFIER_INFO_TYPE_SINGLE: + nhs = 1; + break; + case NH_NOTIFIER_INFO_TYPE_GRP: + nhs = info->nh_grp->num_nh; + break; + default: + return -EINVAL; + } + nhgi = kzalloc(struct_size(nhgi, nexthops, nhs), GFP_KERNEL); if (!nhgi) return -ENOMEM; @@ -4427,12 +4449,18 @@ mlxsw_sp_nexthop_obj_group_info_init(struct mlxsw_sp *mlxsw_sp, int weight; nh = &nhgi->nexthops[i]; - if (info->is_grp) { - nh_obj = &info->nh_grp->nh_entries[i].nh; - weight = info->nh_grp->nh_entries[i].weight; - } else { + switch (info->type) { + case NH_NOTIFIER_INFO_TYPE_SINGLE: nh_obj = info->nh; weight = 1; + break; + case NH_NOTIFIER_INFO_TYPE_GRP: + nh_obj = &info->nh_grp->nh_entries[i].nh; + weight = info->nh_grp->nh_entries[i].weight; + break; + default: + err = -EINVAL; + goto err_nexthop_obj_init; } err = mlxsw_sp_nexthop_obj_init(mlxsw_sp, nh_grp, nh, nh_obj, weight); diff --git a/drivers/net/netdevsim/fib.c b/drivers/net/netdevsim/fib.c index 45d8a7790bd5b..f140bbca98c5d 100644 --- a/drivers/net/netdevsim/fib.c +++ b/drivers/net/netdevsim/fib.c @@ -860,7 +860,7 @@ static struct nsim_nexthop *nsim_nexthop_create(struct nsim_fib_data *data, nexthop = kzalloc(sizeof(*nexthop), GFP_KERNEL); if (!nexthop) - return NULL; + return ERR_PTR(-ENOMEM); nexthop->id = info->id; @@ -868,15 +868,20 @@ static struct nsim_nexthop *nsim_nexthop_create(struct nsim_fib_data *data, * occupy. */ - if (!info->is_grp) { + switch (info->type) { + case NH_NOTIFIER_INFO_TYPE_SINGLE: occ = 1; - goto out; + break; + case NH_NOTIFIER_INFO_TYPE_GRP: + for (i = 0; i < info->nh_grp->num_nh; i++) + occ += info->nh_grp->nh_entries[i].weight; + break; + default: + NL_SET_ERR_MSG_MOD(info->extack, "Unsupported nexthop type"); + kfree(nexthop); + return ERR_PTR(-EOPNOTSUPP); } - for (i = 0; i < info->nh_grp->num_nh; i++) - occ += info->nh_grp->nh_entries[i].weight; - -out: nexthop->occ = occ; return nexthop; } @@ -972,8 +977,8 @@ static int nsim_nexthop_insert(struct nsim_fib_data *data, int err; nexthop = nsim_nexthop_create(data, info); - if (!nexthop) - return -ENOMEM; + if (IS_ERR(nexthop)) + return PTR_ERR(nexthop); nexthop_old = rhashtable_lookup_fast(&data->nexthop_ht, &info->id, nsim_nexthop_ht_params); diff --git a/include/net/nexthop.h b/include/net/nexthop.h index d0e245b0635d9..7bc057aee40b1 100644 --- a/include/net/nexthop.h +++ b/include/net/nexthop.h @@ -114,6 +114,11 @@ enum nexthop_event_type { NEXTHOP_EVENT_REPLACE, }; +enum nh_notifier_info_type { + NH_NOTIFIER_INFO_TYPE_SINGLE, + NH_NOTIFIER_INFO_TYPE_GRP, +}; + struct nh_notifier_single_info { struct net_device *dev; u8 gw_family; @@ -142,7 +147,7 @@ struct nh_notifier_info { struct net *net; struct netlink_ext_ack *extack; u32 id; - bool is_grp; + enum nh_notifier_info_type type; union { struct nh_notifier_single_info *nh; struct nh_notifier_grp_info *nh_grp; diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index c09b8231f56ae..12f812b9538da 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -71,6 +71,7 @@ __nh_notifier_single_info_init(struct nh_notifier_single_info *nh_info, static int nh_notifier_single_info_init(struct nh_notifier_info *info, const struct nexthop *nh) { + info->type = NH_NOTIFIER_INFO_TYPE_SINGLE; info->nh = kzalloc(sizeof(*info->nh), GFP_KERNEL); if (!info->nh) return -ENOMEM; @@ -92,6 +93,7 @@ static int nh_notifier_grp_info_init(struct nh_notifier_info *info, u16 num_nh = nhg->num_nh; int i; + info->type = NH_NOTIFIER_INFO_TYPE_GRP; info->nh_grp = kzalloc(struct_size(info->nh_grp, nh_entries, num_nh), GFP_KERNEL); if (!info->nh_grp) @@ -121,17 +123,17 @@ static int nh_notifier_info_init(struct nh_notifier_info *info, const struct nexthop *nh) { info->id = nh->id; - info->is_grp = nh->is_group; - if (info->is_grp) + if (nh->is_group) return nh_notifier_grp_info_init(info, nh); else return nh_notifier_single_info_init(info, nh); } -static void nh_notifier_info_fini(struct nh_notifier_info *info) +static void nh_notifier_info_fini(struct nh_notifier_info *info, + const struct nexthop *nh) { - if (info->is_grp) + if (nh->is_group) nh_notifier_grp_info_fini(info); else nh_notifier_single_info_fini(info); @@ -161,7 +163,7 @@ static int call_nexthop_notifiers(struct net *net, err = blocking_notifier_call_chain(&net->nexthop.notifier_chain, event_type, &info); - nh_notifier_info_fini(&info); + nh_notifier_info_fini(&info, nh); return notifier_to_errno(err); } @@ -182,7 +184,7 @@ static int call_nexthop_notifier(struct notifier_block *nb, struct net *net, return err; err = nb->notifier_call(nb, event_type, &info); - nh_notifier_info_fini(&info); + nh_notifier_info_fini(&info, nh); return notifier_to_errno(err); } From da230501f2c95a39eaa0856afd0122d35bda9e5d Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Thu, 28 Jan 2021 13:49:18 +0100 Subject: [PATCH 06/12] nexthop: Dispatch notifier init()/fini() by group type After there are several next-hop group types, initialization and finalization of notifier type needs to reflect the actual type. Transform nh_notifier_grp_info_init() and _fini() to make extending them easier. Signed-off-by: Petr Machata Reviewed-by: Ido Schimmel Reviewed-by: David Ahern Signed-off-by: Jakub Kicinski --- net/ipv4/nexthop.c | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index 12f812b9538da..7149b12c4703c 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -86,10 +86,9 @@ static void nh_notifier_single_info_fini(struct nh_notifier_info *info) kfree(info->nh); } -static int nh_notifier_grp_info_init(struct nh_notifier_info *info, - const struct nexthop *nh) +static int nh_notifier_mp_info_init(struct nh_notifier_info *info, + struct nh_group *nhg) { - struct nh_group *nhg = rtnl_dereference(nh->nh_grp); u16 num_nh = nhg->num_nh; int i; @@ -114,9 +113,23 @@ static int nh_notifier_grp_info_init(struct nh_notifier_info *info, return 0; } -static void nh_notifier_grp_info_fini(struct nh_notifier_info *info) +static int nh_notifier_grp_info_init(struct nh_notifier_info *info, + const struct nexthop *nh) { - kfree(info->nh_grp); + struct nh_group *nhg = rtnl_dereference(nh->nh_grp); + + if (nhg->mpath) + return nh_notifier_mp_info_init(info, nhg); + return -EINVAL; +} + +static void nh_notifier_grp_info_fini(struct nh_notifier_info *info, + const struct nexthop *nh) +{ + struct nh_group *nhg = rtnl_dereference(nh->nh_grp); + + if (nhg->mpath) + kfree(info->nh_grp); } static int nh_notifier_info_init(struct nh_notifier_info *info, @@ -134,7 +147,7 @@ static void nh_notifier_info_fini(struct nh_notifier_info *info, const struct nexthop *nh) { if (nh->is_group) - nh_notifier_grp_info_fini(info); + nh_notifier_grp_info_fini(info, nh); else nh_notifier_single_info_fini(info); } From 56450ec6b7fc2824d6402fc3a60bff1a6fe32c04 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Thu, 28 Jan 2021 13:49:19 +0100 Subject: [PATCH 07/12] nexthop: Extract dump filtering parameters into a single structure Requests to dump nexthops have many attributes in common with those that requests to dump buckets of resilient NH groups will have. In order to make reuse of this code simpler, convert the code to use a single structure with filtering configuration instead of passing around the parameters one by one. Signed-off-by: Petr Machata Reviewed-by: Ido Schimmel Reviewed-by: David Ahern Signed-off-by: Jakub Kicinski --- net/ipv4/nexthop.c | 44 ++++++++++++++++++++++++-------------------- 1 file changed, 24 insertions(+), 20 deletions(-) diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index 7149b12c4703c..ad48e5d71bf99 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -1971,16 +1971,23 @@ static int rtm_get_nexthop(struct sk_buff *in_skb, struct nlmsghdr *nlh, goto out; } -static bool nh_dump_filtered(struct nexthop *nh, int dev_idx, int master_idx, - bool group_filter, u8 family) +struct nh_dump_filter { + int dev_idx; + int master_idx; + bool group_filter; + bool fdb_filter; +}; + +static bool nh_dump_filtered(struct nexthop *nh, + struct nh_dump_filter *filter, u8 family) { const struct net_device *dev; const struct nh_info *nhi; - if (group_filter && !nh->is_group) + if (filter->group_filter && !nh->is_group) return true; - if (!dev_idx && !master_idx && !family) + if (!filter->dev_idx && !filter->master_idx && !family) return false; if (nh->is_group) @@ -1991,26 +1998,26 @@ static bool nh_dump_filtered(struct nexthop *nh, int dev_idx, int master_idx, return true; dev = nhi->fib_nhc.nhc_dev; - if (dev_idx && (!dev || dev->ifindex != dev_idx)) + if (filter->dev_idx && (!dev || dev->ifindex != filter->dev_idx)) return true; - if (master_idx) { + if (filter->master_idx) { struct net_device *master; if (!dev) return true; master = netdev_master_upper_dev_get((struct net_device *)dev); - if (!master || master->ifindex != master_idx) + if (!master || master->ifindex != filter->master_idx) return true; } return false; } -static int nh_valid_dump_req(const struct nlmsghdr *nlh, int *dev_idx, - int *master_idx, bool *group_filter, - bool *fdb_filter, struct netlink_callback *cb) +static int nh_valid_dump_req(const struct nlmsghdr *nlh, + struct nh_dump_filter *filter, + struct netlink_callback *cb) { struct netlink_ext_ack *extack = cb->extack; struct nlattr *tb[ARRAY_SIZE(rtm_nh_policy_dump)]; @@ -2030,7 +2037,7 @@ static int nh_valid_dump_req(const struct nlmsghdr *nlh, int *dev_idx, NL_SET_ERR_MSG(extack, "Invalid device index"); return -EINVAL; } - *dev_idx = idx; + filter->dev_idx = idx; } if (tb[NHA_MASTER]) { idx = nla_get_u32(tb[NHA_MASTER]); @@ -2038,10 +2045,10 @@ static int nh_valid_dump_req(const struct nlmsghdr *nlh, int *dev_idx, NL_SET_ERR_MSG(extack, "Invalid master device index"); return -EINVAL; } - *master_idx = idx; + filter->master_idx = idx; } - *group_filter = nla_get_flag(tb[NHA_GROUPS]); - *fdb_filter = nla_get_flag(tb[NHA_FDB]); + filter->group_filter = nla_get_flag(tb[NHA_GROUPS]); + filter->fdb_filter = nla_get_flag(tb[NHA_FDB]); nhm = nlmsg_data(nlh); if (nhm->nh_protocol || nhm->resvd || nhm->nh_scope || nhm->nh_flags) { @@ -2055,17 +2062,15 @@ static int nh_valid_dump_req(const struct nlmsghdr *nlh, int *dev_idx, /* rtnl */ static int rtm_dump_nexthop(struct sk_buff *skb, struct netlink_callback *cb) { - bool group_filter = false, fdb_filter = false; struct nhmsg *nhm = nlmsg_data(cb->nlh); - int dev_filter_idx = 0, master_idx = 0; struct net *net = sock_net(skb->sk); struct rb_root *root = &net->nexthop.rb_root; + struct nh_dump_filter filter = {}; struct rb_node *node; int idx = 0, s_idx; int err; - err = nh_valid_dump_req(cb->nlh, &dev_filter_idx, &master_idx, - &group_filter, &fdb_filter, cb); + err = nh_valid_dump_req(cb->nlh, &filter, cb); if (err < 0) return err; @@ -2077,8 +2082,7 @@ static int rtm_dump_nexthop(struct sk_buff *skb, struct netlink_callback *cb) goto cont; nh = rb_entry(node, struct nexthop, rb_node); - if (nh_dump_filtered(nh, dev_filter_idx, master_idx, - group_filter, nhm->nh_family)) + if (nh_dump_filtered(nh, &filter, nhm->nh_family)) goto cont; err = nh_fill_node(skb, nh, RTM_NEWNEXTHOP, From b9ebea127661e8982c03065d99422dbf0f73e4d1 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Thu, 28 Jan 2021 13:49:20 +0100 Subject: [PATCH 08/12] nexthop: Extract a common helper for parsing dump attributes Requests to dump nexthops have many attributes in common with those that requests to dump buckets of resilient NH groups will have. However, they have different policies. To allow reuse of this code, extract a policy-agnostic wrapper out of nh_valid_dump_req(), and convert this function into a thin wrapper around it. Signed-off-by: Petr Machata Reviewed-by: Ido Schimmel Reviewed-by: David Ahern Signed-off-by: Jakub Kicinski --- net/ipv4/nexthop.c | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index ad48e5d71bf99..1c4f10fe3b4eb 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -2015,22 +2015,13 @@ static bool nh_dump_filtered(struct nexthop *nh, return false; } -static int nh_valid_dump_req(const struct nlmsghdr *nlh, - struct nh_dump_filter *filter, - struct netlink_callback *cb) +static int __nh_valid_dump_req(const struct nlmsghdr *nlh, struct nlattr **tb, + struct nh_dump_filter *filter, + struct netlink_ext_ack *extack) { - struct netlink_ext_ack *extack = cb->extack; - struct nlattr *tb[ARRAY_SIZE(rtm_nh_policy_dump)]; struct nhmsg *nhm; - int err; u32 idx; - err = nlmsg_parse(nlh, sizeof(*nhm), tb, - ARRAY_SIZE(rtm_nh_policy_dump) - 1, - rtm_nh_policy_dump, NULL); - if (err < 0) - return err; - if (tb[NHA_OIF]) { idx = nla_get_u32(tb[NHA_OIF]); if (idx > INT_MAX) { @@ -2059,6 +2050,22 @@ static int nh_valid_dump_req(const struct nlmsghdr *nlh, return 0; } +static int nh_valid_dump_req(const struct nlmsghdr *nlh, + struct nh_dump_filter *filter, + struct netlink_callback *cb) +{ + struct nlattr *tb[ARRAY_SIZE(rtm_nh_policy_dump)]; + int err; + + err = nlmsg_parse(nlh, sizeof(struct nhmsg), tb, + ARRAY_SIZE(rtm_nh_policy_dump) - 1, + rtm_nh_policy_dump, cb->extack); + if (err < 0) + return err; + + return __nh_valid_dump_req(nlh, tb, filter, cb->extack); +} + /* rtnl */ static int rtm_dump_nexthop(struct sk_buff *skb, struct netlink_callback *cb) { From a6fbbaa64c3b0e744e7e421a13658a7441f5a9f3 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Thu, 28 Jan 2021 13:49:21 +0100 Subject: [PATCH 09/12] nexthop: Strongly-type context of rtm_dump_nexthop() The dump operations need to keep state from one invocation to another. A scratch area is dedicated for this purpose in the passed-in argument, cb, namely via two aliased arrays, struct netlink_callback.args and .ctx. Dumping of buckets will end up having to iterate over next hops as well, and it would be nice to be able to reuse the iteration logic with the NH dumper. The fact that the logic currently relies on fixed index to the .args array, and the indices would have to be coordinated between the two dumpers, makes this somewhat awkward. To make the access patters clearer, introduce a helper struct with a NH index, and instead of using the .args array directly, use it through this structure. Signed-off-by: Petr Machata Reviewed-by: Ido Schimmel Reviewed-by: David Ahern Signed-off-by: Jakub Kicinski --- net/ipv4/nexthop.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index 1c4f10fe3b4eb..7ae197efa5a96 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -2066,9 +2066,23 @@ static int nh_valid_dump_req(const struct nlmsghdr *nlh, return __nh_valid_dump_req(nlh, tb, filter, cb->extack); } +struct rtm_dump_nh_ctx { + u32 idx; +}; + +static struct rtm_dump_nh_ctx * +rtm_dump_nh_ctx(struct netlink_callback *cb) +{ + struct rtm_dump_nh_ctx *ctx = (void *)cb->ctx; + + BUILD_BUG_ON(sizeof(*ctx) > sizeof(cb->ctx)); + return ctx; +} + /* rtnl */ static int rtm_dump_nexthop(struct sk_buff *skb, struct netlink_callback *cb) { + struct rtm_dump_nh_ctx *ctx = rtm_dump_nh_ctx(cb); struct nhmsg *nhm = nlmsg_data(cb->nlh); struct net *net = sock_net(skb->sk); struct rb_root *root = &net->nexthop.rb_root; @@ -2081,7 +2095,7 @@ static int rtm_dump_nexthop(struct sk_buff *skb, struct netlink_callback *cb) if (err < 0) return err; - s_idx = cb->args[0]; + s_idx = ctx->idx; for (node = rb_first(root); node; node = rb_next(node)) { struct nexthop *nh; @@ -2108,7 +2122,7 @@ static int rtm_dump_nexthop(struct sk_buff *skb, struct netlink_callback *cb) out: err = skb->len; out_err: - cb->args[0] = idx; + ctx->idx = idx; cb->seq = net->nexthop.seq; nl_dump_check_consistent(cb, nlmsg_hdr(skb)); From cbee18071e72b68d63b055655ac96ae97126776e Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Thu, 28 Jan 2021 13:49:22 +0100 Subject: [PATCH 10/12] nexthop: Extract a helper for walking the next-hop tree Extract from rtm_dump_nexthop() a helper to walk the next hop tree. A separate function for this will be reusable from the bucket dumper. Signed-off-by: Petr Machata Reviewed-by: Ido Schimmel Reviewed-by: David Ahern Signed-off-by: Jakub Kicinski --- net/ipv4/nexthop.c | 52 +++++++++++++++++++++++++++++----------------- 1 file changed, 33 insertions(+), 19 deletions(-) diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index 7ae197efa5a96..e5175f531ffb0 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -2079,22 +2079,17 @@ rtm_dump_nh_ctx(struct netlink_callback *cb) return ctx; } -/* rtnl */ -static int rtm_dump_nexthop(struct sk_buff *skb, struct netlink_callback *cb) +static int rtm_dump_walk_nexthops(struct sk_buff *skb, + struct netlink_callback *cb, + struct rb_root *root, + struct rtm_dump_nh_ctx *ctx, + struct nh_dump_filter *filter) { - struct rtm_dump_nh_ctx *ctx = rtm_dump_nh_ctx(cb); struct nhmsg *nhm = nlmsg_data(cb->nlh); - struct net *net = sock_net(skb->sk); - struct rb_root *root = &net->nexthop.rb_root; - struct nh_dump_filter filter = {}; struct rb_node *node; int idx = 0, s_idx; int err; - err = nh_valid_dump_req(cb->nlh, &filter, cb); - if (err < 0) - return err; - s_idx = ctx->idx; for (node = rb_first(root); node; node = rb_next(node)) { struct nexthop *nh; @@ -2103,29 +2098,48 @@ static int rtm_dump_nexthop(struct sk_buff *skb, struct netlink_callback *cb) goto cont; nh = rb_entry(node, struct nexthop, rb_node); - if (nh_dump_filtered(nh, &filter, nhm->nh_family)) + if (nh_dump_filtered(nh, filter, nhm->nh_family)) goto cont; + ctx->idx = idx; err = nh_fill_node(skb, nh, RTM_NEWNEXTHOP, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI); - if (err < 0) { - if (likely(skb->len)) - goto out; - - goto out_err; - } + if (err < 0) + return err; cont: idx++; } + ctx->idx = idx; + return 0; +} + +/* rtnl */ +static int rtm_dump_nexthop(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct rtm_dump_nh_ctx *ctx = rtm_dump_nh_ctx(cb); + struct net *net = sock_net(skb->sk); + struct rb_root *root = &net->nexthop.rb_root; + struct nh_dump_filter filter = {}; + int err; + + err = nh_valid_dump_req(cb->nlh, &filter, cb); + if (err < 0) + return err; + + err = rtm_dump_walk_nexthops(skb, cb, root, ctx, &filter); + if (err < 0) { + if (likely(skb->len)) + goto out; + goto out_err; + } + out: err = skb->len; out_err: - ctx->idx = idx; cb->seq = net->nexthop.seq; nl_dump_check_consistent(cb, nlmsg_hdr(skb)); - return err; } From e948217d258f35b248578f7b400d6df23ac562da Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Thu, 28 Jan 2021 13:49:23 +0100 Subject: [PATCH 11/12] nexthop: Add a callback parameter to rtm_dump_walk_nexthops() In order to allow different handling for next-hop tree dumper and for bucket dumper, parameterize the next-hop tree walker with a callback. Add rtm_dump_nexthop_cb() with just the bits relevant for next-hop tree dumping. Signed-off-by: Petr Machata Reviewed-by: Ido Schimmel Reviewed-by: David Ahern Signed-off-by: Jakub Kicinski --- net/ipv4/nexthop.c | 32 ++++++++++++++++++++++---------- 1 file changed, 22 insertions(+), 10 deletions(-) diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index e5175f531ffb0..9536cf2f6aca0 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -2083,9 +2083,11 @@ static int rtm_dump_walk_nexthops(struct sk_buff *skb, struct netlink_callback *cb, struct rb_root *root, struct rtm_dump_nh_ctx *ctx, - struct nh_dump_filter *filter) + int (*nh_cb)(struct sk_buff *skb, + struct netlink_callback *cb, + struct nexthop *nh, void *data), + void *data) { - struct nhmsg *nhm = nlmsg_data(cb->nlh); struct rb_node *node; int idx = 0, s_idx; int err; @@ -2098,14 +2100,9 @@ static int rtm_dump_walk_nexthops(struct sk_buff *skb, goto cont; nh = rb_entry(node, struct nexthop, rb_node); - if (nh_dump_filtered(nh, filter, nhm->nh_family)) - goto cont; - ctx->idx = idx; - err = nh_fill_node(skb, nh, RTM_NEWNEXTHOP, - NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, NLM_F_MULTI); - if (err < 0) + err = nh_cb(skb, cb, nh, data); + if (err) return err; cont: idx++; @@ -2115,6 +2112,20 @@ static int rtm_dump_walk_nexthops(struct sk_buff *skb, return 0; } +static int rtm_dump_nexthop_cb(struct sk_buff *skb, struct netlink_callback *cb, + struct nexthop *nh, void *data) +{ + struct nhmsg *nhm = nlmsg_data(cb->nlh); + struct nh_dump_filter *filter = data; + + if (nh_dump_filtered(nh, filter, nhm->nh_family)) + return 0; + + return nh_fill_node(skb, nh, RTM_NEWNEXTHOP, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, NLM_F_MULTI); +} + /* rtnl */ static int rtm_dump_nexthop(struct sk_buff *skb, struct netlink_callback *cb) { @@ -2128,7 +2139,8 @@ static int rtm_dump_nexthop(struct sk_buff *skb, struct netlink_callback *cb) if (err < 0) return err; - err = rtm_dump_walk_nexthops(skb, cb, root, ctx, &filter); + err = rtm_dump_walk_nexthops(skb, cb, root, ctx, + &rtm_dump_nexthop_cb, &filter); if (err < 0) { if (likely(skb->len)) goto out; From 0bccf8ed8aa6ecdd12cd2b3b0a73ff2c4c88d62b Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Thu, 28 Jan 2021 13:49:24 +0100 Subject: [PATCH 12/12] nexthop: Extract a helper for validation of get/del RTNL requests Validation of messages for get / del of a next hop is the same as will be validation of messages for get of a resilient next hop group bucket. The difference is that policy for resilient next hop group buckets is a superset of that used for next-hop get. It is therefore possible to reuse the code that validates the nhmsg fields, extracts the next-hop ID, and validates that. To that end, extract from nh_valid_get_del_req() a helper __nh_valid_get_del_req() that does just that. Make the nlh argument const so that the function can be called from the dump context, which only has a const nlh. Propagate the constness to nh_valid_get_del_req(). Signed-off-by: Petr Machata Reviewed-by: Ido Schimmel Reviewed-by: David Ahern Signed-off-by: Jakub Kicinski --- net/ipv4/nexthop.c | 43 +++++++++++++++++++++++++------------------ 1 file changed, 25 insertions(+), 18 deletions(-) diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index 9536cf2f6aca0..f1c6cbdb9e436 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -1872,37 +1872,44 @@ static int rtm_new_nexthop(struct sk_buff *skb, struct nlmsghdr *nlh, return err; } -static int nh_valid_get_del_req(struct nlmsghdr *nlh, u32 *id, - struct netlink_ext_ack *extack) +static int __nh_valid_get_del_req(const struct nlmsghdr *nlh, + struct nlattr **tb, u32 *id, + struct netlink_ext_ack *extack) { struct nhmsg *nhm = nlmsg_data(nlh); - struct nlattr *tb[ARRAY_SIZE(rtm_nh_policy_get)]; - int err; - - err = nlmsg_parse(nlh, sizeof(*nhm), tb, - ARRAY_SIZE(rtm_nh_policy_get) - 1, - rtm_nh_policy_get, extack); - if (err < 0) - return err; - err = -EINVAL; if (nhm->nh_protocol || nhm->resvd || nhm->nh_scope || nhm->nh_flags) { NL_SET_ERR_MSG(extack, "Invalid values in header"); - goto out; + return -EINVAL; } if (!tb[NHA_ID]) { NL_SET_ERR_MSG(extack, "Nexthop id is missing"); - goto out; + return -EINVAL; } *id = nla_get_u32(tb[NHA_ID]); - if (!(*id)) + if (!(*id)) { NL_SET_ERR_MSG(extack, "Invalid nexthop id"); - else - err = 0; -out: - return err; + return -EINVAL; + } + + return 0; +} + +static int nh_valid_get_del_req(const struct nlmsghdr *nlh, u32 *id, + struct netlink_ext_ack *extack) +{ + struct nlattr *tb[ARRAY_SIZE(rtm_nh_policy_get)]; + int err; + + err = nlmsg_parse(nlh, sizeof(struct nhmsg), tb, + ARRAY_SIZE(rtm_nh_policy_get) - 1, + rtm_nh_policy_get, extack); + if (err < 0) + return err; + + return __nh_valid_get_del_req(nlh, tb, id, extack); } /* rtnl */