From 7cf0f96df1d86a4d08ac0160931df85bc88d42e8 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Sun, 8 May 2022 11:08:14 +0300 Subject: [PATCH 01/10] mlxsw: spectrum: Tolerate enslaving of various devices to VRF Enslaving netdevices to VRF is currently handled through an mlxsw_sp_is_vrf_event() conditional in mlxsw_sp_netdevice_event(). In the following patch sets, VRF enslavement will be handled purely in the router code. Therefore make handlers of NETDEV_PRECHANGEUPPER tolerant of enslaving to VRF, so that they do not bounce the change. For NETDEV_CHANGEUPPER, drop the WARN_ON(1) and bounce from mlxsw_sp_netdevice_port_vlan_event(). This is the only handler that warned and bounces even in the CHANGEUPPER code, other handler quietly do nothing when they encounter an unfamiliar upper. Signed-off-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlxsw/spectrum.c | 26 ++++++++++++------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index ac6348e2ff1f1..12fd846a778f1 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -4525,7 +4525,8 @@ static int mlxsw_sp_netdevice_port_upper_event(struct net_device *lower_dev, !netif_is_lag_master(upper_dev) && !netif_is_bridge_master(upper_dev) && !netif_is_ovs_master(upper_dev) && - !netif_is_macvlan(upper_dev)) { + !netif_is_macvlan(upper_dev) && + !netif_is_l3_master(upper_dev)) { NL_SET_ERR_MSG_MOD(extack, "Unknown upper device type"); return -EINVAL; } @@ -4724,7 +4725,8 @@ static int mlxsw_sp_netdevice_port_vlan_event(struct net_device *vlan_dev, case NETDEV_PRECHANGEUPPER: upper_dev = info->upper_dev; if (!netif_is_bridge_master(upper_dev) && - !netif_is_macvlan(upper_dev)) { + !netif_is_macvlan(upper_dev) && + !netif_is_l3_master(upper_dev)) { NL_SET_ERR_MSG_MOD(extack, "Unknown upper device type"); return -EINVAL; } @@ -4763,9 +4765,6 @@ static int mlxsw_sp_netdevice_port_vlan_event(struct net_device *vlan_dev, } else if (netif_is_macvlan(upper_dev)) { if (!info->linking) mlxsw_sp_rif_macvlan_del(mlxsw_sp, upper_dev); - } else { - err = -EINVAL; - WARN_ON(1); } break; } @@ -4813,7 +4812,8 @@ static int mlxsw_sp_netdevice_bridge_vlan_event(struct net_device *vlan_dev, switch (event) { case NETDEV_PRECHANGEUPPER: upper_dev = info->upper_dev; - if (!netif_is_macvlan(upper_dev)) { + if (!netif_is_macvlan(upper_dev) && + !netif_is_l3_master(upper_dev)) { NL_SET_ERR_MSG_MOD(extack, "Unknown upper device type"); return -EOPNOTSUPP; } @@ -4874,7 +4874,9 @@ static int mlxsw_sp_netdevice_bridge_event(struct net_device *br_dev, switch (event) { case NETDEV_PRECHANGEUPPER: upper_dev = info->upper_dev; - if (!is_vlan_dev(upper_dev) && !netif_is_macvlan(upper_dev)) { + if (!is_vlan_dev(upper_dev) && + !netif_is_macvlan(upper_dev) && + !netif_is_l3_master(upper_dev)) { NL_SET_ERR_MSG_MOD(extack, "Unknown upper device type"); return -EOPNOTSUPP; } @@ -4918,16 +4920,20 @@ static int mlxsw_sp_netdevice_macvlan_event(struct net_device *macvlan_dev, struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(macvlan_dev); struct netdev_notifier_changeupper_info *info = ptr; struct netlink_ext_ack *extack; + struct net_device *upper_dev; if (!mlxsw_sp || event != NETDEV_PRECHANGEUPPER) return 0; extack = netdev_notifier_info_to_extack(&info->info); + upper_dev = info->upper_dev; - /* VRF enslavement is handled in mlxsw_sp_netdevice_vrf_event() */ - NL_SET_ERR_MSG_MOD(extack, "Unknown upper device type"); + if (!netif_is_l3_master(upper_dev)) { + NL_SET_ERR_MSG_MOD(extack, "Unknown upper device type"); + return -EOPNOTSUPP; + } - return -EOPNOTSUPP; + return 0; } static bool mlxsw_sp_is_vrf_event(unsigned long event, void *ptr) From 0a27cb1692dedd44516745d86e0cb8e9524004c0 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Sun, 8 May 2022 11:08:15 +0300 Subject: [PATCH 02/10] mlxsw: spectrum_router: Add a dedicated notifier block Currently all netdevice events are handled in the centralized notifier handler maintained by spectrum.c. Since a number of events are involving router code, spectrum.c needs to dispatch them to spectrum_router.c. The spectrum module therefore needs to know more about the router code than it should have, and there is are several API points through which the two modules communicate. To simplify the notifier handlers, introduce a new notifier into the router module. Signed-off-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- .../ethernet/mellanox/mlxsw/spectrum_router.c | 20 +++++++++++++++++++ .../ethernet/mellanox/mlxsw/spectrum_router.h | 1 + 2 files changed, 21 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 9ac4f3c003499..3fcb848836f04 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -9508,6 +9508,14 @@ int mlxsw_sp_netdevice_vrf_event(struct net_device *l3_dev, unsigned long event, return err; } +static int mlxsw_sp_router_netdevice_event(struct notifier_block *nb, + unsigned long event, void *ptr) +{ + int err = 0; + + return notifier_from_errno(err); +} + static int __mlxsw_sp_rif_macvlan_flush(struct net_device *dev, struct netdev_nested_priv *priv) { @@ -10692,8 +10700,18 @@ int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp, if (err) goto err_register_fib_notifier; + mlxsw_sp->router->netdevice_nb.notifier_call = + mlxsw_sp_router_netdevice_event; + err = register_netdevice_notifier_net(mlxsw_sp_net(mlxsw_sp), + &mlxsw_sp->router->netdevice_nb); + if (err) + goto err_register_netdev_notifier; + return 0; +err_register_netdev_notifier: + unregister_fib_notifier(mlxsw_sp_net(mlxsw_sp), + &mlxsw_sp->router->fib_nb); err_register_fib_notifier: unregister_nexthop_notifier(mlxsw_sp_net(mlxsw_sp), &mlxsw_sp->router->nexthop_nb); @@ -10741,6 +10759,8 @@ int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp, void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp) { + unregister_netdevice_notifier_net(mlxsw_sp_net(mlxsw_sp), + &mlxsw_sp->router->netdevice_nb); unregister_fib_notifier(mlxsw_sp_net(mlxsw_sp), &mlxsw_sp->router->fib_nb); unregister_nexthop_notifier(mlxsw_sp_net(mlxsw_sp), diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h index 6e704d807a78c..37411b74c3e69 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h @@ -67,6 +67,7 @@ struct mlxsw_sp_router { struct notifier_block netevent_nb; struct notifier_block inetaddr_nb; struct notifier_block inet6addr_nb; + struct notifier_block netdevice_nb; const struct mlxsw_sp_rif_ops **rif_ops_arr; const struct mlxsw_sp_ipip_ops **ipip_ops_arr; struct mlxsw_sp_router_nve_decap nve_decap_config; From 4f8afb680f1343a2ce42d5ff3c31e8486650a3a6 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Sun, 8 May 2022 11:08:16 +0300 Subject: [PATCH 03/10] mlxsw: spectrum: Move handling of VRF events to router code Events involving VRF, as L3 concern, are handled in the router code, by the helper mlxsw_sp_netdevice_vrf_event(). The handler is currently invoked from the centralized dispatcher in spectrum.c. Instead, move the call to the newly-introduced router-specific notifier handler. Signed-off-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 11 ----------- drivers/net/ethernet/mellanox/mlxsw/spectrum.h | 2 -- .../ethernet/mellanox/mlxsw/spectrum_router.c | 18 ++++++++++++++++-- 3 files changed, 16 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 12fd846a778f1..867c1f3810e69 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -4936,15 +4936,6 @@ static int mlxsw_sp_netdevice_macvlan_event(struct net_device *macvlan_dev, return 0; } -static bool mlxsw_sp_is_vrf_event(unsigned long event, void *ptr) -{ - struct netdev_notifier_changeupper_info *info = ptr; - - if (event != NETDEV_PRECHANGEUPPER && event != NETDEV_CHANGEUPPER) - return false; - return netif_is_l3_master(info->upper_dev); -} - static int mlxsw_sp_netdevice_vxlan_event(struct mlxsw_sp *mlxsw_sp, struct net_device *dev, unsigned long event, void *ptr) @@ -5055,8 +5046,6 @@ static int mlxsw_sp_netdevice_event(struct notifier_block *nb, event, ptr); else if (mlxsw_sp_netdevice_event_is_router(event)) err = mlxsw_sp_netdevice_router_port_event(dev, event, ptr); - else if (mlxsw_sp_is_vrf_event(event, ptr)) - err = mlxsw_sp_netdevice_vrf_event(dev, event, ptr); else if (mlxsw_sp_port_dev_check(dev)) err = mlxsw_sp_netdevice_port_event(dev, dev, event, ptr); else if (netif_is_lag_master(dev)) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index 2ad29ae1c6407..a20e2a1b85692 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -726,8 +726,6 @@ int mlxsw_sp_inetaddr_valid_event(struct notifier_block *unused, unsigned long event, void *ptr); int mlxsw_sp_inet6addr_valid_event(struct notifier_block *unused, unsigned long event, void *ptr); -int mlxsw_sp_netdevice_vrf_event(struct net_device *l3_dev, unsigned long event, - struct netdev_notifier_changeupper_info *info); bool mlxsw_sp_netdev_is_ipip_ol(const struct mlxsw_sp *mlxsw_sp, const struct net_device *dev); bool mlxsw_sp_netdev_is_ipip_ul(struct mlxsw_sp *mlxsw_sp, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 3fcb848836f04..fa4d9bf7da75b 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -9476,8 +9476,18 @@ static void mlxsw_sp_port_vrf_leave(struct mlxsw_sp *mlxsw_sp, __mlxsw_sp_inetaddr_event(mlxsw_sp, l3_dev, NETDEV_DOWN, NULL); } -int mlxsw_sp_netdevice_vrf_event(struct net_device *l3_dev, unsigned long event, - struct netdev_notifier_changeupper_info *info) +static bool mlxsw_sp_is_vrf_event(unsigned long event, void *ptr) +{ + struct netdev_notifier_changeupper_info *info = ptr; + + if (event != NETDEV_PRECHANGEUPPER && event != NETDEV_CHANGEUPPER) + return false; + return netif_is_l3_master(info->upper_dev); +} + +static int +mlxsw_sp_netdevice_vrf_event(struct net_device *l3_dev, unsigned long event, + struct netdev_notifier_changeupper_info *info) { struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(l3_dev); int err = 0; @@ -9511,8 +9521,12 @@ int mlxsw_sp_netdevice_vrf_event(struct net_device *l3_dev, unsigned long event, static int mlxsw_sp_router_netdevice_event(struct notifier_block *nb, unsigned long event, void *ptr) { + struct net_device *dev = netdev_notifier_info_to_dev(ptr); int err = 0; + if (mlxsw_sp_is_vrf_event(event, ptr)) + err = mlxsw_sp_netdevice_vrf_event(dev, event, ptr); + return notifier_from_errno(err); } From f40e600b369e00b990d788bf3c68631234ff2843 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Sun, 8 May 2022 11:08:17 +0300 Subject: [PATCH 04/10] mlxsw: spectrum: Move handling of HW stats events to router code L3 HW stats are implemented in mlxsw as RIF counters, and therefore the code resides in spectrum_router. Exclude the offload xstats events from the mlxsw_sp_netdevice_event_is_router() predicate, and instead recreate the glue code in the router module. Previously, the order of dispatch was that for events on tunnels, a dedicated handler was called, which however did not handle HW stats events. But there is nothing special about tunnel devices as far as HW stats: there is a RIF associated with the tunnel netdevice, and that RIF is where the counter should be installed. Therefore now, HW stats events are tested first, independent of netdevice type. The upshot is that as of this commit, mlxsw supports L3 HW stats work on GRE tunnels. Signed-off-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlxsw/spectrum.c | 4 -- .../ethernet/mellanox/mlxsw/spectrum_router.c | 47 ++++++++++++++++--- 2 files changed, 40 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 867c1f3810e69..367b9b6e040af 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -5010,10 +5010,6 @@ static bool mlxsw_sp_netdevice_event_is_router(unsigned long event) case NETDEV_PRE_CHANGEADDR: case NETDEV_CHANGEADDR: case NETDEV_CHANGEMTU: - case NETDEV_OFFLOAD_XSTATS_ENABLE: - case NETDEV_OFFLOAD_XSTATS_DISABLE: - case NETDEV_OFFLOAD_XSTATS_REPORT_USED: - case NETDEV_OFFLOAD_XSTATS_REPORT_DELTA: return true; default: return false; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index fa4d9bf7da75b..01f10200aeaa1 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -9378,6 +9378,19 @@ static int mlxsw_sp_router_port_pre_changeaddr_event(struct mlxsw_sp_rif *rif, return -ENOBUFS; } +static bool mlxsw_sp_is_offload_xstats_event(unsigned long event) +{ + switch (event) { + case NETDEV_OFFLOAD_XSTATS_ENABLE: + case NETDEV_OFFLOAD_XSTATS_DISABLE: + case NETDEV_OFFLOAD_XSTATS_REPORT_USED: + case NETDEV_OFFLOAD_XSTATS_REPORT_DELTA: + return true; + } + + return false; +} + static int mlxsw_sp_router_port_offload_xstats_cmd(struct mlxsw_sp_rif *rif, unsigned long event, @@ -9407,6 +9420,24 @@ mlxsw_sp_router_port_offload_xstats_cmd(struct mlxsw_sp_rif *rif, return 0; } +static int +mlxsw_sp_netdevice_offload_xstats_cmd(struct mlxsw_sp *mlxsw_sp, + struct net_device *dev, + unsigned long event, + struct netdev_notifier_offload_xstats_info *info) +{ + struct mlxsw_sp_rif *rif; + int err = 0; + + mutex_lock(&mlxsw_sp->router->lock); + rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev); + if (rif) + err = mlxsw_sp_router_port_offload_xstats_cmd(rif, event, info); + mutex_unlock(&mlxsw_sp->router->lock); + + return err; +} + int mlxsw_sp_netdevice_router_port_event(struct net_device *dev, unsigned long event, void *ptr) { @@ -9432,12 +9463,6 @@ int mlxsw_sp_netdevice_router_port_event(struct net_device *dev, case NETDEV_PRE_CHANGEADDR: err = mlxsw_sp_router_port_pre_changeaddr_event(rif, ptr); break; - case NETDEV_OFFLOAD_XSTATS_ENABLE: - case NETDEV_OFFLOAD_XSTATS_DISABLE: - case NETDEV_OFFLOAD_XSTATS_REPORT_USED: - case NETDEV_OFFLOAD_XSTATS_REPORT_DELTA: - err = mlxsw_sp_router_port_offload_xstats_cmd(rif, event, ptr); - break; default: WARN_ON_ONCE(1); break; @@ -9522,9 +9547,17 @@ static int mlxsw_sp_router_netdevice_event(struct notifier_block *nb, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); + struct mlxsw_sp_router *router; + struct mlxsw_sp *mlxsw_sp; int err = 0; - if (mlxsw_sp_is_vrf_event(event, ptr)) + router = container_of(nb, struct mlxsw_sp_router, netdevice_nb); + mlxsw_sp = router->mlxsw_sp; + + if (mlxsw_sp_is_offload_xstats_event(event)) + err = mlxsw_sp_netdevice_offload_xstats_cmd(mlxsw_sp, dev, + event, ptr); + else if (mlxsw_sp_is_vrf_event(event, ptr)) err = mlxsw_sp_netdevice_vrf_event(dev, event, ptr); return notifier_from_errno(err); From ba81954cd5266c2cbcd3e1513f0dd10986d23948 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Sun, 8 May 2022 11:08:18 +0300 Subject: [PATCH 05/10] mlxsw: spectrum: Move handling of router events to router code The events NETDEV_PRE_CHANGEADDR, NETDEV_CHANGEADDR and NETDEV_CHANGEMTU have implications for in-ASIC router interface objects, and as such are handled in the router module. Move the handling from the central dispatcher in spectrum.c to the new notifier handler in the router module. Signed-off-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 14 -------------- drivers/net/ethernet/mellanox/mlxsw/spectrum.h | 2 -- .../ethernet/mellanox/mlxsw/spectrum_router.c | 18 ++++++++++++++++-- 3 files changed, 16 insertions(+), 18 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 367b9b6e040af..afc03ba938269 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -5004,18 +5004,6 @@ static int mlxsw_sp_netdevice_vxlan_event(struct mlxsw_sp *mlxsw_sp, return 0; } -static bool mlxsw_sp_netdevice_event_is_router(unsigned long event) -{ - switch (event) { - case NETDEV_PRE_CHANGEADDR: - case NETDEV_CHANGEADDR: - case NETDEV_CHANGEMTU: - return true; - default: - return false; - } -} - static int mlxsw_sp_netdevice_event(struct notifier_block *nb, unsigned long event, void *ptr) { @@ -5040,8 +5028,6 @@ static int mlxsw_sp_netdevice_event(struct notifier_block *nb, else if (mlxsw_sp_netdev_is_ipip_ul(mlxsw_sp, dev)) err = mlxsw_sp_netdevice_ipip_ul_event(mlxsw_sp, dev, event, ptr); - else if (mlxsw_sp_netdevice_event_is_router(event)) - err = mlxsw_sp_netdevice_router_port_event(dev, event, ptr); else if (mlxsw_sp_port_dev_check(dev)) err = mlxsw_sp_netdevice_port_event(dev, dev, event, ptr); else if (netif_is_lag_master(dev)) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index a20e2a1b85692..983195ee881e5 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -718,8 +718,6 @@ union mlxsw_sp_l3addr { int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp, struct netlink_ext_ack *extack); void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp); -int mlxsw_sp_netdevice_router_port_event(struct net_device *dev, - unsigned long event, void *ptr); void mlxsw_sp_rif_macvlan_del(struct mlxsw_sp *mlxsw_sp, const struct net_device *macvlan_dev); int mlxsw_sp_inetaddr_valid_event(struct notifier_block *unused, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 01f10200aeaa1..d5a8307aecb3c 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -9438,8 +9438,20 @@ mlxsw_sp_netdevice_offload_xstats_cmd(struct mlxsw_sp *mlxsw_sp, return err; } -int mlxsw_sp_netdevice_router_port_event(struct net_device *dev, - unsigned long event, void *ptr) +static bool mlxsw_sp_is_router_event(unsigned long event) +{ + switch (event) { + case NETDEV_PRE_CHANGEADDR: + case NETDEV_CHANGEADDR: + case NETDEV_CHANGEMTU: + return true; + default: + return false; + } +} + +static int mlxsw_sp_netdevice_router_port_event(struct net_device *dev, + unsigned long event, void *ptr) { struct netlink_ext_ack *extack = netdev_notifier_info_to_extack(ptr); struct mlxsw_sp *mlxsw_sp; @@ -9557,6 +9569,8 @@ static int mlxsw_sp_router_netdevice_event(struct notifier_block *nb, if (mlxsw_sp_is_offload_xstats_event(event)) err = mlxsw_sp_netdevice_offload_xstats_cmd(mlxsw_sp, dev, event, ptr); + else if (mlxsw_sp_is_router_event(event)) + err = mlxsw_sp_netdevice_router_port_event(dev, event, ptr); else if (mlxsw_sp_is_vrf_event(event, ptr)) err = mlxsw_sp_netdevice_vrf_event(dev, event, ptr); From 75ef4342282a2e2d008b97d70155475a5d342927 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Sun, 8 May 2022 11:08:19 +0300 Subject: [PATCH 06/10] mlxsw: spectrum: Move handling of tunnel events to router code The events related to IPIP tunnels are handled by the router code. Move the handling from the central dispatcher in spectrum.c to the new notifier handler in the router module. Signed-off-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlxsw/spectrum.c | 6 ----- .../net/ethernet/mellanox/mlxsw/spectrum.h | 13 ---------- .../ethernet/mellanox/mlxsw/spectrum_router.c | 24 ++++++++++++------- 3 files changed, 15 insertions(+), 28 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index afc03ba938269..5ced3df92aabd 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -5022,12 +5022,6 @@ static int mlxsw_sp_netdevice_event(struct notifier_block *nb, if (netif_is_vxlan(dev)) err = mlxsw_sp_netdevice_vxlan_event(mlxsw_sp, dev, event, ptr); - if (mlxsw_sp_netdev_is_ipip_ol(mlxsw_sp, dev)) - err = mlxsw_sp_netdevice_ipip_ol_event(mlxsw_sp, dev, - event, ptr); - else if (mlxsw_sp_netdev_is_ipip_ul(mlxsw_sp, dev)) - err = mlxsw_sp_netdevice_ipip_ul_event(mlxsw_sp, dev, - event, ptr); else if (mlxsw_sp_port_dev_check(dev)) err = mlxsw_sp_netdevice_port_event(dev, dev, event, ptr); else if (netif_is_lag_master(dev)) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index 983195ee881e5..a60d2bbd3aa6e 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -724,19 +724,6 @@ int mlxsw_sp_inetaddr_valid_event(struct notifier_block *unused, unsigned long event, void *ptr); int mlxsw_sp_inet6addr_valid_event(struct notifier_block *unused, unsigned long event, void *ptr); -bool mlxsw_sp_netdev_is_ipip_ol(const struct mlxsw_sp *mlxsw_sp, - const struct net_device *dev); -bool mlxsw_sp_netdev_is_ipip_ul(struct mlxsw_sp *mlxsw_sp, - const struct net_device *dev); -int mlxsw_sp_netdevice_ipip_ol_event(struct mlxsw_sp *mlxsw_sp, - struct net_device *l3_dev, - unsigned long event, - struct netdev_notifier_info *info); -int -mlxsw_sp_netdevice_ipip_ul_event(struct mlxsw_sp *mlxsw_sp, - struct net_device *l3_dev, - unsigned long event, - struct netdev_notifier_info *info); int mlxsw_sp_port_vlan_router_join(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan, struct net_device *l3_dev, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index d5a8307aecb3c..54e19e988c01c 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -1530,8 +1530,8 @@ static bool mlxsw_sp_netdev_ipip_type(const struct mlxsw_sp *mlxsw_sp, return false; } -bool mlxsw_sp_netdev_is_ipip_ol(const struct mlxsw_sp *mlxsw_sp, - const struct net_device *dev) +static bool mlxsw_sp_netdev_is_ipip_ol(const struct mlxsw_sp *mlxsw_sp, + const struct net_device *dev) { return mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, NULL); } @@ -1575,8 +1575,8 @@ mlxsw_sp_ipip_entry_find_by_ul_dev(const struct mlxsw_sp *mlxsw_sp, return NULL; } -bool mlxsw_sp_netdev_is_ipip_ul(struct mlxsw_sp *mlxsw_sp, - const struct net_device *dev) +static bool mlxsw_sp_netdev_is_ipip_ul(struct mlxsw_sp *mlxsw_sp, + const struct net_device *dev) { bool is_ipip_ul; @@ -1960,10 +1960,10 @@ static void mlxsw_sp_ipip_demote_tunnel_by_ul_netdev(struct mlxsw_sp *mlxsw_sp, } } -int mlxsw_sp_netdevice_ipip_ol_event(struct mlxsw_sp *mlxsw_sp, - struct net_device *ol_dev, - unsigned long event, - struct netdev_notifier_info *info) +static int mlxsw_sp_netdevice_ipip_ol_event(struct mlxsw_sp *mlxsw_sp, + struct net_device *ol_dev, + unsigned long event, + struct netdev_notifier_info *info) { struct netdev_notifier_changeupper_info *chup; struct netlink_ext_ack *extack; @@ -2038,7 +2038,7 @@ __mlxsw_sp_netdevice_ipip_ul_event(struct mlxsw_sp *mlxsw_sp, return 0; } -int +static int mlxsw_sp_netdevice_ipip_ul_event(struct mlxsw_sp *mlxsw_sp, struct net_device *ul_dev, unsigned long event, @@ -9569,6 +9569,12 @@ static int mlxsw_sp_router_netdevice_event(struct notifier_block *nb, if (mlxsw_sp_is_offload_xstats_event(event)) err = mlxsw_sp_netdevice_offload_xstats_cmd(mlxsw_sp, dev, event, ptr); + else if (mlxsw_sp_netdev_is_ipip_ol(mlxsw_sp, dev)) + err = mlxsw_sp_netdevice_ipip_ol_event(mlxsw_sp, dev, + event, ptr); + else if (mlxsw_sp_netdev_is_ipip_ul(mlxsw_sp, dev)) + err = mlxsw_sp_netdevice_ipip_ul_event(mlxsw_sp, dev, + event, ptr); else if (mlxsw_sp_is_router_event(event)) err = mlxsw_sp_netdevice_router_port_event(dev, event, ptr); else if (mlxsw_sp_is_vrf_event(event, ptr)) From 05a8d7d4fadfc0ab65c6e8d81f8a02484d8db116 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Sun, 8 May 2022 11:08:20 +0300 Subject: [PATCH 07/10] mlxsw: spectrum: Update a comment The position of netdevice notifier registration no longer depends on the router initialization, because the event handler no longer dispatches to the router code. Update the comment at the registration to that effect. Signed-off-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 5ced3df92aabd..cafd206e8d7e9 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -3122,9 +3122,8 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core, } } - /* Initialize netdevice notifier after router and SPAN is initialized, - * so that the event handler can use router structures and call SPAN - * respin. + /* Initialize netdevice notifier after SPAN is initialized, so that the + * event handler can call SPAN respin. */ mlxsw_sp->netdevice_nb.notifier_call = mlxsw_sp_netdevice_event; err = register_netdevice_notifier_net(mlxsw_sp_net(mlxsw_sp), From c353fb0d4c932e4b8b561d31afc54b5a60df3d95 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Sun, 8 May 2022 11:08:21 +0300 Subject: [PATCH 08/10] mlxsw: spectrum_router: Take router lock in router notifier handler For notifications that the router needs to handle, router lock is taken. Further, at least to determine whether an event is related to a tunnel underlay, router lock also needs to be taken. Due to this, the router lock is always taken for each unhandled event, and also for some handled events, even if they are not related to underlay. Thus each event implies at least one router lock, sometimes two. Instead of deferring the locking to the leaf handlers, take the lock in the router notifier handler always. This simplifies thinking about the locking state, and in some cases saves one lock cycle. Signed-off-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- .../ethernet/mellanox/mlxsw/spectrum_router.c | 47 ++++++------------- 1 file changed, 15 insertions(+), 32 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 54e19e988c01c..9dbb573d53eaf 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -1578,13 +1578,7 @@ mlxsw_sp_ipip_entry_find_by_ul_dev(const struct mlxsw_sp *mlxsw_sp, static bool mlxsw_sp_netdev_is_ipip_ul(struct mlxsw_sp *mlxsw_sp, const struct net_device *dev) { - bool is_ipip_ul; - - mutex_lock(&mlxsw_sp->router->lock); - is_ipip_ul = mlxsw_sp_ipip_entry_find_by_ul_dev(mlxsw_sp, dev, NULL); - mutex_unlock(&mlxsw_sp->router->lock); - - return is_ipip_ul; + return mlxsw_sp_ipip_entry_find_by_ul_dev(mlxsw_sp, dev, NULL); } static bool mlxsw_sp_netdevice_ipip_can_offload(struct mlxsw_sp *mlxsw_sp, @@ -1969,7 +1963,6 @@ static int mlxsw_sp_netdevice_ipip_ol_event(struct mlxsw_sp *mlxsw_sp, struct netlink_ext_ack *extack; int err = 0; - mutex_lock(&mlxsw_sp->router->lock); switch (event) { case NETDEV_REGISTER: err = mlxsw_sp_netdevice_ipip_ol_reg_event(mlxsw_sp, ol_dev); @@ -2000,7 +1993,6 @@ static int mlxsw_sp_netdevice_ipip_ol_event(struct mlxsw_sp *mlxsw_sp, err = mlxsw_sp_netdevice_ipip_ol_update_mtu(mlxsw_sp, ol_dev); break; } - mutex_unlock(&mlxsw_sp->router->lock); return err; } @@ -2045,9 +2037,8 @@ mlxsw_sp_netdevice_ipip_ul_event(struct mlxsw_sp *mlxsw_sp, struct netdev_notifier_info *info) { struct mlxsw_sp_ipip_entry *ipip_entry = NULL; - int err = 0; + int err; - mutex_lock(&mlxsw_sp->router->lock); while ((ipip_entry = mlxsw_sp_ipip_entry_find_by_ul_dev(mlxsw_sp, ul_dev, ipip_entry))) { @@ -2060,7 +2051,7 @@ mlxsw_sp_netdevice_ipip_ul_event(struct mlxsw_sp *mlxsw_sp, if (err) { mlxsw_sp_ipip_demote_tunnel_by_ul_netdev(mlxsw_sp, ul_dev); - break; + return err; } if (demote_this) { @@ -2077,9 +2068,8 @@ mlxsw_sp_netdevice_ipip_ul_event(struct mlxsw_sp *mlxsw_sp, ipip_entry = prev; } } - mutex_unlock(&mlxsw_sp->router->lock); - return err; + return 0; } int mlxsw_sp_router_nve_promote_decap(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id, @@ -9427,15 +9417,12 @@ mlxsw_sp_netdevice_offload_xstats_cmd(struct mlxsw_sp *mlxsw_sp, struct netdev_notifier_offload_xstats_info *info) { struct mlxsw_sp_rif *rif; - int err = 0; - mutex_lock(&mlxsw_sp->router->lock); rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev); - if (rif) - err = mlxsw_sp_router_port_offload_xstats_cmd(rif, event, info); - mutex_unlock(&mlxsw_sp->router->lock); + if (!rif) + return 0; - return err; + return mlxsw_sp_router_port_offload_xstats_cmd(rif, event, info); } static bool mlxsw_sp_is_router_event(unsigned long event) @@ -9456,33 +9443,27 @@ static int mlxsw_sp_netdevice_router_port_event(struct net_device *dev, struct netlink_ext_ack *extack = netdev_notifier_info_to_extack(ptr); struct mlxsw_sp *mlxsw_sp; struct mlxsw_sp_rif *rif; - int err = 0; mlxsw_sp = mlxsw_sp_lower_get(dev); if (!mlxsw_sp) return 0; - mutex_lock(&mlxsw_sp->router->lock); rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev); if (!rif) - goto out; + return 0; switch (event) { case NETDEV_CHANGEMTU: case NETDEV_CHANGEADDR: - err = mlxsw_sp_router_port_change_event(mlxsw_sp, rif, extack); - break; + return mlxsw_sp_router_port_change_event(mlxsw_sp, rif, extack); case NETDEV_PRE_CHANGEADDR: - err = mlxsw_sp_router_port_pre_changeaddr_event(rif, ptr); - break; + return mlxsw_sp_router_port_pre_changeaddr_event(rif, ptr); default: WARN_ON_ONCE(1); break; } -out: - mutex_unlock(&mlxsw_sp->router->lock); - return err; + return 0; } static int mlxsw_sp_port_vrf_join(struct mlxsw_sp *mlxsw_sp, @@ -9535,7 +9516,6 @@ mlxsw_sp_netdevice_vrf_event(struct net_device *l3_dev, unsigned long event, if (!mlxsw_sp || netif_is_macvlan(l3_dev)) return 0; - mutex_lock(&mlxsw_sp->router->lock); switch (event) { case NETDEV_PRECHANGEUPPER: break; @@ -9550,7 +9530,6 @@ mlxsw_sp_netdevice_vrf_event(struct net_device *l3_dev, unsigned long event, } break; } - mutex_unlock(&mlxsw_sp->router->lock); return err; } @@ -9566,6 +9545,8 @@ static int mlxsw_sp_router_netdevice_event(struct notifier_block *nb, router = container_of(nb, struct mlxsw_sp_router, netdevice_nb); mlxsw_sp = router->mlxsw_sp; + mutex_lock(&mlxsw_sp->router->lock); + if (mlxsw_sp_is_offload_xstats_event(event)) err = mlxsw_sp_netdevice_offload_xstats_cmd(mlxsw_sp, dev, event, ptr); @@ -9580,6 +9561,8 @@ static int mlxsw_sp_router_netdevice_event(struct notifier_block *nb, else if (mlxsw_sp_is_vrf_event(event, ptr)) err = mlxsw_sp_netdevice_vrf_event(dev, event, ptr); + mutex_unlock(&mlxsw_sp->router->lock); + return notifier_from_errno(err); } From 32fb67a3e7a65171cd790ff0e65fcee49cae7cf5 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Sun, 8 May 2022 11:08:22 +0300 Subject: [PATCH 09/10] selftests: lib: Add a generic helper for obtaining HW stats The function get_l3_stats() from the test hw_stats_l3.sh will be useful for any test that wishes to work with L3 stats. Furthermore, it is easy to generalize to other HW stats suites (for when such are added). Therefore, move the code to lib.sh, rewrite it to have the same interface as the other stats-collecting functions, and generalize to take the name of the HW stats suite to collect as an argument. Signed-off-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- .../selftests/net/forwarding/hw_stats_l3.sh | 16 ++++------------ tools/testing/selftests/net/forwarding/lib.sh | 11 +++++++++++ 2 files changed, 15 insertions(+), 12 deletions(-) diff --git a/tools/testing/selftests/net/forwarding/hw_stats_l3.sh b/tools/testing/selftests/net/forwarding/hw_stats_l3.sh index 1c11c4256d06e..9c1f76e108af1 100755 --- a/tools/testing/selftests/net/forwarding/hw_stats_l3.sh +++ b/tools/testing/selftests/net/forwarding/hw_stats_l3.sh @@ -162,14 +162,6 @@ ping_ipv6() ping_test $h1.200 2001:db8:2::1 " IPv6" } -get_l3_stat() -{ - local selector=$1; shift - - ip -j stats show dev $rp1.200 group offload subgroup l3_stats | - jq '.[0].stats64.'$selector -} - send_packets_rx_ipv4() { # Send 21 packets instead of 20, because the first one might trap and go @@ -208,11 +200,11 @@ ___test_stats() local a local b - a=$(get_l3_stat ${dir}.packets) + a=$(hw_stats_get l3_stats $rp1.200 ${dir} packets) send_packets_${dir}_${prot} "$@" b=$(busywait "$TC_HIT_TIMEOUT" until_counter_is ">= $a + 20" \ - get_l3_stat ${dir}.packets) + hw_stats_get l3_stats $rp1.200 ${dir} packets) check_err $? "Traffic not reflected in the counter: $a -> $b" } @@ -281,11 +273,11 @@ __test_stats_report() RET=0 - a=$(get_l3_stat ${dir}.packets) + a=$(hw_stats_get l3_stats $rp1.200 ${dir} packets) send_packets_${dir}_${prot} ip address flush dev $rp1.200 b=$(busywait "$TC_HIT_TIMEOUT" until_counter_is ">= $a + 20" \ - get_l3_stat ${dir}.packets) + hw_stats_get l3_stats $rp1.200 ${dir} packets) check_err $? "Traffic not reflected in the counter: $a -> $b" log_test "Test ${dir} packets: stats pushed on loss of L3" diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh index 66681a2bcdd3f..37ae49d478536 100755 --- a/tools/testing/selftests/net/forwarding/lib.sh +++ b/tools/testing/selftests/net/forwarding/lib.sh @@ -828,6 +828,17 @@ ipv6_stats_get() cat /proc/net/dev_snmp6/$dev | grep "^$stat" | cut -f2 } +hw_stats_get() +{ + local suite=$1; shift + local if_name=$1; shift + local dir=$1; shift + local stat=$1; shift + + ip -j stats show dev $if_name group offload subgroup $suite | + jq ".[0].stats64.$dir.$stat" +} + humanize() { local speed=$1; shift From 813f97a2686086464f59a433c1bf3413cf504757 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Sun, 8 May 2022 11:08:23 +0300 Subject: [PATCH 10/10] selftests: forwarding: Add a tunnel-based test for L3 HW stats Add a selftest that uses an IPIP topology and tests that L3 HW stats reflect the traffic in the tunnel. Signed-off-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- .../testing/selftests/net/forwarding/Makefile | 1 + .../net/forwarding/hw_stats_l3_gre.sh | 109 ++++++++++++++++++ 2 files changed, 110 insertions(+) create mode 100755 tools/testing/selftests/net/forwarding/hw_stats_l3_gre.sh diff --git a/tools/testing/selftests/net/forwarding/Makefile b/tools/testing/selftests/net/forwarding/Makefile index 0912f5ae7f6be..b5181b5a8e290 100644 --- a/tools/testing/selftests/net/forwarding/Makefile +++ b/tools/testing/selftests/net/forwarding/Makefile @@ -20,6 +20,7 @@ TEST_PROGS = bridge_igmp.sh \ gre_multipath_nh.sh \ gre_multipath.sh \ hw_stats_l3.sh \ + hw_stats_l3_gre.sh \ ip6_forward_instats_vrf.sh \ ip6gre_custom_multipath_hash.sh \ ip6gre_flat_key.sh \ diff --git a/tools/testing/selftests/net/forwarding/hw_stats_l3_gre.sh b/tools/testing/selftests/net/forwarding/hw_stats_l3_gre.sh new file mode 100755 index 0000000000000..eb9ec4a68f84b --- /dev/null +++ b/tools/testing/selftests/net/forwarding/hw_stats_l3_gre.sh @@ -0,0 +1,109 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# Test L3 stats on IP-in-IP GRE tunnel without key. + +# This test uses flat topology for IP tunneling tests. See ipip_lib.sh for more +# details. + +ALL_TESTS=" + ping_ipv4 + test_stats_rx + test_stats_tx +" +NUM_NETIFS=6 +source lib.sh +source ipip_lib.sh + +setup_prepare() +{ + h1=${NETIFS[p1]} + ol1=${NETIFS[p2]} + + ul1=${NETIFS[p3]} + ul2=${NETIFS[p4]} + + ol2=${NETIFS[p5]} + h2=${NETIFS[p6]} + + ol1mac=$(mac_get $ol1) + + forwarding_enable + vrf_prepare + h1_create + h2_create + sw1_flat_create gre $ol1 $ul1 + sw2_flat_create gre $ol2 $ul2 + ip stats set dev g1a l3_stats on + ip stats set dev g2a l3_stats on +} + +cleanup() +{ + pre_cleanup + + ip stats set dev g1a l3_stats off + ip stats set dev g2a l3_stats off + + sw2_flat_destroy $ol2 $ul2 + sw1_flat_destroy $ol1 $ul1 + h2_destroy + h1_destroy + + vrf_cleanup + forwarding_restore +} + +ping_ipv4() +{ + RET=0 + + ping_test $h1 192.0.2.18 " gre flat" +} + +send_packets_ipv4() +{ + # Send 21 packets instead of 20, because the first one might trap and go + # through the SW datapath, which might not bump the HW counter. + $MZ $h1 -c 21 -d 20msec -p 100 \ + -a own -b $ol1mac -A 192.0.2.1 -B 192.0.2.18 \ + -q -t udp sp=54321,dp=12345 +} + +test_stats() +{ + local dev=$1; shift + local dir=$1; shift + + local a + local b + + RET=0 + + a=$(hw_stats_get l3_stats $dev $dir packets) + send_packets_ipv4 + b=$(busywait "$TC_HIT_TIMEOUT" until_counter_is ">= $a + 20" \ + hw_stats_get l3_stats $dev $dir packets) + check_err $? "Traffic not reflected in the counter: $a -> $b" + + log_test "Test $dir packets: $prot" +} + +test_stats_tx() +{ + test_stats g1a tx +} + +test_stats_rx() +{ + test_stats g2a rx +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS