From c30f5d012edf755959c44d71757fbf4648ad75a8 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Mon, 16 Oct 2017 16:26:35 +0200 Subject: [PATCH 1/5] mlxsw: spectrum: Move netdevice NB to struct mlxsw_sp So far, all netdevice notifications that the driver cared about were related to its own ports, and mlxsw_sp could be retrieved from the netdevice's private data. For IP-in-IP offloading however, the driver cares about events on foreign netdevices, and getting at mlxsw_sp or router data structures from the handler is inconvenient. Therefore move the netdevice notifier blocks from global scope to struct mlxsw_sp to allow retrieval from the notifier block pointer itself. Signed-off-by: Petr Machata Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlxsw/spectrum.c | 23 +++++++++++++------ .../net/ethernet/mellanox/mlxsw/spectrum.h | 1 + 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 321988ac57ccb..83f9c2564f617 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -3667,6 +3667,9 @@ static int mlxsw_sp_basic_trap_groups_set(struct mlxsw_core *mlxsw_core) return mlxsw_reg_write(mlxsw_core, MLXSW_REG(htgt), htgt_pl); } +static int mlxsw_sp_netdevice_event(struct notifier_block *unused, + unsigned long event, void *ptr); + static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core, const struct mlxsw_bus_info *mlxsw_bus_info) { @@ -3736,6 +3739,16 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core, goto err_router_init; } + /* Initialize netdevice notifier after router is initialized, so that + * the event handler can use router structures. + */ + mlxsw_sp->netdevice_nb.notifier_call = mlxsw_sp_netdevice_event; + err = register_netdevice_notifier(&mlxsw_sp->netdevice_nb); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Failed to register netdev notifier\n"); + goto err_netdev_notifier; + } + err = mlxsw_sp_span_init(mlxsw_sp); if (err) { dev_err(mlxsw_sp->bus_info->dev, "Failed to init span system\n"); @@ -3769,6 +3782,8 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core, err_acl_init: mlxsw_sp_span_fini(mlxsw_sp); err_span_init: + unregister_netdevice_notifier(&mlxsw_sp->netdevice_nb); +err_netdev_notifier: mlxsw_sp_router_fini(mlxsw_sp); err_router_init: mlxsw_sp_afa_fini(mlxsw_sp); @@ -3795,6 +3810,7 @@ static void mlxsw_sp_fini(struct mlxsw_core *mlxsw_core) mlxsw_sp_dpipe_fini(mlxsw_sp); mlxsw_sp_acl_fini(mlxsw_sp); mlxsw_sp_span_fini(mlxsw_sp); + unregister_netdevice_notifier(&mlxsw_sp->netdevice_nb); mlxsw_sp_router_fini(mlxsw_sp); mlxsw_sp_afa_fini(mlxsw_sp); mlxsw_sp_counter_pool_fini(mlxsw_sp); @@ -4501,10 +4517,6 @@ static int mlxsw_sp_netdevice_event(struct notifier_block *unused, return notifier_from_errno(err); } -static struct notifier_block mlxsw_sp_netdevice_nb __read_mostly = { - .notifier_call = mlxsw_sp_netdevice_event, -}; - static struct notifier_block mlxsw_sp_inetaddr_nb __read_mostly = { .notifier_call = mlxsw_sp_inetaddr_event, .priority = 10, /* Must be called before FIB notifier block */ @@ -4532,7 +4544,6 @@ static int __init mlxsw_sp_module_init(void) { int err; - register_netdevice_notifier(&mlxsw_sp_netdevice_nb); register_inetaddr_notifier(&mlxsw_sp_inetaddr_nb); register_inet6addr_notifier(&mlxsw_sp_inet6addr_nb); register_netevent_notifier(&mlxsw_sp_router_netevent_nb); @@ -4553,7 +4564,6 @@ static int __init mlxsw_sp_module_init(void) unregister_netevent_notifier(&mlxsw_sp_router_netevent_nb); unregister_inet6addr_notifier(&mlxsw_sp_inet6addr_nb); unregister_inetaddr_notifier(&mlxsw_sp_inetaddr_nb); - unregister_netdevice_notifier(&mlxsw_sp_netdevice_nb); return err; } @@ -4564,7 +4574,6 @@ static void __exit mlxsw_sp_module_exit(void) unregister_netevent_notifier(&mlxsw_sp_router_netevent_nb); unregister_inet6addr_notifier(&mlxsw_sp_inet6addr_nb); unregister_inetaddr_notifier(&mlxsw_sp_inetaddr_nb); - unregister_netdevice_notifier(&mlxsw_sp_netdevice_nb); } module_init(mlxsw_sp_module_init); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index 8e45183dc9bbb..e1a0157c0b94f 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -161,6 +161,7 @@ struct mlxsw_sp { struct { DECLARE_BITMAP(usage, MLXSW_SP_KVD_LINEAR_SIZE); } kvdl; + struct notifier_block netdevice_nb; struct mlxsw_sp_counter_pool *counter_pool; struct { From 6698c168bf48cb85505d7f6e77f0091a83aa497e Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Mon, 16 Oct 2017 16:26:36 +0200 Subject: [PATCH 2/5] mlxsw: spectrum_router: Move mlxsw_sp_netdev_ipip_type() Signed-off-by: Petr Machata Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../ethernet/mellanox/mlxsw/spectrum_router.c | 38 +++++++++---------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 6a356f4b99a35..c5e574bf3e08d 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -1295,6 +1295,25 @@ mlxsw_sp_ipip_entry_find_by_decap(struct mlxsw_sp *mlxsw_sp, return NULL; } +static bool mlxsw_sp_netdev_ipip_type(const struct mlxsw_sp *mlxsw_sp, + const struct net_device *dev, + enum mlxsw_sp_ipip_type *p_type) +{ + struct mlxsw_sp_router *router = mlxsw_sp->router; + const struct mlxsw_sp_ipip_ops *ipip_ops; + enum mlxsw_sp_ipip_type ipipt; + + for (ipipt = 0; ipipt < MLXSW_SP_IPIP_TYPE_MAX; ++ipipt) { + ipip_ops = router->ipip_ops_arr[ipipt]; + if (dev->type == ipip_ops->dev_type) { + if (p_type) + *p_type = ipipt; + return true; + } + } + return false; +} + struct mlxsw_sp_neigh_key { struct neighbour *n; }; @@ -2785,25 +2804,6 @@ static void mlxsw_sp_nexthop_neigh_fini(struct mlxsw_sp *mlxsw_sp, neigh_release(n); } -static bool mlxsw_sp_netdev_ipip_type(const struct mlxsw_sp *mlxsw_sp, - const struct net_device *dev, - enum mlxsw_sp_ipip_type *p_type) -{ - struct mlxsw_sp_router *router = mlxsw_sp->router; - const struct mlxsw_sp_ipip_ops *ipip_ops; - enum mlxsw_sp_ipip_type ipipt; - - for (ipipt = 0; ipipt < MLXSW_SP_IPIP_TYPE_MAX; ++ipipt) { - ipip_ops = router->ipip_ops_arr[ipipt]; - if (dev->type == ipip_ops->dev_type) { - if (p_type) - *p_type = ipipt; - return true; - } - } - return false; -} - static int mlxsw_sp_nexthop_ipip_init(struct mlxsw_sp *mlxsw_sp, enum mlxsw_sp_ipip_type ipipt, struct mlxsw_sp_nexthop *nh, From 0063587d358733008423c80302cb7b077be8e237 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Mon, 16 Oct 2017 16:26:37 +0200 Subject: [PATCH 3/5] mlxsw: spectrum: Support decap-only IP-in-IP tunnels Current code for offloading IP-in-IP tunneling assumes that there is no decap without encap. But that's never true for IPv6 overlays, and is not true for IPv4 ones either, if net.ipv4.conf.*.rp_filter is unset. To support decap-only tunnels, an IPIP entry is now created as soon as an offloadable tunneling device is created. When that netdevice is up'd, a decap route is looked up and possibly offloaded. Thus decap is not handled implicitly as part of mlxsw_sp_ipip_entry_get() call anymore, but needs to be done explicitly after the get, if desired. Signed-off-by: Petr Machata Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlxsw/spectrum.c | 8 +- .../net/ethernet/mellanox/mlxsw/spectrum.h | 6 + .../ethernet/mellanox/mlxsw/spectrum_router.c | 105 ++++++++++++++++-- 3 files changed, 109 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 83f9c2564f617..c3ae650fbe5e9 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -4497,13 +4497,17 @@ static bool mlxsw_sp_is_vrf_event(unsigned long event, void *ptr) return netif_is_l3_master(info->upper_dev); } -static int mlxsw_sp_netdevice_event(struct notifier_block *unused, +static int mlxsw_sp_netdevice_event(struct notifier_block *nb, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); + struct mlxsw_sp *mlxsw_sp; int err = 0; - if (event == NETDEV_CHANGEADDR || event == NETDEV_CHANGEMTU) + mlxsw_sp = container_of(nb, struct mlxsw_sp, netdevice_nb); + if (mlxsw_sp_netdev_is_ipip(mlxsw_sp, dev)) + err = mlxsw_sp_netdevice_ipip_event(mlxsw_sp, dev, event); + else if (event == NETDEV_CHANGEADDR || event == NETDEV_CHANGEMTU) err = mlxsw_sp_netdevice_router_port_event(dev); else if (mlxsw_sp_is_vrf_event(event, ptr)) err = mlxsw_sp_netdevice_vrf_event(dev, event, ptr); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index e1a0157c0b94f..a4f21afd7f00e 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -395,6 +395,12 @@ int mlxsw_sp_inet6addr_event(struct notifier_block *unused, unsigned long event, void *ptr); int mlxsw_sp_netdevice_vrf_event(struct net_device *l3_dev, unsigned long event, struct netdev_notifier_changeupper_info *info); +bool mlxsw_sp_netdev_is_ipip(const struct mlxsw_sp *mlxsw_sp, + const struct net_device *dev); +int +mlxsw_sp_netdevice_ipip_event(struct mlxsw_sp *mlxsw_sp, + struct net_device *l3_dev, + unsigned long event); void mlxsw_sp_port_vlan_router_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan); void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index c5e574bf3e08d..db834220a2fe2 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -1206,7 +1206,6 @@ mlxsw_sp_ipip_entry_get(struct mlxsw_sp *mlxsw_sp, { u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ol_dev); struct mlxsw_sp_router *router = mlxsw_sp->router; - struct mlxsw_sp_fib_entry *decap_fib_entry; struct mlxsw_sp_ipip_entry *ipip_entry; enum mlxsw_sp_l3proto ul_proto; union mlxsw_sp_l3addr saddr; @@ -1231,11 +1230,6 @@ mlxsw_sp_ipip_entry_get(struct mlxsw_sp *mlxsw_sp, if (IS_ERR(ipip_entry)) return ipip_entry; - decap_fib_entry = mlxsw_sp_ipip_entry_find_decap(mlxsw_sp, ipip_entry); - if (decap_fib_entry) - mlxsw_sp_ipip_entry_promote_decap(mlxsw_sp, ipip_entry, - decap_fib_entry); - list_add_tail(&ipip_entry->ipip_list_node, &mlxsw_sp->router->ipip_list); @@ -1250,8 +1244,6 @@ mlxsw_sp_ipip_entry_put(struct mlxsw_sp *mlxsw_sp, { if (--ipip_entry->ref_count == 0) { list_del(&ipip_entry->ipip_list_node); - if (ipip_entry->decap_fib_entry) - mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry); mlxsw_sp_ipip_entry_destroy(ipip_entry); } } @@ -1314,6 +1306,103 @@ static bool mlxsw_sp_netdev_ipip_type(const struct mlxsw_sp *mlxsw_sp, return false; } +bool mlxsw_sp_netdev_is_ipip(const struct mlxsw_sp *mlxsw_sp, + const struct net_device *dev) +{ + return mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, NULL); +} + +static struct mlxsw_sp_ipip_entry * +mlxsw_sp_ipip_entry_find_by_ol_dev(struct mlxsw_sp *mlxsw_sp, + const struct net_device *ol_dev) +{ + struct mlxsw_sp_ipip_entry *ipip_entry; + + list_for_each_entry(ipip_entry, &mlxsw_sp->router->ipip_list, + ipip_list_node) + if (ipip_entry->ol_dev == ol_dev) + return ipip_entry; + + return NULL; +} + +static int mlxsw_sp_netdevice_ipip_reg_event(struct mlxsw_sp *mlxsw_sp, + struct net_device *ol_dev) +{ + struct mlxsw_sp_router *router = mlxsw_sp->router; + struct mlxsw_sp_ipip_entry *ipip_entry; + enum mlxsw_sp_ipip_type ipipt; + + mlxsw_sp_netdev_ipip_type(mlxsw_sp, ol_dev, &ipipt); + if (router->ipip_ops_arr[ipipt]->can_offload(mlxsw_sp, ol_dev, + MLXSW_SP_L3_PROTO_IPV4) || + router->ipip_ops_arr[ipipt]->can_offload(mlxsw_sp, ol_dev, + MLXSW_SP_L3_PROTO_IPV6)) { + ipip_entry = mlxsw_sp_ipip_entry_get(mlxsw_sp, ipipt, ol_dev); + if (IS_ERR(ipip_entry)) + return PTR_ERR(ipip_entry); + } + + return 0; +} + +static void mlxsw_sp_netdevice_ipip_unreg_event(struct mlxsw_sp *mlxsw_sp, + struct net_device *ol_dev) +{ + struct mlxsw_sp_ipip_entry *ipip_entry; + + ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev); + if (ipip_entry) + mlxsw_sp_ipip_entry_put(mlxsw_sp, ipip_entry); +} + +static int mlxsw_sp_netdevice_ipip_up_event(struct mlxsw_sp *mlxsw_sp, + struct net_device *ol_dev) +{ + struct mlxsw_sp_fib_entry *decap_fib_entry; + struct mlxsw_sp_ipip_entry *ipip_entry; + + ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev); + if (ipip_entry) { + decap_fib_entry = mlxsw_sp_ipip_entry_find_decap(mlxsw_sp, + ipip_entry); + if (decap_fib_entry) + mlxsw_sp_ipip_entry_promote_decap(mlxsw_sp, ipip_entry, + decap_fib_entry); + } + + return 0; +} + +static void mlxsw_sp_netdevice_ipip_down_event(struct mlxsw_sp *mlxsw_sp, + struct net_device *ol_dev) +{ + struct mlxsw_sp_ipip_entry *ipip_entry; + + ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev); + if (ipip_entry && ipip_entry->decap_fib_entry) + mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry); +} + +int mlxsw_sp_netdevice_ipip_event(struct mlxsw_sp *mlxsw_sp, + struct net_device *ol_dev, + unsigned long event) +{ + switch (event) { + case NETDEV_REGISTER: + return mlxsw_sp_netdevice_ipip_reg_event(mlxsw_sp, ol_dev); + case NETDEV_UNREGISTER: + mlxsw_sp_netdevice_ipip_unreg_event(mlxsw_sp, ol_dev); + return 0; + case NETDEV_UP: + return mlxsw_sp_netdevice_ipip_up_event(mlxsw_sp, ol_dev); + case NETDEV_DOWN: + mlxsw_sp_netdevice_ipip_down_event(mlxsw_sp, ol_dev); + return 0; + } + return 0; +} + struct mlxsw_sp_neigh_key { struct neighbour *n; }; From f63ce4e54a424d9f99bad2ba099c972a07eab517 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Mon, 16 Oct 2017 16:26:38 +0200 Subject: [PATCH 4/5] mlxsw: spectrum: Support IPIP overlay VRF migration IPIP entries are created as soon as an offloadable device is created. That means that when such a device is later moved to a different VRF, the loopback device that backs the tunnel is wrong. Thus when an offloadable encapsulating netdevice moves from one VRF to another, make sure that the loopback is updated as necessary. Signed-off-by: Petr Machata Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlxsw/spectrum.c | 2 +- .../net/ethernet/mellanox/mlxsw/spectrum.h | 3 +- .../ethernet/mellanox/mlxsw/spectrum_router.c | 47 ++++++++++++++++++- 3 files changed, 49 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index c3ae650fbe5e9..e1e11c726c16a 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -4506,7 +4506,7 @@ static int mlxsw_sp_netdevice_event(struct notifier_block *nb, mlxsw_sp = container_of(nb, struct mlxsw_sp, netdevice_nb); if (mlxsw_sp_netdev_is_ipip(mlxsw_sp, dev)) - err = mlxsw_sp_netdevice_ipip_event(mlxsw_sp, dev, event); + err = mlxsw_sp_netdevice_ipip_event(mlxsw_sp, dev, event, ptr); else if (event == NETDEV_CHANGEADDR || event == NETDEV_CHANGEMTU) err = mlxsw_sp_netdevice_router_port_event(dev); else if (mlxsw_sp_is_vrf_event(event, ptr)) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index a4f21afd7f00e..28feb745a38a5 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -400,7 +400,8 @@ bool mlxsw_sp_netdev_is_ipip(const struct mlxsw_sp *mlxsw_sp, int mlxsw_sp_netdevice_ipip_event(struct mlxsw_sp *mlxsw_sp, struct net_device *l3_dev, - unsigned long event); + unsigned long event, + struct netdev_notifier_changeupper_info *info); void mlxsw_sp_port_vlan_router_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan); void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index db834220a2fe2..082cf00eaadb8 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -1384,9 +1384,49 @@ static void mlxsw_sp_netdevice_ipip_down_event(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry); } +static int mlxsw_sp_netdevice_ipip_vrf_event(struct mlxsw_sp *mlxsw_sp, + struct net_device *ol_dev) +{ + struct mlxsw_sp_fib_entry *decap_fib_entry; + struct mlxsw_sp_ipip_entry *ipip_entry; + struct mlxsw_sp_rif_ipip_lb *lb_rif; + + ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev); + if (!ipip_entry) + return 0; + + /* When a tunneling device is moved to a different VRF, we need to + * update the backing loopback. Since RIFs can't be edited, we need to + * destroy and recreate it. That might create a window of opportunity + * where RALUE and RATR registers end up referencing a RIF that's + * already gone. RATRs are handled by the RIF destroy, and to take care + * of RALUE, demote the decap route back. + */ + if (ipip_entry->decap_fib_entry) + mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry); + + lb_rif = mlxsw_sp_ipip_ol_ipip_lb_create(mlxsw_sp, ipip_entry->ipipt, + ol_dev); + if (IS_ERR(lb_rif)) + return PTR_ERR(lb_rif); + mlxsw_sp_rif_destroy(&ipip_entry->ol_lb->common); + ipip_entry->ol_lb = lb_rif; + + if (ol_dev->flags & IFF_UP) { + decap_fib_entry = mlxsw_sp_ipip_entry_find_decap(mlxsw_sp, + ipip_entry); + if (decap_fib_entry) + mlxsw_sp_ipip_entry_promote_decap(mlxsw_sp, ipip_entry, + decap_fib_entry); + } + + return 0; +} + int mlxsw_sp_netdevice_ipip_event(struct mlxsw_sp *mlxsw_sp, struct net_device *ol_dev, - unsigned long event) + unsigned long event, + struct netdev_notifier_changeupper_info *info) { switch (event) { case NETDEV_REGISTER: @@ -1399,6 +1439,11 @@ int mlxsw_sp_netdevice_ipip_event(struct mlxsw_sp *mlxsw_sp, case NETDEV_DOWN: mlxsw_sp_netdevice_ipip_down_event(mlxsw_sp, ol_dev); return 0; + case NETDEV_CHANGEUPPER: + if (netif_is_l3_master(info->upper_dev)) + return mlxsw_sp_netdevice_ipip_vrf_event(mlxsw_sp, + ol_dev); + return 0; } return 0; } From 4cccb737d2fd0d78b939a97b5ac1831b9a27d4c0 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Mon, 16 Oct 2017 16:26:39 +0200 Subject: [PATCH 5/5] mlxsw: spectrum: Drop refcounting of IPIP entries Formerly, IPIP entries were created lazily by next hops that referenced an offloadable IP-in-IP netdevice. However now that they are created eagerly as a reaction to events on such netdevices, the reference counting is useless. Hence drop it. The routes whose next hops reference an offloaded IP-in-IP netdevice actually linger around a bit after their device is unregistered. However, mlxsw_sp_ipip_entry_destroy() also destroys the backing loopback, and mlxsw_sp_rif_destroy() transitively (via mlxsw_sp_nexthop_rif_gone_sync()) calls mlxsw_sp_nexthop_ipip_fini(), which unlinks the IPIP entry from a next hop. Thus no dangling pointers are left behind for the brief window after netdevice is gone, but routes not yet. Signed-off-by: Petr Machata Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../ethernet/mellanox/mlxsw/spectrum_ipip.h | 1 - .../ethernet/mellanox/mlxsw/spectrum_router.c | 49 ++++++++----------- 2 files changed, 20 insertions(+), 30 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h index 1c2db831d83b1..6fb49129ce878 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h @@ -47,7 +47,6 @@ struct mlxsw_sp_ipip_entry { enum mlxsw_sp_ipip_type ipipt; struct net_device *ol_dev; /* Overlay. */ struct mlxsw_sp_rif_ipip_lb *ol_lb; - unsigned int ref_count; /* Number of next hops using the tunnel. */ struct mlxsw_sp_fib_entry *decap_fib_entry; struct list_head ipip_list_node; }; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 082cf00eaadb8..3330120f2f8e2 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -1002,9 +1002,8 @@ mlxsw_sp_ipip_entry_alloc(struct mlxsw_sp *mlxsw_sp, } static void -mlxsw_sp_ipip_entry_destroy(struct mlxsw_sp_ipip_entry *ipip_entry) +mlxsw_sp_ipip_entry_dealloc(struct mlxsw_sp_ipip_entry *ipip_entry) { - WARN_ON(ipip_entry->ref_count > 0); mlxsw_sp_rif_destroy(&ipip_entry->ol_lb->common); kfree(ipip_entry); } @@ -1200,9 +1199,9 @@ mlxsw_sp_ipip_entry_find_decap(struct mlxsw_sp *mlxsw_sp, } static struct mlxsw_sp_ipip_entry * -mlxsw_sp_ipip_entry_get(struct mlxsw_sp *mlxsw_sp, - enum mlxsw_sp_ipip_type ipipt, - struct net_device *ol_dev) +mlxsw_sp_ipip_entry_create(struct mlxsw_sp *mlxsw_sp, + enum mlxsw_sp_ipip_type ipipt, + struct net_device *ol_dev) { u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ol_dev); struct mlxsw_sp_router *router = mlxsw_sp->router; @@ -1210,15 +1209,12 @@ mlxsw_sp_ipip_entry_get(struct mlxsw_sp *mlxsw_sp, enum mlxsw_sp_l3proto ul_proto; union mlxsw_sp_l3addr saddr; + /* The configuration where several tunnels have the same local address + * in the same underlay table needs special treatment in the HW. That is + * currently not implemented in the driver. + */ list_for_each_entry(ipip_entry, &mlxsw_sp->router->ipip_list, ipip_list_node) { - if (ipip_entry->ol_dev == ol_dev) - goto inc_ref_count; - - /* The configuration where several tunnels have the same local - * address in the same underlay table needs special treatment in - * the HW. That is currently not implemented in the driver. - */ ul_proto = router->ipip_ops_arr[ipip_entry->ipipt]->ul_proto; saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ol_dev); if (mlxsw_sp_ipip_entry_saddr_matches(mlxsw_sp, ul_proto, saddr, @@ -1233,19 +1229,15 @@ mlxsw_sp_ipip_entry_get(struct mlxsw_sp *mlxsw_sp, list_add_tail(&ipip_entry->ipip_list_node, &mlxsw_sp->router->ipip_list); -inc_ref_count: - ++ipip_entry->ref_count; return ipip_entry; } static void -mlxsw_sp_ipip_entry_put(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_ipip_entry *ipip_entry) +mlxsw_sp_ipip_entry_destroy(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_ipip_entry *ipip_entry) { - if (--ipip_entry->ref_count == 0) { - list_del(&ipip_entry->ipip_list_node); - mlxsw_sp_ipip_entry_destroy(ipip_entry); - } + list_del(&ipip_entry->ipip_list_node); + mlxsw_sp_ipip_entry_dealloc(ipip_entry); } static bool @@ -1338,7 +1330,8 @@ static int mlxsw_sp_netdevice_ipip_reg_event(struct mlxsw_sp *mlxsw_sp, MLXSW_SP_L3_PROTO_IPV4) || router->ipip_ops_arr[ipipt]->can_offload(mlxsw_sp, ol_dev, MLXSW_SP_L3_PROTO_IPV6)) { - ipip_entry = mlxsw_sp_ipip_entry_get(mlxsw_sp, ipipt, ol_dev); + ipip_entry = mlxsw_sp_ipip_entry_create(mlxsw_sp, ipipt, + ol_dev); if (IS_ERR(ipip_entry)) return PTR_ERR(ipip_entry); } @@ -1353,7 +1346,7 @@ static void mlxsw_sp_netdevice_ipip_unreg_event(struct mlxsw_sp *mlxsw_sp, ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev); if (ipip_entry) - mlxsw_sp_ipip_entry_put(mlxsw_sp, ipip_entry); + mlxsw_sp_ipip_entry_destroy(mlxsw_sp, ipip_entry); } static int mlxsw_sp_netdevice_ipip_up_event(struct mlxsw_sp *mlxsw_sp, @@ -2939,16 +2932,15 @@ static void mlxsw_sp_nexthop_neigh_fini(struct mlxsw_sp *mlxsw_sp, } static int mlxsw_sp_nexthop_ipip_init(struct mlxsw_sp *mlxsw_sp, - enum mlxsw_sp_ipip_type ipipt, struct mlxsw_sp_nexthop *nh, struct net_device *ol_dev) { if (!nh->nh_grp->gateway || nh->ipip_entry) return 0; - nh->ipip_entry = mlxsw_sp_ipip_entry_get(mlxsw_sp, ipipt, ol_dev); - if (IS_ERR(nh->ipip_entry)) - return PTR_ERR(nh->ipip_entry); + nh->ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev); + if (!nh->ipip_entry) + return -ENOENT; __mlxsw_sp_nexthop_neigh_update(nh, false); return 0; @@ -2963,7 +2955,6 @@ static void mlxsw_sp_nexthop_ipip_fini(struct mlxsw_sp *mlxsw_sp, return; __mlxsw_sp_nexthop_neigh_update(nh, true); - mlxsw_sp_ipip_entry_put(mlxsw_sp, ipip_entry); nh->ipip_entry = NULL; } @@ -3007,7 +2998,7 @@ static int mlxsw_sp_nexthop4_type_init(struct mlxsw_sp *mlxsw_sp, router->ipip_ops_arr[ipipt]->can_offload(mlxsw_sp, dev, MLXSW_SP_L3_PROTO_IPV4)) { nh->type = MLXSW_SP_NEXTHOP_TYPE_IPIP; - err = mlxsw_sp_nexthop_ipip_init(mlxsw_sp, ipipt, nh, dev); + err = mlxsw_sp_nexthop_ipip_init(mlxsw_sp, nh, dev); if (err) return err; mlxsw_sp_nexthop_rif_init(nh, &nh->ipip_entry->ol_lb->common); @@ -4269,7 +4260,7 @@ static int mlxsw_sp_nexthop6_type_init(struct mlxsw_sp *mlxsw_sp, router->ipip_ops_arr[ipipt]->can_offload(mlxsw_sp, dev, MLXSW_SP_L3_PROTO_IPV6)) { nh->type = MLXSW_SP_NEXTHOP_TYPE_IPIP; - err = mlxsw_sp_nexthop_ipip_init(mlxsw_sp, ipipt, nh, dev); + err = mlxsw_sp_nexthop_ipip_init(mlxsw_sp, nh, dev); if (err) return err; mlxsw_sp_nexthop_rif_init(nh, &nh->ipip_entry->ol_lb->common);