Skip to content

Commit

Permalink
net/mlx5e: Extract neigh-specific code from en_rep.c to rep/neigh.c
Browse files Browse the repository at this point in the history
As a preparation for introducing new kconfig option that controls
compilation of all TC offloads code in mlx5, extract neigh-specific code
from en_rep.c to standalone file. This allows easily compiling out the code
by only including new source in make file when corresponding kconfig is
enabled instead of adding multiple ifdef blocks to en_rep.

Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
  • Loading branch information
Vlad Buslov authored and Saeed Mahameed committed May 22, 2020
1 parent 768c366 commit 549c243
Show file tree
Hide file tree
Showing 8 changed files with 395 additions and 368 deletions.
2 changes: 1 addition & 1 deletion drivers/net/ethernet/mellanox/mlx5/core/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ mlx5_core-$(CONFIG_MLX5_CORE_EN) += en_main.o en_common.o en_fs.o en_ethtool.o \
mlx5_core-$(CONFIG_MLX5_EN_ARFS) += en_arfs.o
mlx5_core-$(CONFIG_MLX5_EN_RXNFC) += en_fs_ethtool.o
mlx5_core-$(CONFIG_MLX5_CORE_EN_DCB) += en_dcbnl.o en/port_buffer.o
mlx5_core-$(CONFIG_MLX5_ESWITCH) += en_rep.o en_tc.o en/rep/tc.o en/tc_tun.o lib/port_tun.o \
mlx5_core-$(CONFIG_MLX5_ESWITCH) += en_rep.o en_tc.o en/rep/tc.o en/rep/neigh.o en/tc_tun.o lib/port_tun.o \
lag_mp.o \
lib/geneve.o en/mapping.o en/tc_tun_vxlan.o en/tc_tun_gre.o \
en/tc_tun_geneve.o diag/en_tc_tracepoint.o
Expand Down
368 changes: 368 additions & 0 deletions drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,368 @@
// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/* Copyright (c) 2020 Mellanox Technologies. */

#include <linux/refcount.h>
#include <linux/list.h>
#include <linux/rculist.h>
#include <linux/rtnetlink.h>
#include <linux/workqueue.h>
#include <linux/rwlock.h>
#include <linux/spinlock.h>
#include <linux/notifier.h>
#include <net/netevent.h>
#include "neigh.h"
#include "tc.h"
#include "en_rep.h"
#include "fs_core.h"
#include "diag/en_rep_tracepoint.h"

static unsigned long mlx5e_rep_ipv6_interval(void)
{
if (IS_ENABLED(CONFIG_IPV6) && ipv6_stub->nd_tbl)
return NEIGH_VAR(&ipv6_stub->nd_tbl->parms, DELAY_PROBE_TIME);

return ~0UL;
}

static void mlx5e_rep_neigh_update_init_interval(struct mlx5e_rep_priv *rpriv)
{
unsigned long ipv4_interval = NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME);
unsigned long ipv6_interval = mlx5e_rep_ipv6_interval();
struct net_device *netdev = rpriv->netdev;
struct mlx5e_priv *priv = netdev_priv(netdev);

rpriv->neigh_update.min_interval = min_t(unsigned long, ipv6_interval, ipv4_interval);
mlx5_fc_update_sampling_interval(priv->mdev, rpriv->neigh_update.min_interval);
}

void mlx5e_rep_queue_neigh_stats_work(struct mlx5e_priv *priv)
{
struct mlx5e_rep_priv *rpriv = priv->ppriv;
struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update;

mlx5_fc_queue_stats_work(priv->mdev,
&neigh_update->neigh_stats_work,
neigh_update->min_interval);
}

static bool mlx5e_rep_neigh_entry_hold(struct mlx5e_neigh_hash_entry *nhe)
{
return refcount_inc_not_zero(&nhe->refcnt);
}

static void mlx5e_rep_neigh_entry_remove(struct mlx5e_neigh_hash_entry *nhe);

void mlx5e_rep_neigh_entry_release(struct mlx5e_neigh_hash_entry *nhe)
{
if (refcount_dec_and_test(&nhe->refcnt)) {
mlx5e_rep_neigh_entry_remove(nhe);
kfree_rcu(nhe, rcu);
}
}

static struct mlx5e_neigh_hash_entry *
mlx5e_get_next_nhe(struct mlx5e_rep_priv *rpriv,
struct mlx5e_neigh_hash_entry *nhe)
{
struct mlx5e_neigh_hash_entry *next = NULL;

rcu_read_lock();

for (next = nhe ?
list_next_or_null_rcu(&rpriv->neigh_update.neigh_list,
&nhe->neigh_list,
struct mlx5e_neigh_hash_entry,
neigh_list) :
list_first_or_null_rcu(&rpriv->neigh_update.neigh_list,
struct mlx5e_neigh_hash_entry,
neigh_list);
next;
next = list_next_or_null_rcu(&rpriv->neigh_update.neigh_list,
&next->neigh_list,
struct mlx5e_neigh_hash_entry,
neigh_list))
if (mlx5e_rep_neigh_entry_hold(next))
break;

rcu_read_unlock();

if (nhe)
mlx5e_rep_neigh_entry_release(nhe);

return next;
}

static void mlx5e_rep_neigh_stats_work(struct work_struct *work)
{
struct mlx5e_rep_priv *rpriv = container_of(work, struct mlx5e_rep_priv,
neigh_update.neigh_stats_work.work);
struct net_device *netdev = rpriv->netdev;
struct mlx5e_priv *priv = netdev_priv(netdev);
struct mlx5e_neigh_hash_entry *nhe = NULL;

rtnl_lock();
if (!list_empty(&rpriv->neigh_update.neigh_list))
mlx5e_rep_queue_neigh_stats_work(priv);

while ((nhe = mlx5e_get_next_nhe(rpriv, nhe)) != NULL)
mlx5e_tc_update_neigh_used_value(nhe);

rtnl_unlock();
}

static void mlx5e_rep_neigh_update(struct work_struct *work)
{
struct mlx5e_neigh_hash_entry *nhe =
container_of(work, struct mlx5e_neigh_hash_entry, neigh_update_work);
struct neighbour *n = nhe->n;
struct mlx5e_encap_entry *e;
unsigned char ha[ETH_ALEN];
struct mlx5e_priv *priv;
bool neigh_connected;
u8 nud_state, dead;

rtnl_lock();

/* If these parameters are changed after we release the lock,
* we'll receive another event letting us know about it.
* We use this lock to avoid inconsistency between the neigh validity
* and it's hw address.
*/
read_lock_bh(&n->lock);
memcpy(ha, n->ha, ETH_ALEN);
nud_state = n->nud_state;
dead = n->dead;
read_unlock_bh(&n->lock);

neigh_connected = (nud_state & NUD_VALID) && !dead;

trace_mlx5e_rep_neigh_update(nhe, ha, neigh_connected);

list_for_each_entry(e, &nhe->encap_list, encap_list) {
if (!mlx5e_encap_take(e))
continue;

priv = netdev_priv(e->out_dev);
mlx5e_rep_update_flows(priv, e, neigh_connected, ha);
mlx5e_encap_put(priv, e);
}
mlx5e_rep_neigh_entry_release(nhe);
rtnl_unlock();
neigh_release(n);
}

static void mlx5e_rep_queue_neigh_update_work(struct mlx5e_priv *priv,
struct mlx5e_neigh_hash_entry *nhe,
struct neighbour *n)
{
/* Take a reference to ensure the neighbour and mlx5 encap
* entry won't be destructed until we drop the reference in
* delayed work.
*/
neigh_hold(n);

/* This assignment is valid as long as the the neigh reference
* is taken
*/
nhe->n = n;

if (!queue_work(priv->wq, &nhe->neigh_update_work)) {
mlx5e_rep_neigh_entry_release(nhe);
neigh_release(n);
}
}

static int mlx5e_rep_netevent_event(struct notifier_block *nb,
unsigned long event, void *ptr)
{
struct mlx5e_rep_priv *rpriv = container_of(nb, struct mlx5e_rep_priv,
neigh_update.netevent_nb);
struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update;
struct net_device *netdev = rpriv->netdev;
struct mlx5e_priv *priv = netdev_priv(netdev);
struct mlx5e_neigh_hash_entry *nhe = NULL;
struct mlx5e_neigh m_neigh = {};
struct neigh_parms *p;
struct neighbour *n;
bool found = false;

switch (event) {
case NETEVENT_NEIGH_UPDATE:
n = ptr;
#if IS_ENABLED(CONFIG_IPV6)
if (n->tbl != ipv6_stub->nd_tbl && n->tbl != &arp_tbl)
#else
if (n->tbl != &arp_tbl)
#endif
return NOTIFY_DONE;

m_neigh.dev = n->dev;
m_neigh.family = n->ops->family;
memcpy(&m_neigh.dst_ip, n->primary_key, n->tbl->key_len);

rcu_read_lock();
nhe = mlx5e_rep_neigh_entry_lookup(priv, &m_neigh);
rcu_read_unlock();
if (!nhe)
return NOTIFY_DONE;

mlx5e_rep_queue_neigh_update_work(priv, nhe, n);
break;

case NETEVENT_DELAY_PROBE_TIME_UPDATE:
p = ptr;

/* We check the device is present since we don't care about
* changes in the default table, we only care about changes
* done per device delay prob time parameter.
*/
#if IS_ENABLED(CONFIG_IPV6)
if (!p->dev || (p->tbl != ipv6_stub->nd_tbl && p->tbl != &arp_tbl))
#else
if (!p->dev || p->tbl != &arp_tbl)
#endif
return NOTIFY_DONE;

rcu_read_lock();
list_for_each_entry_rcu(nhe, &neigh_update->neigh_list,
neigh_list) {
if (p->dev == nhe->m_neigh.dev) {
found = true;
break;
}
}
rcu_read_unlock();
if (!found)
return NOTIFY_DONE;

neigh_update->min_interval = min_t(unsigned long,
NEIGH_VAR(p, DELAY_PROBE_TIME),
neigh_update->min_interval);
mlx5_fc_update_sampling_interval(priv->mdev,
neigh_update->min_interval);
break;
}
return NOTIFY_DONE;
}

static const struct rhashtable_params mlx5e_neigh_ht_params = {
.head_offset = offsetof(struct mlx5e_neigh_hash_entry, rhash_node),
.key_offset = offsetof(struct mlx5e_neigh_hash_entry, m_neigh),
.key_len = sizeof(struct mlx5e_neigh),
.automatic_shrinking = true,
};

int mlx5e_rep_neigh_init(struct mlx5e_rep_priv *rpriv)
{
struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update;
int err;

err = rhashtable_init(&neigh_update->neigh_ht, &mlx5e_neigh_ht_params);
if (err)
return err;

INIT_LIST_HEAD(&neigh_update->neigh_list);
mutex_init(&neigh_update->encap_lock);
INIT_DELAYED_WORK(&neigh_update->neigh_stats_work,
mlx5e_rep_neigh_stats_work);
mlx5e_rep_neigh_update_init_interval(rpriv);

rpriv->neigh_update.netevent_nb.notifier_call = mlx5e_rep_netevent_event;
err = register_netevent_notifier(&rpriv->neigh_update.netevent_nb);
if (err)
goto out_err;
return 0;

out_err:
rhashtable_destroy(&neigh_update->neigh_ht);
return err;
}

void mlx5e_rep_neigh_cleanup(struct mlx5e_rep_priv *rpriv)
{
struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update;
struct mlx5e_priv *priv = netdev_priv(rpriv->netdev);

unregister_netevent_notifier(&neigh_update->netevent_nb);

flush_workqueue(priv->wq); /* flush neigh update works */

cancel_delayed_work_sync(&rpriv->neigh_update.neigh_stats_work);

mutex_destroy(&neigh_update->encap_lock);
rhashtable_destroy(&neigh_update->neigh_ht);
}

static int mlx5e_rep_neigh_entry_insert(struct mlx5e_priv *priv,
struct mlx5e_neigh_hash_entry *nhe)
{
struct mlx5e_rep_priv *rpriv = priv->ppriv;
int err;

err = rhashtable_insert_fast(&rpriv->neigh_update.neigh_ht,
&nhe->rhash_node,
mlx5e_neigh_ht_params);
if (err)
return err;

list_add_rcu(&nhe->neigh_list, &rpriv->neigh_update.neigh_list);

return err;
}

static void mlx5e_rep_neigh_entry_remove(struct mlx5e_neigh_hash_entry *nhe)
{
struct mlx5e_rep_priv *rpriv = nhe->priv->ppriv;

mutex_lock(&rpriv->neigh_update.encap_lock);

list_del_rcu(&nhe->neigh_list);

rhashtable_remove_fast(&rpriv->neigh_update.neigh_ht,
&nhe->rhash_node,
mlx5e_neigh_ht_params);
mutex_unlock(&rpriv->neigh_update.encap_lock);
}

/* This function must only be called under the representor's encap_lock or
* inside rcu read lock section.
*/
struct mlx5e_neigh_hash_entry *
mlx5e_rep_neigh_entry_lookup(struct mlx5e_priv *priv,
struct mlx5e_neigh *m_neigh)
{
struct mlx5e_rep_priv *rpriv = priv->ppriv;
struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update;
struct mlx5e_neigh_hash_entry *nhe;

nhe = rhashtable_lookup_fast(&neigh_update->neigh_ht, m_neigh,
mlx5e_neigh_ht_params);
return nhe && mlx5e_rep_neigh_entry_hold(nhe) ? nhe : NULL;
}

int mlx5e_rep_neigh_entry_create(struct mlx5e_priv *priv,
struct mlx5e_encap_entry *e,
struct mlx5e_neigh_hash_entry **nhe)
{
int err;

*nhe = kzalloc(sizeof(**nhe), GFP_KERNEL);
if (!*nhe)
return -ENOMEM;

(*nhe)->priv = priv;
memcpy(&(*nhe)->m_neigh, &e->m_neigh, sizeof(e->m_neigh));
INIT_WORK(&(*nhe)->neigh_update_work, mlx5e_rep_neigh_update);
spin_lock_init(&(*nhe)->encap_list_lock);
INIT_LIST_HEAD(&(*nhe)->encap_list);
refcount_set(&(*nhe)->refcnt, 1);

err = mlx5e_rep_neigh_entry_insert(priv, *nhe);
if (err)
goto out_free;
return 0;

out_free:
kfree(*nhe);
return err;
}
Loading

0 comments on commit 549c243

Please sign in to comment.