Skip to content

Commit

Permalink
Merge branch 'udp_tunnel-convert-Intel-drivers-with-shared-tables'
Browse files Browse the repository at this point in the history
Jakub Kicinski says:

====================
udp_tunnel: convert Intel drivers with shared tables

This set converts Intel drivers which have the ability to spawn
multiple netdevs, but have only one UDP tunnel port table.

Appropriate support is added to the core infra in patch 1,
followed by netdevsim support and a selftest.

The table sharing works by core attaching the same table
structure to all devices sharing the table. This means the
reference count has to accommodate potentially large values.

Once core is ready i40e and ice are converted. These are
complex drivers, but we got a tested-by from Aaron, so we
should be good :)

Compared to v1 I've made sure the selftest is executable.

Other than that patches 8 and 9 are actually from the Mellanox
conversion series were kept out to avoid Mellanox vs Intel
conflicts.

Last patch is new, some docs to let users knows ethtool
can now display UDP tunnel info.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
David S. Miller committed Sep 28, 2020
2 parents 8744c0a + 33a1aaf commit bcbf1be
Show file tree
Hide file tree
Showing 15 changed files with 531 additions and 451 deletions.
28 changes: 28 additions & 0 deletions Documentation/networking/vxlan.rst
Original file line number Diff line number Diff line change
Expand Up @@ -58,3 +58,31 @@ forwarding table using the new bridge command.
3. Show forwarding table::

# bridge fdb show dev vxlan0

The following NIC features may indicate support for UDP tunnel-related
offloads (most commonly VXLAN features, but support for a particular
encapsulation protocol is NIC specific):

- `tx-udp_tnl-segmentation`
- `tx-udp_tnl-csum-segmentation`
ability to perform TCP segmentation offload of UDP encapsulated frames

- `rx-udp_tunnel-port-offload`
receive side parsing of UDP encapsulated frames which allows NICs to
perform protocol-aware offloads, like checksum validation offload of
inner frames (only needed by NICs without protocol-agnostic offloads)

For devices supporting `rx-udp_tunnel-port-offload` the list of currently
offloaded ports can be interrogated with `ethtool`::

$ ethtool --show-tunnels eth0
Tunnel information for eth0:
UDP port table 0:
Size: 4
Types: vxlan
No entries
UDP port table 1:
Size: 4
Types: geneve, vxlan-gpe
Entries (1):
port 1230, vxlan-gpe
6 changes: 3 additions & 3 deletions drivers/net/ethernet/intel/i40e/i40e.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
#include <net/pkt_cls.h>
#include <net/tc_act/tc_gact.h>
#include <net/tc_act/tc_mirred.h>
#include <net/udp_tunnel.h>
#include <net/xdp_sock.h>
#include "i40e_type.h"
#include "i40e_prototype.h"
Expand Down Expand Up @@ -133,7 +134,6 @@ enum i40e_state_t {
__I40E_PORT_SUSPENDED,
__I40E_VF_DISABLE,
__I40E_MACVLAN_SYNC_PENDING,
__I40E_UDP_FILTER_SYNC_PENDING,
__I40E_TEMP_LINK_POLLING,
__I40E_CLIENT_SERVICE_REQUESTED,
__I40E_CLIENT_L2_CHANGE,
Expand Down Expand Up @@ -478,8 +478,8 @@ struct i40e_pf {
struct list_head l3_flex_pit_list;
struct list_head l4_flex_pit_list;

struct i40e_udp_port_config udp_ports[I40E_MAX_PF_UDP_OFFLOAD_PORTS];
u16 pending_udp_bitmap;
struct udp_tunnel_nic_shared udp_tunnel_shared;
struct udp_tunnel_nic_info udp_tunnel_nic;

struct hlist_head cloud_filter_list;
u16 num_cloud_filters;
Expand Down
264 changes: 48 additions & 216 deletions drivers/net/ethernet/intel/i40e/i40e_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -10386,106 +10386,6 @@ static void i40e_handle_mdd_event(struct i40e_pf *pf)
i40e_flush(hw);
}

static const char *i40e_tunnel_name(u8 type)
{
switch (type) {
case UDP_TUNNEL_TYPE_VXLAN:
return "vxlan";
case UDP_TUNNEL_TYPE_GENEVE:
return "geneve";
default:
return "unknown";
}
}

/**
* i40e_sync_udp_filters - Trigger a sync event for existing UDP filters
* @pf: board private structure
**/
static void i40e_sync_udp_filters(struct i40e_pf *pf)
{
int i;

/* loop through and set pending bit for all active UDP filters */
for (i = 0; i < I40E_MAX_PF_UDP_OFFLOAD_PORTS; i++) {
if (pf->udp_ports[i].port)
pf->pending_udp_bitmap |= BIT_ULL(i);
}

set_bit(__I40E_UDP_FILTER_SYNC_PENDING, pf->state);
}

/**
* i40e_sync_udp_filters_subtask - Sync the VSI filter list with HW
* @pf: board private structure
**/
static void i40e_sync_udp_filters_subtask(struct i40e_pf *pf)
{
struct i40e_hw *hw = &pf->hw;
u8 filter_index, type;
u16 port;
int i;

if (!test_and_clear_bit(__I40E_UDP_FILTER_SYNC_PENDING, pf->state))
return;

/* acquire RTNL to maintain state of flags and port requests */
rtnl_lock();

for (i = 0; i < I40E_MAX_PF_UDP_OFFLOAD_PORTS; i++) {
if (pf->pending_udp_bitmap & BIT_ULL(i)) {
struct i40e_udp_port_config *udp_port;
i40e_status ret = 0;

udp_port = &pf->udp_ports[i];
pf->pending_udp_bitmap &= ~BIT_ULL(i);

port = READ_ONCE(udp_port->port);
type = READ_ONCE(udp_port->type);
filter_index = READ_ONCE(udp_port->filter_index);

/* release RTNL while we wait on AQ command */
rtnl_unlock();

if (port)
ret = i40e_aq_add_udp_tunnel(hw, port,
type,
&filter_index,
NULL);
else if (filter_index != I40E_UDP_PORT_INDEX_UNUSED)
ret = i40e_aq_del_udp_tunnel(hw, filter_index,
NULL);

/* reacquire RTNL so we can update filter_index */
rtnl_lock();

if (ret) {
dev_info(&pf->pdev->dev,
"%s %s port %d, index %d failed, err %s aq_err %s\n",
i40e_tunnel_name(type),
port ? "add" : "delete",
port,
filter_index,
i40e_stat_str(&pf->hw, ret),
i40e_aq_str(&pf->hw,
pf->hw.aq.asq_last_status));
if (port) {
/* failed to add, just reset port,
* drop pending bit for any deletion
*/
udp_port->port = 0;
pf->pending_udp_bitmap &= ~BIT_ULL(i);
}
} else if (port) {
/* record filter index on success */
udp_port->filter_index = filter_index;
}
}
}

rtnl_unlock();
}

/**
* i40e_service_task - Run the driver's async subtasks
* @work: pointer to work_struct containing our data
Expand Down Expand Up @@ -10525,7 +10425,6 @@ static void i40e_service_task(struct work_struct *work)
pf->vsi[pf->lan_vsi]);
}
i40e_sync_filters_subtask(pf);
i40e_sync_udp_filters_subtask(pf);
} else {
i40e_reset_subtask(pf);
}
Expand Down Expand Up @@ -12225,131 +12124,48 @@ static int i40e_set_features(struct net_device *netdev,
return 0;
}

/**
* i40e_get_udp_port_idx - Lookup a possibly offloaded for Rx UDP port
* @pf: board private structure
* @port: The UDP port to look up
*
* Returns the index number or I40E_MAX_PF_UDP_OFFLOAD_PORTS if port not found
**/
static u8 i40e_get_udp_port_idx(struct i40e_pf *pf, u16 port)
{
u8 i;

for (i = 0; i < I40E_MAX_PF_UDP_OFFLOAD_PORTS; i++) {
/* Do not report ports with pending deletions as
* being available.
*/
if (!port && (pf->pending_udp_bitmap & BIT_ULL(i)))
continue;
if (pf->udp_ports[i].port == port)
return i;
}

return i;
}

/**
* i40e_udp_tunnel_add - Get notifications about UDP tunnel ports that come up
* @netdev: This physical port's netdev
* @ti: Tunnel endpoint information
**/
static void i40e_udp_tunnel_add(struct net_device *netdev,
struct udp_tunnel_info *ti)
static int i40e_udp_tunnel_set_port(struct net_device *netdev,
unsigned int table, unsigned int idx,
struct udp_tunnel_info *ti)
{
struct i40e_netdev_priv *np = netdev_priv(netdev);
struct i40e_vsi *vsi = np->vsi;
struct i40e_pf *pf = vsi->back;
u16 port = ntohs(ti->port);
u8 next_idx;
u8 idx;

idx = i40e_get_udp_port_idx(pf, port);

/* Check if port already exists */
if (idx < I40E_MAX_PF_UDP_OFFLOAD_PORTS) {
netdev_info(netdev, "port %d already offloaded\n", port);
return;
}

/* Now check if there is space to add the new port */
next_idx = i40e_get_udp_port_idx(pf, 0);
struct i40e_hw *hw = &np->vsi->back->hw;
u8 type, filter_index;
i40e_status ret;

if (next_idx == I40E_MAX_PF_UDP_OFFLOAD_PORTS) {
netdev_info(netdev, "maximum number of offloaded UDP ports reached, not adding port %d\n",
port);
return;
}
type = ti->type == UDP_TUNNEL_TYPE_VXLAN ? I40E_AQC_TUNNEL_TYPE_VXLAN :
I40E_AQC_TUNNEL_TYPE_NGE;

switch (ti->type) {
case UDP_TUNNEL_TYPE_VXLAN:
pf->udp_ports[next_idx].type = I40E_AQC_TUNNEL_TYPE_VXLAN;
break;
case UDP_TUNNEL_TYPE_GENEVE:
if (!(pf->hw_features & I40E_HW_GENEVE_OFFLOAD_CAPABLE))
return;
pf->udp_ports[next_idx].type = I40E_AQC_TUNNEL_TYPE_NGE;
break;
default:
return;
ret = i40e_aq_add_udp_tunnel(hw, ntohs(ti->port), type, &filter_index,
NULL);
if (ret) {
netdev_info(netdev, "add UDP port failed, err %s aq_err %s\n",
i40e_stat_str(hw, ret),
i40e_aq_str(hw, hw->aq.asq_last_status));
return -EIO;
}

/* New port: add it and mark its index in the bitmap */
pf->udp_ports[next_idx].port = port;
pf->udp_ports[next_idx].filter_index = I40E_UDP_PORT_INDEX_UNUSED;
pf->pending_udp_bitmap |= BIT_ULL(next_idx);
set_bit(__I40E_UDP_FILTER_SYNC_PENDING, pf->state);
udp_tunnel_nic_set_port_priv(netdev, table, idx, filter_index);
return 0;
}

/**
* i40e_udp_tunnel_del - Get notifications about UDP tunnel ports that go away
* @netdev: This physical port's netdev
* @ti: Tunnel endpoint information
**/
static void i40e_udp_tunnel_del(struct net_device *netdev,
struct udp_tunnel_info *ti)
static int i40e_udp_tunnel_unset_port(struct net_device *netdev,
unsigned int table, unsigned int idx,
struct udp_tunnel_info *ti)
{
struct i40e_netdev_priv *np = netdev_priv(netdev);
struct i40e_vsi *vsi = np->vsi;
struct i40e_pf *pf = vsi->back;
u16 port = ntohs(ti->port);
u8 idx;

idx = i40e_get_udp_port_idx(pf, port);

/* Check if port already exists */
if (idx >= I40E_MAX_PF_UDP_OFFLOAD_PORTS)
goto not_found;
struct i40e_hw *hw = &np->vsi->back->hw;
i40e_status ret;

switch (ti->type) {
case UDP_TUNNEL_TYPE_VXLAN:
if (pf->udp_ports[idx].type != I40E_AQC_TUNNEL_TYPE_VXLAN)
goto not_found;
break;
case UDP_TUNNEL_TYPE_GENEVE:
if (pf->udp_ports[idx].type != I40E_AQC_TUNNEL_TYPE_NGE)
goto not_found;
break;
default:
goto not_found;
ret = i40e_aq_del_udp_tunnel(hw, ti->hw_priv, NULL);
if (ret) {
netdev_info(netdev, "delete UDP port failed, err %s aq_err %s\n",
i40e_stat_str(hw, ret),
i40e_aq_str(hw, hw->aq.asq_last_status));
return -EIO;
}

/* if port exists, set it to 0 (mark for deletion)
* and make it pending
*/
pf->udp_ports[idx].port = 0;

/* Toggle pending bit instead of setting it. This way if we are
* deleting a port that has yet to be added we just clear the pending
* bit and don't have to worry about it.
*/
pf->pending_udp_bitmap ^= BIT_ULL(idx);
set_bit(__I40E_UDP_FILTER_SYNC_PENDING, pf->state);

return;
not_found:
netdev_warn(netdev, "UDP port %d was not found, not deleting\n",
port);
return 0;
}

static int i40e_get_phys_port_id(struct net_device *netdev,
Expand Down Expand Up @@ -12955,8 +12771,8 @@ static const struct net_device_ops i40e_netdev_ops = {
.ndo_set_vf_link_state = i40e_ndo_set_vf_link_state,
.ndo_set_vf_spoofchk = i40e_ndo_set_vf_spoofchk,
.ndo_set_vf_trust = i40e_ndo_set_vf_trust,
.ndo_udp_tunnel_add = i40e_udp_tunnel_add,
.ndo_udp_tunnel_del = i40e_udp_tunnel_del,
.ndo_udp_tunnel_add = udp_tunnel_nic_add_port,
.ndo_udp_tunnel_del = udp_tunnel_nic_del_port,
.ndo_get_phys_port_id = i40e_get_phys_port_id,
.ndo_fdb_add = i40e_ndo_fdb_add,
.ndo_features_check = i40e_features_check,
Expand Down Expand Up @@ -13020,6 +12836,8 @@ static int i40e_config_netdev(struct i40e_vsi *vsi)
if (!(pf->hw_features & I40E_HW_OUTER_UDP_CSUM_CAPABLE))
netdev->gso_partial_features |= NETIF_F_GSO_UDP_TUNNEL_CSUM;

netdev->udp_tunnel_nic_info = &pf->udp_tunnel_nic;

netdev->gso_partial_features |= NETIF_F_GSO_GRE_CSUM;

netdev->hw_enc_features |= hw_enc_features;
Expand Down Expand Up @@ -14420,7 +14238,7 @@ static int i40e_setup_pf_switch(struct i40e_pf *pf, bool reinit)
i40e_ptp_init(pf);

/* repopulate tunnel port filters */
i40e_sync_udp_filters(pf);
udp_tunnel_nic_reset_ntf(pf->vsi[pf->lan_vsi]->netdev);

return ret;
}
Expand Down Expand Up @@ -15149,6 +14967,14 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
if (err)
goto err_switch_setup;

pf->udp_tunnel_nic.set_port = i40e_udp_tunnel_set_port;
pf->udp_tunnel_nic.unset_port = i40e_udp_tunnel_unset_port;
pf->udp_tunnel_nic.flags = UDP_TUNNEL_NIC_INFO_MAY_SLEEP;
pf->udp_tunnel_nic.shared = &pf->udp_tunnel_shared;
pf->udp_tunnel_nic.tables[0].n_entries = I40E_MAX_PF_UDP_OFFLOAD_PORTS;
pf->udp_tunnel_nic.tables[0].tunnel_types = UDP_TUNNEL_TYPE_VXLAN |
UDP_TUNNEL_TYPE_GENEVE;

/* The number of VSIs reported by the FW is the minimum guaranteed
* to us; HW supports far more and we share the remaining pool with
* the other PFs. We allocate space for more than the guarantee with
Expand All @@ -15158,6 +14984,12 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
pf->num_alloc_vsi = I40E_MIN_VSI_ALLOC;
else
pf->num_alloc_vsi = pf->hw.func_caps.num_vsis;
if (pf->num_alloc_vsi > UDP_TUNNEL_NIC_MAX_SHARING_DEVICES) {
dev_warn(&pf->pdev->dev,
"limiting the VSI count due to UDP tunnel limitation %d > %d\n",
pf->num_alloc_vsi, UDP_TUNNEL_NIC_MAX_SHARING_DEVICES);
pf->num_alloc_vsi = UDP_TUNNEL_NIC_MAX_SHARING_DEVICES;
}

/* Set up the *vsi struct and our local tracking of the MAIN PF vsi. */
pf->vsi = kcalloc(pf->num_alloc_vsi, sizeof(struct i40e_vsi *),
Expand Down
Loading

0 comments on commit bcbf1be

Please sign in to comment.