Skip to content

Commit

Permalink
sfc: Add SR-IOV back-end support for SFC9000 family
Browse files Browse the repository at this point in the history
On the SFC9000 family, each port has 1024 Virtual Interfaces (VIs),
each with an RX queue, a TX queue, an event queue and a mailbox
register.  These may be assigned to up to 127 SR-IOV virtual functions
per port, with up to 64 VIs per VF.

We allocate an extra channel (IRQ and event queue only) to receive
requests from VF drivers.

There is a per-port limit of 4 concurrent RX queue flushes, and queue
flushes may be initiated by the MC in response to a Function Level
Reset (FLR) of a VF.  Therefore, when SR-IOV is in use, we submit all
flush requests via the MC.

The RSS indirection table is shared with VFs, so the number of RX
queues used in the PF is limited to the number of VIs per VF.

This is almost entirely the work of Steve Hodgson, formerly
shodgson@solarflare.com.

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
  • Loading branch information
Ben Hutchings committed Feb 16, 2012
1 parent 28e47c4 commit cd2d5b5
Show file tree
Hide file tree
Showing 13 changed files with 2,192 additions and 26 deletions.
8 changes: 8 additions & 0 deletions drivers/net/ethernet/sfc/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -26,3 +26,11 @@ config SFC_MCDI_MON
----help---
This exposes the on-board firmware-managed sensors as a
hardware monitor device.
config SFC_SRIOV
bool "Solarflare SFC9000-family SR-IOV support"
depends on SFC && PCI_IOV
default y
---help---
This enables support for the SFC9000 I/O Virtualization
features, allowing accelerated network performance in
virtualized environments.
1 change: 1 addition & 0 deletions drivers/net/ethernet/sfc/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,6 @@ sfc-y += efx.o nic.o falcon.o siena.o tx.o rx.o filter.o \
tenxpress.o txc43128_phy.o falcon_boards.o \
mcdi.o mcdi_phy.o mcdi_mon.o
sfc-$(CONFIG_SFC_MTD) += mtd.o
sfc-$(CONFIG_SFC_SRIOV) += siena_sriov.o

obj-$(CONFIG_SFC) += sfc.o
70 changes: 55 additions & 15 deletions drivers/net/ethernet/sfc/efx.c
Original file line number Diff line number Diff line change
Expand Up @@ -1175,25 +1175,40 @@ static unsigned int efx_wanted_parallelism(struct efx_nic *efx)
unsigned int count;
int cpu;

if (rss_cpus)
return rss_cpus;
if (rss_cpus) {
count = rss_cpus;
} else {
if (unlikely(!zalloc_cpumask_var(&thread_mask, GFP_KERNEL))) {
netif_warn(efx, probe, efx->net_dev,
"RSS disabled due to allocation failure\n");
return 1;
}

if (unlikely(!zalloc_cpumask_var(&thread_mask, GFP_KERNEL))) {
netif_warn(efx, probe, efx->net_dev,
"RSS disabled due to allocation failure\n");
return 1;
count = 0;
for_each_online_cpu(cpu) {
if (!cpumask_test_cpu(cpu, thread_mask)) {
++count;
cpumask_or(thread_mask, thread_mask,
topology_thread_cpumask(cpu));
}
}

free_cpumask_var(thread_mask);
}

count = 0;
for_each_online_cpu(cpu) {
if (!cpumask_test_cpu(cpu, thread_mask)) {
++count;
cpumask_or(thread_mask, thread_mask,
topology_thread_cpumask(cpu));
}
/* If RSS is requested for the PF *and* VFs then we can't write RSS
* table entries that are inaccessible to VFs
*/
if (efx_sriov_wanted(efx) && efx_vf_size(efx) > 1 &&
count > efx_vf_size(efx)) {
netif_warn(efx, probe, efx->net_dev,
"Reducing number of RSS channels from %u to %u for "
"VF support. Increase vf-msix-limit to use more "
"channels on the PF.\n",
count, efx_vf_size(efx));
count = efx_vf_size(efx);
}

free_cpumask_var(thread_mask);
return count;
}

Expand Down Expand Up @@ -1327,6 +1342,10 @@ static int efx_probe_interrupts(struct efx_nic *efx)
}
}

/* RSS might be usable on VFs even if it is disabled on the PF */
efx->rss_spread = (efx->n_rx_channels > 1 ?
efx->n_rx_channels : efx_vf_size(efx));

return 0;
}

Expand Down Expand Up @@ -1426,7 +1445,7 @@ static int efx_probe_nic(struct efx_nic *efx)
get_random_bytes(&efx->rx_hash_key, sizeof(efx->rx_hash_key));
for (i = 0; i < ARRAY_SIZE(efx->rx_indir_table); i++)
efx->rx_indir_table[i] =
ethtool_rxfh_indir_default(i, efx->n_rx_channels);
ethtool_rxfh_indir_default(i, efx->rss_spread);

efx_set_channels(efx);
netif_set_real_num_tx_queues(efx->net_dev, efx->n_tx_channels);
Expand Down Expand Up @@ -1915,6 +1934,7 @@ static int efx_set_mac_address(struct net_device *net_dev, void *data)
}

memcpy(net_dev->dev_addr, new_addr, net_dev->addr_len);
efx_sriov_mac_address_changed(efx);

/* Reconfigure the MAC */
mutex_lock(&efx->mac_lock);
Expand Down Expand Up @@ -1981,6 +2001,12 @@ static const struct net_device_ops efx_netdev_ops = {
.ndo_set_mac_address = efx_set_mac_address,
.ndo_set_rx_mode = efx_set_rx_mode,
.ndo_set_features = efx_set_features,
#ifdef CONFIG_SFC_SRIOV
.ndo_set_vf_mac = efx_sriov_set_vf_mac,
.ndo_set_vf_vlan = efx_sriov_set_vf_vlan,
.ndo_set_vf_spoofchk = efx_sriov_set_vf_spoofchk,
.ndo_get_vf_config = efx_sriov_get_vf_config,
#endif
#ifdef CONFIG_NET_POLL_CONTROLLER
.ndo_poll_controller = efx_netpoll,
#endif
Expand Down Expand Up @@ -2150,6 +2176,7 @@ int efx_reset_up(struct efx_nic *efx, enum reset_type method, bool ok)

efx_start_interrupts(efx, false);
efx_restore_filters(efx);
efx_sriov_reset(efx);

mutex_unlock(&efx->mac_lock);

Expand Down Expand Up @@ -2440,6 +2467,7 @@ static void efx_pci_remove(struct pci_dev *pci_dev)
rtnl_unlock();

efx_stop_interrupts(efx, false);
efx_sriov_fini(efx);
efx_unregister_netdev(efx);

efx_mtd_remove(efx);
Expand Down Expand Up @@ -2581,6 +2609,11 @@ static int __devinit efx_pci_probe(struct pci_dev *pci_dev,
if (rc)
goto fail4;

rc = efx_sriov_init(efx);
if (rc)
netif_err(efx, probe, efx->net_dev,
"SR-IOV can't be enabled rc %d\n", rc);

netif_dbg(efx, probe, efx->net_dev, "initialisation successful\n");

/* Try to create MTDs, but allow this to fail */
Expand Down Expand Up @@ -2732,6 +2765,10 @@ static int __init efx_init_module(void)
if (rc)
goto err_notifier;

rc = efx_init_sriov();
if (rc)
goto err_sriov;

reset_workqueue = create_singlethread_workqueue("sfc_reset");
if (!reset_workqueue) {
rc = -ENOMEM;
Expand All @@ -2747,6 +2784,8 @@ static int __init efx_init_module(void)
err_pci:
destroy_workqueue(reset_workqueue);
err_reset:
efx_fini_sriov();
err_sriov:
unregister_netdevice_notifier(&efx_netdev_notifier);
err_notifier:
return rc;
Expand All @@ -2758,6 +2797,7 @@ static void __exit efx_exit_module(void)

pci_unregister_driver(&efx_pci_driver);
destroy_workqueue(reset_workqueue);
efx_fini_sriov();
unregister_netdevice_notifier(&efx_netdev_notifier);

}
Expand Down
3 changes: 2 additions & 1 deletion drivers/net/ethernet/sfc/ethtool.c
Original file line number Diff line number Diff line change
Expand Up @@ -1085,7 +1085,8 @@ static u32 efx_ethtool_get_rxfh_indir_size(struct net_device *net_dev)
{
struct efx_nic *efx = netdev_priv(net_dev);

return (efx_nic_rev(efx) < EFX_REV_FALCON_B0 ?
return ((efx_nic_rev(efx) < EFX_REV_FALCON_B0 ||
efx->n_rx_channels == 1) ?
0 : ARRAY_SIZE(efx->rx_indir_table));
}

Expand Down
34 changes: 34 additions & 0 deletions drivers/net/ethernet/sfc/mcdi.c
Original file line number Diff line number Diff line change
Expand Up @@ -560,6 +560,9 @@ void efx_mcdi_process_event(struct efx_channel *channel,
case MCDI_EVENT_CODE_MAC_STATS_DMA:
/* MAC stats are gather lazily. We can ignore this. */
break;
case MCDI_EVENT_CODE_FLR:
efx_sriov_flr(efx, MCDI_EVENT_FIELD(*event, FLR_VF));
break;

default:
netif_err(efx, hw, efx->net_dev, "Unknown MCDI event 0x%x\n",
Expand Down Expand Up @@ -1154,6 +1157,37 @@ int efx_mcdi_wol_filter_remove(struct efx_nic *efx, int id)
return rc;
}

int efx_mcdi_flush_rxqs(struct efx_nic *efx)
{
struct efx_channel *channel;
struct efx_rx_queue *rx_queue;
__le32 *qid;
int rc, count;

qid = kmalloc(EFX_MAX_CHANNELS * sizeof(*qid), GFP_KERNEL);
if (qid == NULL)
return -ENOMEM;

count = 0;
efx_for_each_channel(channel, efx) {
efx_for_each_channel_rx_queue(rx_queue, channel) {
if (rx_queue->flush_pending) {
rx_queue->flush_pending = false;
atomic_dec(&efx->rxq_flush_pending);
qid[count++] = cpu_to_le32(
efx_rx_queue_index(rx_queue));
}
}
}

rc = efx_mcdi_rpc(efx, MC_CMD_FLUSH_RX_QUEUES, (u8 *)qid,
count * sizeof(*qid), NULL, 0, NULL);
WARN_ON(rc > 0);

kfree(qid);

return rc;
}

int efx_mcdi_wol_filter_reset(struct efx_nic *efx)
{
Expand Down
2 changes: 2 additions & 0 deletions drivers/net/ethernet/sfc/mcdi.h
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,8 @@ extern int efx_mcdi_wol_filter_set_magic(struct efx_nic *efx,
extern int efx_mcdi_wol_filter_get_magic(struct efx_nic *efx, int *id_out);
extern int efx_mcdi_wol_filter_remove(struct efx_nic *efx, int id);
extern int efx_mcdi_wol_filter_reset(struct efx_nic *efx);
extern int efx_mcdi_flush_rxqs(struct efx_nic *efx);
extern int efx_mcdi_set_mac(struct efx_nic *efx);
extern int efx_mcdi_mac_stats(struct efx_nic *efx, dma_addr_t dma_addr,
u32 dma_len, int enable, int clear);
extern int efx_mcdi_mac_reconfigure(struct efx_nic *efx);
Expand Down
2 changes: 1 addition & 1 deletion drivers/net/ethernet/sfc/mcdi_mac.c
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
#include "mcdi.h"
#include "mcdi_pcol.h"

static int efx_mcdi_set_mac(struct efx_nic *efx)
int efx_mcdi_set_mac(struct efx_nic *efx)
{
u32 reject, fcntl;
u8 cmdbytes[MC_CMD_SET_MAC_IN_LEN];
Expand Down
32 changes: 31 additions & 1 deletion drivers/net/ethernet/sfc/net_driver.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
#include <linux/device.h>
#include <linux/highmem.h>
#include <linux/workqueue.h>
#include <linux/mutex.h>
#include <linux/vmalloc.h>
#include <linux/i2c.h>

Expand Down Expand Up @@ -54,7 +55,8 @@

#define EFX_MAX_CHANNELS 32U
#define EFX_MAX_RX_QUEUES EFX_MAX_CHANNELS
#define EFX_MAX_EXTRA_CHANNELS 0U
#define EFX_EXTRA_CHANNEL_IOV 0
#define EFX_MAX_EXTRA_CHANNELS 1U

/* Checksum generation is a per-queue option in hardware, so each
* queue visible to the networking core is backed by two hardware TX
Expand Down Expand Up @@ -629,6 +631,8 @@ union efx_multicast_hash {
};

struct efx_filter_state;
struct efx_vf;
struct vfdi_status;

/**
* struct efx_nic - an Efx NIC
Expand Down Expand Up @@ -712,6 +716,17 @@ struct efx_filter_state;
* completed (either success or failure). Not used when MCDI is used to
* flush receive queues.
* @flush_wq: wait queue used by efx_nic_flush_queues() to wait for flush completions.
* @vf: Array of &struct efx_vf objects.
* @vf_count: Number of VFs intended to be enabled.
* @vf_init_count: Number of VFs that have been fully initialised.
* @vi_scale: log2 number of vnics per VF.
* @vf_buftbl_base: The zeroth buffer table index used to back VF queues.
* @vfdi_status: Common VFDI status page to be dmad to VF address space.
* @local_addr_list: List of local addresses. Protected by %local_lock.
* @local_page_list: List of DMA addressable pages used to broadcast
* %local_addr_list. Protected by %local_lock.
* @local_lock: Mutex protecting %local_addr_list and %local_page_list.
* @peer_work: Work item to broadcast peer addresses to VMs.
* @monitor_work: Hardware monitor workitem
* @biu_lock: BIU (bus interface unit) lock
* @last_irq_cpu: Last CPU to handle a possible test interrupt. This
Expand Down Expand Up @@ -762,6 +777,7 @@ struct efx_nic {
unsigned next_buffer_table;
unsigned n_channels;
unsigned n_rx_channels;
unsigned rss_spread;
unsigned tx_channel_offset;
unsigned n_tx_channels;
unsigned int rx_buffer_len;
Expand Down Expand Up @@ -820,6 +836,20 @@ struct efx_nic {
atomic_t rxq_flush_outstanding;
wait_queue_head_t flush_wq;

#ifdef CONFIG_SFC_SRIOV
struct efx_channel *vfdi_channel;
struct efx_vf *vf;
unsigned vf_count;
unsigned vf_init_count;
unsigned vi_scale;
unsigned vf_buftbl_base;
struct efx_buffer vfdi_status;
struct list_head local_addr_list;
struct list_head local_page_list;
struct mutex local_lock;
struct work_struct peer_work;
#endif

/* The following fields may be written more often */

struct delayed_work monitor_work ____cacheline_aligned_in_smp;
Expand Down
Loading

0 comments on commit cd2d5b5

Please sign in to comment.