From 0b10710603b27e86ddd89fb87742997594892e50 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Tue, 1 Dec 2015 18:03:08 +0200 Subject: [PATCH 01/18] net/mlx5_core: Modify enable/disable hca functions Modify these functions to have func_id argument to state which device we are referring to. This is done as a preparation for SRIOV support where a PF driver needs to control its virtual functions. Signed-off-by: Eli Cohen Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlx5/core/main.c | 45 +++++++++---------- .../ethernet/mellanox/mlx5/core/mlx5_core.h | 2 + 2 files changed, 23 insertions(+), 24 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 4ac8d4cc49737..f2e64dc1a4436 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -462,42 +462,39 @@ static int set_hca_ctrl(struct mlx5_core_dev *dev) return err; } -static int mlx5_core_enable_hca(struct mlx5_core_dev *dev) +int mlx5_core_enable_hca(struct mlx5_core_dev *dev, u16 func_id) { + u32 out[MLX5_ST_SZ_DW(enable_hca_out)]; + u32 in[MLX5_ST_SZ_DW(enable_hca_in)]; int err; - struct mlx5_enable_hca_mbox_in in; - struct mlx5_enable_hca_mbox_out out; - memset(&in, 0, sizeof(in)); - memset(&out, 0, sizeof(out)); - in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_ENABLE_HCA); + memset(in, 0, sizeof(in)); + MLX5_SET(enable_hca_in, in, opcode, MLX5_CMD_OP_ENABLE_HCA); + MLX5_SET(enable_hca_in, in, function_id, func_id); + memset(out, 0, sizeof(out)); + err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); if (err) return err; - if (out.hdr.status) - return mlx5_cmd_status_to_err(&out.hdr); - - return 0; + return mlx5_cmd_status_to_err_v2(out); } -static int mlx5_core_disable_hca(struct mlx5_core_dev *dev) +int mlx5_core_disable_hca(struct mlx5_core_dev *dev, u16 func_id) { + u32 out[MLX5_ST_SZ_DW(disable_hca_out)]; + u32 in[MLX5_ST_SZ_DW(disable_hca_in)]; int err; - struct mlx5_disable_hca_mbox_in in; - struct mlx5_disable_hca_mbox_out out; - memset(&in, 0, sizeof(in)); - memset(&out, 0, sizeof(out)); - in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DISABLE_HCA); - err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); + memset(in, 0, sizeof(in)); + MLX5_SET(disable_hca_in, in, opcode, MLX5_CMD_OP_DISABLE_HCA); + MLX5_SET(disable_hca_in, in, function_id, func_id); + memset(out, 0, sizeof(out)); + err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); if (err) return err; - if (out.hdr.status) - return mlx5_cmd_status_to_err(&out.hdr); - - return 0; + return mlx5_cmd_status_to_err_v2(out); } static int mlx5_irq_set_affinity_hint(struct mlx5_core_dev *mdev, int i) @@ -942,7 +939,7 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv) mlx5_pagealloc_init(dev); - err = mlx5_core_enable_hca(dev); + err = mlx5_core_enable_hca(dev, 0); if (err) { dev_err(&pdev->dev, "enable hca failed\n"); goto err_pagealloc_cleanup; @@ -1106,7 +1103,7 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv) mlx5_reclaim_startup_pages(dev); err_disable_hca: - mlx5_core_disable_hca(dev); + mlx5_core_disable_hca(dev, 0); err_pagealloc_cleanup: mlx5_pagealloc_cleanup(dev); @@ -1149,7 +1146,7 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv) } mlx5_pagealloc_stop(dev); mlx5_reclaim_startup_pages(dev); - mlx5_core_disable_hca(dev); + mlx5_core_disable_hca(dev, 0); mlx5_pagealloc_cleanup(dev); mlx5_cmd_cleanup(dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index cee5b7a839bc3..1ed2239a6a6d2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -90,6 +90,8 @@ void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event, unsigned long param); void mlx5_enter_error_state(struct mlx5_core_dev *dev); void mlx5_disable_device(struct mlx5_core_dev *dev); +int mlx5_core_enable_hca(struct mlx5_core_dev *dev, u16 func_id); +int mlx5_core_disable_hca(struct mlx5_core_dev *dev, u16 func_id); void mlx5e_init(void); void mlx5e_cleanup(void); From fc50db98ff872372f266695858f87a12eb1b4f05 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Tue, 1 Dec 2015 18:03:09 +0200 Subject: [PATCH 02/18] net/mlx5_core: Add base sriov support This patch adds SRIOV base support for mlx5 supported devices. The same driver is used for both PFs and VFs; VFs are identified by the driver through the flag MLX5_PCI_DEV_IS_VF added to the pci table entries. Virtual functions are created as usual through writing a value to the sriov_numvs sysfs file of the PF device. Upon instantiating VFs, they will all be probed by the driver on the hypervisor. One can gracefully unbind them through /sys/bus/pci/drivers/mlx5_core/unbind. mlx5_wait_for_vf_pages() was added to ensure that when a VF dies without executing proper teardown, the hypervisor driver waits till all of the pages that were allocated at the hypervisor to maintain its operation are returned. In order for the VF to be operational, the PF needs to call enable_hca for it. This can be done before the VFs are created through a call to pci_enable_sriov. If the there are VFs assigned to a VMs when the driver of the PF is unloaded, all the VF will experience system error and PF driver unloads cleanly; in this case pci_disable_sriov is not called and the devices will show when running lspci. Once the PF driver is reloaded, it will sync its data structures which maintain state on its VFs. Signed-off-by: Eli Cohen Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlx5/core/Makefile | 2 +- .../net/ethernet/mellanox/mlx5/core/main.c | 36 ++- .../ethernet/mellanox/mlx5/core/mlx5_core.h | 2 + .../ethernet/mellanox/mlx5/core/pagealloc.c | 38 +++ .../net/ethernet/mellanox/mlx5/core/sriov.c | 221 ++++++++++++++++++ include/linux/mlx5/driver.h | 24 ++ include/linux/mlx5/mlx5_ifc.h | 4 +- 7 files changed, 318 insertions(+), 9 deletions(-) create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/sriov.c diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index 26a68b8af2c5c..4d51039115279 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -2,7 +2,7 @@ obj-$(CONFIG_MLX5_CORE) += mlx5_core.o mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \ health.o mcg.o cq.o srq.o alloc.o qp.o port.o mr.o pd.o \ - mad.o transobj.o vport.o + mad.o transobj.o vport.o sriov.o mlx5_core-$(CONFIG_MLX5_CORE_EN) += wq.o flow_table.o \ en_main.o en_flow_table.o en_ethtool.o en_tx.o en_rx.o \ en_txrx.o diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index f2e64dc1a4436..66e2b37cfbbfe 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -454,6 +454,9 @@ static int set_hca_ctrl(struct mlx5_core_dev *dev) struct mlx5_reg_host_endianess he_out; int err; + if (!mlx5_core_is_pf(dev)) + return 0; + memset(&he_in, 0, sizeof(he_in)); he_in.he = MLX5_SET_HOST_ENDIANNESS; err = mlx5_core_access_reg(dev, &he_in, sizeof(he_in), @@ -1049,6 +1052,12 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv) mlx5_init_srq_table(dev); mlx5_init_mr_table(dev); + err = mlx5_sriov_init(dev); + if (err) { + dev_err(&pdev->dev, "sriov init failed %d\n", err); + goto err_sriov; + } + err = mlx5_register_device(dev); if (err) { dev_err(&pdev->dev, "mlx5_register_device failed %d\n", err); @@ -1065,6 +1074,10 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv) return 0; +err_sriov: + if (mlx5_sriov_cleanup(dev)) + dev_err(&dev->pdev->dev, "sriov cleanup failed\n"); + err_reg_dev: mlx5_cleanup_mr_table(dev); mlx5_cleanup_srq_table(dev); @@ -1120,6 +1133,13 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv) { int err = 0; + err = mlx5_sriov_cleanup(dev); + if (err) { + dev_warn(&dev->pdev->dev, "%s: sriov cleanup failed - abort\n", + __func__); + return err; + } + mutex_lock(&dev->intf_state_mutex); if (dev->interface_state == MLX5_INTERFACE_STATE_DOWN) { dev_warn(&dev->pdev->dev, "%s: interface is down, NOP\n", @@ -1192,6 +1212,7 @@ static int init_one(struct pci_dev *pdev, return -ENOMEM; } priv = &dev->priv; + priv->pci_dev_data = id->driver_data; pci_set_drvdata(pdev, dev); @@ -1362,12 +1383,12 @@ static const struct pci_error_handlers mlx5_err_handler = { }; static const struct pci_device_id mlx5_core_pci_table[] = { - { PCI_VDEVICE(MELLANOX, 0x1011) }, /* Connect-IB */ - { PCI_VDEVICE(MELLANOX, 0x1012) }, /* Connect-IB VF */ - { PCI_VDEVICE(MELLANOX, 0x1013) }, /* ConnectX-4 */ - { PCI_VDEVICE(MELLANOX, 0x1014) }, /* ConnectX-4 VF */ - { PCI_VDEVICE(MELLANOX, 0x1015) }, /* ConnectX-4LX */ - { PCI_VDEVICE(MELLANOX, 0x1016) }, /* ConnectX-4LX VF */ + { PCI_VDEVICE(MELLANOX, 0x1011) }, /* Connect-IB */ + { PCI_VDEVICE(MELLANOX, 0x1012), MLX5_PCI_DEV_IS_VF}, /* Connect-IB VF */ + { PCI_VDEVICE(MELLANOX, 0x1013) }, /* ConnectX-4 */ + { PCI_VDEVICE(MELLANOX, 0x1014), MLX5_PCI_DEV_IS_VF}, /* ConnectX-4 VF */ + { PCI_VDEVICE(MELLANOX, 0x1015) }, /* ConnectX-4LX */ + { PCI_VDEVICE(MELLANOX, 0x1016), MLX5_PCI_DEV_IS_VF}, /* ConnectX-4LX VF */ { 0, } }; @@ -1378,7 +1399,8 @@ static struct pci_driver mlx5_core_driver = { .id_table = mlx5_core_pci_table, .probe = init_one, .remove = remove_one, - .err_handler = &mlx5_err_handler + .err_handler = &mlx5_err_handler, + .sriov_configure = mlx5_core_sriov_configure, }; static int __init init(void) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index 1ed2239a6a6d2..1649d5cf9e29a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -90,8 +90,10 @@ void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event, unsigned long param); void mlx5_enter_error_state(struct mlx5_core_dev *dev); void mlx5_disable_device(struct mlx5_core_dev *dev); +int mlx5_core_sriov_configure(struct pci_dev *dev, int num_vfs); int mlx5_core_enable_hca(struct mlx5_core_dev *dev, u16 func_id); int mlx5_core_disable_hca(struct mlx5_core_dev *dev, u16 func_id); +int mlx5_wait_for_vf_pages(struct mlx5_core_dev *dev); void mlx5e_init(void); void mlx5e_cleanup(void); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c index 4d3377b126578..9eeee0545f1cf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include "mlx5_core.h" @@ -95,6 +96,7 @@ struct mlx5_manage_pages_outbox { enum { MAX_RECLAIM_TIME_MSECS = 5000, + MAX_RECLAIM_VFS_PAGES_TIME_MSECS = 2 * 1000 * 60, }; enum { @@ -352,6 +354,10 @@ static int give_pages(struct mlx5_core_dev *dev, u16 func_id, int npages, goto out_4k; } + dev->priv.fw_pages += npages; + if (func_id) + dev->priv.vfs_pages += npages; + mlx5_core_dbg(dev, "err %d\n", err); kvfree(in); @@ -405,6 +411,12 @@ static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages, } num_claimed = be32_to_cpu(out->num_entries); + if (num_claimed > npages) { + mlx5_core_warn(dev, "fw returned %d, driver asked %d => corruption\n", + num_claimed, npages); + err = -EINVAL; + goto out_free; + } if (nclaimed) *nclaimed = num_claimed; @@ -412,6 +424,9 @@ static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages, addr = be64_to_cpu(out->pas[i]); free_4k(dev, addr); } + dev->priv.fw_pages -= num_claimed; + if (func_id) + dev->priv.vfs_pages -= num_claimed; out_free: kvfree(out); @@ -548,3 +563,26 @@ void mlx5_pagealloc_stop(struct mlx5_core_dev *dev) { destroy_workqueue(dev->priv.pg_wq); } + +int mlx5_wait_for_vf_pages(struct mlx5_core_dev *dev) +{ + unsigned long end = jiffies + msecs_to_jiffies(MAX_RECLAIM_VFS_PAGES_TIME_MSECS); + int prev_vfs_pages = dev->priv.vfs_pages; + + mlx5_core_dbg(dev, "Waiting for %d pages from %s\n", prev_vfs_pages, + dev->priv.name); + while (dev->priv.vfs_pages) { + if (time_after(jiffies, end)) { + mlx5_core_warn(dev, "aborting while there are %d pending pages\n", dev->priv.vfs_pages); + return -ETIMEDOUT; + } + if (dev->priv.vfs_pages < prev_vfs_pages) { + end = jiffies + msecs_to_jiffies(MAX_RECLAIM_VFS_PAGES_TIME_MSECS); + prev_vfs_pages = dev->priv.vfs_pages; + } + msleep(50); + } + + mlx5_core_dbg(dev, "All pages received from %s\n", dev->priv.name); + return 0; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c new file mode 100644 index 0000000000000..19a43240e3592 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c @@ -0,0 +1,221 @@ +/* + * Copyright (c) 2014, Mellanox Technologies inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include "mlx5_core.h" + +static void enable_vfs(struct mlx5_core_dev *dev, int num_vfs) +{ + struct mlx5_core_sriov *sriov = &dev->priv.sriov; + int err; + int vf; + + for (vf = 1; vf <= num_vfs; vf++) { + err = mlx5_core_enable_hca(dev, vf); + if (err) { + mlx5_core_warn(dev, "failed to enable VF %d\n", vf - 1); + } else { + sriov->vfs_ctx[vf - 1].enabled = 1; + mlx5_core_dbg(dev, "successfully enabled VF %d\n", vf - 1); + } + } +} + +static void disable_vfs(struct mlx5_core_dev *dev, int num_vfs) +{ + struct mlx5_core_sriov *sriov = &dev->priv.sriov; + int vf; + + for (vf = 1; vf <= num_vfs; vf++) { + if (sriov->vfs_ctx[vf - 1].enabled) { + if (mlx5_core_disable_hca(dev, vf)) + mlx5_core_warn(dev, "failed to disable VF %d\n", vf - 1); + else + sriov->vfs_ctx[vf - 1].enabled = 0; + } + } +} + +static int mlx5_core_create_vfs(struct pci_dev *pdev, int num_vfs) +{ + struct mlx5_core_dev *dev = pci_get_drvdata(pdev); + int err; + + if (pci_num_vf(pdev)) + pci_disable_sriov(pdev); + + enable_vfs(dev, num_vfs); + + err = pci_enable_sriov(pdev, num_vfs); + if (err) { + dev_warn(&pdev->dev, "enable sriov failed %d\n", err); + goto ex; + } + + return 0; + +ex: + disable_vfs(dev, num_vfs); + return err; +} + +static int mlx5_core_sriov_enable(struct pci_dev *pdev, int num_vfs) +{ + struct mlx5_core_dev *dev = pci_get_drvdata(pdev); + struct mlx5_core_sriov *sriov = &dev->priv.sriov; + int err; + + kfree(sriov->vfs_ctx); + sriov->vfs_ctx = kcalloc(num_vfs, sizeof(*sriov->vfs_ctx), GFP_ATOMIC); + if (!sriov->vfs_ctx) + return -ENOMEM; + + sriov->enabled_vfs = num_vfs; + err = mlx5_core_create_vfs(pdev, num_vfs); + if (err) { + kfree(sriov->vfs_ctx); + sriov->vfs_ctx = NULL; + return err; + } + + return 0; +} + +static void mlx5_core_init_vfs(struct mlx5_core_dev *dev, int num_vfs) +{ + struct mlx5_core_sriov *sriov = &dev->priv.sriov; + + sriov->num_vfs = num_vfs; +} + +static void mlx5_core_cleanup_vfs(struct mlx5_core_dev *dev) +{ + struct mlx5_core_sriov *sriov; + + sriov = &dev->priv.sriov; + disable_vfs(dev, sriov->num_vfs); + + if (mlx5_wait_for_vf_pages(dev)) + mlx5_core_warn(dev, "timeout claiming VFs pages\n"); + + sriov->num_vfs = 0; +} + +int mlx5_core_sriov_configure(struct pci_dev *pdev, int num_vfs) +{ + struct mlx5_core_dev *dev = pci_get_drvdata(pdev); + struct mlx5_core_sriov *sriov = &dev->priv.sriov; + int err; + + mlx5_core_dbg(dev, "requsted num_vfs %d\n", num_vfs); + if (!mlx5_core_is_pf(dev)) + return -EPERM; + + mlx5_core_cleanup_vfs(dev); + + if (!num_vfs) { + kfree(sriov->vfs_ctx); + sriov->vfs_ctx = NULL; + if (!pci_vfs_assigned(pdev)) + pci_disable_sriov(pdev); + else + pr_info("unloading PF driver while leaving orphan VFs\n"); + + return 0; + } + + err = mlx5_core_sriov_enable(pdev, num_vfs); + if (err) { + dev_warn(&pdev->dev, "mlx5_core_sriov_enable failed %d\n", err); + return err; + } + + mlx5_core_init_vfs(dev, num_vfs); + + return num_vfs; +} + +static int sync_required(struct pci_dev *pdev) +{ + struct mlx5_core_dev *dev = pci_get_drvdata(pdev); + struct mlx5_core_sriov *sriov = &dev->priv.sriov; + int cur_vfs = pci_num_vf(pdev); + + if (cur_vfs != sriov->num_vfs) { + pr_info("current VFs %d, registered %d - sync needed\n", cur_vfs, sriov->num_vfs); + return 1; + } + + return 0; +} + +int mlx5_sriov_init(struct mlx5_core_dev *dev) +{ + struct mlx5_core_sriov *sriov = &dev->priv.sriov; + struct pci_dev *pdev = dev->pdev; + int cur_vfs; + + if (!mlx5_core_is_pf(dev)) + return 0; + + if (!sync_required(dev->pdev)) + return 0; + + cur_vfs = pci_num_vf(pdev); + sriov->vfs_ctx = kcalloc(cur_vfs, sizeof(*sriov->vfs_ctx), GFP_KERNEL); + if (!sriov->vfs_ctx) + return -ENOMEM; + + sriov->enabled_vfs = cur_vfs; + + mlx5_core_init_vfs(dev, cur_vfs); + + enable_vfs(dev, cur_vfs); + + return 0; +} + +int mlx5_sriov_cleanup(struct mlx5_core_dev *dev) +{ + struct pci_dev *pdev = dev->pdev; + int err; + + if (!mlx5_core_is_pf(dev)) + return 0; + + err = mlx5_core_sriov_configure(pdev, 0); + if (err) + return err; + + return 0; +} diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 5c857f2a20d7b..efebb87163c8b 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -426,6 +426,16 @@ struct mlx5_mr_table { struct radix_tree_root tree; }; +struct mlx5_vf_context { + int enabled; +}; + +struct mlx5_core_sriov { + struct mlx5_vf_context *vfs_ctx; + int num_vfs; + int enabled_vfs; +}; + struct mlx5_irq_info { cpumask_var_t mask; char name[MLX5_MAX_IRQ_NAME]; @@ -447,6 +457,7 @@ struct mlx5_priv { int fw_pages; atomic_t reg_pages; struct list_head free_list; + int vfs_pages; struct mlx5_core_health health; @@ -485,6 +496,8 @@ struct mlx5_priv { struct list_head dev_list; struct list_head ctx_list; spinlock_t ctx_lock; + struct mlx5_core_sriov sriov; + unsigned long pci_dev_data; }; enum mlx5_device_state { @@ -739,6 +752,8 @@ void mlx5_pagealloc_init(struct mlx5_core_dev *dev); void mlx5_pagealloc_cleanup(struct mlx5_core_dev *dev); int mlx5_pagealloc_start(struct mlx5_core_dev *dev); void mlx5_pagealloc_stop(struct mlx5_core_dev *dev); +int mlx5_sriov_init(struct mlx5_core_dev *dev); +int mlx5_sriov_cleanup(struct mlx5_core_dev *dev); void mlx5_core_req_pages_handler(struct mlx5_core_dev *dev, u16 func_id, s32 npages); int mlx5_satisfy_startup_pages(struct mlx5_core_dev *dev, int boot); @@ -884,6 +899,15 @@ struct mlx5_profile { } mr_cache[MAX_MR_CACHE_ENTRIES]; }; +enum { + MLX5_PCI_DEV_IS_VF = 1 << 0, +}; + +static inline int mlx5_core_is_pf(struct mlx5_core_dev *dev) +{ + return !(dev->priv.pci_dev_data & MLX5_PCI_DEV_IS_VF); +} + static inline int mlx5_get_gid_table_len(u16 param) { if (param > 4) { diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 1565324eb620c..9b76fddd696b9 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -665,7 +665,9 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 reserved_17[0x1]; u8 ets[0x1]; u8 nic_flow_table[0x1]; - u8 reserved_18[0x4]; + u8 reserved_18_0; + u8 early_vf_enable; + u8 reserved_18[0x2]; u8 local_ca_ack_delay[0x5]; u8 reserved_19[0x6]; u8 port_type[0x2]; From 54f0a411ec72cb437d57d0c9654dcbd0f198ff3a Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Tue, 1 Dec 2015 18:03:10 +0200 Subject: [PATCH 03/18] net/mlx5: Add HW capabilities and structs for SR-IOV E-Switch Update HCA capabilities and HW struct to include needed capabilities for upcoming Ethernet Switch (SR-IOV E-Switch). Signed-off-by: Saeed Mahameed Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- include/linux/mlx5/mlx5_ifc.h | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 9b76fddd696b9..836cf0e43174d 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -665,7 +665,7 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 reserved_17[0x1]; u8 ets[0x1]; u8 nic_flow_table[0x1]; - u8 reserved_18_0; + u8 eswitch_flow_table[0x1]; u8 early_vf_enable; u8 reserved_18[0x2]; u8 local_ca_ack_delay[0x5]; @@ -789,22 +789,30 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 reserved_60[0x1b]; u8 log_max_wq_sz[0x5]; - u8 reserved_61[0xa0]; - + u8 nic_vport_change_event[0x1]; + u8 reserved_61[0xa]; + u8 log_max_vlan_list[0x5]; u8 reserved_62[0x3]; + u8 log_max_current_mc_list[0x5]; + u8 reserved_63[0x3]; + u8 log_max_current_uc_list[0x5]; + + u8 reserved_64[0x80]; + + u8 reserved_65[0x3]; u8 log_max_l2_table[0x5]; - u8 reserved_63[0x8]; + u8 reserved_66[0x8]; u8 log_uar_page_sz[0x10]; - u8 reserved_64[0x100]; + u8 reserved_67[0xe0]; - u8 reserved_65[0x1f]; + u8 reserved_68[0x1f]; u8 cqe_zip[0x1]; u8 cqe_zip_timeout[0x10]; u8 cqe_zip_max_num[0x10]; - u8 reserved_66[0x220]; + u8 reserved_69[0x220]; }; enum { @@ -2135,10 +2143,6 @@ struct mlx5_ifc_rmpc_bits { struct mlx5_ifc_wq_bits wq; }; -enum { - MLX5_NIC_VPORT_CONTEXT_ALLOWED_LIST_TYPE_CURRENT_UC_MAC_ADDRESS = 0x0, -}; - struct mlx5_ifc_nic_vport_context_bits { u8 reserved_0[0x1f]; u8 roce_en[0x1]; From e1d7d349c69d12721c420f1fe673ce9aa462aadd Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Tue, 1 Dec 2015 18:03:11 +0200 Subject: [PATCH 04/18] net/mlx5: Update access functions to Query/Modify vport MAC address In preparation for SR-IOV we add here an API to enable each e-switch client (PF/VF) to configure its L2 MAC addresses and for the e-switch manager (usually the PF) to access them in order to be able to configure them into the e-switch. Therefore we now pass vport num parameter to mlx5_query_nic_vport_context, so PF can access other vports contexts. preperation for ethernet sriov and l2 table management. Signed-off-by: Saeed Mahameed Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlx5/core/en_main.c | 2 +- .../net/ethernet/mellanox/mlx5/core/vport.c | 87 ++++++++++++++++--- include/linux/mlx5/vport.h | 5 +- 3 files changed, 81 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index f6a8cc7876035..2ef717f98b0da 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -2028,7 +2028,7 @@ static void mlx5e_set_netdev_dev_addr(struct net_device *netdev) { struct mlx5e_priv *priv = netdev_priv(netdev); - mlx5_query_nic_vport_mac_address(priv->mdev, netdev->dev_addr); + mlx5_query_nic_vport_mac_address(priv->mdev, 0, netdev->dev_addr); } static void mlx5e_build_netdev(struct net_device *netdev) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c index b94177ebcf3a3..442916e987248 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c @@ -57,33 +57,98 @@ u8 mlx5_query_vport_state(struct mlx5_core_dev *mdev, u8 opmod) } EXPORT_SYMBOL(mlx5_query_vport_state); -void mlx5_query_nic_vport_mac_address(struct mlx5_core_dev *mdev, u8 *addr) +static int mlx5_query_nic_vport_context(struct mlx5_core_dev *mdev, u16 vport, + u32 *out, int outlen) +{ + u32 in[MLX5_ST_SZ_DW(query_nic_vport_context_in)]; + + memset(in, 0, sizeof(in)); + + MLX5_SET(query_nic_vport_context_in, in, opcode, + MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT); + + MLX5_SET(query_nic_vport_context_in, in, vport_number, vport); + if (vport) + MLX5_SET(query_nic_vport_context_in, in, other_vport, 1); + + return mlx5_cmd_exec_check_status(mdev, in, sizeof(in), out, outlen); +} + +static int mlx5_modify_nic_vport_context(struct mlx5_core_dev *mdev, void *in, + int inlen) +{ + u32 out[MLX5_ST_SZ_DW(modify_nic_vport_context_out)]; + + MLX5_SET(modify_nic_vport_context_in, in, opcode, + MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT); + + memset(out, 0, sizeof(out)); + return mlx5_cmd_exec_check_status(mdev, in, inlen, out, sizeof(out)); +} + +int mlx5_query_nic_vport_mac_address(struct mlx5_core_dev *mdev, + u16 vport, u8 *addr) { - u32 in[MLX5_ST_SZ_DW(query_nic_vport_context_in)]; u32 *out; int outlen = MLX5_ST_SZ_BYTES(query_nic_vport_context_out); u8 *out_addr; + int err; out = mlx5_vzalloc(outlen); if (!out) - return; + return -ENOMEM; out_addr = MLX5_ADDR_OF(query_nic_vport_context_out, out, nic_vport_context.permanent_address); - memset(in, 0, sizeof(in)); - - MLX5_SET(query_nic_vport_context_in, in, opcode, - MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT); - - memset(out, 0, outlen); - mlx5_cmd_exec_check_status(mdev, in, sizeof(in), out, outlen); + err = mlx5_query_nic_vport_context(mdev, vport, out, outlen); + if (err) + goto out; ether_addr_copy(addr, &out_addr[2]); +out: kvfree(out); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_mac_address); + +int mlx5_modify_nic_vport_mac_address(struct mlx5_core_dev *mdev, + u16 vport, u8 *addr) +{ + void *in; + int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in); + int err; + void *nic_vport_ctx; + u8 *perm_mac; + + in = mlx5_vzalloc(inlen); + if (!in) { + mlx5_core_warn(mdev, "failed to allocate inbox\n"); + return -ENOMEM; + } + + MLX5_SET(modify_nic_vport_context_in, in, + field_select.permanent_address, 1); + MLX5_SET(modify_nic_vport_context_in, in, vport_number, vport); + + if (vport) + MLX5_SET(modify_nic_vport_context_in, in, other_vport, 1); + + nic_vport_ctx = MLX5_ADDR_OF(modify_nic_vport_context_in, + in, nic_vport_context); + perm_mac = MLX5_ADDR_OF(nic_vport_context, nic_vport_ctx, + permanent_address); + + ether_addr_copy(&perm_mac[2], addr); + + err = mlx5_modify_nic_vport_context(mdev, in, inlen); + + kvfree(in); + + return err; } -EXPORT_SYMBOL(mlx5_query_nic_vport_mac_address); +EXPORT_SYMBOL(mlx5_modify_nic_vport_mac_address); int mlx5_query_hca_vport_gid(struct mlx5_core_dev *dev, u8 other_vport, u8 port_num, u16 vf_num, u16 gid_index, diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h index 967e0fd06e897..43e82d9f54638 100644 --- a/include/linux/mlx5/vport.h +++ b/include/linux/mlx5/vport.h @@ -36,7 +36,10 @@ #include u8 mlx5_query_vport_state(struct mlx5_core_dev *mdev, u8 opmod); -void mlx5_query_nic_vport_mac_address(struct mlx5_core_dev *mdev, u8 *addr); +int mlx5_query_nic_vport_mac_address(struct mlx5_core_dev *mdev, + u16 vport, u8 *addr); +int mlx5_modify_nic_vport_mac_address(struct mlx5_core_dev *dev, + u16 vport, u8 *addr); int mlx5_query_hca_vport_gid(struct mlx5_core_dev *dev, u8 other_vport, u8 port_num, u16 vf_num, u16 gid_index, union ib_gid *gid); From e16aea2744abea612c27ee0eef606c6a6a8204de Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Tue, 1 Dec 2015 18:03:12 +0200 Subject: [PATCH 05/18] net/mlx5: Introduce access functions to modify/query vport mac lists Those functions are needed to notify the upcoming L2 table and SR-IOV E-Switch(FDB) manager(PF), of the NIC vport (vf) UC/MC mac lists changes. preperation for ethernet sriov and l2 table management. Signed-off-by: Saeed Mahameed Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlx5/core/vport.c | 119 ++++++++++++++++++ include/linux/mlx5/device.h | 6 + include/linux/mlx5/vport.h | 10 ++ 3 files changed, 135 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c index 442916e987248..986d0d364df7d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c @@ -150,6 +150,125 @@ int mlx5_modify_nic_vport_mac_address(struct mlx5_core_dev *mdev, } EXPORT_SYMBOL(mlx5_modify_nic_vport_mac_address); +int mlx5_query_nic_vport_mac_list(struct mlx5_core_dev *dev, + u32 vport, + enum mlx5_list_type list_type, + u8 addr_list[][ETH_ALEN], + int *list_size) +{ + u32 in[MLX5_ST_SZ_DW(query_nic_vport_context_in)]; + void *nic_vport_ctx; + int max_list_size; + int req_list_size; + int out_sz; + void *out; + int err; + int i; + + req_list_size = *list_size; + + max_list_size = list_type == MLX5_NVPRT_LIST_TYPE_UC ? + 1 << MLX5_CAP_GEN(dev, log_max_current_uc_list) : + 1 << MLX5_CAP_GEN(dev, log_max_current_mc_list); + + if (req_list_size > max_list_size) { + mlx5_core_warn(dev, "Requested list size (%d) > (%d) max_list_size\n", + req_list_size, max_list_size); + req_list_size = max_list_size; + } + + out_sz = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in) + + req_list_size * MLX5_ST_SZ_BYTES(mac_address_layout); + + memset(in, 0, sizeof(in)); + out = kzalloc(out_sz, GFP_KERNEL); + if (!out) + return -ENOMEM; + + MLX5_SET(query_nic_vport_context_in, in, opcode, + MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT); + MLX5_SET(query_nic_vport_context_in, in, allowed_list_type, list_type); + MLX5_SET(query_nic_vport_context_in, in, vport_number, vport); + + if (vport) + MLX5_SET(query_nic_vport_context_in, in, other_vport, 1); + + err = mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, out_sz); + if (err) + goto out; + + nic_vport_ctx = MLX5_ADDR_OF(query_nic_vport_context_out, out, + nic_vport_context); + req_list_size = MLX5_GET(nic_vport_context, nic_vport_ctx, + allowed_list_size); + + *list_size = req_list_size; + for (i = 0; i < req_list_size; i++) { + u8 *mac_addr = MLX5_ADDR_OF(nic_vport_context, + nic_vport_ctx, + current_uc_mac_address[i]) + 2; + ether_addr_copy(addr_list[i], mac_addr); + } +out: + kfree(out); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_mac_list); + +int mlx5_modify_nic_vport_mac_list(struct mlx5_core_dev *dev, + enum mlx5_list_type list_type, + u8 addr_list[][ETH_ALEN], + int list_size) +{ + u32 out[MLX5_ST_SZ_DW(modify_nic_vport_context_out)]; + void *nic_vport_ctx; + int max_list_size; + int in_sz; + void *in; + int err; + int i; + + max_list_size = list_type == MLX5_NVPRT_LIST_TYPE_UC ? + 1 << MLX5_CAP_GEN(dev, log_max_current_uc_list) : + 1 << MLX5_CAP_GEN(dev, log_max_current_mc_list); + + if (list_size > max_list_size) + return -ENOSPC; + + in_sz = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in) + + list_size * MLX5_ST_SZ_BYTES(mac_address_layout); + + memset(out, 0, sizeof(out)); + in = kzalloc(in_sz, GFP_KERNEL); + if (!in) + return -ENOMEM; + + MLX5_SET(modify_nic_vport_context_in, in, opcode, + MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT); + MLX5_SET(modify_nic_vport_context_in, in, + field_select.addresses_list, 1); + + nic_vport_ctx = MLX5_ADDR_OF(modify_nic_vport_context_in, in, + nic_vport_context); + + MLX5_SET(nic_vport_context, nic_vport_ctx, + allowed_list_type, list_type); + MLX5_SET(nic_vport_context, nic_vport_ctx, + allowed_list_size, list_size); + + for (i = 0; i < list_size; i++) { + u8 *curr_mac = MLX5_ADDR_OF(nic_vport_context, + nic_vport_ctx, + current_uc_mac_address[i]) + 2; + ether_addr_copy(curr_mac, addr_list[i]); + } + + err = mlx5_cmd_exec_check_status(dev, in, in_sz, out, sizeof(out)); + kfree(in); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_modify_nic_vport_mac_list); + int mlx5_query_hca_vport_gid(struct mlx5_core_dev *dev, u8 other_vport, u8 port_num, u16 vf_num, u16 gid_index, union ib_gid *gid) diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 0b473cbfa7ef1..0d2f0435a9f06 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -1102,6 +1102,12 @@ enum { MLX5_FLOW_CONTEXT_DEST_TYPE_TIR = 2, }; +enum mlx5_list_type { + MLX5_NVPRT_LIST_TYPE_UC = 0x0, + MLX5_NVPRT_LIST_TYPE_MC = 0x1, + MLX5_NVPRT_LIST_TYPE_VLAN = 0x2, +}; + enum { MLX5_RQC_RQ_TYPE_MEMORY_RQ_INLINE = 0x0, MLX5_RQC_RQ_TYPE_MEMORY_RQ_RPM = 0x1, diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h index 43e82d9f54638..00bbec8d9527e 100644 --- a/include/linux/mlx5/vport.h +++ b/include/linux/mlx5/vport.h @@ -34,6 +34,7 @@ #define __MLX5_VPORT_H__ #include +#include u8 mlx5_query_vport_state(struct mlx5_core_dev *mdev, u8 opmod); int mlx5_query_nic_vport_mac_address(struct mlx5_core_dev *mdev, @@ -54,5 +55,14 @@ int mlx5_query_hca_vport_system_image_guid(struct mlx5_core_dev *dev, u64 *sys_image_guid); int mlx5_query_hca_vport_node_guid(struct mlx5_core_dev *dev, u64 *node_guid); +int mlx5_query_nic_vport_mac_list(struct mlx5_core_dev *dev, + u32 vport, + enum mlx5_list_type list_type, + u8 addr_list[][ETH_ALEN], + int *list_size); +int mlx5_modify_nic_vport_mac_list(struct mlx5_core_dev *dev, + enum mlx5_list_type list_type, + u8 addr_list[][ETH_ALEN], + int list_size); #endif /* __MLX5_VPORT_H__ */ From e75465148b7df7f2796c75bf98bf33f171edeb2b Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Tue, 1 Dec 2015 18:03:13 +0200 Subject: [PATCH 06/18] net/mlx5: Introduce access functions to modify/query vport state In preparation for SR-IOV we add here an API to enable each e-switch manager (PF) to configure its VFs link states in e-switch preparation for ethernet sriov. Signed-off-by: Saeed Mahameed Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlx5/core/en_main.c | 2 +- .../net/ethernet/mellanox/mlx5/core/vport.c | 61 +++++++++++++++++-- include/linux/mlx5/mlx5_ifc.h | 1 + include/linux/mlx5/vport.h | 6 +- 4 files changed, 62 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 2ef717f98b0da..007e464b3e585 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -63,7 +63,7 @@ static void mlx5e_update_carrier(struct mlx5e_priv *priv) u8 port_state; port_state = mlx5_query_vport_state(mdev, - MLX5_QUERY_VPORT_STATE_IN_OP_MOD_VNIC_VPORT); + MLX5_QUERY_VPORT_STATE_IN_OP_MOD_VNIC_VPORT, 0); if (port_state == VPORT_STATE_UP) netif_carrier_on(priv->netdev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c index 986d0d364df7d..b017a7e68b28a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c @@ -36,26 +36,75 @@ #include #include "mlx5_core.h" -u8 mlx5_query_vport_state(struct mlx5_core_dev *mdev, u8 opmod) +static int _mlx5_query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, + u16 vport, u32 *out, int outlen) { - u32 in[MLX5_ST_SZ_DW(query_vport_state_in)]; - u32 out[MLX5_ST_SZ_DW(query_vport_state_out)]; int err; + u32 in[MLX5_ST_SZ_DW(query_vport_state_in)]; memset(in, 0, sizeof(in)); MLX5_SET(query_vport_state_in, in, opcode, MLX5_CMD_OP_QUERY_VPORT_STATE); MLX5_SET(query_vport_state_in, in, op_mod, opmod); + MLX5_SET(query_vport_state_in, in, vport_number, vport); + if (vport) + MLX5_SET(query_vport_state_in, in, other_vport, 1); - err = mlx5_cmd_exec_check_status(mdev, in, sizeof(in), out, - sizeof(out)); + err = mlx5_cmd_exec_check_status(mdev, in, sizeof(in), out, outlen); if (err) mlx5_core_warn(mdev, "MLX5_CMD_OP_QUERY_VPORT_STATE failed\n"); + return err; +} + +u8 mlx5_query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport) +{ + u32 out[MLX5_ST_SZ_DW(query_vport_state_out)] = {0}; + + _mlx5_query_vport_state(mdev, opmod, vport, out, sizeof(out)); + return MLX5_GET(query_vport_state_out, out, state); } -EXPORT_SYMBOL(mlx5_query_vport_state); +EXPORT_SYMBOL_GPL(mlx5_query_vport_state); + +u8 mlx5_query_vport_admin_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport) +{ + u32 out[MLX5_ST_SZ_DW(query_vport_state_out)] = {0}; + + _mlx5_query_vport_state(mdev, opmod, vport, out, sizeof(out)); + + return MLX5_GET(query_vport_state_out, out, admin_state); +} +EXPORT_SYMBOL(mlx5_query_vport_admin_state); + +int mlx5_modify_vport_admin_state(struct mlx5_core_dev *mdev, u8 opmod, + u16 vport, u8 state) +{ + u32 in[MLX5_ST_SZ_DW(modify_vport_state_in)]; + u32 out[MLX5_ST_SZ_DW(modify_vport_state_out)]; + int err; + + memset(in, 0, sizeof(in)); + + MLX5_SET(modify_vport_state_in, in, opcode, + MLX5_CMD_OP_MODIFY_VPORT_STATE); + MLX5_SET(modify_vport_state_in, in, op_mod, opmod); + MLX5_SET(modify_vport_state_in, in, vport_number, vport); + + if (vport) + MLX5_SET(modify_vport_state_in, in, other_vport, 1); + + MLX5_SET(modify_vport_state_in, in, admin_state, state); + + err = mlx5_cmd_exec_check_status(mdev, in, sizeof(in), out, + sizeof(out)); + if (err) + mlx5_core_warn(mdev, "MLX5_CMD_OP_MODIFY_VPORT_STATE failed\n"); + + return err; +} +EXPORT_SYMBOL(mlx5_modify_vport_admin_state); static int mlx5_query_nic_vport_context(struct mlx5_core_dev *mdev, u16 vport, u32 *out, int outlen) diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 836cf0e43174d..655184702ea2d 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -2946,6 +2946,7 @@ struct mlx5_ifc_query_vport_state_out_bits { enum { MLX5_QUERY_VPORT_STATE_IN_OP_MOD_VNIC_VPORT = 0x0, + MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT = 0x1, }; struct mlx5_ifc_query_vport_state_in_bits { diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h index 00bbec8d9527e..c1bba59488510 100644 --- a/include/linux/mlx5/vport.h +++ b/include/linux/mlx5/vport.h @@ -36,7 +36,11 @@ #include #include -u8 mlx5_query_vport_state(struct mlx5_core_dev *mdev, u8 opmod); +u8 mlx5_query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport); +u8 mlx5_query_vport_admin_state(struct mlx5_core_dev *mdev, u8 opmod, + u16 vport); +int mlx5_modify_vport_admin_state(struct mlx5_core_dev *mdev, u8 opmod, + u16 vport, u8 state); int mlx5_query_nic_vport_mac_address(struct mlx5_core_dev *mdev, u16 vport, u8 *addr); int mlx5_modify_nic_vport_mac_address(struct mlx5_core_dev *dev, From d82b73186dab70d6d332dd2afdb48608be2e5230 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Tue, 1 Dec 2015 18:03:14 +0200 Subject: [PATCH 07/18] net/mlx5: Introduce access functions to modify/query vport promisc mode Those functions are needed to notify the upcoming SR-IOV E-Switch(FDB) manager(PF), of the NIC vport (vf) promisc mode changes. Preperation for ethernet sriov and l2 table management. Signed-off-by: Saeed Mahameed Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlx5/core/vport.c | 62 +++++++++++++++++++ include/linux/mlx5/mlx5_ifc.h | 28 +++++++-- include/linux/mlx5/vport.h | 9 +++ 3 files changed, 94 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c index b017a7e68b28a..68aa51df29c1c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c @@ -576,3 +576,65 @@ int mlx5_query_hca_vport_node_guid(struct mlx5_core_dev *dev, return err; } EXPORT_SYMBOL_GPL(mlx5_query_hca_vport_node_guid); + +int mlx5_query_nic_vport_promisc(struct mlx5_core_dev *mdev, + u32 vport, + int *promisc_uc, + int *promisc_mc, + int *promisc_all) +{ + u32 *out; + int outlen = MLX5_ST_SZ_BYTES(query_nic_vport_context_out); + int err; + + out = kzalloc(outlen, GFP_KERNEL); + if (!out) + return -ENOMEM; + + err = mlx5_query_nic_vport_context(mdev, vport, out, outlen); + if (err) + goto out; + + *promisc_uc = MLX5_GET(query_nic_vport_context_out, out, + nic_vport_context.promisc_uc); + *promisc_mc = MLX5_GET(query_nic_vport_context_out, out, + nic_vport_context.promisc_mc); + *promisc_all = MLX5_GET(query_nic_vport_context_out, out, + nic_vport_context.promisc_all); + +out: + kfree(out); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_promisc); + +int mlx5_modify_nic_vport_promisc(struct mlx5_core_dev *mdev, + int promisc_uc, + int promisc_mc, + int promisc_all) +{ + void *in; + int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in); + int err; + + in = mlx5_vzalloc(inlen); + if (!in) { + mlx5_core_err(mdev, "failed to allocate inbox\n"); + return -ENOMEM; + } + + MLX5_SET(modify_nic_vport_context_in, in, field_select.promisc, 1); + MLX5_SET(modify_nic_vport_context_in, in, + nic_vport_context.promisc_uc, promisc_uc); + MLX5_SET(modify_nic_vport_context_in, in, + nic_vport_context.promisc_mc, promisc_mc); + MLX5_SET(modify_nic_vport_context_in, in, + nic_vport_context.promisc_all, promisc_all); + + err = mlx5_modify_nic_vport_context(mdev, in, inlen); + + kvfree(in); + + return err; +} +EXPORT_SYMBOL_GPL(mlx5_modify_nic_vport_promisc); diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 655184702ea2d..2728b5f6c017e 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -2147,16 +2147,31 @@ struct mlx5_ifc_nic_vport_context_bits { u8 reserved_0[0x1f]; u8 roce_en[0x1]; - u8 reserved_1[0x760]; + u8 arm_change_event[0x1]; + u8 reserved_1[0x1a]; + u8 event_on_mtu[0x1]; + u8 event_on_promisc_change[0x1]; + u8 event_on_vlan_change[0x1]; + u8 event_on_mc_address_change[0x1]; + u8 event_on_uc_address_change[0x1]; - u8 reserved_2[0x5]; + u8 reserved_2[0xf0]; + + u8 mtu[0x10]; + + u8 reserved_3[0x640]; + + u8 promisc_uc[0x1]; + u8 promisc_mc[0x1]; + u8 promisc_all[0x1]; + u8 reserved_4[0x2]; u8 allowed_list_type[0x3]; - u8 reserved_3[0xc]; + u8 reserved_5[0xc]; u8 allowed_list_size[0xc]; struct mlx5_ifc_mac_address_layout_bits permanent_address; - u8 reserved_4[0x20]; + u8 reserved_6[0x20]; u8 current_uc_mac_address[0][0x40]; }; @@ -4235,7 +4250,10 @@ struct mlx5_ifc_modify_nic_vport_context_out_bits { }; struct mlx5_ifc_modify_nic_vport_field_select_bits { - u8 reserved_0[0x1c]; + u8 reserved_0[0x19]; + u8 mtu[0x1]; + u8 change_event[0x1]; + u8 promisc[0x1]; u8 permanent_address[0x1]; u8 addresses_list[0x1]; u8 roce_en[0x1]; diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h index c1bba59488510..dbbaed9f975ae 100644 --- a/include/linux/mlx5/vport.h +++ b/include/linux/mlx5/vport.h @@ -68,5 +68,14 @@ int mlx5_modify_nic_vport_mac_list(struct mlx5_core_dev *dev, enum mlx5_list_type list_type, u8 addr_list[][ETH_ALEN], int list_size); +int mlx5_query_nic_vport_promisc(struct mlx5_core_dev *mdev, + u32 vport, + int *promisc_uc, + int *promisc_mc, + int *promisc_all); +int mlx5_modify_nic_vport_promisc(struct mlx5_core_dev *mdev, + int promisc_uc, + int promisc_mc, + int promisc_all); #endif /* __MLX5_VPORT_H__ */ From c0046cf7b81ac55b8bf056c71918ec04edd99379 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Tue, 1 Dec 2015 18:03:15 +0200 Subject: [PATCH 08/18] net/mlx5: Introduce access functions to modify/query vport vlans Those functions are needed to notify the upcoming L2 table and SR-IOV E-Switch(FDB) manager(PF), of the NIC vport (vf) vlan table changes. preperation for ethernet sriov and l2 table management. Signed-off-by: Saeed Mahameed Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlx5/core/vport.c | 112 ++++++++++++++++++ include/linux/mlx5/mlx5_ifc.h | 7 ++ include/linux/mlx5/vport.h | 7 ++ 3 files changed, 126 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c index 68aa51df29c1c..076197efea9b0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c @@ -318,6 +318,118 @@ int mlx5_modify_nic_vport_mac_list(struct mlx5_core_dev *dev, } EXPORT_SYMBOL_GPL(mlx5_modify_nic_vport_mac_list); +int mlx5_query_nic_vport_vlans(struct mlx5_core_dev *dev, + u32 vport, + u16 vlans[], + int *size) +{ + u32 in[MLX5_ST_SZ_DW(query_nic_vport_context_in)]; + void *nic_vport_ctx; + int req_list_size; + int max_list_size; + int out_sz; + void *out; + int err; + int i; + + req_list_size = *size; + max_list_size = 1 << MLX5_CAP_GEN(dev, log_max_vlan_list); + if (req_list_size > max_list_size) { + mlx5_core_warn(dev, "Requested list size (%d) > (%d) max list size\n", + req_list_size, max_list_size); + req_list_size = max_list_size; + } + + out_sz = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in) + + req_list_size * MLX5_ST_SZ_BYTES(vlan_layout); + + memset(in, 0, sizeof(in)); + out = kzalloc(out_sz, GFP_KERNEL); + if (!out) + return -ENOMEM; + + MLX5_SET(query_nic_vport_context_in, in, opcode, + MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT); + MLX5_SET(query_nic_vport_context_in, in, allowed_list_type, + MLX5_NVPRT_LIST_TYPE_VLAN); + MLX5_SET(query_nic_vport_context_in, in, vport_number, vport); + + if (vport) + MLX5_SET(query_nic_vport_context_in, in, other_vport, 1); + + err = mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, out_sz); + if (err) + goto out; + + nic_vport_ctx = MLX5_ADDR_OF(query_nic_vport_context_out, out, + nic_vport_context); + req_list_size = MLX5_GET(nic_vport_context, nic_vport_ctx, + allowed_list_size); + + *size = req_list_size; + for (i = 0; i < req_list_size; i++) { + void *vlan_addr = MLX5_ADDR_OF(nic_vport_context, + nic_vport_ctx, + current_uc_mac_address[i]); + vlans[i] = MLX5_GET(vlan_layout, vlan_addr, vlan); + } +out: + kfree(out); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_vlans); + +int mlx5_modify_nic_vport_vlans(struct mlx5_core_dev *dev, + u16 vlans[], + int list_size) +{ + u32 out[MLX5_ST_SZ_DW(modify_nic_vport_context_out)]; + void *nic_vport_ctx; + int max_list_size; + int in_sz; + void *in; + int err; + int i; + + max_list_size = 1 << MLX5_CAP_GEN(dev, log_max_vlan_list); + + if (list_size > max_list_size) + return -ENOSPC; + + in_sz = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in) + + list_size * MLX5_ST_SZ_BYTES(vlan_layout); + + memset(out, 0, sizeof(out)); + in = kzalloc(in_sz, GFP_KERNEL); + if (!in) + return -ENOMEM; + + MLX5_SET(modify_nic_vport_context_in, in, opcode, + MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT); + MLX5_SET(modify_nic_vport_context_in, in, + field_select.addresses_list, 1); + + nic_vport_ctx = MLX5_ADDR_OF(modify_nic_vport_context_in, in, + nic_vport_context); + + MLX5_SET(nic_vport_context, nic_vport_ctx, + allowed_list_type, MLX5_NVPRT_LIST_TYPE_VLAN); + MLX5_SET(nic_vport_context, nic_vport_ctx, + allowed_list_size, list_size); + + for (i = 0; i < list_size; i++) { + void *vlan_addr = MLX5_ADDR_OF(nic_vport_context, + nic_vport_ctx, + current_uc_mac_address[i]); + MLX5_SET(vlan_layout, vlan_addr, vlan, vlans[i]); + } + + err = mlx5_cmd_exec_check_status(dev, in, in_sz, out, sizeof(out)); + kfree(in); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_modify_nic_vport_vlans); + int mlx5_query_hca_vport_gid(struct mlx5_core_dev *dev, u8 other_vport, u8 port_num, u16 vf_num, u16 gid_index, union ib_gid *gid) diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 2728b5f6c017e..39487d0c305d4 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -910,6 +910,13 @@ struct mlx5_ifc_mac_address_layout_bits { u8 mac_addr_31_0[0x20]; }; +struct mlx5_ifc_vlan_layout_bits { + u8 reserved_0[0x14]; + u8 vlan[0x0c]; + + u8 reserved_1[0x20]; +}; + struct mlx5_ifc_cong_control_r_roce_ecn_np_bits { u8 reserved_0[0xa0]; diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h index dbbaed9f975ae..638f2ca7a527c 100644 --- a/include/linux/mlx5/vport.h +++ b/include/linux/mlx5/vport.h @@ -77,5 +77,12 @@ int mlx5_modify_nic_vport_promisc(struct mlx5_core_dev *mdev, int promisc_uc, int promisc_mc, int promisc_all); +int mlx5_query_nic_vport_vlans(struct mlx5_core_dev *dev, + u32 vport, + u16 vlans[], + int *size); +int mlx5_modify_nic_vport_vlans(struct mlx5_core_dev *dev, + u16 vlans[], + int list_size); #endif /* __MLX5_VPORT_H__ */ From 5e55da1d5a5f5ffd2038def3bfdcb2f8f3a0418a Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Tue, 1 Dec 2015 18:03:16 +0200 Subject: [PATCH 09/18] net/mlx5e: Write UC/MC list and promisc mode into vport context Each Vport/vNIC must notify underlying e-Switch layer for UC/MC list and promisc mode updates, in-order to update l2 tables and SR-IOV FDB tables. We do that at set_rx_mode ndo. preperation for ethernet-SRIOV and l2 table management. Signed-off-by: Saeed Mahameed Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- .../mellanox/mlx5/core/en_flow_table.c | 87 +++++++++++++++++++ 1 file changed, 87 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_flow_table.c b/drivers/net/ethernet/mellanox/mlx5/core/en_flow_table.c index 22d603f782733..fb02f6c0891f3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_flow_table.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_flow_table.c @@ -671,6 +671,91 @@ static void mlx5e_sync_netdev_addr(struct mlx5e_priv *priv) netif_addr_unlock_bh(netdev); } +static void mlx5e_fill_addr_array(struct mlx5e_priv *priv, int list_type, + u8 addr_array[][ETH_ALEN], int size) +{ + bool is_uc = (list_type == MLX5_NVPRT_LIST_TYPE_UC); + struct net_device *ndev = priv->netdev; + struct mlx5e_eth_addr_hash_node *hn; + struct hlist_head *addr_list; + struct hlist_node *tmp; + int i = 0; + int hi; + + addr_list = is_uc ? priv->eth_addr.netdev_uc : priv->eth_addr.netdev_mc; + + if (is_uc) /* Make sure our own address is pushed first */ + ether_addr_copy(addr_array[i++], ndev->dev_addr); + else if (priv->eth_addr.broadcast_enabled) + ether_addr_copy(addr_array[i++], ndev->broadcast); + + mlx5e_for_each_hash_node(hn, tmp, addr_list, hi) { + if (ether_addr_equal(ndev->dev_addr, hn->ai.addr)) + continue; + if (i >= size) + break; + ether_addr_copy(addr_array[i++], hn->ai.addr); + } +} + +static void mlx5e_vport_context_update_addr_list(struct mlx5e_priv *priv, + int list_type) +{ + bool is_uc = (list_type == MLX5_NVPRT_LIST_TYPE_UC); + struct mlx5e_eth_addr_hash_node *hn; + u8 (*addr_array)[ETH_ALEN] = NULL; + struct hlist_head *addr_list; + struct hlist_node *tmp; + int max_size; + int size; + int err; + int hi; + + size = is_uc ? 0 : (priv->eth_addr.broadcast_enabled ? 1 : 0); + max_size = is_uc ? + 1 << MLX5_CAP_GEN(priv->mdev, log_max_current_uc_list) : + 1 << MLX5_CAP_GEN(priv->mdev, log_max_current_mc_list); + + addr_list = is_uc ? priv->eth_addr.netdev_uc : priv->eth_addr.netdev_mc; + mlx5e_for_each_hash_node(hn, tmp, addr_list, hi) + size++; + + if (size > max_size) { + netdev_warn(priv->netdev, + "netdev %s list size (%d) > (%d) max vport list size, some addresses will be dropped\n", + is_uc ? "UC" : "MC", size, max_size); + size = max_size; + } + + if (size) { + addr_array = kcalloc(size, ETH_ALEN, GFP_KERNEL); + if (!addr_array) { + err = -ENOMEM; + goto out; + } + mlx5e_fill_addr_array(priv, list_type, addr_array, size); + } + + err = mlx5_modify_nic_vport_mac_list(priv->mdev, list_type, addr_array, size); +out: + if (err) + netdev_err(priv->netdev, + "Failed to modify vport %s list err(%d)\n", + is_uc ? "UC" : "MC", err); + kfree(addr_array); +} + +static void mlx5e_vport_context_update(struct mlx5e_priv *priv) +{ + struct mlx5e_eth_addr_db *ea = &priv->eth_addr; + + mlx5e_vport_context_update_addr_list(priv, MLX5_NVPRT_LIST_TYPE_UC); + mlx5e_vport_context_update_addr_list(priv, MLX5_NVPRT_LIST_TYPE_MC); + mlx5_modify_nic_vport_promisc(priv->mdev, 0, + ea->allmulti_enabled, + ea->promisc_enabled); +} + static void mlx5e_apply_netdev_addr(struct mlx5e_priv *priv) { struct mlx5e_eth_addr_hash_node *hn; @@ -748,6 +833,8 @@ void mlx5e_set_rx_mode_work(struct work_struct *work) ea->promisc_enabled = promisc_enabled; ea->allmulti_enabled = allmulti_enabled; ea->broadcast_enabled = broadcast_enabled; + + mlx5e_vport_context_update(priv); } void mlx5e_init_eth_addr(struct mlx5e_priv *priv) From aad9e6e41e5e595528e316871bbc5be78a8a1eb4 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Tue, 1 Dec 2015 18:03:17 +0200 Subject: [PATCH 10/18] net/mlx5e: Write vlan list into vport context Each Vport/vNIC must notify underlying e-Switch layer for vlan table changes in-order to update SR-IOV FDB tables. We do that at vlan_rx_add_vid and vlan_rx_kill_vid ndos. Signed-off-by: Saeed Mahameed Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 1 + .../mellanox/mlx5/core/en_flow_table.c | 52 +++++++++++++++++++ 2 files changed, 53 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 69f1c1a412b4e..89313d46952de 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -465,6 +465,7 @@ enum { }; struct mlx5e_vlan_db { + unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)]; u32 active_vlans_ft_ix[VLAN_N_VID]; u32 untagged_rule_ft_ix; u32 any_vlan_rule_ft_ix; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_flow_table.c b/drivers/net/ethernet/mellanox/mlx5/core/en_flow_table.c index fb02f6c0891f3..5b93c9c6e341d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_flow_table.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_flow_table.c @@ -502,6 +502,49 @@ static int mlx5e_add_eth_addr_rule(struct mlx5e_priv *priv, return err; } +static int mlx5e_vport_context_update_vlans(struct mlx5e_priv *priv) +{ + struct net_device *ndev = priv->netdev; + int max_list_size; + int list_size; + u16 *vlans; + int vlan; + int err; + int i; + + list_size = 0; + for_each_set_bit(vlan, priv->vlan.active_vlans, VLAN_N_VID) + list_size++; + + max_list_size = 1 << MLX5_CAP_GEN(priv->mdev, log_max_vlan_list); + + if (list_size > max_list_size) { + netdev_warn(ndev, + "netdev vlans list size (%d) > (%d) max vport list size, some vlans will be dropped\n", + list_size, max_list_size); + list_size = max_list_size; + } + + vlans = kcalloc(list_size, sizeof(*vlans), GFP_KERNEL); + if (!vlans) + return -ENOMEM; + + i = 0; + for_each_set_bit(vlan, priv->vlan.active_vlans, VLAN_N_VID) { + if (i >= list_size) + break; + vlans[i++] = vlan; + } + + err = mlx5_modify_nic_vport_vlans(priv->mdev, vlans, list_size); + if (err) + netdev_err(ndev, "Failed to modify vport vlans list err(%d)\n", + err); + + kfree(vlans); + return err; +} + enum mlx5e_vlan_rule_type { MLX5E_VLAN_RULE_TYPE_UNTAGGED, MLX5E_VLAN_RULE_TYPE_ANY_VID, @@ -552,6 +595,10 @@ static int mlx5e_add_vlan_rule(struct mlx5e_priv *priv, 1); break; default: /* MLX5E_VLAN_RULE_TYPE_MATCH_VID */ + err = mlx5e_vport_context_update_vlans(priv); + if (err) + goto add_vlan_rule_out; + ft_ix = &priv->vlan.active_vlans_ft_ix[vid]; MLX5_SET(fte_match_param, match_value, outer_headers.vlan_tag, 1); @@ -588,6 +635,7 @@ static void mlx5e_del_vlan_rule(struct mlx5e_priv *priv, case MLX5E_VLAN_RULE_TYPE_MATCH_VID: mlx5_del_flow_table_entry(priv->ft.vlan, priv->vlan.active_vlans_ft_ix[vid]); + mlx5e_vport_context_update_vlans(priv); break; } } @@ -619,6 +667,8 @@ int mlx5e_vlan_rx_add_vid(struct net_device *dev, __always_unused __be16 proto, { struct mlx5e_priv *priv = netdev_priv(dev); + set_bit(vid, priv->vlan.active_vlans); + return mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_VID, vid); } @@ -627,6 +677,8 @@ int mlx5e_vlan_rx_kill_vid(struct net_device *dev, __always_unused __be16 proto, { struct mlx5e_priv *priv = netdev_priv(dev); + clear_bit(vid, priv->vlan.active_vlans); + mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_VID, vid); return 0; From 073bb189a41d7bbad509b576a690611c46c4858f Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Tue, 1 Dec 2015 18:03:18 +0200 Subject: [PATCH 11/18] net/mlx5: Introducing E-Switch and l2 table E-Switch is the software entity that represents and manages ConnectX4 inter-HCA ethernet l2 switching. E-Switch has its own Virtual Ports, each Vport/vNIC/VF can be connected to the device through a vport of an e-switch. Each e-switch is managed by one vNIC identified by HCA_CAP.vport_group_manager (usually it is the PF/vport[0]), and its main responsibility is to forward each packet to the right vport. e-Switch needs to manage its own l2-table and FDB tables. L2 table is a flow table that is managed by FW, it is needed for Multi-host (Multi PF) configuration for inter HCA switching between PFs. FDB table is a flow table that is totally managed by e-Switch driver, its main responsibility is to switch packets between e-Swtich internal vports and uplink vport that belong to the same. This patch introduces only e-Swtich l2 table management, FDB managemnt will come later when ethernet SRIOV/VFs will be enabled. preperation for ethernet sriov and l2 table management. Signed-off-by: Saeed Mahameed Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlx5/core/Makefile | 2 +- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 13 + .../net/ethernet/mellanox/mlx5/core/eswitch.c | 500 ++++++++++++++++++ .../net/ethernet/mellanox/mlx5/core/eswitch.h | 122 +++++ .../net/ethernet/mellanox/mlx5/core/main.c | 18 + include/linux/mlx5/device.h | 8 + include/linux/mlx5/driver.h | 4 + 7 files changed, 666 insertions(+), 1 deletion(-) create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/eswitch.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/eswitch.h diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index 4d51039115279..a0755919ccaf0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -3,6 +3,6 @@ obj-$(CONFIG_MLX5_CORE) += mlx5_core.o mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \ health.o mcg.o cq.o srq.o alloc.o qp.o port.o mr.o pd.o \ mad.o transobj.o vport.o sriov.o -mlx5_core-$(CONFIG_MLX5_CORE_EN) += wq.o flow_table.o \ +mlx5_core-$(CONFIG_MLX5_CORE_EN) += wq.o flow_table.o eswitch.o \ en_main.o en_flow_table.o en_ethtool.o en_tx.o en_rx.o \ en_txrx.o diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 713ead583347f..23c244a7e5d73 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -35,6 +35,9 @@ #include #include #include "mlx5_core.h" +#ifdef CONFIG_MLX5_CORE_EN +#include "eswitch.h" +#endif enum { MLX5_EQE_SIZE = sizeof(struct mlx5_eqe), @@ -287,6 +290,11 @@ static int mlx5_eq_int(struct mlx5_core_dev *dev, struct mlx5_eq *eq) break; #endif +#ifdef CONFIG_MLX5_CORE_EN + case MLX5_EVENT_TYPE_NIC_VPORT_CHANGE: + mlx5_eswitch_vport_event(dev->priv.eswitch, eqe); + break; +#endif default: mlx5_core_warn(dev, "Unhandled event 0x%x on EQ 0x%x\n", eqe->type, eq->eqn); @@ -459,6 +467,11 @@ int mlx5_start_eqs(struct mlx5_core_dev *dev) if (MLX5_CAP_GEN(dev, pg)) async_event_mask |= (1ull << MLX5_EVENT_TYPE_PAGE_FAULT); + if (MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_ETH && + MLX5_CAP_GEN(dev, vport_group_manager) && + mlx5_core_is_pf(dev)) + async_event_mask |= (1ull << MLX5_EVENT_TYPE_NIC_VPORT_CHANGE); + err = mlx5_create_map_eq(dev, &table->cmd_eq, MLX5_EQ_VEC_CMD, MLX5_NUM_CMD_EQE, 1ull << MLX5_EVENT_TYPE_CMD, "mlx5_cmd_eq", &dev->priv.uuari.uars[0]); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c new file mode 100644 index 0000000000000..1f2f804bde3e7 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -0,0 +1,500 @@ +/* + * Copyright (c) 2015, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include "mlx5_core.h" +#include "eswitch.h" + +#define MLX5_DEBUG_ESWITCH_MASK BIT(3) + +#define esw_info(dev, format, ...) \ + pr_info("(%s): E-Switch: " format, (dev)->priv.name, ##__VA_ARGS__) + +#define esw_warn(dev, format, ...) \ + pr_warn("(%s): E-Switch: " format, (dev)->priv.name, ##__VA_ARGS__) + +#define esw_debug(dev, format, ...) \ + mlx5_core_dbg_mask(dev, MLX5_DEBUG_ESWITCH_MASK, format, ##__VA_ARGS__) + +enum { + MLX5_ACTION_NONE = 0, + MLX5_ACTION_ADD = 1, + MLX5_ACTION_DEL = 2, +}; + +/* HW UC L2 table hash node */ +struct mlx5_uc_l2addr { + struct l2addr_node node; + u8 action; + u32 table_index; + u32 vport; +}; + +/* Vport UC L2 table hash node */ +struct mlx5_vport_addr { + struct l2addr_node node; + u8 action; +}; + +enum { + UC_ADDR_CHANGE = BIT(0), + MC_ADDR_CHANGE = BIT(1), +}; + +static int arm_vport_context_events_cmd(struct mlx5_core_dev *dev, int vport, + u32 events_mask) +{ + int in[MLX5_ST_SZ_DW(modify_nic_vport_context_in)]; + int out[MLX5_ST_SZ_DW(modify_nic_vport_context_out)]; + void *nic_vport_ctx; + int err; + + memset(out, 0, sizeof(out)); + memset(in, 0, sizeof(in)); + + MLX5_SET(modify_nic_vport_context_in, in, + opcode, MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT); + MLX5_SET(modify_nic_vport_context_in, in, field_select.change_event, 1); + MLX5_SET(modify_nic_vport_context_in, in, vport_number, vport); + if (vport) + MLX5_SET(modify_nic_vport_context_in, in, other_vport, 1); + nic_vport_ctx = MLX5_ADDR_OF(modify_nic_vport_context_in, + in, nic_vport_context); + + MLX5_SET(nic_vport_context, nic_vport_ctx, arm_change_event, 1); + + if (events_mask & UC_ADDR_CHANGE) + MLX5_SET(nic_vport_context, nic_vport_ctx, + event_on_uc_address_change, 1); + if (events_mask & MC_ADDR_CHANGE) + MLX5_SET(nic_vport_context, nic_vport_ctx, + event_on_mc_address_change, 1); + + err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + if (err) + goto ex; + err = mlx5_cmd_status_to_err_v2(out); + if (err) + goto ex; + return 0; +ex: + return err; +} + +/* HW L2 Table (MPFS) management */ +static int set_l2_table_entry_cmd(struct mlx5_core_dev *dev, u32 index, + u8 *mac, u8 vlan_valid, u16 vlan) +{ + u32 in[MLX5_ST_SZ_DW(set_l2_table_entry_in)]; + u32 out[MLX5_ST_SZ_DW(set_l2_table_entry_out)]; + u8 *in_mac_addr; + + memset(in, 0, sizeof(in)); + memset(out, 0, sizeof(out)); + + MLX5_SET(set_l2_table_entry_in, in, opcode, + MLX5_CMD_OP_SET_L2_TABLE_ENTRY); + MLX5_SET(set_l2_table_entry_in, in, table_index, index); + MLX5_SET(set_l2_table_entry_in, in, vlan_valid, vlan_valid); + MLX5_SET(set_l2_table_entry_in, in, vlan, vlan); + + in_mac_addr = MLX5_ADDR_OF(set_l2_table_entry_in, in, mac_address); + ether_addr_copy(&in_mac_addr[2], mac); + + return mlx5_cmd_exec_check_status(dev, in, sizeof(in), + out, sizeof(out)); +} + +static int del_l2_table_entry_cmd(struct mlx5_core_dev *dev, u32 index) +{ + u32 in[MLX5_ST_SZ_DW(delete_l2_table_entry_in)]; + u32 out[MLX5_ST_SZ_DW(delete_l2_table_entry_out)]; + + memset(in, 0, sizeof(in)); + memset(out, 0, sizeof(out)); + + MLX5_SET(delete_l2_table_entry_in, in, opcode, + MLX5_CMD_OP_DELETE_L2_TABLE_ENTRY); + MLX5_SET(delete_l2_table_entry_in, in, table_index, index); + return mlx5_cmd_exec_check_status(dev, in, sizeof(in), + out, sizeof(out)); +} + +static int alloc_l2_table_index(struct mlx5_l2_table *l2_table, u32 *ix) +{ + int err = 0; + + *ix = find_first_zero_bit(l2_table->bitmap, l2_table->size); + if (*ix >= l2_table->size) + err = -ENOSPC; + else + __set_bit(*ix, l2_table->bitmap); + + return err; +} + +static void free_l2_table_index(struct mlx5_l2_table *l2_table, u32 ix) +{ + __clear_bit(ix, l2_table->bitmap); +} + +static int set_l2_table_entry(struct mlx5_core_dev *dev, u8 *mac, + u8 vlan_valid, u16 vlan, + u32 *index) +{ + struct mlx5_l2_table *l2_table = &dev->priv.eswitch->l2_table; + int err; + + err = alloc_l2_table_index(l2_table, index); + if (err) + return err; + + err = set_l2_table_entry_cmd(dev, *index, mac, vlan_valid, vlan); + if (err) + free_l2_table_index(l2_table, *index); + + return err; +} + +static void del_l2_table_entry(struct mlx5_core_dev *dev, u32 index) +{ + struct mlx5_l2_table *l2_table = &dev->priv.eswitch->l2_table; + + del_l2_table_entry_cmd(dev, index); + free_l2_table_index(l2_table, index); +} + +/* SW E-Switch L2 Table management */ +static int l2_table_addr_add(struct mlx5_eswitch *esw, + u8 mac[ETH_ALEN], u32 vport) +{ + struct hlist_head *hash; + struct mlx5_uc_l2addr *addr; + int err; + + hash = esw->l2_table.l2_hash; + addr = l2addr_hash_find(hash, mac, struct mlx5_uc_l2addr); + if (addr) { + esw_warn(esw->dev, + "Failed to set L2 mac(%pM) for vport(%d), mac is already in use by vport(%d)\n", + mac, vport, addr->vport); + return -EEXIST; + } + + addr = l2addr_hash_add(hash, mac, struct mlx5_uc_l2addr, + GFP_KERNEL); + if (!addr) + return -ENOMEM; + + addr->vport = vport; + addr->action = MLX5_ACTION_NONE; + err = set_l2_table_entry(esw->dev, mac, 0, 0, &addr->table_index); + if (err) + l2addr_hash_del(addr); + else + esw_debug(esw->dev, "\tADDED L2 MAC: vport[%d] %pM index:%d\n", + vport, addr->node.addr, addr->table_index); + return err; +} + +static int l2_table_addr_del(struct mlx5_eswitch *esw, + u8 mac[ETH_ALEN], u32 vport) +{ + struct hlist_head *hash; + struct mlx5_uc_l2addr *addr; + + hash = esw->l2_table.l2_hash; + addr = l2addr_hash_find(hash, mac, struct mlx5_uc_l2addr); + if (!addr || addr->vport != vport) { + esw_warn(esw->dev, "MAC(%pM) doesn't belong to vport (%d)\n", + mac, vport); + return -EINVAL; + } + + esw_debug(esw->dev, "\tDELETE L2 MAC: vport[%d] %pM index:%d\n", + vport, addr->node.addr, addr->table_index); + del_l2_table_entry(esw->dev, addr->table_index); + l2addr_hash_del(addr); + return 0; +} + +/* E-Switch vport uc list management */ + +/* Apply vport uc list to HW l2 table */ +static void esw_apply_vport_uc_list(struct mlx5_eswitch *esw, + u32 vport_num) +{ + struct mlx5_vport *vport = &esw->vports[vport_num]; + struct mlx5_vport_addr *addr; + struct l2addr_node *node; + struct hlist_head *hash; + struct hlist_node *tmp; + int hi; + + hash = vport->uc_list; + for_each_l2hash_node(node, tmp, hash, hi) { + addr = container_of(node, struct mlx5_vport_addr, node); + switch (addr->action) { + case MLX5_ACTION_ADD: + l2_table_addr_add(esw, addr->node.addr, vport_num); + addr->action = MLX5_ACTION_NONE; + break; + case MLX5_ACTION_DEL: + l2_table_addr_del(esw, addr->node.addr, vport_num); + l2addr_hash_del(addr); + break; + } + } +} + +/* Sync vport uc list from vport context */ +static void esw_update_vport_uc_list(struct mlx5_eswitch *esw, + u32 vport_num) +{ + struct mlx5_vport *vport = &esw->vports[vport_num]; + struct mlx5_vport_addr *addr; + struct l2addr_node *node; + u8 (*mac_list)[ETH_ALEN]; + struct hlist_head *hash; + struct hlist_node *tmp; + int size; + int err; + int hi; + int i; + + size = MLX5_MAX_UC_PER_VPORT(esw->dev); + + mac_list = kcalloc(size, ETH_ALEN, GFP_KERNEL); + if (!mac_list) + return; + + hash = vport->uc_list; + + for_each_l2hash_node(node, tmp, hash, hi) { + addr = container_of(node, struct mlx5_vport_addr, node); + addr->action = MLX5_ACTION_DEL; + } + + err = mlx5_query_nic_vport_mac_list(esw->dev, vport_num, + MLX5_NVPRT_LIST_TYPE_UC, + mac_list, &size); + if (err) + return; + esw_debug(esw->dev, "vport[%d] context update UC list size (%d)\n", + vport_num, size); + + for (i = 0; i < size; i++) { + if (!is_valid_ether_addr(mac_list[i])) + continue; + + addr = l2addr_hash_find(hash, mac_list[i], + struct mlx5_vport_addr); + if (addr) { + addr->action = MLX5_ACTION_NONE; + continue; + } + + addr = l2addr_hash_add(hash, mac_list[i], + struct mlx5_vport_addr, + GFP_KERNEL); + if (!addr) { + esw_warn(esw->dev, + "Failed to add MAC(%pM) to vport[%d] DB\n", + mac_list[i], vport_num); + continue; + } + addr->action = MLX5_ACTION_ADD; + } + kfree(mac_list); +} + +static void esw_vport_change_handler(struct work_struct *work) +{ + struct mlx5_vport *vport = + container_of(work, struct mlx5_vport, vport_change_handler); + struct mlx5_core_dev *dev = vport->dev; + u8 mac[ETH_ALEN]; + + mlx5_query_nic_vport_mac_address(dev, vport->vport, mac); + + if (!is_valid_ether_addr(mac)) + goto out; + + esw_update_vport_uc_list(dev->priv.eswitch, vport->vport); + esw_apply_vport_uc_list(dev->priv.eswitch, vport->vport); + +out: + if (vport->enabled) + arm_vport_context_events_cmd(dev, vport->vport, + UC_ADDR_CHANGE); +} + +static void esw_enable_vport(struct mlx5_eswitch *esw, int vport_num) +{ + struct mlx5_vport *vport = &esw->vports[vport_num]; + unsigned long flags; + + spin_lock_irqsave(&vport->lock, flags); + vport->enabled = true; + spin_unlock_irqrestore(&vport->lock, flags); + + arm_vport_context_events_cmd(esw->dev, vport_num, UC_ADDR_CHANGE); +} + +static void esw_disable_vport(struct mlx5_eswitch *esw, int vport_num) +{ + struct mlx5_vport *vport = &esw->vports[vport_num]; + unsigned long flags; + + if (!vport->enabled) + return; + + /* Mark this vport as disabled to discard new events */ + spin_lock_irqsave(&vport->lock, flags); + vport->enabled = false; + spin_unlock_irqrestore(&vport->lock, flags); + + /* Wait for current already scheduled events to complete */ + flush_workqueue(esw->work_queue); + + /* Disable events from this vport */ + arm_vport_context_events_cmd(esw->dev, vport->vport, 0); +} + +/* Public E-Switch API */ +int mlx5_eswitch_init(struct mlx5_core_dev *dev) +{ + int l2_table_size = 1 << MLX5_CAP_GEN(dev, log_max_l2_table); + int total_vports = 1 + pci_sriov_get_totalvfs(dev->pdev); + struct mlx5_eswitch *esw; + int vport_num; + int err; + + if (!MLX5_CAP_GEN(dev, vport_group_manager) || + MLX5_CAP_GEN(dev, port_type) != MLX5_CAP_PORT_TYPE_ETH) + return 0; + + esw_info(dev, + "Total vports %d, l2 table size(%d), per vport: max uc(%d) max mc(%d)\n", + total_vports, l2_table_size, + MLX5_MAX_UC_PER_VPORT(dev), + MLX5_MAX_MC_PER_VPORT(dev)); + + esw = kzalloc(sizeof(*esw), GFP_KERNEL); + if (!esw) + return -ENOMEM; + + esw->dev = dev; + + esw->l2_table.bitmap = kcalloc(BITS_TO_LONGS(l2_table_size), + sizeof(uintptr_t), GFP_KERNEL); + if (!esw->l2_table.bitmap) { + err = -ENOMEM; + goto abort; + } + esw->l2_table.size = l2_table_size; + + esw->work_queue = create_singlethread_workqueue("mlx5_esw_wq"); + if (!esw->work_queue) { + err = -ENOMEM; + goto abort; + } + + esw->vports = kcalloc(total_vports, sizeof(struct mlx5_vport), + GFP_KERNEL); + if (!esw->vports) { + err = -ENOMEM; + goto abort; + } + + esw->total_vports = total_vports; + for (vport_num = 0; vport_num < total_vports; vport_num++) { + struct mlx5_vport *vport = &esw->vports[vport_num]; + + vport->vport = vport_num; + vport->dev = dev; + INIT_WORK(&vport->vport_change_handler, + esw_vport_change_handler); + spin_lock_init(&vport->lock); + } + + dev->priv.eswitch = esw; + + esw_enable_vport(esw, 0); + /* VF Vports will be enabled when SRIOV is enabled */ + return 0; +abort: + if (esw->work_queue) + destroy_workqueue(esw->work_queue); + kfree(esw->l2_table.bitmap); + kfree(esw->vports); + kfree(esw); + return err; +} + +void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) +{ + if (!esw || !MLX5_CAP_GEN(esw->dev, vport_group_manager) || + MLX5_CAP_GEN(esw->dev, port_type) != MLX5_CAP_PORT_TYPE_ETH) + return; + + esw_info(esw->dev, "cleanup\n"); + esw_disable_vport(esw, 0); + + esw->dev->priv.eswitch = NULL; + destroy_workqueue(esw->work_queue); + kfree(esw->l2_table.bitmap); + kfree(esw->vports); + kfree(esw); +} + +void mlx5_eswitch_vport_event(struct mlx5_eswitch *esw, struct mlx5_eqe *eqe) +{ + struct mlx5_eqe_vport_change *vc_eqe = &eqe->data.vport_change; + u16 vport_num = be16_to_cpu(vc_eqe->vport_num); + struct mlx5_vport *vport; + + if (!esw) { + pr_warn("MLX5 E-Switch: vport %d got an event while eswitch is not initialized\n", + vport_num); + return; + } + + vport = &esw->vports[vport_num]; + spin_lock(&vport->lock); + if (vport->enabled) + queue_work(esw->work_queue, &vport->vport_change_handler); + spin_unlock(&vport->lock); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h new file mode 100644 index 0000000000000..0c41f26578243 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -0,0 +1,122 @@ +/* + * Copyright (c) 2015, Mellanox Technologies, Ltd. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __MLX5_ESWITCH_H__ +#define __MLX5_ESWITCH_H__ + +#include + +#define MLX5_MAX_UC_PER_VPORT(dev) \ + (1 << MLX5_CAP_GEN(dev, log_max_current_uc_list)) + +#define MLX5_MAX_MC_PER_VPORT(dev) \ + (1 << MLX5_CAP_GEN(dev, log_max_current_mc_list)) + +#define MLX5_L2_ADDR_HASH_SIZE (BIT(BITS_PER_BYTE)) +#define MLX5_L2_ADDR_HASH(addr) (addr[5]) + +/* L2 -mac address based- hash helpers */ +struct l2addr_node { + struct hlist_node hlist; + u8 addr[ETH_ALEN]; +}; + +#define for_each_l2hash_node(hn, tmp, hash, i) \ + for (i = 0; i < MLX5_L2_ADDR_HASH_SIZE; i++) \ + hlist_for_each_entry_safe(hn, tmp, &hash[i], hlist) + +#define l2addr_hash_find(hash, mac, type) ({ \ + int ix = MLX5_L2_ADDR_HASH(mac); \ + bool found = false; \ + type *ptr = NULL; \ + \ + hlist_for_each_entry(ptr, &hash[ix], node.hlist) \ + if (ether_addr_equal(ptr->node.addr, mac)) {\ + found = true; \ + break; \ + } \ + if (!found) \ + ptr = NULL; \ + ptr; \ +}) + +#define l2addr_hash_add(hash, mac, type, gfp) ({ \ + int ix = MLX5_L2_ADDR_HASH(mac); \ + type *ptr = NULL; \ + \ + ptr = kzalloc(sizeof(type), gfp); \ + if (ptr) { \ + ether_addr_copy(ptr->node.addr, mac); \ + hlist_add_head(&ptr->node.hlist, &hash[ix]);\ + } \ + ptr; \ +}) + +#define l2addr_hash_del(ptr) ({ \ + hlist_del(&ptr->node.hlist); \ + kfree(ptr); \ +}) + +struct mlx5_vport { + struct mlx5_core_dev *dev; + int vport; + struct hlist_head uc_list[MLX5_L2_ADDR_HASH_SIZE]; + struct work_struct vport_change_handler; + + /* This spinlock protects access to vport data, between + * "esw_vport_disable" and ongoing interrupt "mlx5_eswitch_vport_event" + * once vport marked as disabled new interrupts are discarded. + */ + spinlock_t lock; /* vport events sync */ + bool enabled; +}; + +struct mlx5_l2_table { + struct hlist_head l2_hash[MLX5_L2_ADDR_HASH_SIZE]; + u32 size; + unsigned long *bitmap; +}; + +struct mlx5_eswitch { + struct mlx5_core_dev *dev; + struct mlx5_l2_table l2_table; + struct workqueue_struct *work_queue; + struct mlx5_vport *vports; + int total_vports; +}; + +/* E-Switch API */ +int mlx5_eswitch_init(struct mlx5_core_dev *dev); +void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw); +void mlx5_eswitch_vport_event(struct mlx5_eswitch *esw, struct mlx5_eqe *eqe); + +#endif /* __MLX5_ESWITCH_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 66e2b37cfbbfe..c6de3240f76f2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -49,6 +49,9 @@ #include #include #include "mlx5_core.h" +#ifdef CONFIG_MLX5_CORE_EN +#include "eswitch.h" +#endif MODULE_AUTHOR("Eli Cohen "); MODULE_DESCRIPTION("Mellanox Connect-IB, ConnectX-4 core driver"); @@ -1052,6 +1055,14 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv) mlx5_init_srq_table(dev); mlx5_init_mr_table(dev); +#ifdef CONFIG_MLX5_CORE_EN + err = mlx5_eswitch_init(dev); + if (err) { + dev_err(&pdev->dev, "eswitch init failed %d\n", err); + goto err_reg_dev; + } +#endif + err = mlx5_sriov_init(dev); if (err) { dev_err(&pdev->dev, "sriov init failed %d\n", err); @@ -1078,6 +1089,9 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv) if (mlx5_sriov_cleanup(dev)) dev_err(&dev->pdev->dev, "sriov cleanup failed\n"); +#ifdef CONFIG_MLX5_CORE_EN + mlx5_eswitch_cleanup(dev->priv.eswitch); +#endif err_reg_dev: mlx5_cleanup_mr_table(dev); mlx5_cleanup_srq_table(dev); @@ -1147,6 +1161,10 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv) goto out; } mlx5_unregister_device(dev); +#ifdef CONFIG_MLX5_CORE_EN + mlx5_eswitch_cleanup(dev->priv.eswitch); +#endif + mlx5_cleanup_mr_table(dev); mlx5_cleanup_srq_table(dev); mlx5_cleanup_qp_table(dev); diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 0d2f0435a9f06..90a4cb6dc4cb1 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -251,6 +251,7 @@ enum mlx5_event { MLX5_EVENT_TYPE_PAGE_REQUEST = 0xb, MLX5_EVENT_TYPE_PAGE_FAULT = 0xc, + MLX5_EVENT_TYPE_NIC_VPORT_CHANGE = 0xd, }; enum { @@ -520,6 +521,12 @@ struct mlx5_eqe_page_fault { __be32 flags_qpn; } __packed; +struct mlx5_eqe_vport_change { + u8 rsvd0[2]; + __be16 vport_num; + __be32 rsvd1[6]; +} __packed; + union ev_data { __be32 raw[7]; struct mlx5_eqe_cmd cmd; @@ -532,6 +539,7 @@ union ev_data { struct mlx5_eqe_stall_vl stall_vl; struct mlx5_eqe_page_req req_pages; struct mlx5_eqe_page_fault page_fault; + struct mlx5_eqe_vport_change vport_change; } __packed; struct mlx5_eqe { diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index efebb87163c8b..ac098b6b97bf9 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -441,6 +441,8 @@ struct mlx5_irq_info { char name[MLX5_MAX_IRQ_NAME]; }; +struct mlx5_eswitch; + struct mlx5_priv { char name[MLX5_MAX_NAME_LEN]; struct mlx5_eq_table eq_table; @@ -496,6 +498,8 @@ struct mlx5_priv { struct list_head dev_list; struct list_head ctx_list; spinlock_t ctx_lock; + + struct mlx5_eswitch *eswitch; struct mlx5_core_sriov sriov; unsigned long pci_dev_data; }; From 495716b191f607b2cb2175f7499966daef79f663 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Tue, 1 Dec 2015 18:03:19 +0200 Subject: [PATCH 12/18] net/mlx5: E-Switch, Introduce FDB hardware capabilities Define needed hardware structures and capabilities needed for E-Switch FDB flow tables and read them on driver load. Signed-off-by: Saeed Mahameed Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/fw.c | 13 +++++++++++++ include/linux/mlx5/device.h | 15 +++++++++++++++ include/linux/mlx5/mlx5_ifc.h | 13 +++++++++++++ 3 files changed, 41 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c index 9335e5ae18cce..bf6e3dfcef511 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c @@ -160,6 +160,19 @@ int mlx5_query_hca_caps(struct mlx5_core_dev *dev) if (err) return err; } + + if (MLX5_CAP_GEN(dev, vport_group_manager) && + MLX5_CAP_GEN(dev, eswitch_flow_table)) { + err = mlx5_core_get_caps(dev, MLX5_CAP_ESWITCH_FLOW_TABLE, + HCA_CAP_OPMOD_GET_CUR); + if (err) + return err; + err = mlx5_core_get_caps(dev, MLX5_CAP_ESWITCH_FLOW_TABLE, + HCA_CAP_OPMOD_GET_MAX); + if (err) + return err; + } + return 0; } diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 90a4cb6dc4cb1..bce9caed1eed8 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -1138,6 +1138,7 @@ enum mlx5_cap_type { MLX5_CAP_IPOIB_OFFLOADS, MLX5_CAP_EOIB_OFFLOADS, MLX5_CAP_FLOW_TABLE, + MLX5_CAP_ESWITCH_FLOW_TABLE, /* NUM OF CAP Types */ MLX5_CAP_NUM }; @@ -1175,6 +1176,20 @@ enum mlx5_cap_type { #define MLX5_CAP_FLOWTABLE_MAX(mdev, cap) \ MLX5_GET(flow_table_nic_cap, mdev->hca_caps_max[MLX5_CAP_FLOW_TABLE], cap) +#define MLX5_CAP_ESW_FLOWTABLE(mdev, cap) \ + MLX5_GET(flow_table_eswitch_cap, \ + mdev->hca_caps_cur[MLX5_CAP_ESWITCH_FLOW_TABLE], cap) + +#define MLX5_CAP_ESW_FLOWTABLE_MAX(mdev, cap) \ + MLX5_GET(flow_table_eswitch_cap, \ + mdev->hca_caps_max[MLX5_CAP_ESWITCH_FLOW_TABLE], cap) + +#define MLX5_CAP_ESW_FLOWTABLE_FDB(mdev, cap) \ + MLX5_CAP_ESW_FLOWTABLE(mdev, flow_table_properties_nic_esw_fdb.cap) + +#define MLX5_CAP_ESW_FLOWTABLE_FDB_MAX(mdev, cap) \ + MLX5_CAP_ESW_FLOWTABLE_MAX(mdev, flow_table_properties_nic_esw_fdb.cap) + #define MLX5_CAP_ODP(mdev, cap)\ MLX5_GET(odp_cap, mdev->hca_caps_cur[MLX5_CAP_ODP], cap) diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 39487d0c305d4..ae7c08adba4a0 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -447,6 +447,18 @@ struct mlx5_ifc_flow_table_nic_cap_bits { u8 reserved_3[0x7200]; }; +struct mlx5_ifc_flow_table_eswitch_cap_bits { + u8 reserved_0[0x200]; + + struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties_nic_esw_fdb; + + struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties_esw_acl_ingress; + + struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties_esw_acl_egress; + + u8 reserved_1[0x7800]; +}; + struct mlx5_ifc_per_protocol_networking_offload_caps_bits { u8 csum_cap[0x1]; u8 vlan_cap[0x1]; @@ -1846,6 +1858,7 @@ union mlx5_ifc_hca_cap_union_bits { struct mlx5_ifc_roce_cap_bits roce_cap; struct mlx5_ifc_per_protocol_networking_offload_caps_bits per_protocol_networking_offload_caps; struct mlx5_ifc_flow_table_nic_cap_bits flow_table_nic_cap; + struct mlx5_ifc_flow_table_eswitch_cap_bits flow_table_eswitch_cap; u8 reserved_0[0x8000]; }; From 81848731ff4070a3e4136efa6a99d507177a53fe Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Tue, 1 Dec 2015 18:03:20 +0200 Subject: [PATCH 13/18] net/mlx5: E-Switch, Add SR-IOV (FDB) support Enabling E-Switch SRIOV for nvfs+1 vports. Create E-Switch FDB for L2 UC/MC mac steering between VFs/PF and external vport (Uplink). FDB contains forwarding rules such as: UC MAC0 -> vport0(PF). UC MAC1 -> vport1. UC MAC2 -> vport2. MC MACX -> vport0, vport2, Uplink. MC MACY -> vport1, Uplink. For unmatched traffic FDB has the following default rules: Unmached Traffic (src vport != Uplink) -> Uplink. Unmached Traffic (src vport == Uplink) -> vport0(PF). FDB rules population: Each NIC vport (VF) will notify E-Switch manager of its UC/MC vport context changes via modify vport context command, which will be translated to an event that will be handled by E-Switch manager (PF) which will update FDB table accordingly. Signed-off-by: Saeed Mahameed Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlx5/core/eswitch.c | 682 ++++++++++++++++-- .../net/ethernet/mellanox/mlx5/core/eswitch.h | 25 + .../ethernet/mellanox/mlx5/core/mlx5_core.h | 1 + .../net/ethernet/mellanox/mlx5/core/sriov.c | 14 +- include/linux/mlx5/device.h | 6 + include/linux/mlx5/flow_table.h | 9 + include/linux/mlx5/mlx5_ifc.h | 7 +- 7 files changed, 661 insertions(+), 83 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 1f2f804bde3e7..6fa6f013d2c5f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -34,9 +34,12 @@ #include #include #include +#include #include "mlx5_core.h" #include "eswitch.h" +#define UPLINK_VPORT 0xFFFF + #define MLX5_DEBUG_ESWITCH_MASK BIT(3) #define esw_info(dev, format, ...) \ @@ -54,18 +57,26 @@ enum { MLX5_ACTION_DEL = 2, }; -/* HW UC L2 table hash node */ -struct mlx5_uc_l2addr { +/* E-Switch UC L2 table hash node */ +struct esw_uc_addr { struct l2addr_node node; - u8 action; u32 table_index; u32 vport; }; -/* Vport UC L2 table hash node */ -struct mlx5_vport_addr { - struct l2addr_node node; - u8 action; +/* E-Switch MC FDB table hash node */ +struct esw_mc_addr { /* SRIOV only */ + struct l2addr_node node; + struct mlx5_flow_rule *uplink_rule; /* Forward to uplink rule */ + u32 refcnt; +}; + +/* Vport UC/MC hash node */ +struct vport_addr { + struct l2addr_node node; + u8 action; + u32 vport; + struct mlx5_flow_rule *flow_rule; /* SRIOV only */ }; enum { @@ -73,7 +84,11 @@ enum { MC_ADDR_CHANGE = BIT(1), }; -static int arm_vport_context_events_cmd(struct mlx5_core_dev *dev, int vport, +/* Vport context events */ +#define SRIOV_VPORT_EVENTS (UC_ADDR_CHANGE | \ + MC_ADDR_CHANGE) + +static int arm_vport_context_events_cmd(struct mlx5_core_dev *dev, u16 vport, u32 events_mask) { int in[MLX5_ST_SZ_DW(modify_nic_vport_context_in)]; @@ -196,97 +211,492 @@ static void del_l2_table_entry(struct mlx5_core_dev *dev, u32 index) free_l2_table_index(l2_table, index); } -/* SW E-Switch L2 Table management */ -static int l2_table_addr_add(struct mlx5_eswitch *esw, - u8 mac[ETH_ALEN], u32 vport) +/* E-Switch FDB flow steering */ +struct dest_node { + struct list_head list; + struct mlx5_flow_destination dest; +}; + +static int _mlx5_flow_rule_apply(struct mlx5_flow_rule *fr) { - struct hlist_head *hash; - struct mlx5_uc_l2addr *addr; + bool was_valid = fr->valid; + struct dest_node *dest_n; + u32 dest_list_size = 0; + void *in_match_value; + u32 *flow_context; + u32 flow_index; int err; + int i; + + if (list_empty(&fr->dest_list)) { + if (fr->valid) + mlx5_del_flow_table_entry(fr->ft, fr->fi); + fr->valid = false; + return 0; + } + + list_for_each_entry(dest_n, &fr->dest_list, list) + dest_list_size++; - hash = esw->l2_table.l2_hash; - addr = l2addr_hash_find(hash, mac, struct mlx5_uc_l2addr); - if (addr) { + flow_context = mlx5_vzalloc(MLX5_ST_SZ_BYTES(flow_context) + + MLX5_ST_SZ_BYTES(dest_format_struct) * + dest_list_size); + if (!flow_context) + return -ENOMEM; + + MLX5_SET(flow_context, flow_context, flow_tag, fr->flow_tag); + MLX5_SET(flow_context, flow_context, action, fr->action); + MLX5_SET(flow_context, flow_context, destination_list_size, + dest_list_size); + + i = 0; + list_for_each_entry(dest_n, &fr->dest_list, list) { + void *dest_addr = MLX5_ADDR_OF(flow_context, flow_context, + destination[i++]); + + MLX5_SET(dest_format_struct, dest_addr, destination_type, + dest_n->dest.type); + MLX5_SET(dest_format_struct, dest_addr, destination_id, + dest_n->dest.vport_num); + } + + in_match_value = MLX5_ADDR_OF(flow_context, flow_context, match_value); + memcpy(in_match_value, fr->match_value, MLX5_ST_SZ_BYTES(fte_match_param)); + + err = mlx5_add_flow_table_entry(fr->ft, fr->match_criteria_enable, + fr->match_criteria, flow_context, + &flow_index); + if (!err) { + if (was_valid) + mlx5_del_flow_table_entry(fr->ft, fr->fi); + fr->fi = flow_index; + fr->valid = true; + } + kfree(flow_context); + return err; +} + +static int mlx5_flow_rule_add_dest(struct mlx5_flow_rule *fr, + struct mlx5_flow_destination *new_dest) +{ + struct dest_node *dest_n; + int err; + + dest_n = kzalloc(sizeof(*dest_n), GFP_KERNEL); + if (!dest_n) + return -ENOMEM; + + memcpy(&dest_n->dest, new_dest, sizeof(dest_n->dest)); + mutex_lock(&fr->mutex); + list_add(&dest_n->list, &fr->dest_list); + err = _mlx5_flow_rule_apply(fr); + if (err) { + list_del(&dest_n->list); + kfree(dest_n); + } + mutex_unlock(&fr->mutex); + return err; +} + +static int mlx5_flow_rule_del_dest(struct mlx5_flow_rule *fr, + struct mlx5_flow_destination *dest) +{ + struct dest_node *dest_n; + struct dest_node *n; + int err; + + mutex_lock(&fr->mutex); + list_for_each_entry_safe(dest_n, n, &fr->dest_list, list) { + if (dest->vport_num == dest_n->dest.vport_num) + goto found; + } + mutex_unlock(&fr->mutex); + return -ENOENT; + +found: + list_del(&dest_n->list); + err = _mlx5_flow_rule_apply(fr); + mutex_unlock(&fr->mutex); + kfree(dest_n); + + return err; +} + +static struct mlx5_flow_rule *find_fr(struct mlx5_eswitch *esw, + u8 match_criteria_enable, + u32 *match_value) +{ + struct hlist_head *hash = esw->mc_table; + struct esw_mc_addr *esw_mc; + u8 *dmac_v; + + dmac_v = MLX5_ADDR_OF(fte_match_param, match_value, + outer_headers.dmac_47_16); + + /* UNICAST FULL MATCH */ + if (!is_multicast_ether_addr(dmac_v)) + return NULL; + + /* MULTICAST FULL MATCH */ + esw_mc = l2addr_hash_find(hash, dmac_v, struct esw_mc_addr); + + return esw_mc ? esw_mc->uplink_rule : NULL; +} + +static struct mlx5_flow_rule *alloc_fr(void *ft, + u8 match_criteria_enable, + u32 *match_criteria, + u32 *match_value, + u32 action, + u32 flow_tag) +{ + struct mlx5_flow_rule *fr = kzalloc(sizeof(*fr), GFP_KERNEL); + + if (!fr) + return NULL; + + fr->match_criteria = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL); + fr->match_value = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL); + if (!fr->match_criteria || !fr->match_value) { + kfree(fr->match_criteria); + kfree(fr->match_value); + kfree(fr); + return NULL; + } + + memcpy(fr->match_criteria, match_criteria, MLX5_ST_SZ_BYTES(fte_match_param)); + memcpy(fr->match_value, match_value, MLX5_ST_SZ_BYTES(fte_match_param)); + fr->match_criteria_enable = match_criteria_enable; + fr->flow_tag = flow_tag; + fr->action = action; + + mutex_init(&fr->mutex); + INIT_LIST_HEAD(&fr->dest_list); + atomic_set(&fr->refcount, 0); + fr->ft = ft; + return fr; +} + +static void deref_fr(struct mlx5_flow_rule *fr) +{ + if (!atomic_dec_and_test(&fr->refcount)) + return; + + kfree(fr->match_criteria); + kfree(fr->match_value); + kfree(fr); +} + +static struct mlx5_flow_rule * +mlx5_add_flow_rule(struct mlx5_eswitch *esw, + u8 match_criteria_enable, + u32 *match_criteria, + u32 *match_value, + u32 action, + u32 flow_tag, + struct mlx5_flow_destination *dest) +{ + struct mlx5_flow_rule *fr; + int err; + + fr = find_fr(esw, match_criteria_enable, match_value); + fr = fr ? fr : alloc_fr(esw->fdb_table.fdb, match_criteria_enable, match_criteria, + match_value, action, flow_tag); + if (!fr) + return NULL; + + atomic_inc(&fr->refcount); + + err = mlx5_flow_rule_add_dest(fr, dest); + if (err) { + deref_fr(fr); + return NULL; + } + + return fr; +} + +static void mlx5_del_flow_rule(struct mlx5_flow_rule *fr, u32 vport) +{ + struct mlx5_flow_destination dest; + + dest.vport_num = vport; + mlx5_flow_rule_del_dest(fr, &dest); + deref_fr(fr); +} + +/* E-Switch FDB */ +static struct mlx5_flow_rule * +esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u8 mac[ETH_ALEN], u32 vport) +{ + int match_header = MLX5_MATCH_OUTER_HEADERS; + struct mlx5_flow_destination dest; + struct mlx5_flow_rule *flow_rule = NULL; + u32 *match_v; + u32 *match_c; + u8 *dmac_v; + u8 *dmac_c; + + match_v = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL); + match_c = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL); + if (!match_v || !match_c) { + pr_warn("FDB: Failed to alloc match parameters\n"); + goto out; + } + dmac_v = MLX5_ADDR_OF(fte_match_param, match_v, + outer_headers.dmac_47_16); + dmac_c = MLX5_ADDR_OF(fte_match_param, match_c, + outer_headers.dmac_47_16); + + ether_addr_copy(dmac_v, mac); + /* Match criteria mask */ + memset(dmac_c, 0xff, 6); + + dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT; + dest.vport_num = vport; + + esw_debug(esw->dev, + "\tFDB add rule dmac_v(%pM) dmac_c(%pM) -> vport(%d)\n", + dmac_v, dmac_c, vport); + flow_rule = + mlx5_add_flow_rule(esw, + match_header, + match_c, + match_v, + MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, + 0, &dest); + if (IS_ERR_OR_NULL(flow_rule)) { + pr_warn( + "FDB: Failed to add flow rule: dmac_v(%pM) dmac_c(%pM) -> vport(%d), err(%ld)\n", + dmac_v, dmac_c, vport, PTR_ERR(flow_rule)); + flow_rule = NULL; + } +out: + kfree(match_v); + kfree(match_c); + return flow_rule; +} + +static int esw_create_fdb_table(struct mlx5_eswitch *esw, int nvports) +{ + struct mlx5_core_dev *dev = esw->dev; + struct mlx5_flow_table_group g; + struct mlx5_flow_table *fdb; + u8 *dmac; + + esw_debug(dev, "Create FDB log_max_size(%d)\n", + MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size)); + + memset(&g, 0, sizeof(g)); + /* UC MC Full match rules*/ + g.log_sz = MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size); + g.match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + dmac = MLX5_ADDR_OF(fte_match_param, g.match_criteria, + outer_headers.dmac_47_16); + /* Match criteria mask */ + memset(dmac, 0xff, 6); + + fdb = mlx5_create_flow_table(dev, 0, + MLX5_FLOW_TABLE_TYPE_ESWITCH, + 1, &g); + if (fdb) + esw_debug(dev, "ESW: FDB Table created fdb->id %d\n", mlx5_get_flow_table_id(fdb)); + else + esw_warn(dev, "ESW: Failed to create FDB Table\n"); + + esw->fdb_table.fdb = fdb; + return fdb ? 0 : -ENOMEM; +} + +static void esw_destroy_fdb_table(struct mlx5_eswitch *esw) +{ + if (!esw->fdb_table.fdb) + return; + + esw_debug(esw->dev, "Destroy FDB Table fdb(%d)\n", + mlx5_get_flow_table_id(esw->fdb_table.fdb)); + mlx5_destroy_flow_table(esw->fdb_table.fdb); + esw->fdb_table.fdb = NULL; +} + +/* E-Switch vport UC/MC lists management */ +typedef int (*vport_addr_action)(struct mlx5_eswitch *esw, + struct vport_addr *vaddr); + +static int esw_add_uc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr) +{ + struct hlist_head *hash = esw->l2_table.l2_hash; + struct esw_uc_addr *esw_uc; + u8 *mac = vaddr->node.addr; + u32 vport = vaddr->vport; + int err; + + esw_uc = l2addr_hash_find(hash, mac, struct esw_uc_addr); + if (esw_uc) { esw_warn(esw->dev, "Failed to set L2 mac(%pM) for vport(%d), mac is already in use by vport(%d)\n", - mac, vport, addr->vport); + mac, vport, esw_uc->vport); return -EEXIST; } - addr = l2addr_hash_add(hash, mac, struct mlx5_uc_l2addr, - GFP_KERNEL); - if (!addr) + esw_uc = l2addr_hash_add(hash, mac, struct esw_uc_addr, GFP_KERNEL); + if (!esw_uc) return -ENOMEM; + esw_uc->vport = vport; - addr->vport = vport; - addr->action = MLX5_ACTION_NONE; - err = set_l2_table_entry(esw->dev, mac, 0, 0, &addr->table_index); + err = set_l2_table_entry(esw->dev, mac, 0, 0, &esw_uc->table_index); if (err) - l2addr_hash_del(addr); - else - esw_debug(esw->dev, "\tADDED L2 MAC: vport[%d] %pM index:%d\n", - vport, addr->node.addr, addr->table_index); + goto abort; + + if (esw->fdb_table.fdb) /* SRIOV is enabled: Forward UC MAC to vport */ + vaddr->flow_rule = esw_fdb_set_vport_rule(esw, mac, vport); + + esw_debug(esw->dev, "\tADDED UC MAC: vport[%d] %pM index:%d fr(%p)\n", + vport, mac, esw_uc->table_index, vaddr->flow_rule); + return err; +abort: + l2addr_hash_del(esw_uc); return err; } -static int l2_table_addr_del(struct mlx5_eswitch *esw, - u8 mac[ETH_ALEN], u32 vport) +static int esw_del_uc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr) { - struct hlist_head *hash; - struct mlx5_uc_l2addr *addr; + struct hlist_head *hash = esw->l2_table.l2_hash; + struct esw_uc_addr *esw_uc; + u8 *mac = vaddr->node.addr; + u32 vport = vaddr->vport; + + esw_uc = l2addr_hash_find(hash, mac, struct esw_uc_addr); + if (!esw_uc || esw_uc->vport != vport) { + esw_debug(esw->dev, + "MAC(%pM) doesn't belong to vport (%d)\n", + mac, vport); + return -EINVAL; + } + esw_debug(esw->dev, "\tDELETE UC MAC: vport[%d] %pM index:%d fr(%p)\n", + vport, mac, esw_uc->table_index, vaddr->flow_rule); + + del_l2_table_entry(esw->dev, esw_uc->table_index); + + if (vaddr->flow_rule) + mlx5_del_flow_rule(vaddr->flow_rule, vport); + vaddr->flow_rule = NULL; + + l2addr_hash_del(esw_uc); + return 0; +} + +static int esw_add_mc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr) +{ + struct hlist_head *hash = esw->mc_table; + struct esw_mc_addr *esw_mc; + u8 *mac = vaddr->node.addr; + u32 vport = vaddr->vport; + + if (!esw->fdb_table.fdb) + return 0; + + esw_mc = l2addr_hash_find(hash, mac, struct esw_mc_addr); + if (esw_mc) + goto add; + + esw_mc = l2addr_hash_add(hash, mac, struct esw_mc_addr, GFP_KERNEL); + if (!esw_mc) + return -ENOMEM; + + esw_mc->uplink_rule = /* Forward MC MAC to Uplink */ + esw_fdb_set_vport_rule(esw, mac, UPLINK_VPORT); +add: + esw_mc->refcnt++; + /* Forward MC MAC to vport */ + vaddr->flow_rule = esw_fdb_set_vport_rule(esw, mac, vport); + esw_debug(esw->dev, + "\tADDED MC MAC: vport[%d] %pM fr(%p) refcnt(%d) uplinkfr(%p)\n", + vport, mac, vaddr->flow_rule, + esw_mc->refcnt, esw_mc->uplink_rule); + return 0; +} + +static int esw_del_mc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr) +{ + struct hlist_head *hash = esw->mc_table; + struct esw_mc_addr *esw_mc; + u8 *mac = vaddr->node.addr; + u32 vport = vaddr->vport; - hash = esw->l2_table.l2_hash; - addr = l2addr_hash_find(hash, mac, struct mlx5_uc_l2addr); - if (!addr || addr->vport != vport) { - esw_warn(esw->dev, "MAC(%pM) doesn't belong to vport (%d)\n", + if (!esw->fdb_table.fdb) + return 0; + + esw_mc = l2addr_hash_find(hash, mac, struct esw_mc_addr); + if (!esw_mc) { + esw_warn(esw->dev, + "Failed to find eswitch MC addr for MAC(%pM) vport(%d)", mac, vport); return -EINVAL; } + esw_debug(esw->dev, + "\tDELETE MC MAC: vport[%d] %pM fr(%p) refcnt(%d) uplinkfr(%p)\n", + vport, mac, vaddr->flow_rule, esw_mc->refcnt, + esw_mc->uplink_rule); - esw_debug(esw->dev, "\tDELETE L2 MAC: vport[%d] %pM index:%d\n", - vport, addr->node.addr, addr->table_index); - del_l2_table_entry(esw->dev, addr->table_index); - l2addr_hash_del(addr); + if (vaddr->flow_rule) + mlx5_del_flow_rule(vaddr->flow_rule, vport); + vaddr->flow_rule = NULL; + + if (--esw_mc->refcnt) + return 0; + + if (esw_mc->uplink_rule) + mlx5_del_flow_rule(esw_mc->uplink_rule, UPLINK_VPORT); + + l2addr_hash_del(esw_mc); return 0; } -/* E-Switch vport uc list management */ - -/* Apply vport uc list to HW l2 table */ -static void esw_apply_vport_uc_list(struct mlx5_eswitch *esw, - u32 vport_num) +/* Apply vport UC/MC list to HW l2 table and FDB table */ +static void esw_apply_vport_addr_list(struct mlx5_eswitch *esw, + u32 vport_num, int list_type) { struct mlx5_vport *vport = &esw->vports[vport_num]; - struct mlx5_vport_addr *addr; + bool is_uc = list_type == MLX5_NVPRT_LIST_TYPE_UC; + vport_addr_action vport_addr_add; + vport_addr_action vport_addr_del; + struct vport_addr *addr; struct l2addr_node *node; struct hlist_head *hash; struct hlist_node *tmp; int hi; - hash = vport->uc_list; + vport_addr_add = is_uc ? esw_add_uc_addr : + esw_add_mc_addr; + vport_addr_del = is_uc ? esw_del_uc_addr : + esw_del_mc_addr; + + hash = is_uc ? vport->uc_list : vport->mc_list; for_each_l2hash_node(node, tmp, hash, hi) { - addr = container_of(node, struct mlx5_vport_addr, node); + addr = container_of(node, struct vport_addr, node); switch (addr->action) { case MLX5_ACTION_ADD: - l2_table_addr_add(esw, addr->node.addr, vport_num); + vport_addr_add(esw, addr); addr->action = MLX5_ACTION_NONE; break; case MLX5_ACTION_DEL: - l2_table_addr_del(esw, addr->node.addr, vport_num); + vport_addr_del(esw, addr); l2addr_hash_del(addr); break; } } } -/* Sync vport uc list from vport context */ -static void esw_update_vport_uc_list(struct mlx5_eswitch *esw, - u32 vport_num) +/* Sync vport UC/MC list from vport context */ +static void esw_update_vport_addr_list(struct mlx5_eswitch *esw, + u32 vport_num, int list_type) { struct mlx5_vport *vport = &esw->vports[vport_num]; - struct mlx5_vport_addr *addr; - struct l2addr_node *node; + bool is_uc = list_type == MLX5_NVPRT_LIST_TYPE_UC; u8 (*mac_list)[ETH_ALEN]; + struct l2addr_node *node; + struct vport_addr *addr; struct hlist_head *hash; struct hlist_node *tmp; int size; @@ -294,40 +704,41 @@ static void esw_update_vport_uc_list(struct mlx5_eswitch *esw, int hi; int i; - size = MLX5_MAX_UC_PER_VPORT(esw->dev); + size = is_uc ? MLX5_MAX_UC_PER_VPORT(esw->dev) : + MLX5_MAX_MC_PER_VPORT(esw->dev); mac_list = kcalloc(size, ETH_ALEN, GFP_KERNEL); if (!mac_list) return; - hash = vport->uc_list; + hash = is_uc ? vport->uc_list : vport->mc_list; for_each_l2hash_node(node, tmp, hash, hi) { - addr = container_of(node, struct mlx5_vport_addr, node); + addr = container_of(node, struct vport_addr, node); addr->action = MLX5_ACTION_DEL; } - err = mlx5_query_nic_vport_mac_list(esw->dev, vport_num, - MLX5_NVPRT_LIST_TYPE_UC, + err = mlx5_query_nic_vport_mac_list(esw->dev, vport_num, list_type, mac_list, &size); if (err) return; - esw_debug(esw->dev, "vport[%d] context update UC list size (%d)\n", - vport_num, size); + esw_debug(esw->dev, "vport[%d] context update %s list size (%d)\n", + vport_num, is_uc ? "UC" : "MC", size); for (i = 0; i < size; i++) { - if (!is_valid_ether_addr(mac_list[i])) + if (is_uc && !is_valid_ether_addr(mac_list[i])) + continue; + + if (!is_uc && !is_multicast_ether_addr(mac_list[i])) continue; - addr = l2addr_hash_find(hash, mac_list[i], - struct mlx5_vport_addr); + addr = l2addr_hash_find(hash, mac_list[i], struct vport_addr); if (addr) { addr->action = MLX5_ACTION_NONE; continue; } - addr = l2addr_hash_add(hash, mac_list[i], - struct mlx5_vport_addr, + addr = l2addr_hash_add(hash, mac_list[i], struct vport_addr, GFP_KERNEL); if (!addr) { esw_warn(esw->dev, @@ -335,6 +746,7 @@ static void esw_update_vport_uc_list(struct mlx5_eswitch *esw, mac_list[i], vport_num); continue; } + addr->vport = vport_num; addr->action = MLX5_ACTION_ADD; } kfree(mac_list); @@ -345,32 +757,80 @@ static void esw_vport_change_handler(struct work_struct *work) struct mlx5_vport *vport = container_of(work, struct mlx5_vport, vport_change_handler); struct mlx5_core_dev *dev = vport->dev; + struct mlx5_eswitch *esw = dev->priv.eswitch; u8 mac[ETH_ALEN]; mlx5_query_nic_vport_mac_address(dev, vport->vport, mac); + esw_debug(dev, "vport[%d] Context Changed: perm mac: %pM\n", + vport->vport, mac); + + if (vport->enabled_events & UC_ADDR_CHANGE) { + esw_update_vport_addr_list(esw, vport->vport, + MLX5_NVPRT_LIST_TYPE_UC); + esw_apply_vport_addr_list(esw, vport->vport, + MLX5_NVPRT_LIST_TYPE_UC); + } - if (!is_valid_ether_addr(mac)) - goto out; - - esw_update_vport_uc_list(dev->priv.eswitch, vport->vport); - esw_apply_vport_uc_list(dev->priv.eswitch, vport->vport); + if (vport->enabled_events & MC_ADDR_CHANGE) { + esw_update_vport_addr_list(esw, vport->vport, + MLX5_NVPRT_LIST_TYPE_MC); + esw_apply_vport_addr_list(esw, vport->vport, + MLX5_NVPRT_LIST_TYPE_MC); + } -out: + esw_debug(esw->dev, "vport[%d] Context Changed: Done\n", vport->vport); if (vport->enabled) arm_vport_context_events_cmd(dev, vport->vport, - UC_ADDR_CHANGE); + vport->enabled_events); } -static void esw_enable_vport(struct mlx5_eswitch *esw, int vport_num) +static void esw_enable_vport(struct mlx5_eswitch *esw, int vport_num, + int enable_events) { struct mlx5_vport *vport = &esw->vports[vport_num]; unsigned long flags; + WARN_ON(vport->enabled); + + esw_debug(esw->dev, "Enabling VPORT(%d)\n", vport_num); + mlx5_modify_vport_admin_state(esw->dev, + MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT, + vport_num, + MLX5_ESW_VPORT_ADMIN_STATE_AUTO); + + /* Sync with current vport context */ + vport->enabled_events = enable_events; + esw_vport_change_handler(&vport->vport_change_handler); + spin_lock_irqsave(&vport->lock, flags); vport->enabled = true; spin_unlock_irqrestore(&vport->lock, flags); - arm_vport_context_events_cmd(esw->dev, vport_num, UC_ADDR_CHANGE); + arm_vport_context_events_cmd(esw->dev, vport_num, enable_events); + + esw->enabled_vports++; + esw_debug(esw->dev, "Enabled VPORT(%d)\n", vport_num); +} + +static void esw_cleanup_vport(struct mlx5_eswitch *esw, u16 vport_num) +{ + struct mlx5_vport *vport = &esw->vports[vport_num]; + struct l2addr_node *node; + struct vport_addr *addr; + struct hlist_node *tmp; + int hi; + + for_each_l2hash_node(node, tmp, vport->uc_list, hi) { + addr = container_of(node, struct vport_addr, node); + addr->action = MLX5_ACTION_DEL; + } + esw_apply_vport_addr_list(esw, vport_num, MLX5_NVPRT_LIST_TYPE_UC); + + for_each_l2hash_node(node, tmp, vport->mc_list, hi) { + addr = container_of(node, struct vport_addr, node); + addr->action = MLX5_ACTION_DEL; + } + esw_apply_vport_addr_list(esw, vport_num, MLX5_NVPRT_LIST_TYPE_MC); } static void esw_disable_vport(struct mlx5_eswitch *esw, int vport_num) @@ -381,19 +841,82 @@ static void esw_disable_vport(struct mlx5_eswitch *esw, int vport_num) if (!vport->enabled) return; + esw_debug(esw->dev, "Disabling vport(%d)\n", vport_num); /* Mark this vport as disabled to discard new events */ spin_lock_irqsave(&vport->lock, flags); vport->enabled = false; + vport->enabled_events = 0; spin_unlock_irqrestore(&vport->lock, flags); + mlx5_modify_vport_admin_state(esw->dev, + MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT, + vport_num, + MLX5_ESW_VPORT_ADMIN_STATE_DOWN); /* Wait for current already scheduled events to complete */ flush_workqueue(esw->work_queue); - /* Disable events from this vport */ arm_vport_context_events_cmd(esw->dev, vport->vport, 0); + /* We don't assume VFs will cleanup after themselves */ + esw_cleanup_vport(esw, vport_num); + esw->enabled_vports--; } /* Public E-Switch API */ +int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs) +{ + int err; + int i; + + if (!esw || !MLX5_CAP_GEN(esw->dev, vport_group_manager) || + MLX5_CAP_GEN(esw->dev, port_type) != MLX5_CAP_PORT_TYPE_ETH) + return 0; + + if (!MLX5_CAP_GEN(esw->dev, eswitch_flow_table) || + !MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ft_support)) { + esw_warn(esw->dev, "E-Switch FDB is not supported, aborting ...\n"); + return -ENOTSUPP; + } + + esw_info(esw->dev, "E-Switch enable SRIOV: nvfs(%d)\n", nvfs); + + esw_disable_vport(esw, 0); + + err = esw_create_fdb_table(esw, nvfs + 1); + if (err) + goto abort; + + for (i = 0; i <= nvfs; i++) + esw_enable_vport(esw, i, SRIOV_VPORT_EVENTS); + + esw_info(esw->dev, "SRIOV enabled: active vports(%d)\n", + esw->enabled_vports); + return 0; + +abort: + esw_enable_vport(esw, 0, UC_ADDR_CHANGE); + return err; +} + +void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw) +{ + int i; + + if (!esw || !MLX5_CAP_GEN(esw->dev, vport_group_manager) || + MLX5_CAP_GEN(esw->dev, port_type) != MLX5_CAP_PORT_TYPE_ETH) + return; + + esw_info(esw->dev, "disable SRIOV: active vports(%d)\n", + esw->enabled_vports); + + for (i = 0; i < esw->total_vports; i++) + esw_disable_vport(esw, i); + + esw_destroy_fdb_table(esw); + + /* VPORT 0 (PF) must be enabled back with non-sriov configuration */ + esw_enable_vport(esw, 0, UC_ADDR_CHANGE); +} + int mlx5_eswitch_init(struct mlx5_core_dev *dev) { int l2_table_size = 1 << MLX5_CAP_GEN(dev, log_max_l2_table); @@ -439,7 +962,6 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) goto abort; } - esw->total_vports = total_vports; for (vport_num = 0; vport_num < total_vports; vport_num++) { struct mlx5_vport *vport = &esw->vports[vport_num]; @@ -450,9 +972,11 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) spin_lock_init(&vport->lock); } - dev->priv.eswitch = esw; + esw->total_vports = total_vports; + esw->enabled_vports = 0; - esw_enable_vport(esw, 0); + dev->priv.eswitch = esw; + esw_enable_vport(esw, 0, UC_ADDR_CHANGE); /* VF Vports will be enabled when SRIOV is enabled */ return 0; abort: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 0c41f26578243..f222e336f34f2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -86,10 +86,25 @@ struct l2addr_node { kfree(ptr); \ }) +struct mlx5_flow_rule { + void *ft; + u32 fi; + u8 match_criteria_enable; + u32 *match_criteria; + u32 *match_value; + u32 action; + u32 flow_tag; + bool valid; + atomic_t refcount; + struct mutex mutex; /* protect flow rule updates */ + struct list_head dest_list; +}; + struct mlx5_vport { struct mlx5_core_dev *dev; int vport; struct hlist_head uc_list[MLX5_L2_ADDR_HASH_SIZE]; + struct hlist_head mc_list[MLX5_L2_ADDR_HASH_SIZE]; struct work_struct vport_change_handler; /* This spinlock protects access to vport data, between @@ -98,6 +113,7 @@ struct mlx5_vport { */ spinlock_t lock; /* vport events sync */ bool enabled; + u16 enabled_events; }; struct mlx5_l2_table { @@ -106,17 +122,26 @@ struct mlx5_l2_table { unsigned long *bitmap; }; +struct mlx5_eswitch_fdb { + void *fdb; +}; + struct mlx5_eswitch { struct mlx5_core_dev *dev; struct mlx5_l2_table l2_table; + struct mlx5_eswitch_fdb fdb_table; + struct hlist_head mc_table[MLX5_L2_ADDR_HASH_SIZE]; struct workqueue_struct *work_queue; struct mlx5_vport *vports; int total_vports; + int enabled_vports; }; /* E-Switch API */ int mlx5_eswitch_init(struct mlx5_core_dev *dev); void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw); void mlx5_eswitch_vport_event(struct mlx5_eswitch *esw, struct mlx5_eqe *eqe); +int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs); +void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw); #endif /* __MLX5_ESWITCH_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index 1649d5cf9e29a..bee7da822dfe2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -36,6 +36,7 @@ #include #include #include +#include #define DRIVER_NAME "mlx5_core" #define DRIVER_VERSION "3.0-1" diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c index 19a43240e3592..7b24386794f99 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c @@ -33,6 +33,9 @@ #include #include #include "mlx5_core.h" +#ifdef CONFIG_MLX5_CORE_EN +#include "eswitch.h" +#endif static void enable_vfs(struct mlx5_core_dev *dev, int num_vfs) { @@ -144,13 +147,15 @@ int mlx5_core_sriov_configure(struct pci_dev *pdev, int num_vfs) mlx5_core_cleanup_vfs(dev); if (!num_vfs) { +#ifdef CONFIG_MLX5_CORE_EN + mlx5_eswitch_disable_sriov(dev->priv.eswitch); +#endif kfree(sriov->vfs_ctx); sriov->vfs_ctx = NULL; if (!pci_vfs_assigned(pdev)) pci_disable_sriov(pdev); else pr_info("unloading PF driver while leaving orphan VFs\n"); - return 0; } @@ -161,6 +166,9 @@ int mlx5_core_sriov_configure(struct pci_dev *pdev, int num_vfs) } mlx5_core_init_vfs(dev, num_vfs); +#ifdef CONFIG_MLX5_CORE_EN + mlx5_eswitch_enable_sriov(dev->priv.eswitch, num_vfs); +#endif return num_vfs; } @@ -199,6 +207,10 @@ int mlx5_sriov_init(struct mlx5_core_dev *dev) sriov->enabled_vfs = cur_vfs; mlx5_core_init_vfs(dev, cur_vfs); +#ifdef CONFIG_MLX5_CORE_EN + if (cur_vfs) + mlx5_eswitch_enable_sriov(dev->priv.eswitch, cur_vfs); +#endif enable_vfs(dev, cur_vfs); diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index bce9caed1eed8..88eb4490a8b32 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -1074,6 +1074,12 @@ enum { VPORT_STATE_UP = 0x1, }; +enum { + MLX5_ESW_VPORT_ADMIN_STATE_DOWN = 0x0, + MLX5_ESW_VPORT_ADMIN_STATE_UP = 0x1, + MLX5_ESW_VPORT_ADMIN_STATE_AUTO = 0x2, +}; + enum { MLX5_L3_PROT_TYPE_IPV4 = 0, MLX5_L3_PROT_TYPE_IPV6 = 1, diff --git a/include/linux/mlx5/flow_table.h b/include/linux/mlx5/flow_table.h index 5f922c6d4fc23..0f2a15cf33173 100644 --- a/include/linux/mlx5/flow_table.h +++ b/include/linux/mlx5/flow_table.h @@ -41,6 +41,15 @@ struct mlx5_flow_table_group { u32 match_criteria[MLX5_ST_SZ_DW(fte_match_param)]; }; +struct mlx5_flow_destination { + enum mlx5_flow_destination_type type; + union { + u32 tir_num; + void *ft; + u32 vport_num; + }; +}; + void *mlx5_create_flow_table(struct mlx5_core_dev *dev, u8 level, u8 table_type, u16 num_groups, struct mlx5_flow_table_group *group); diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index ae7c08adba4a0..a81b008738fd2 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -827,9 +827,10 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 reserved_69[0x220]; }; -enum { - MLX5_DEST_FORMAT_STRUCT_DESTINATION_TYPE_FLOW_TABLE_ = 0x1, - MLX5_DEST_FORMAT_STRUCT_DESTINATION_TYPE_TIR = 0x2, +enum mlx5_flow_destination_type { + MLX5_FLOW_DESTINATION_TYPE_VPORT = 0x0, + MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE = 0x1, + MLX5_FLOW_DESTINATION_TYPE_TIR = 0x2, }; struct mlx5_ifc_dest_format_struct_bits { From 77256579c6b43bb88da96b43637bafc0df20f8e8 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Tue, 1 Dec 2015 18:03:21 +0200 Subject: [PATCH 14/18] net/mlx5: E-Switch, Introduce Vport administration functions Implement set VF mac/link state and query VF config to be used later in nedev VF ndos or any other management API. Signed-off-by: Saeed Mahameed Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlx5/core/eswitch.c | 61 +++++++++++++++++++ .../net/ethernet/mellanox/mlx5/core/eswitch.h | 8 +++ 2 files changed, 69 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 6fa6f013d2c5f..d7d6eba672a7a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1022,3 +1022,64 @@ void mlx5_eswitch_vport_event(struct mlx5_eswitch *esw, struct mlx5_eqe *eqe) queue_work(esw->work_queue, &vport->vport_change_handler); spin_unlock(&vport->lock); } + +/* Vport Administration */ +#define ESW_ALLOWED(esw) \ + (esw && MLX5_CAP_GEN(esw->dev, vport_group_manager) && mlx5_core_is_pf(esw->dev)) +#define LEGAL_VPORT(esw, vport) (vport >= 0 && vport < esw->total_vports) + +int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw, + int vport, u8 mac[ETH_ALEN]) +{ + int err = 0; + + if (!ESW_ALLOWED(esw)) + return -EPERM; + if (!LEGAL_VPORT(esw, vport)) + return -EINVAL; + + err = mlx5_modify_nic_vport_mac_address(esw->dev, vport, mac); + if (err) { + mlx5_core_warn(esw->dev, + "Failed to mlx5_modify_nic_vport_mac vport(%d) err=(%d)\n", + vport, err); + return err; + } + + return err; +} + +int mlx5_eswitch_set_vport_state(struct mlx5_eswitch *esw, + int vport, int link_state) +{ + if (!ESW_ALLOWED(esw)) + return -EPERM; + if (!LEGAL_VPORT(esw, vport)) + return -EINVAL; + + return mlx5_modify_vport_admin_state(esw->dev, + MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT, + vport, link_state); +} + +int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw, + int vport, struct ifla_vf_info *ivi) +{ + if (!ESW_ALLOWED(esw)) + return -EPERM; + if (!LEGAL_VPORT(esw, vport)) + return -EINVAL; + + memset(ivi, 0, sizeof(*ivi)); + ivi->vf = vport - 1; + + mlx5_query_nic_vport_mac_address(esw->dev, vport, ivi->mac); + ivi->linkstate = mlx5_query_vport_admin_state(esw->dev, + MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT, + vport); + ivi->vlan = 0; + ivi->qos = 0; + ivi->spoofchk = 0; + + return 0; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index f222e336f34f2..43a6730d76a3b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -33,6 +33,8 @@ #ifndef __MLX5_ESWITCH_H__ #define __MLX5_ESWITCH_H__ +#include +#include #include #define MLX5_MAX_UC_PER_VPORT(dev) \ @@ -143,5 +145,11 @@ void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw); void mlx5_eswitch_vport_event(struct mlx5_eswitch *esw, struct mlx5_eqe *eqe); int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs); void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw); +int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw, + int vport, u8 mac[ETH_ALEN]); +int mlx5_eswitch_set_vport_state(struct mlx5_eswitch *esw, + int vport, int link_state); +int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw, + int vport, struct ifla_vf_info *ivi); #endif /* __MLX5_ESWITCH_H__ */ From d6666753c6e85834f1669c7b831cc2b7fc9e4390 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Tue, 1 Dec 2015 18:03:22 +0200 Subject: [PATCH 15/18] net/mlx5: E-Switch, Introduce HCA cap and E-Switch vport context E-Switch vport context is unlike NIC vport context, managed by the E-Switch manager or vport_group_manager and not by the NIC(VF) driver. The E-Switch manager can access (read/modify) any of its vports E-Switch context. Currently E-Switch vport context includes only clietnt and server vlan insertion and striping data (for later support of VST mode). Signed-off-by: Saeed Mahameed Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/fw.c | 11 +++ include/linux/mlx5/device.h | 9 ++ include/linux/mlx5/mlx5_ifc.h | 90 ++++++++++++++++++++ 3 files changed, 110 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c index bf6e3dfcef511..1c9f9a54a8733 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c @@ -173,6 +173,17 @@ int mlx5_query_hca_caps(struct mlx5_core_dev *dev) return err; } + if (MLX5_CAP_GEN(dev, vport_group_manager)) { + err = mlx5_core_get_caps(dev, MLX5_CAP_ESWITCH, + HCA_CAP_OPMOD_GET_CUR); + if (err) + return err; + err = mlx5_core_get_caps(dev, MLX5_CAP_ESWITCH, + HCA_CAP_OPMOD_GET_MAX); + if (err) + return err; + } + return 0; } diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 88eb4490a8b32..7d3a85faefb73 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -1145,6 +1145,7 @@ enum mlx5_cap_type { MLX5_CAP_EOIB_OFFLOADS, MLX5_CAP_FLOW_TABLE, MLX5_CAP_ESWITCH_FLOW_TABLE, + MLX5_CAP_ESWITCH, /* NUM OF CAP Types */ MLX5_CAP_NUM }; @@ -1196,6 +1197,14 @@ enum mlx5_cap_type { #define MLX5_CAP_ESW_FLOWTABLE_FDB_MAX(mdev, cap) \ MLX5_CAP_ESW_FLOWTABLE_MAX(mdev, flow_table_properties_nic_esw_fdb.cap) +#define MLX5_CAP_ESW(mdev, cap) \ + MLX5_GET(e_switch_cap, \ + mdev->hca_caps_cur[MLX5_CAP_ESWITCH], cap) + +#define MLX5_CAP_ESW_MAX(mdev, cap) \ + MLX5_GET(e_switch_cap, \ + mdev->hca_caps_max[MLX5_CAP_ESWITCH], cap) + #define MLX5_CAP_ODP(mdev, cap)\ MLX5_GET(odp_cap, mdev->hca_caps_cur[MLX5_CAP_ODP], cap) diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index a81b008738fd2..f5d94495758a6 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -459,6 +459,17 @@ struct mlx5_ifc_flow_table_eswitch_cap_bits { u8 reserved_1[0x7800]; }; +struct mlx5_ifc_e_switch_cap_bits { + u8 vport_svlan_strip[0x1]; + u8 vport_cvlan_strip[0x1]; + u8 vport_svlan_insert[0x1]; + u8 vport_cvlan_insert_if_not_exist[0x1]; + u8 vport_cvlan_insert_overwrite[0x1]; + u8 reserved_0[0x1b]; + + u8 reserved_1[0x7e0]; +}; + struct mlx5_ifc_per_protocol_networking_offload_caps_bits { u8 csum_cap[0x1]; u8 vlan_cap[0x1]; @@ -1860,6 +1871,7 @@ union mlx5_ifc_hca_cap_union_bits { struct mlx5_ifc_per_protocol_networking_offload_caps_bits per_protocol_networking_offload_caps; struct mlx5_ifc_flow_table_nic_cap_bits flow_table_nic_cap; struct mlx5_ifc_flow_table_eswitch_cap_bits flow_table_eswitch_cap; + struct mlx5_ifc_e_switch_cap_bits e_switch_cap; u8 reserved_0[0x8000]; }; @@ -2305,6 +2317,26 @@ struct mlx5_ifc_hca_vport_context_bits { u8 reserved_6[0xca0]; }; +struct mlx5_ifc_esw_vport_context_bits { + u8 reserved_0[0x3]; + u8 vport_svlan_strip[0x1]; + u8 vport_cvlan_strip[0x1]; + u8 vport_svlan_insert[0x1]; + u8 vport_cvlan_insert[0x2]; + u8 reserved_1[0x18]; + + u8 reserved_2[0x20]; + + u8 svlan_cfi[0x1]; + u8 svlan_pcp[0x3]; + u8 svlan_id[0xc]; + u8 cvlan_cfi[0x1]; + u8 cvlan_pcp[0x3]; + u8 cvlan_id[0xc]; + + u8 reserved_3[0x7a0]; +}; + enum { MLX5_EQC_STATUS_OK = 0x0, MLX5_EQC_STATUS_EQ_WRITE_FAILURE = 0xa, @@ -3743,6 +3775,64 @@ struct mlx5_ifc_query_flow_group_in_bits { u8 reserved_5[0x120]; }; +struct mlx5_ifc_query_esw_vport_context_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; + + struct mlx5_ifc_esw_vport_context_bits esw_vport_context; +}; + +struct mlx5_ifc_query_esw_vport_context_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 other_vport[0x1]; + u8 reserved_2[0xf]; + u8 vport_number[0x10]; + + u8 reserved_3[0x20]; +}; + +struct mlx5_ifc_modify_esw_vport_context_out_bits { + u8 status[0x8]; + u8 reserved_0[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_1[0x40]; +}; + +struct mlx5_ifc_esw_vport_context_fields_select_bits { + u8 reserved[0x1c]; + u8 vport_cvlan_insert[0x1]; + u8 vport_svlan_insert[0x1]; + u8 vport_cvlan_strip[0x1]; + u8 vport_svlan_strip[0x1]; +}; + +struct mlx5_ifc_modify_esw_vport_context_in_bits { + u8 opcode[0x10]; + u8 reserved_0[0x10]; + + u8 reserved_1[0x10]; + u8 op_mod[0x10]; + + u8 other_vport[0x1]; + u8 reserved_2[0xf]; + u8 vport_number[0x10]; + + struct mlx5_ifc_esw_vport_context_fields_select_bits field_select; + + struct mlx5_ifc_esw_vport_context_bits esw_vport_context; +}; + struct mlx5_ifc_query_eq_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; From 9e7ea3524a3f819911ffa7f471b78223757e824e Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Tue, 1 Dec 2015 18:03:23 +0200 Subject: [PATCH 16/18] net/mlx5: E-Switch, Introduce set vport vlan (VST mode) Add query and modify functions to control client vlan and qos striping or insertion, in E-Switch vports contexts. Signed-off-by: Saeed Mahameed Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlx5/core/eswitch.c | 134 +++++++++++++++++- .../net/ethernet/mellanox/mlx5/core/eswitch.h | 2 + 2 files changed, 134 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index d7d6eba672a7a..8fb66c0750141 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -128,6 +128,116 @@ static int arm_vport_context_events_cmd(struct mlx5_core_dev *dev, u16 vport, return err; } +/* E-Switch vport context HW commands */ +static int query_esw_vport_context_cmd(struct mlx5_core_dev *mdev, u32 vport, + u32 *out, int outlen) +{ + u32 in[MLX5_ST_SZ_DW(query_esw_vport_context_in)]; + + memset(in, 0, sizeof(in)); + + MLX5_SET(query_nic_vport_context_in, in, opcode, + MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT); + + MLX5_SET(query_esw_vport_context_in, in, vport_number, vport); + if (vport) + MLX5_SET(query_esw_vport_context_in, in, other_vport, 1); + + return mlx5_cmd_exec_check_status(mdev, in, sizeof(in), out, outlen); +} + +static int query_esw_vport_cvlan(struct mlx5_core_dev *dev, u32 vport, + u16 *vlan, u8 *qos) +{ + u32 out[MLX5_ST_SZ_DW(query_esw_vport_context_out)]; + int err; + bool cvlan_strip; + bool cvlan_insert; + + memset(out, 0, sizeof(out)); + + *vlan = 0; + *qos = 0; + + if (!MLX5_CAP_ESW(dev, vport_cvlan_strip) || + !MLX5_CAP_ESW(dev, vport_cvlan_insert_if_not_exist)) + return -ENOTSUPP; + + err = query_esw_vport_context_cmd(dev, vport, out, sizeof(out)); + if (err) + goto out; + + cvlan_strip = MLX5_GET(query_esw_vport_context_out, out, + esw_vport_context.vport_cvlan_strip); + + cvlan_insert = MLX5_GET(query_esw_vport_context_out, out, + esw_vport_context.vport_cvlan_insert); + + if (cvlan_strip || cvlan_insert) { + *vlan = MLX5_GET(query_esw_vport_context_out, out, + esw_vport_context.cvlan_id); + *qos = MLX5_GET(query_esw_vport_context_out, out, + esw_vport_context.cvlan_pcp); + } + + esw_debug(dev, "Query Vport[%d] cvlan: VLAN %d qos=%d\n", + vport, *vlan, *qos); +out: + return err; +} + +static int modify_esw_vport_context_cmd(struct mlx5_core_dev *dev, u16 vport, + void *in, int inlen) +{ + u32 out[MLX5_ST_SZ_DW(modify_esw_vport_context_out)]; + + memset(out, 0, sizeof(out)); + + MLX5_SET(modify_esw_vport_context_in, in, vport_number, vport); + if (vport) + MLX5_SET(modify_esw_vport_context_in, in, other_vport, 1); + + MLX5_SET(modify_esw_vport_context_in, in, opcode, + MLX5_CMD_OP_MODIFY_ESW_VPORT_CONTEXT); + + return mlx5_cmd_exec_check_status(dev, in, inlen, + out, sizeof(out)); +} + +static int modify_esw_vport_cvlan(struct mlx5_core_dev *dev, u32 vport, + u16 vlan, u8 qos, bool set) +{ + u32 in[MLX5_ST_SZ_DW(modify_esw_vport_context_in)]; + + memset(in, 0, sizeof(in)); + + if (!MLX5_CAP_ESW(dev, vport_cvlan_strip) || + !MLX5_CAP_ESW(dev, vport_cvlan_insert_if_not_exist)) + return -ENOTSUPP; + + esw_debug(dev, "Set Vport[%d] VLAN %d qos %d set=%d\n", + vport, vlan, qos, set); + + if (set) { + MLX5_SET(modify_esw_vport_context_in, in, + esw_vport_context.vport_cvlan_strip, 1); + /* insert only if no vlan in packet */ + MLX5_SET(modify_esw_vport_context_in, in, + esw_vport_context.vport_cvlan_insert, 1); + MLX5_SET(modify_esw_vport_context_in, in, + esw_vport_context.cvlan_pcp, qos); + MLX5_SET(modify_esw_vport_context_in, in, + esw_vport_context.cvlan_id, vlan); + } + + MLX5_SET(modify_esw_vport_context_in, in, + field_select.vport_cvlan_strip, 1); + MLX5_SET(modify_esw_vport_context_in, in, + field_select.vport_cvlan_insert, 1); + + return modify_esw_vport_context_cmd(dev, vport, in, sizeof(in)); +} + /* HW L2 Table (MPFS) management */ static int set_l2_table_entry_cmd(struct mlx5_core_dev *dev, u32 index, u8 *mac, u8 vlan_valid, u16 vlan) @@ -1065,6 +1175,9 @@ int mlx5_eswitch_set_vport_state(struct mlx5_eswitch *esw, int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw, int vport, struct ifla_vf_info *ivi) { + u16 vlan; + u8 qos; + if (!ESW_ALLOWED(esw)) return -EPERM; if (!LEGAL_VPORT(esw, vport)) @@ -1077,9 +1190,26 @@ int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw, ivi->linkstate = mlx5_query_vport_admin_state(esw->dev, MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT, vport); - ivi->vlan = 0; - ivi->qos = 0; + query_esw_vport_cvlan(esw->dev, vport, &vlan, &qos); + ivi->vlan = vlan; + ivi->qos = qos; ivi->spoofchk = 0; return 0; } + +int mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw, + int vport, u16 vlan, u8 qos) +{ + int set = 0; + + if (!ESW_ALLOWED(esw)) + return -EPERM; + if (!LEGAL_VPORT(esw, vport) || (vlan > 4095) || (qos > 7)) + return -EINVAL; + + if (vlan || qos) + set = 1; + + return modify_esw_vport_cvlan(esw->dev, vport, vlan, qos, set); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 43a6730d76a3b..f85d5b86ef412 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -149,6 +149,8 @@ int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw, int vport, u8 mac[ETH_ALEN]); int mlx5_eswitch_set_vport_state(struct mlx5_eswitch *esw, int vport, int link_state); +int mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw, + int vport, u16 vlan, u8 qos); int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw, int vport, struct ifla_vf_info *ivi); From 3b751a2a418aa58c5bd3b23bf97d169cc4c63819 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Tue, 1 Dec 2015 18:03:24 +0200 Subject: [PATCH 17/18] net/mlx5: E-Switch, Introduce get vf statistics Add support to get VF statistics using query vport counter command. Signed-off-by: Saeed Mahameed Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlx5/core/eswitch.c | 67 +++++++++++++++++++ .../net/ethernet/mellanox/mlx5/core/eswitch.h | 3 + 2 files changed, 70 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 8fb66c0750141..d8939e597c546 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1213,3 +1213,70 @@ int mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw, return modify_esw_vport_cvlan(esw->dev, vport, vlan, qos, set); } + +int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw, + int vport, + struct ifla_vf_stats *vf_stats) +{ + int outlen = MLX5_ST_SZ_BYTES(query_vport_counter_out); + u32 in[MLX5_ST_SZ_DW(query_vport_counter_in)]; + int err = 0; + u32 *out; + + if (!ESW_ALLOWED(esw)) + return -EPERM; + if (!LEGAL_VPORT(esw, vport)) + return -EINVAL; + + out = mlx5_vzalloc(outlen); + if (!out) + return -ENOMEM; + + memset(in, 0, sizeof(in)); + + MLX5_SET(query_vport_counter_in, in, opcode, + MLX5_CMD_OP_QUERY_VPORT_COUNTER); + MLX5_SET(query_vport_counter_in, in, op_mod, 0); + MLX5_SET(query_vport_counter_in, in, vport_number, vport); + if (vport) + MLX5_SET(query_vport_counter_in, in, other_vport, 1); + + memset(out, 0, outlen); + err = mlx5_cmd_exec(esw->dev, in, sizeof(in), out, outlen); + if (err) + goto free_out; + + #define MLX5_GET_CTR(p, x) \ + MLX5_GET64(query_vport_counter_out, p, x) + + memset(vf_stats, 0, sizeof(*vf_stats)); + vf_stats->rx_packets = + MLX5_GET_CTR(out, received_eth_unicast.packets) + + MLX5_GET_CTR(out, received_eth_multicast.packets) + + MLX5_GET_CTR(out, received_eth_broadcast.packets); + + vf_stats->rx_bytes = + MLX5_GET_CTR(out, received_eth_unicast.octets) + + MLX5_GET_CTR(out, received_eth_multicast.octets) + + MLX5_GET_CTR(out, received_eth_broadcast.octets); + + vf_stats->tx_packets = + MLX5_GET_CTR(out, transmitted_eth_unicast.packets) + + MLX5_GET_CTR(out, transmitted_eth_multicast.packets) + + MLX5_GET_CTR(out, transmitted_eth_broadcast.packets); + + vf_stats->tx_bytes = + MLX5_GET_CTR(out, transmitted_eth_unicast.octets) + + MLX5_GET_CTR(out, transmitted_eth_multicast.octets) + + MLX5_GET_CTR(out, transmitted_eth_broadcast.octets); + + vf_stats->multicast = + MLX5_GET_CTR(out, received_eth_multicast.packets); + + vf_stats->broadcast = + MLX5_GET_CTR(out, received_eth_broadcast.packets); + +free_out: + kvfree(out); + return err; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index f85d5b86ef412..02ff3eade0260 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -153,5 +153,8 @@ int mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw, int vport, u16 vlan, u8 qos); int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw, int vport, struct ifla_vf_info *ivi); +int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw, + int vport, + struct ifla_vf_stats *vf_stats); #endif /* __MLX5_ESWITCH_H__ */ From 66e49dedada6e5badbb01e19e7ae511172c5ac7d Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Tue, 1 Dec 2015 18:03:25 +0200 Subject: [PATCH 18/18] net/mlx5e: Add support for SR-IOV ndos Implement and enable SR-IOV ndos to manage SR-IOV configuration via netdev netlink API. Signed-off-by: Saeed Mahameed Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlx5/core/en_main.c | 84 ++++++++++++++++++- 1 file changed, 83 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 007e464b3e585..d67058afe87ec 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -32,6 +32,7 @@ #include #include "en.h" +#include "eswitch.h" struct mlx5e_rq_param { u32 rqc[MLX5_ST_SZ_DW(rqc)]; @@ -1931,6 +1932,79 @@ static int mlx5e_change_mtu(struct net_device *netdev, int new_mtu) return err; } +static int mlx5e_set_vf_mac(struct net_device *dev, int vf, u8 *mac) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5_core_dev *mdev = priv->mdev; + + return mlx5_eswitch_set_vport_mac(mdev->priv.eswitch, vf + 1, mac); +} + +static int mlx5e_set_vf_vlan(struct net_device *dev, int vf, u16 vlan, u8 qos) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5_core_dev *mdev = priv->mdev; + + return mlx5_eswitch_set_vport_vlan(mdev->priv.eswitch, vf + 1, + vlan, qos); +} + +static int mlx5_vport_link2ifla(u8 esw_link) +{ + switch (esw_link) { + case MLX5_ESW_VPORT_ADMIN_STATE_DOWN: + return IFLA_VF_LINK_STATE_DISABLE; + case MLX5_ESW_VPORT_ADMIN_STATE_UP: + return IFLA_VF_LINK_STATE_ENABLE; + } + return IFLA_VF_LINK_STATE_AUTO; +} + +static int mlx5_ifla_link2vport(u8 ifla_link) +{ + switch (ifla_link) { + case IFLA_VF_LINK_STATE_DISABLE: + return MLX5_ESW_VPORT_ADMIN_STATE_DOWN; + case IFLA_VF_LINK_STATE_ENABLE: + return MLX5_ESW_VPORT_ADMIN_STATE_UP; + } + return MLX5_ESW_VPORT_ADMIN_STATE_AUTO; +} + +static int mlx5e_set_vf_link_state(struct net_device *dev, int vf, + int link_state) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5_core_dev *mdev = priv->mdev; + + return mlx5_eswitch_set_vport_state(mdev->priv.eswitch, vf + 1, + mlx5_ifla_link2vport(link_state)); +} + +static int mlx5e_get_vf_config(struct net_device *dev, + int vf, struct ifla_vf_info *ivi) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5_core_dev *mdev = priv->mdev; + int err; + + err = mlx5_eswitch_get_vport_config(mdev->priv.eswitch, vf + 1, ivi); + if (err) + return err; + ivi->linkstate = mlx5_vport_link2ifla(ivi->linkstate); + return 0; +} + +static int mlx5e_get_vf_stats(struct net_device *dev, + int vf, struct ifla_vf_stats *vf_stats) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5_core_dev *mdev = priv->mdev; + + return mlx5_eswitch_get_vport_stats(mdev->priv.eswitch, vf + 1, + vf_stats); +} + static struct net_device_ops mlx5e_netdev_ops = { .ndo_open = mlx5e_open, .ndo_stop = mlx5e_close, @@ -1941,7 +2015,7 @@ static struct net_device_ops mlx5e_netdev_ops = { .ndo_vlan_rx_add_vid = mlx5e_vlan_rx_add_vid, .ndo_vlan_rx_kill_vid = mlx5e_vlan_rx_kill_vid, .ndo_set_features = mlx5e_set_features, - .ndo_change_mtu = mlx5e_change_mtu, + .ndo_change_mtu = mlx5e_change_mtu }; static int mlx5e_check_required_hca_cap(struct mlx5_core_dev *mdev) @@ -2041,6 +2115,14 @@ static void mlx5e_build_netdev(struct net_device *netdev) if (priv->params.num_tc > 1) mlx5e_netdev_ops.ndo_select_queue = mlx5e_select_queue; + if (MLX5_CAP_GEN(mdev, vport_group_manager)) { + mlx5e_netdev_ops.ndo_set_vf_mac = mlx5e_set_vf_mac; + mlx5e_netdev_ops.ndo_set_vf_vlan = mlx5e_set_vf_vlan; + mlx5e_netdev_ops.ndo_get_vf_config = mlx5e_get_vf_config; + mlx5e_netdev_ops.ndo_set_vf_link_state = mlx5e_set_vf_link_state; + mlx5e_netdev_ops.ndo_get_vf_stats = mlx5e_get_vf_stats; + } + netdev->netdev_ops = &mlx5e_netdev_ops; netdev->watchdog_timeo = 15 * HZ;