From 1c4c769cdf682c63d3b10cb241f4a96ebad2f215 Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Tue, 30 May 2023 11:59:34 +0300 Subject: [PATCH 1/5] net/mlx5: Remove rmap also in case dynamic MSIX not supported mlx5 add IRQs to rmap upon MSIX request, and mlx5 remove rmap from MSIX only if msi_map.index is populated. However, msi_map.index is populated only when dynamic MSIX is supported. This results in freeing IRQs without removing them from rmap, which triggers the bellow WARN_ON[1]. rmap is a feature which have no relation to dynamic MSIX. Hence, remove the check of msi_map.index when removing IRQ from rmap. [1] [ 200.307160 ] WARNING: CPU: 20 PID: 1702 at kernel/irq/manage.c:2034 free_irq+0x2ac/0x358 [ 200.316990 ] CPU: 20 PID: 1702 Comm: modprobe Not tainted 6.4.0-rc3_for_upstream_min_debug_2023_05_24_14_02 #1 [ 200.318939 ] Hardware name: QEMU KVM Virtual Machine, BIOS 0.0.0 02/06/2015 [ 200.321659 ] pc : free_irq+0x2ac/0x358 [ 200.322400 ] lr : free_irq+0x20/0x358 [ 200.337865 ] Call trace: [ 200.338360 ] free_irq+0x2ac/0x358 [ 200.339029 ] irq_release+0x58/0xd0 [mlx5_core] [ 200.340093 ] mlx5_irqs_release_vectors+0x80/0xb0 [mlx5_core] [ 200.341344 ] destroy_comp_eqs+0x120/0x170 [mlx5_core] [ 200.342469 ] mlx5_eq_table_destroy+0x1c/0x38 [mlx5_core] [ 200.343645 ] mlx5_unload+0x8c/0xc8 [mlx5_core] [ 200.344652 ] mlx5_uninit_one+0x78/0x118 [mlx5_core] [ 200.345745 ] remove_one+0x80/0x108 [mlx5_core] [ 200.346752 ] pci_device_remove+0x40/0xd8 [ 200.347554 ] device_remove+0x50/0x88 [ 200.348272 ] device_release_driver_internal+0x1c4/0x228 [ 200.349312 ] driver_detach+0x54/0xa0 [ 200.350030 ] bus_remove_driver+0x74/0x100 [ 200.350833 ] driver_unregister+0x34/0x68 [ 200.351619 ] pci_unregister_driver+0x28/0xa0 [ 200.352476 ] mlx5_cleanup+0x14/0x2210 [mlx5_core] [ 200.353536 ] __arm64_sys_delete_module+0x190/0x2e8 [ 200.354495 ] el0_svc_common.constprop.0+0x6c/0x1d0 [ 200.355455 ] do_el0_svc+0x38/0x98 [ 200.356122 ] el0_svc+0x1c/0x80 [ 200.356739 ] el0t_64_sync_handler+0xb4/0x130 [ 200.357604 ] el0t_64_sync+0x174/0x178 [ 200.358345 ] ---[ end trace 0000000000000000 ]--- Fixes: 3354822cde5a ("net/mlx5: Use dynamic msix vectors allocation") Signed-off-by: Shay Drory Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c index db5687d9fec97..86ac4a85fd878 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c @@ -141,7 +141,7 @@ static void irq_release(struct mlx5_irq *irq) irq_update_affinity_hint(irq->map.virq, NULL); #ifdef CONFIG_RFS_ACCEL rmap = mlx5_eq_table_get_rmap(pool->dev); - if (rmap && irq->map.index) + if (rmap) irq_cpu_rmap_remove(rmap, irq->map.virq); #endif From 8764bd0fa5d402c51b136f6aeaba20fc16961ba1 Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Wed, 31 May 2023 10:48:56 +0200 Subject: [PATCH 2/5] net/mlx5: Fix setting of irq->map.index for static IRQ case MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When dynamic IRQ allocation is not supported all IRQs are allocated up front in mlx5_irq_table_create() instead of dynamically as part of mlx5_irq_alloc(). In the latter dynamic case irq->map.index is set via the mapping returned by pci_msix_alloc_irq_at(). In the static case and prior to commit 1da438c0ae02 ("net/mlx5: Fix indexing of mlx5_irq") irq->map.index was set in mlx5_irq_alloc() twice once initially to 0 and then to the requested index before storing in the xarray. After this commit it is only set to 0 which breaks all other IRQ mappings. Fix this by setting irq->map.index to the requested index together with irq->map.virq and improve the related comment to make it clearer which cases it deals with. Cc: Chuck Lever III Tested-by: Mark Brown Reviewed-by: Mark Brown Reviewed-by: Simon Horman Reviewed-by: Eli Cohen Fixes: 1da438c0ae02 ("net/mlx5: Fix indexing of mlx5_irq") Signed-off-by: Niklas Schnelle Tested-by: Cédric Le Goater Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c index 86ac4a85fd878..38edd485ba6f3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c @@ -232,12 +232,13 @@ struct mlx5_irq *mlx5_irq_alloc(struct mlx5_irq_pool *pool, int i, if (!irq) return ERR_PTR(-ENOMEM); if (!i || !pci_msix_can_alloc_dyn(dev->pdev)) { - /* The vector at index 0 was already allocated. - * Just get the irq number. If dynamic irq is not supported - * vectors have also been allocated. + /* The vector at index 0 is always statically allocated. If + * dynamic irq is not supported all vectors are statically + * allocated. In both cases just get the irq number and set + * the index. */ irq->map.virq = pci_irq_vector(dev->pdev, i); - irq->map.index = 0; + irq->map.index = i; } else { irq->map = pci_msix_alloc_irq_at(dev->pdev, MSI_ANY_INDEX, af_desc); if (!irq->map.virq) { From 368591995d010e639ad8f28b27f1b721f0872342 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 31 May 2023 15:48:25 -0400 Subject: [PATCH 3/5] net/mlx5: Ensure af_desc.mask is properly initialized [ 9.837087] mlx5_core 0000:02:00.0: firmware version: 16.35.2000 [ 9.843126] mlx5_core 0000:02:00.0: 126.016 Gb/s available PCIe bandwidth (8.0 GT/s PCIe x16 link) [ 10.311515] mlx5_core 0000:02:00.0: Rate limit: 127 rates are supported, range: 0Mbps to 97656Mbps [ 10.321948] mlx5_core 0000:02:00.0: E-Switch: Total vports 2, per vport: max uc(128) max mc(2048) [ 10.344324] mlx5_core 0000:02:00.0: mlx5_pcie_event:301:(pid 88): PCIe slot advertised sufficient power (27W). [ 10.354339] BUG: unable to handle page fault for address: ffffffff8ff0ade0 [ 10.361206] #PF: supervisor read access in kernel mode [ 10.366335] #PF: error_code(0x0000) - not-present page [ 10.371467] PGD 81ec39067 P4D 81ec39067 PUD 81ec3a063 PMD 114b07063 PTE 800ffff7e10f5062 [ 10.379544] Oops: 0000 [#1] PREEMPT SMP PTI [ 10.383721] CPU: 0 PID: 117 Comm: kworker/0:6 Not tainted 6.3.0-13028-g7222f123c983 #1 [ 10.391625] Hardware name: Supermicro X10SRA-F/X10SRA-F, BIOS 2.0b 06/12/2017 [ 10.398750] Workqueue: events work_for_cpu_fn [ 10.403108] RIP: 0010:__bitmap_or+0x10/0x26 [ 10.407286] Code: 85 c0 0f 95 c0 c3 cc cc cc cc 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 89 c9 31 c0 48 83 c1 3f 48 c1 e9 06 39 c> [ 10.426024] RSP: 0000:ffffb45a0078f7b0 EFLAGS: 00010097 [ 10.431240] RAX: 0000000000000000 RBX: ffffffff8ff0adc0 RCX: 0000000000000004 [ 10.438365] RDX: ffff9156801967d0 RSI: ffffffff8ff0ade0 RDI: ffff9156801967b0 [ 10.445489] RBP: ffffb45a0078f7e8 R08: 0000000000000030 R09: 0000000000000000 [ 10.452613] R10: 0000000000000000 R11: 0000000000000000 R12: 00000000000000ec [ 10.459737] R13: ffffffff8ff0ade0 R14: 0000000000000001 R15: 0000000000000020 [ 10.466862] FS: 0000000000000000(0000) GS:ffff9165bfc00000(0000) knlGS:0000000000000000 [ 10.474936] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 10.480674] CR2: ffffffff8ff0ade0 CR3: 00000001011ae003 CR4: 00000000003706f0 [ 10.487800] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 10.494922] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 10.502046] Call Trace: [ 10.504493] [ 10.506589] ? matrix_alloc_area.constprop.0+0x43/0x9a [ 10.511729] ? prepare_namespace+0x84/0x174 [ 10.515914] irq_matrix_reserve_managed+0x56/0x10c [ 10.520699] x86_vector_alloc_irqs+0x1d2/0x31e [ 10.525146] irq_domain_alloc_irqs_hierarchy+0x39/0x3f [ 10.530284] irq_domain_alloc_irqs_parent+0x1a/0x2a [ 10.535155] intel_irq_remapping_alloc+0x59/0x5e9 [ 10.539859] ? kmem_cache_debug_flags+0x11/0x26 [ 10.544383] ? __radix_tree_lookup+0x39/0xb9 [ 10.548649] irq_domain_alloc_irqs_hierarchy+0x39/0x3f [ 10.553779] irq_domain_alloc_irqs_parent+0x1a/0x2a [ 10.558650] msi_domain_alloc+0x8c/0x120 [ 10.567697] irq_domain_alloc_irqs_locked+0x11d/0x286 [ 10.572741] __irq_domain_alloc_irqs+0x72/0x93 [ 10.577179] __msi_domain_alloc_irqs+0x193/0x3f1 [ 10.581789] ? __xa_alloc+0xcf/0xe2 [ 10.585273] msi_domain_alloc_irq_at+0xa8/0xfe [ 10.589711] pci_msix_alloc_irq_at+0x47/0x5c The crash is due to matrix_alloc_area() attempting to access per-CPU memory for CPUs that are not present on the system. The CPU mask passed into reserve_managed_vector() via it's @irqd parameter is corrupted because it contains uninitialized stack data. Fixes: bbac70c74183 ("net/mlx5: Use newer affinity descriptor") Reviewed-by: Thomas Gleixner Signed-off-by: Chuck Lever Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c index 38edd485ba6f3..843da89a90350 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c @@ -571,11 +571,11 @@ int mlx5_irqs_request_vectors(struct mlx5_core_dev *dev, u16 *cpus, int nirqs, af_desc.is_managed = false; for (i = 0; i < nirqs; i++) { + cpumask_clear(&af_desc.mask); cpumask_set_cpu(cpus[i], &af_desc.mask); irq = mlx5_irq_request(dev, i + 1, &af_desc, rmap); if (IS_ERR(irq)) break; - cpumask_clear(&af_desc.mask); irqs[i] = irq; } From b6193d7030e3c59f1d4c75648c9c8fa40cad2bcd Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Sat, 27 May 2023 23:07:08 -0700 Subject: [PATCH 4/5] net/mlx5e: Fix error handling in mlx5e_refresh_tirs Allocation failure is outside the critical lock section and should return immediately rather than jumping to the unlock section. Also unlock as soon as required and remove the now redundant jump label. Fixes: 80a2a9026b24 ("net/mlx5e: Add a lock on tir list") Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_common.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c index 1f90594499c60..41c396e764579 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c @@ -150,10 +150,8 @@ int mlx5e_refresh_tirs(struct mlx5e_priv *priv, bool enable_uc_lb, inlen = MLX5_ST_SZ_BYTES(modify_tir_in); in = kvzalloc(inlen, GFP_KERNEL); - if (!in) { - err = -ENOMEM; - goto out; - } + if (!in) + return -ENOMEM; if (enable_uc_lb) lb_flags = MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST; @@ -171,14 +169,13 @@ int mlx5e_refresh_tirs(struct mlx5e_priv *priv, bool enable_uc_lb, tirn = tir->tirn; err = mlx5_core_modify_tir(mdev, tirn, in); if (err) - goto out; + break; } + mutex_unlock(&mdev->mlx5e_res.hw_objs.td.list_lock); -out: kvfree(in); if (err) netdev_err(priv->netdev, "refresh tir(0x%x) failed, %d\n", tirn, err); - mutex_unlock(&mdev->mlx5e_res.hw_objs.td.list_lock); return err; } From bbfa4b58997e3d38ba629c9f6fc0bd1c163aaf43 Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Fri, 28 Apr 2023 13:48:13 +0300 Subject: [PATCH 5/5] net/mlx5: Read embedded cpu after init bit cleared During driver load it reads embedded_cpu bit from initialization segment, but the initialization segment is readable only after initialization bit is cleared. Move the call to mlx5_read_embedded_cpu() right after initialization bit cleared. Signed-off-by: Moshe Shemesh Fixes: 591905ba9679 ("net/mlx5: Introduce Mellanox SmartNIC and modify page management logic") Reviewed-by: Shay Drory Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 2132a65106391..d6ee016deae17 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -923,7 +923,6 @@ static int mlx5_pci_init(struct mlx5_core_dev *dev, struct pci_dev *pdev, } mlx5_pci_vsc_init(dev); - dev->caps.embedded_cpu = mlx5_read_embedded_cpu(dev); return 0; err_clr_master: @@ -1155,6 +1154,7 @@ static int mlx5_function_setup(struct mlx5_core_dev *dev, bool boot, u64 timeout goto err_cmd_cleanup; } + dev->caps.embedded_cpu = mlx5_read_embedded_cpu(dev); mlx5_cmd_set_state(dev, MLX5_CMDIF_STATE_UP); mlx5_start_health_poll(dev);