From 99f201a9a7d59d95da75695c41b3baf72c39efd3 Mon Sep 17 00:00:00 2001 From: Yedidya Benshimol Date: Tue, 1 Apr 2025 06:45:54 +0300 Subject: [PATCH 01/81] wifi: iwlwifi: mld: reduce scope for uninitialized variable After resuming from D3, keeping the connection or disconnecting isn't relevant for the case of netdetect. Reduce the scope of the keep_connection indicator to wowlan only. Fixes: d1e879ec600f9 ("wifi: iwlwifi: add iwlmld sub-driver") Signed-off-by: Yedidya Benshimol Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20250401064530.769f76a9ad6e.I69e8f194997eb3a20e40d27fdc31002d5753d905@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/mld/d3.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mld/d3.c b/drivers/net/wireless/intel/iwlwifi/mld/d3.c index 5a7207accd86..2c6e8ecd93b7 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/d3.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/d3.c @@ -1895,7 +1895,6 @@ int iwl_mld_wowlan_resume(struct iwl_mld *mld) int link_id; int ret; bool fw_err = false; - bool keep_connection; lockdep_assert_wiphy(mld->wiphy); @@ -1965,7 +1964,7 @@ int iwl_mld_wowlan_resume(struct iwl_mld *mld) iwl_mld_process_netdetect_res(mld, bss_vif, &resume_data); mld->netdetect = false; } else { - keep_connection = + bool keep_connection = iwl_mld_process_wowlan_status(mld, bss_vif, resume_data.wowlan_status); @@ -1973,11 +1972,10 @@ int iwl_mld_wowlan_resume(struct iwl_mld *mld) if (keep_connection) iwl_mld_unblock_emlsr(mld, bss_vif, IWL_MLD_EMLSR_BLOCKED_WOWLAN); + else + ieee80211_resume_disconnect(bss_vif); } - if (!mld->netdetect && !keep_connection) - ieee80211_resume_disconnect(bss_vif); - goto out; err: From 676b902db4766aaec049d6ca4800dfa093599005 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 1 Apr 2025 06:45:55 +0300 Subject: [PATCH 02/81] wifi: iwlwifi: mld: fix PM_SLEEP -Wundef warning Config symbols are not defined if turned off, so need to use #ifdef, not #if. Fixes: d1e879ec600f9 ("wifi: iwlwifi: add iwlmld sub-driver") Signed-off-by: Johannes Berg Reviewed-by: Yedidya Ben Shimol Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20250401064530.612020bcdaad.I4e885e6646576e29fb236250a1b5038d3f14b08e@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/mld/iface.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mld/iface.h b/drivers/net/wireless/intel/iwlwifi/mld/iface.h index d1d56b081bf6..ec14d0736cee 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/iface.h +++ b/drivers/net/wireless/intel/iwlwifi/mld/iface.h @@ -166,7 +166,7 @@ struct iwl_mld_vif { struct iwl_mld_emlsr emlsr; -#if CONFIG_PM_SLEEP +#ifdef CONFIG_PM_SLEEP struct iwl_mld_wowlan_data wowlan_data; #endif #ifdef CONFIG_IWLWIFI_DEBUGFS From 44605365f93518eacfc500665d965e88db4791c2 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 25 Mar 2025 09:43:30 +0100 Subject: [PATCH 03/81] iwlwifi: mld: fix building with CONFIG_PM_SLEEP disabled The newly added driver causes multiple build problems when CONFIG_PM_SLEEP is disabled: drivers/net/wireless/intel/iwlwifi/mld/mac80211.c:1982:12: error: 'iwl_mld_resume' defined but not used [-Werror=unused-function] 1982 | static int iwl_mld_resume(struct ieee80211_hw *hw) | ^~~~~~~~~~~~~~ drivers/net/wireless/intel/iwlwifi/mld/mac80211.c:1960:1: error: 'iwl_mld_suspend' defined but not used [-Werror=unused-function] 1960 | iwl_mld_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan) | ^~~~~~~~~~~~~~~ drivers/net/wireless/intel/iwlwifi/mld/mac80211.c:1946:13: error: 'iwl_mld_set_wakeup' defined but not used [-Werror=unused-function] 1946 | static void iwl_mld_set_wakeup(struct ieee80211_hw *hw, bool enabled) | ^~~~~~~~~~~~~~~~~~ drivers/net/wireless/intel/iwlwifi/mld/mac80211.c: In function 'iwl_mld_mac80211_start': drivers/net/wireless/intel/iwlwifi/mld/mac80211.c:504:20: error: 'ret' is used uninitialized [-Werror=uninitialized] 504 | if (!in_d3 || ret) { | ^~ drivers/net/wireless/intel/iwlwifi/mld/mac80211.c:478:13: note: 'ret' was declared here 478 | int ret; | ^~~ Hide the unused functions and make sure the 'ret' variable is not used before the initialization. Signed-off-by: Arnd Bergmann Link: https://patch.msgid.link/20250325084340.378724-1-arnd@kernel.org Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/mld/mac80211.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mld/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mld/mac80211.c index 6851064b82da..0b5bc5abb82d 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/mac80211.c @@ -501,7 +501,7 @@ int iwl_mld_mac80211_start(struct ieee80211_hw *hw) iwl_mld_restart_cleanup(mld); } - if (!in_d3 || ret) { + if (!in_d3) { ret = iwl_mld_start_fw(mld); if (ret) goto error; @@ -537,7 +537,8 @@ void iwl_mld_mac80211_stop(struct ieee80211_hw *hw, bool suspend) /* if the suspend flow fails the fw is in error. Stop it here, and it * will be started upon wakeup */ - if (!suspend || iwl_mld_no_wowlan_suspend(mld)) + if (!suspend || + (IS_ENABLED(CONFIG_PM_SLEEP) && iwl_mld_no_wowlan_suspend(mld))) iwl_mld_stop_fw(mld); /* HW is stopped, no more coming RX. OTOH, the worker can't run as the @@ -1943,6 +1944,7 @@ static void iwl_mld_sta_rc_update(struct ieee80211_hw *hw, } } +#ifdef CONFIG_PM_SLEEP static void iwl_mld_set_wakeup(struct ieee80211_hw *hw, bool enabled) { struct iwl_mld *mld = IWL_MAC80211_GET_MLD(hw); @@ -1994,6 +1996,7 @@ static int iwl_mld_resume(struct ieee80211_hw *hw) return 0; } +#endif static int iwl_mld_alloc_ptk_pn(struct iwl_mld *mld, struct iwl_mld_sta *mld_sta, From 9bb8deae8aec180f8127708ee484c3eedab2b86f Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sat, 29 Mar 2025 22:01:34 +0100 Subject: [PATCH 04/81] wifi: add wireless list to MAINTAINERS Add the wireless list to MAINTAINERS entries for all those drivers that are no longer covered by the entry now. Signed-off-by: Johannes Berg Link: https://patch.msgid.link/20250329220135.8bfaffbad97d.I946354c2395f4a30b8c435857a92553b1b58df5b@changeid Signed-off-by: Johannes Berg --- MAINTAINERS | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 1cd25139cc58..cbbc50980690 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6239,6 +6239,7 @@ F: Documentation/process/cve.rst CW1200 WLAN driver S: Orphan +L: linux-wireless@vger.kernel.org F: drivers/net/wireless/st/ F: include/linux/platform_data/net-cw1200.h @@ -14069,6 +14070,7 @@ S: Odd fixes F: drivers/net/ethernet/marvell/sk* MARVELL LIBERTAS WIRELESS DRIVER +L: linux-wireless@vger.kernel.org L: libertas-dev@lists.infradead.org S: Orphan F: drivers/net/wireless/marvell/libertas/ @@ -19470,6 +19472,7 @@ F: drivers/media/tuners/qt1010* QUALCOMM ATH12K WIRELESS DRIVER M: Jeff Johnson +L: linux-wireless@vger.kernel.org L: ath12k@lists.infradead.org S: Supported W: https://wireless.wiki.kernel.org/en/users/Drivers/ath12k @@ -19479,6 +19482,7 @@ N: ath12k QUALCOMM ATHEROS ATH10K WIRELESS DRIVER M: Jeff Johnson +L: linux-wireless@vger.kernel.org L: ath10k@lists.infradead.org S: Supported W: https://wireless.wiki.kernel.org/en/users/Drivers/ath10k @@ -19488,6 +19492,7 @@ N: ath10k QUALCOMM ATHEROS ATH11K WIRELESS DRIVER M: Jeff Johnson +L: linux-wireless@vger.kernel.org L: ath11k@lists.infradead.org S: Supported W: https://wireless.wiki.kernel.org/en/users/Drivers/ath11k @@ -21832,6 +21837,7 @@ F: drivers/platform/x86/touchscreen_dmi.c SILICON LABS WIRELESS DRIVERS (for WFxxx series) M: Jérôme Pouiller +L: linux-wireless@vger.kernel.org S: Supported F: Documentation/devicetree/bindings/net/wireless/silabs,wfx.yaml F: drivers/net/wireless/silabs/ From 27c7e63b3cb1a20bb78ed4a36c561ea4579fd7da Mon Sep 17 00:00:00 2001 From: Abdun Nihaal Date: Sun, 30 Mar 2025 16:01:10 +0530 Subject: [PATCH 05/81] wifi: at76c50x: fix use after free access in at76_disconnect The memory pointed to by priv is freed at the end of at76_delete_device function (using ieee80211_free_hw). But the code then accesses the udev field of the freed object to put the USB device. This may also lead to a memory leak of the usb device. Fix this by using udev from interface. Fixes: 29e20aa6c6af ("at76c50x-usb: fix use after free on failure path in at76_probe()") Signed-off-by: Abdun Nihaal Link: https://patch.msgid.link/20250330103110.44080-1-abdun.nihaal@gmail.com Signed-off-by: Johannes Berg --- drivers/net/wireless/atmel/at76c50x-usb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/atmel/at76c50x-usb.c b/drivers/net/wireless/atmel/at76c50x-usb.c index 504e05ea30f2..97ea7ab0f491 100644 --- a/drivers/net/wireless/atmel/at76c50x-usb.c +++ b/drivers/net/wireless/atmel/at76c50x-usb.c @@ -2552,7 +2552,7 @@ static void at76_disconnect(struct usb_interface *interface) wiphy_info(priv->hw->wiphy, "disconnecting\n"); at76_delete_device(priv); - usb_put_dev(priv->udev); + usb_put_dev(interface_to_usbdev(interface)); dev_info(&interface->dev, "disconnected\n"); } From a104042e2bf6528199adb6ca901efe7b60c2c27f Mon Sep 17 00:00:00 2001 From: Remi Pommarel Date: Mon, 24 Mar 2025 17:28:20 +0100 Subject: [PATCH 06/81] wifi: mac80211: Update skb's control block key in ieee80211_tx_dequeue() The ieee80211 skb control block key (set when skb was queued) could have been removed before ieee80211_tx_dequeue() call. ieee80211_tx_dequeue() already called ieee80211_tx_h_select_key() to get the current key, but the latter do not update the key in skb control block in case it is NULL. Because some drivers actually use this key in their TX callbacks (e.g. ath1{1,2}k_mac_op_tx()) this could lead to the use after free below: BUG: KASAN: slab-use-after-free in ath11k_mac_op_tx+0x590/0x61c Read of size 4 at addr ffffff803083c248 by task kworker/u16:4/1440 CPU: 3 UID: 0 PID: 1440 Comm: kworker/u16:4 Not tainted 6.13.0-ge128f627f404 #2 Hardware name: HW (DT) Workqueue: bat_events batadv_send_outstanding_bcast_packet Call trace: show_stack+0x14/0x1c (C) dump_stack_lvl+0x58/0x74 print_report+0x164/0x4c0 kasan_report+0xac/0xe8 __asan_report_load4_noabort+0x1c/0x24 ath11k_mac_op_tx+0x590/0x61c ieee80211_handle_wake_tx_queue+0x12c/0x1c8 ieee80211_queue_skb+0xdcc/0x1b4c ieee80211_tx+0x1ec/0x2bc ieee80211_xmit+0x224/0x324 __ieee80211_subif_start_xmit+0x85c/0xcf8 ieee80211_subif_start_xmit+0xc0/0xec4 dev_hard_start_xmit+0xf4/0x28c __dev_queue_xmit+0x6ac/0x318c batadv_send_skb_packet+0x38c/0x4b0 batadv_send_outstanding_bcast_packet+0x110/0x328 process_one_work+0x578/0xc10 worker_thread+0x4bc/0xc7c kthread+0x2f8/0x380 ret_from_fork+0x10/0x20 Allocated by task 1906: kasan_save_stack+0x28/0x4c kasan_save_track+0x1c/0x40 kasan_save_alloc_info+0x3c/0x4c __kasan_kmalloc+0xac/0xb0 __kmalloc_noprof+0x1b4/0x380 ieee80211_key_alloc+0x3c/0xb64 ieee80211_add_key+0x1b4/0x71c nl80211_new_key+0x2b4/0x5d8 genl_family_rcv_msg_doit+0x198/0x240 <...> Freed by task 1494: kasan_save_stack+0x28/0x4c kasan_save_track+0x1c/0x40 kasan_save_free_info+0x48/0x94 __kasan_slab_free+0x48/0x60 kfree+0xc8/0x31c kfree_sensitive+0x70/0x80 ieee80211_key_free_common+0x10c/0x174 ieee80211_free_keys+0x188/0x46c ieee80211_stop_mesh+0x70/0x2cc ieee80211_leave_mesh+0x1c/0x60 cfg80211_leave_mesh+0xe0/0x280 cfg80211_leave+0x1e0/0x244 <...> Reset SKB control block key before calling ieee80211_tx_h_select_key() to avoid that. Fixes: bb42f2d13ffc ("mac80211: Move reorder-sensitive TX handlers to after TXQ dequeue") Signed-off-by: Remi Pommarel Link: https://patch.msgid.link/06aa507b853ca385ceded81c18b0a6dd0f081bc8.1742833382.git.repk@triplefau.lt Signed-off-by: Johannes Berg --- net/mac80211/tx.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 20179db88c4a..34f229a6eab0 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -3894,6 +3894,7 @@ struct sk_buff *ieee80211_tx_dequeue(struct ieee80211_hw *hw, * The key can be removed while the packet was queued, so need to call * this here to get the current key. */ + info->control.hw_key = NULL; r = ieee80211_tx_h_select_key(&tx); if (r != TX_CONTINUE) { ieee80211_free_txskb(&local->hw, skb); From 378677eb8f44621ecc9ce659f7af61e5baa94d81 Mon Sep 17 00:00:00 2001 From: Remi Pommarel Date: Mon, 24 Mar 2025 17:28:21 +0100 Subject: [PATCH 07/81] wifi: mac80211: Purge vif txq in ieee80211_do_stop() After ieee80211_do_stop() SKB from vif's txq could still be processed. Indeed another concurrent vif schedule_and_wake_txq call could cause those packets to be dequeued (see ieee80211_handle_wake_tx_queue()) without checking the sdata current state. Because vif.drv_priv is now cleared in this function, this could lead to driver crash. For example in ath12k, ahvif is store in vif.drv_priv. Thus if ath12k_mac_op_tx() is called after ieee80211_do_stop(), ahvif->ah can be NULL, leading the ath12k_warn(ahvif->ah,...) call in this function to trigger the NULL deref below. Unable to handle kernel paging request at virtual address dfffffc000000001 KASAN: null-ptr-deref in range [0x0000000000000008-0x000000000000000f] batman_adv: bat0: Interface deactivated: brbh1337 Mem abort info: ESR = 0x0000000096000004 EC = 0x25: DABT (current EL), IL = 32 bits SET = 0, FnV = 0 EA = 0, S1PTW = 0 FSC = 0x04: level 0 translation fault Data abort info: ISV = 0, ISS = 0x00000004, ISS2 = 0x00000000 CM = 0, WnR = 0, TnD = 0, TagAccess = 0 GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0 [dfffffc000000001] address between user and kernel address ranges Internal error: Oops: 0000000096000004 [#1] SMP CPU: 1 UID: 0 PID: 978 Comm: lbd Not tainted 6.13.0-g633f875b8f1e #114 Hardware name: HW (DT) pstate: 10000005 (nzcV daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : ath12k_mac_op_tx+0x6cc/0x29b8 [ath12k] lr : ath12k_mac_op_tx+0x174/0x29b8 [ath12k] sp : ffffffc086ace450 x29: ffffffc086ace450 x28: 0000000000000000 x27: 1ffffff810d59ca4 x26: ffffff801d05f7c0 x25: 0000000000000000 x24: 000000004000001e x23: ffffff8009ce4926 x22: ffffff801f9c0800 x21: ffffff801d05f7f0 x20: ffffff8034a19f40 x19: 0000000000000000 x18: ffffff801f9c0958 x17: ffffff800bc0a504 x16: dfffffc000000000 x15: ffffffc086ace4f8 x14: ffffff801d05f83c x13: 0000000000000000 x12: ffffffb003a0bf03 x11: 0000000000000000 x10: ffffffb003a0bf02 x9 : ffffff8034a19f40 x8 : ffffff801d05f818 x7 : 1ffffff0069433dc x6 : ffffff8034a19ee0 x5 : ffffff801d05f7f0 x4 : 0000000000000000 x3 : 0000000000000001 x2 : 0000000000000000 x1 : dfffffc000000000 x0 : 0000000000000008 Call trace: ath12k_mac_op_tx+0x6cc/0x29b8 [ath12k] (P) ieee80211_handle_wake_tx_queue+0x16c/0x260 ieee80211_queue_skb+0xeec/0x1d20 ieee80211_tx+0x200/0x2c8 ieee80211_xmit+0x22c/0x338 __ieee80211_subif_start_xmit+0x7e8/0xc60 ieee80211_subif_start_xmit+0xc4/0xee0 __ieee80211_subif_start_xmit_8023.isra.0+0x854/0x17a0 ieee80211_subif_start_xmit_8023+0x124/0x488 dev_hard_start_xmit+0x160/0x5a8 __dev_queue_xmit+0x6f8/0x3120 br_dev_queue_push_xmit+0x120/0x4a8 __br_forward+0xe4/0x2b0 deliver_clone+0x5c/0xd0 br_flood+0x398/0x580 br_dev_xmit+0x454/0x9f8 dev_hard_start_xmit+0x160/0x5a8 __dev_queue_xmit+0x6f8/0x3120 ip6_finish_output2+0xc28/0x1b60 __ip6_finish_output+0x38c/0x638 ip6_output+0x1b4/0x338 ip6_local_out+0x7c/0xa8 ip6_send_skb+0x7c/0x1b0 ip6_push_pending_frames+0x94/0xd0 rawv6_sendmsg+0x1a98/0x2898 inet_sendmsg+0x94/0xe0 __sys_sendto+0x1e4/0x308 __arm64_sys_sendto+0xc4/0x140 do_el0_svc+0x110/0x280 el0_svc+0x20/0x60 el0t_64_sync_handler+0x104/0x138 el0t_64_sync+0x154/0x158 To avoid that, empty vif's txq at ieee80211_do_stop() so no packet could be dequeued after ieee80211_do_stop() (new packets cannot be queued because SDATA_STATE_RUNNING is cleared at this point). Fixes: ba8c3d6f16a1 ("mac80211: add an intermediate software queue implementation") Signed-off-by: Remi Pommarel Link: https://patch.msgid.link/ff7849e268562456274213c0476e09481a48f489.1742833382.git.repk@triplefau.lt Signed-off-by: Johannes Berg --- net/mac80211/iface.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index b0423046028c..183b63235000 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -659,6 +659,9 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, bool going_do if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) ieee80211_txq_remove_vlan(local, sdata); + if (sdata->vif.txq) + ieee80211_txq_purge(sdata->local, to_txq_info(sdata->vif.txq)); + sdata->bss = NULL; if (local->open_count == 0) From ff4ec537e48cfb84400f52ad102f6d82fe934580 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 21 Mar 2025 17:36:00 +0300 Subject: [PATCH 08/81] wifi: iwlwifi: mld: silence uninitialized variable warning The "resp_len" isn't initialized if iwl_dhc_resp_data() fails. Fixes: b611cf6b57a8 ("wifi: iwlwifi: mld: add support for DHC_TOOLS_UMAC_GET_TAS_STATUS command") Signed-off-by: Dan Carpenter Acked-by: Miri Korenblit Link: https://patch.msgid.link/add9c9e2-3b44-4e0a-a4aa-7326f6425baf@stanley.mountain [fix typo in commit message] Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/mld/debugfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mld/debugfs.c b/drivers/net/wireless/intel/iwlwifi/mld/debugfs.c index 453ce2ba39d1..89d95e9b4f30 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/debugfs.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/debugfs.c @@ -396,8 +396,8 @@ static ssize_t iwl_dbgfs_tas_get_status_read(struct iwl_mld *mld, char *buf, .data[0] = &cmd, }; struct iwl_dhc_tas_status_resp *resp = NULL; + u32 resp_len = 0; ssize_t pos = 0; - u32 resp_len; u32 status; int ret; From 9e935c0fe3f806ff700a804c00832f0f340b6061 Mon Sep 17 00:00:00 2001 From: Abdun Nihaal Date: Sun, 30 Mar 2025 16:04:24 +0530 Subject: [PATCH 09/81] wifi: brcmfmac: fix memory leak in brcmf_get_module_param The memory allocated for settings is not freed when brcmf_of_probe fails. Fix that by freeing settings before returning in error path. Fixes: 0ff0843310b7 ("wifi: brcmfmac: Add optional lpo clock enable support") Signed-off-by: Abdun Nihaal Acked-by: Arend van Spriel Link: https://patch.msgid.link/20250330103425.44197-1-abdun.nihaal@gmail.com Signed-off-by: Johannes Berg --- drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.c index cfcf01eb0daa..f26e4679e4ff 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.c @@ -561,8 +561,10 @@ struct brcmf_mp_device *brcmf_get_module_param(struct device *dev, if (!found) { /* No platform data for this device, try OF and DMI data */ brcmf_dmi_probe(settings, chip, chiprev); - if (brcmf_of_probe(dev, bus_type, settings) == -EPROBE_DEFER) + if (brcmf_of_probe(dev, bus_type, settings) == -EPROBE_DEFER) { + kfree(settings); return ERR_PTR(-EPROBE_DEFER); + } brcmf_acpi_probe(dev, bus_type, settings); } return settings; From a0f0dc96de03ffeefc2a177b7f8acde565cb77f4 Mon Sep 17 00:00:00 2001 From: Abdun Nihaal Date: Sun, 30 Mar 2025 16:15:32 +0530 Subject: [PATCH 10/81] wifi: wl1251: fix memory leak in wl1251_tx_work The skb dequeued from tx_queue is lost when wl1251_ps_elp_wakeup fails with a -ETIMEDOUT error. Fix that by queueing the skb back to tx_queue. Fixes: c5483b719363 ("wl12xx: check if elp wakeup failed") Signed-off-by: Abdun Nihaal Reviewed-by: Michael Nemanov Link: https://patch.msgid.link/20250330104532.44935-1-abdun.nihaal@gmail.com Signed-off-by: Johannes Berg --- drivers/net/wireless/ti/wl1251/tx.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/ti/wl1251/tx.c b/drivers/net/wireless/ti/wl1251/tx.c index 474b603c121c..adb4840b0489 100644 --- a/drivers/net/wireless/ti/wl1251/tx.c +++ b/drivers/net/wireless/ti/wl1251/tx.c @@ -342,8 +342,10 @@ void wl1251_tx_work(struct work_struct *work) while ((skb = skb_dequeue(&wl->tx_queue))) { if (!woken_up) { ret = wl1251_ps_elp_wakeup(wl); - if (ret < 0) + if (ret < 0) { + skb_queue_head(&wl->tx_queue, skb); goto out; + } woken_up = true; } From eb73b5a9157221f405b4fe32751da84ee46b7a25 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Tue, 1 Apr 2025 13:02:08 -0400 Subject: [PATCH 11/81] Bluetooth: hci_event: Fix sending MGMT_EV_DEVICE_FOUND for invalid address This fixes sending MGMT_EV_DEVICE_FOUND for invalid address (00:00:00:00:00:00) which is a regression introduced by a2ec905d1e16 ("Bluetooth: fix kernel oops in store_pending_adv_report") since in the attempt to skip storing data for extended advertisement it actually made the code to skip the entire if statement supposed to send MGMT_EV_DEVICE_FOUND without attempting to use the last_addr_adv which is garanteed to be invalid for extended advertisement since we never store anything on it. Link: https://github.com/bluez/bluez/issues/1157 Link: https://github.com/bluez/bluez/issues/1149#issuecomment-2767215658 Fixes: a2ec905d1e16 ("Bluetooth: fix kernel oops in store_pending_adv_report") Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_event.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 1d8616f2e740..5f808f0b0e9a 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -6160,11 +6160,12 @@ static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr, * event or send an immediate device found event if the data * should not be stored for later. */ - if (!ext_adv && !has_pending_adv_report(hdev)) { + if (!has_pending_adv_report(hdev)) { /* If the report will trigger a SCAN_REQ store it for * later merging. */ - if (type == LE_ADV_IND || type == LE_ADV_SCAN_IND) { + if (!ext_adv && (type == LE_ADV_IND || + type == LE_ADV_SCAN_IND)) { store_pending_adv_report(hdev, bdaddr, bdaddr_type, rssi, flags, data, len); return; From 324dddea321078a6eeb535c2bff5257be74c9799 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 2 Apr 2025 14:01:41 +0300 Subject: [PATCH 12/81] Bluetooth: btrtl: Prevent potential NULL dereference The btrtl_initialize() function checks that rtl_load_file() either had an error or it loaded a zero length file. However, if it loaded a zero length file then the error code is not set correctly. It results in an error pointer vs NULL bug, followed by a NULL pointer dereference. This was detected by Smatch: drivers/bluetooth/btrtl.c:592 btrtl_initialize() warn: passing zero to 'ERR_PTR' Fixes: 26503ad25de8 ("Bluetooth: btrtl: split the device initialization into smaller parts") Signed-off-by: Dan Carpenter Reviewed-by: Hans de Goede Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btrtl.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/bluetooth/btrtl.c b/drivers/bluetooth/btrtl.c index d3eba0d4a57d..7838c89e529e 100644 --- a/drivers/bluetooth/btrtl.c +++ b/drivers/bluetooth/btrtl.c @@ -1215,6 +1215,8 @@ struct btrtl_device_info *btrtl_initialize(struct hci_dev *hdev, rtl_dev_err(hdev, "mandatory config file %s not found", btrtl_dev->ic_info->cfg_name); ret = btrtl_dev->cfg_len; + if (!ret) + ret = -EINVAL; goto err_free; } } From e92900c9803fb35ad6cf599cb268b8ddd9f91940 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Tue, 1 Apr 2025 18:04:03 +0300 Subject: [PATCH 13/81] Bluetooth: qca: fix NV variant for one of WCN3950 SoCs The QCA_WCN3950_SOC_ID_S should be using qca/cmnv13s.bin, rather than qca/cmnv13u.bin file. Correct the variant suffix to be used for this SoC ID. Fixes: d5712c511cb3 ("Bluetooth: qca: add WCN3950 support") Reported-by: Wojciech Slenska Closes: https://github.com/qualcomm-linux/meta-qcom/pull/817#discussion_r2022866431 Signed-off-by: Dmitry Baryshkov Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btqca.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/bluetooth/btqca.c b/drivers/bluetooth/btqca.c index 3d6778b95e00..edefb9dc76aa 100644 --- a/drivers/bluetooth/btqca.c +++ b/drivers/bluetooth/btqca.c @@ -889,7 +889,7 @@ int qca_uart_setup(struct hci_dev *hdev, uint8_t baudrate, if (le32_to_cpu(ver.soc_id) == QCA_WCN3950_SOC_ID_T) variant = "t"; else if (le32_to_cpu(ver.soc_id) == QCA_WCN3950_SOC_ID_S) - variant = "u"; + variant = "s"; snprintf(config.fwname, sizeof(config.fwname), "qca/cmnv%02x%s.bin", rom_ver, variant); From c174cd0945ad3f1b7e48781e0d22b94f9e89b28b Mon Sep 17 00:00:00 2001 From: Pauli Virtanen Date: Thu, 3 Apr 2025 19:57:59 +0300 Subject: [PATCH 14/81] Bluetooth: increment TX timestamping tskey always for stream sockets Documentation/networking/timestamping.rst implies TX timestamping OPT_ID tskey increments for each sendmsg. In practice: TCP socket increments it for all sendmsg, timestamping on or off, but UDP only when timestamping is on. The user-visible counter resets when OPT_ID is turned on, so difference can be seen only if timestamping is enabled for some packets only (eg. via SO_TIMESTAMPING CMSG). Fix BT sockets to work in the same way: stream sockets increment tskey for all sendmsg (seqpacket already increment for timestamped only). Fixes: 134f4b39df7b ("Bluetooth: add support for skb TX SND/COMPLETION timestamping") Signed-off-by: Pauli Virtanen Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_conn.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 95972fd4c784..7e1b53857648 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -3072,6 +3072,7 @@ void hci_setup_tx_timestamp(struct sk_buff *skb, size_t key_offset, const struct sockcm_cookie *sockc) { struct sock *sk = skb ? skb->sk : NULL; + int key; /* This shall be called on a single skb of those generated by user * sendmsg(), and only when the sendmsg() does not return error to @@ -3087,13 +3088,16 @@ void hci_setup_tx_timestamp(struct sk_buff *skb, size_t key_offset, sock_tx_timestamp(sk, sockc, &skb_shinfo(skb)->tx_flags); + if (sk->sk_type == SOCK_STREAM) + key = atomic_add_return(key_offset, &sk->sk_tskey); + if (sockc->tsflags & SOF_TIMESTAMPING_OPT_ID && sockc->tsflags & SOF_TIMESTAMPING_TX_RECORD_MASK) { if (sockc->tsflags & SOCKCM_FLAG_TS_OPT_ID) { skb_shinfo(skb)->tskey = sockc->ts_opt_id; } else { - int key = atomic_add_return(key_offset, &sk->sk_tskey); - + if (sk->sk_type != SOCK_STREAM) + key = atomic_inc_return(&sk->sk_tskey); skb_shinfo(skb)->tskey = key - 1; } } From 61a9c6e39c8dfe2fd56dee44f817cf2a1b3f3c71 Mon Sep 17 00:00:00 2001 From: Neeraj Sanjay Kale Date: Thu, 3 Apr 2025 20:32:23 +0530 Subject: [PATCH 15/81] Bluetooth: btnxpuart: Revert baudrate change in nxp_shutdown This reverts the change baudrate logic in nxp_shutdown. Earlier, when the driver was removed, it restored the controller baudrate to fw_init_baudrate, so that on re-loading the driver, things work fine. However, if the driver was removed while hci0 interface is down, the change baudrate vendor command could not be sent by the driver. When the driver was re-loaded, host and controller baudrate would be mismatched and hci initialization would fail. The only way to recover would be to reboot the system. This issue was fixed by moving the restore baudrate logic from nxp_serdev_remove() to nxp_shutdown(). This fix however caused another issue with the command "hciconfig hci0 reset", which makes hci0 DOWN and UP immediately. Running "bluetoothctl power off" and "bluetoothctl power on" in a tight loop works fine. To maintain support for "hciconfig reset" command, the above mentioned fix is reverted. Fixes: 6fca6781d19d ("Bluetooth: btnxpuart: Move vendor specific initialization to .post_init") Signed-off-by: Neeraj Sanjay Kale Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btnxpuart.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/drivers/bluetooth/btnxpuart.c b/drivers/bluetooth/btnxpuart.c index 5091dea762a0..2d58b8be2c5f 100644 --- a/drivers/bluetooth/btnxpuart.c +++ b/drivers/bluetooth/btnxpuart.c @@ -1445,9 +1445,6 @@ static int nxp_shutdown(struct hci_dev *hdev) /* HCI_NXP_IND_RESET command may not returns any response */ if (!IS_ERR(skb)) kfree_skb(skb); - } else if (nxpdev->current_baudrate != nxpdev->fw_init_baudrate) { - nxpdev->new_baudrate = nxpdev->fw_init_baudrate; - nxp_set_baudrate_cmd(hdev, NULL); } return 0; @@ -1799,13 +1796,15 @@ static void nxp_serdev_remove(struct serdev_device *serdev) clear_bit(BTNXPUART_FW_DOWNLOADING, &nxpdev->tx_state); wake_up_interruptible(&nxpdev->check_boot_sign_wait_q); wake_up_interruptible(&nxpdev->fw_dnld_done_wait_q); - } - - if (test_bit(HCI_RUNNING, &hdev->flags)) { - /* Ensure shutdown callback is executed before unregistering, so - * that baudrate is reset to initial value. + } else { + /* Restore FW baudrate to fw_init_baudrate if changed. + * This will ensure FW baudrate is in sync with + * driver baudrate in case this driver is re-inserted. */ - nxp_shutdown(hdev); + if (nxpdev->current_baudrate != nxpdev->fw_init_baudrate) { + nxpdev->new_baudrate = nxpdev->fw_init_baudrate; + nxp_set_baudrate_cmd(hdev, NULL); + } } ps_cleanup(nxpdev); From 103308e50db92d1e705cd9818aaf7fb327c14fad Mon Sep 17 00:00:00 2001 From: Neeraj Sanjay Kale Date: Thu, 3 Apr 2025 20:32:22 +0530 Subject: [PATCH 16/81] Bluetooth: btnxpuart: Add an error message if FW dump trigger fails This prints an error message if the FW Dump trigger command fails. This scenario is mainly observed in legacy chipsets 8987 and 8997 and also IW416, where this feature is unavailable due to memory constraints. Fixes: 998e447f443f ("Bluetooth: btnxpuart: Add support for HCI coredump feature") Signed-off-by: Neeraj Sanjay Kale Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btnxpuart.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/bluetooth/btnxpuart.c b/drivers/bluetooth/btnxpuart.c index 2d58b8be2c5f..604ab2bba231 100644 --- a/drivers/bluetooth/btnxpuart.c +++ b/drivers/bluetooth/btnxpuart.c @@ -1286,7 +1286,9 @@ static void nxp_coredump(struct hci_dev *hdev) u8 pcmd = 2; skb = nxp_drv_send_cmd(hdev, HCI_NXP_TRIGGER_DUMP, 1, &pcmd); - if (!IS_ERR(skb)) + if (IS_ERR(skb)) + bt_dev_err(hdev, "Failed to trigger FW Dump. (%ld)", PTR_ERR(skb)); + else kfree_skb(skb); } From 522e9ed157e3c21b4dd623c79967f72c21e45b78 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Danis?= Date: Wed, 9 Apr 2025 10:53:06 +0200 Subject: [PATCH 17/81] Bluetooth: l2cap: Check encryption key size on incoming connection MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is required for passing GAP/SEC/SEM/BI-04-C PTS test case: Security Mode 4 Level 4, Responder - Invalid Encryption Key Size - 128 bit This tests the security key with size from 1 to 15 bytes while the Security Mode 4 Level 4 requests 16 bytes key size. Currently PTS fails with the following logs: - expected:Connection Response: Code: [3 (0x03)] Code Identifier: (lt)WildCard: Exists(gt) Length: [8 (0x0008)] Destination CID: (lt)WildCard: Exists(gt) Source CID: [64 (0x0040)] Result: [3 (0x0003)] Connection refused - Security block Status: (lt)WildCard: Exists(gt), but received:Connection Response: Code: [3 (0x03)] Code Identifier: [1 (0x01)] Length: [8 (0x0008)] Destination CID: [64 (0x0040)] Source CID: [64 (0x0040)] Result: [0 (0x0000)] Connection Successful Status: [0 (0x0000)] No further information available And HCI logs: < HCI Command: Read Encrypti.. (0x05|0x0008) plen 2 Handle: 14 Address: 00:1B:DC:F2:24:10 (Vencer Co., Ltd.) > HCI Event: Command Complete (0x0e) plen 7 Read Encryption Key Size (0x05|0x0008) ncmd 1 Status: Success (0x00) Handle: 14 Address: 00:1B:DC:F2:24:10 (Vencer Co., Ltd.) Key size: 7 > ACL Data RX: Handle 14 flags 0x02 dlen 12 L2CAP: Connection Request (0x02) ident 1 len 4 PSM: 4097 (0x1001) Source CID: 64 < ACL Data TX: Handle 14 flags 0x00 dlen 16 L2CAP: Connection Response (0x03) ident 1 len 8 Destination CID: 64 Source CID: 64 Result: Connection successful (0x0000) Status: No further information available (0x0000) Fixes: 288c06973daa ("Bluetooth: Enforce key size of 16 bytes on FIPS level") Signed-off-by: Frédéric Danis Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/l2cap_core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index c7b66b2ea9f2..f1c4b8bd7a8b 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -3991,7 +3991,8 @@ static void l2cap_connect(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd, /* Check if the ACL is secure enough (if not SDP) */ if (psm != cpu_to_le16(L2CAP_PSM_SDP) && - !hci_conn_check_link_mode(conn->hcon)) { + (!hci_conn_check_link_mode(conn->hcon) || + !l2cap_check_enc_key_size(conn->hcon))) { conn->disc_reason = HCI_ERROR_AUTH_FAILURE; result = L2CAP_CR_SEC_BLOCK; goto response; From 6afd0a3c7ecb5049d75801a3efda0ada70483bd0 Mon Sep 17 00:00:00 2001 From: David Wei Date: Wed, 9 Apr 2025 09:31:53 -0700 Subject: [PATCH 18/81] io_uring/zcrx: enable tcp-data-split in selftest For bnxt when the agg ring is used then tcp-data-split is automatically reported to be enabled, but __net_mp_open_rxq() requires tcp-data-split to be explicitly enabled by the user. Enable tcp-data-split explicitly in io_uring zc rx selftest. Signed-off-by: David Wei Link: https://patch.msgid.link/20250409163153.2747918-1-dw@davidwei.uk Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/hw/iou-zcrx.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/testing/selftests/drivers/net/hw/iou-zcrx.py b/tools/testing/selftests/drivers/net/hw/iou-zcrx.py index 9f271ab6ec04..6a0378e06cab 100755 --- a/tools/testing/selftests/drivers/net/hw/iou-zcrx.py +++ b/tools/testing/selftests/drivers/net/hw/iou-zcrx.py @@ -35,6 +35,7 @@ def test_zcrx(cfg) -> None: rx_ring = _get_rx_ring_entries(cfg) try: + ethtool(f"-G {cfg.ifname} tcp-data-split on", host=cfg.remote) ethtool(f"-G {cfg.ifname} rx 64", host=cfg.remote) ethtool(f"-X {cfg.ifname} equal {combined_chans - 1}", host=cfg.remote) flow_rule_id = _set_flow_rule(cfg, combined_chans - 1) @@ -48,6 +49,7 @@ def test_zcrx(cfg) -> None: ethtool(f"-N {cfg.ifname} delete {flow_rule_id}", host=cfg.remote) ethtool(f"-X {cfg.ifname} default", host=cfg.remote) ethtool(f"-G {cfg.ifname} rx {rx_ring}", host=cfg.remote) + ethtool(f"-G {cfg.ifname} tcp-data-split auto", host=cfg.remote) def test_zcrx_oneshot(cfg) -> None: @@ -59,6 +61,7 @@ def test_zcrx_oneshot(cfg) -> None: rx_ring = _get_rx_ring_entries(cfg) try: + ethtool(f"-G {cfg.ifname} tcp-data-split on", host=cfg.remote) ethtool(f"-G {cfg.ifname} rx 64", host=cfg.remote) ethtool(f"-X {cfg.ifname} equal {combined_chans - 1}", host=cfg.remote) flow_rule_id = _set_flow_rule(cfg, combined_chans - 1) @@ -72,6 +75,7 @@ def test_zcrx_oneshot(cfg) -> None: ethtool(f"-N {cfg.ifname} delete {flow_rule_id}", host=cfg.remote) ethtool(f"-X {cfg.ifname} default", host=cfg.remote) ethtool(f"-G {cfg.ifname} rx {rx_ring}", host=cfg.remote) + ethtool(f"-G {cfg.ifname} tcp-data-split auto", host=cfg.remote) def main() -> None: From cfe82469a00f0c0983bf4652de3a2972637dfc56 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Tue, 8 Apr 2025 13:46:17 -0400 Subject: [PATCH 19/81] ipv6: add exception routes to GC list in rt6_insert_exception Commit 5eb902b8e719 ("net/ipv6: Remove expired routes with a separated list of routes.") introduced a separated list for managing route expiration via the GC timer. However, it missed adding exception routes (created by ip6_rt_update_pmtu() and rt6_do_redirect()) to this GC list. As a result, these exceptions were never considered for expiration and removal, leading to stale entries persisting in the routing table. This patch fixes the issue by calling fib6_add_gc_list() in rt6_insert_exception(), ensuring that exception routes are properly tracked and garbage collected when expired. Fixes: 5eb902b8e719 ("net/ipv6: Remove expired routes with a separated list of routes.") Reported-by: Jianlin Shi Signed-off-by: Xin Long Reviewed-by: David Ahern Link: https://patch.msgid.link/837e7506ffb63f47faa2b05d9b85481aad28e1a4.1744134377.git.lucien.xin@gmail.com Signed-off-by: Jakub Kicinski --- net/ipv6/route.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 210b84cecc24..96f1621e2381 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1771,6 +1771,7 @@ static int rt6_insert_exception(struct rt6_info *nrt, if (!err) { spin_lock_bh(&f6i->fib6_table->tb6_lock); fib6_update_sernum(net, f6i); + fib6_add_gc_list(f6i); spin_unlock_bh(&f6i->fib6_table->tb6_lock); fib6_force_start_gc(net); } From 5f05c14e7c198415abe936514a6905f8b545b63b Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 11 Apr 2025 10:40:54 +0200 Subject: [PATCH 20/81] wifi: iwlwifi: pcie: set state to no-FW before reset handshake The reset handshake attempts to kill the firmware, and it'll go into a pretty much dead state once we do that. However, if it times out, then we'll attempt to dump the firmware to be able to see why it didn't respond. During this dump, we cannot treat it as if it was still running, since we just tried to kill it, otherwise dumping will attempt to send a DBGC stop command. As this command will time out, we'll go into a reset loop. For now, fix this by setting the trans->state to say firmware isn't running before doing the reset handshake. In the longer term, we should clean up the way this state is handled. It's not entirely clear but it seems likely that this issue was introduced by my rework of the error handling, prior to that it would've been synchronous at that point and (I think) not have attempted to reset since it was already doing down. Closes: https://bugzilla.kernel.org/show_bug.cgi?id=219967 Closes: https://bugzilla.kernel.org/show_bug.cgi?id=219968 Fixes: 7391b2a4f7db ("wifi: iwlwifi: rework firmware error handling") Reviewed-by: Miriam Rachel Korenblit Link: https://patch.msgid.link/20250411104054.63aa4f56894d.Ife70cfe997db03f0d07fdef2b164695739a05a63@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/pcie/trans-gen2.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans-gen2.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans-gen2.c index 3ece34e30d58..472f26f83ba8 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/trans-gen2.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans-gen2.c @@ -147,8 +147,14 @@ static void _iwl_trans_pcie_gen2_stop_device(struct iwl_trans *trans) return; if (trans->state >= IWL_TRANS_FW_STARTED && - trans_pcie->fw_reset_handshake) + trans_pcie->fw_reset_handshake) { + /* + * Reset handshake can dump firmware on timeout, but that + * should assume that the firmware is already dead. + */ + trans->state = IWL_TRANS_NO_FW; iwl_trans_pcie_fw_reset_handshake(trans); + } trans_pcie->is_down = true; From 575fe08c221567cdbf63e078baecaeaed08a1d17 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Fri, 11 Apr 2025 12:21:05 +0200 Subject: [PATCH 21/81] wifi: iwlwifi: mld: Restart firmware on iwl_mld_no_wowlan_resume() error Commit 44605365f935 ("iwlwifi: mld: fix building with CONFIG_PM_SLEEP disabled") sought to fix build breakage, but inadvertently introduced a new issue: iwl_mld_mac80211_start() no longer calls iwl_mld_start_fw() after having called iwl_mld_stop_fw() in the error path of iwl_mld_no_wowlan_resume(). Fix it. Fixes: 44605365f935 ("iwlwifi: mld: fix building with CONFIG_PM_SLEEP disabled") Reported-by: Miri Korenblit Closes: https://lore.kernel.org/r/MW5PR11MB58106D6BC6403845C330C7AAA3A22@MW5PR11MB5810.namprd11.prod.outlook.com/ Signed-off-by: Lukas Wunner Acked-by: Miri Korenblit Acked-by: Arnd Bergmann Link: https://patch.msgid.link/d3ba1006a1b72ceb58c593fa62b9bd7c73e2e4ed.1744366815.git.lukas@wunner.de Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/mld/mac80211.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mld/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mld/mac80211.c index 0b5bc5abb82d..99e13cfd1e5f 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/mac80211.c @@ -475,8 +475,8 @@ static int iwl_mld_mac80211_start(struct ieee80211_hw *hw) { struct iwl_mld *mld = IWL_MAC80211_GET_MLD(hw); - int ret; bool in_d3 = false; + int ret = 0; lockdep_assert_wiphy(mld->wiphy); @@ -501,7 +501,7 @@ int iwl_mld_mac80211_start(struct ieee80211_hw *hw) iwl_mld_restart_cleanup(mld); } - if (!in_d3) { + if (!in_d3 || ret) { ret = iwl_mld_start_fw(mld); if (ret) goto error; From 0937cb5f345c79d702b4d0d744e2a2529b551cb2 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 11 Apr 2025 16:13:34 +0200 Subject: [PATCH 22/81] Revert "wifi: mac80211: Update skb's control block key in ieee80211_tx_dequeue()" This reverts commit a104042e2bf6528199adb6ca901efe7b60c2c27f. Since the original bug seems to have been around for years, but a new issue was report with the fix, revert the fix for now. We have a couple of weeks to figure it out for this release, if needed. Reported-by: Bert Karwatzki Closes: https://lore.kernel.org/linux-wireless/20250410215527.3001-1-spasswolf@web.de Fixes: a104042e2bf6 ("wifi: mac80211: Update skb's control block key in ieee80211_tx_dequeue()") Signed-off-by: Johannes Berg --- net/mac80211/tx.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 34f229a6eab0..20179db88c4a 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -3894,7 +3894,6 @@ struct sk_buff *ieee80211_tx_dequeue(struct ieee80211_hw *hw, * The key can be removed while the packet was queued, so need to call * this here to get the current key. */ - info->control.hw_key = NULL; r = ieee80211_tx_h_select_key(&tx); if (r != TX_CONTINUE) { ieee80211_free_txskb(&local->hw, skb); From 8e404ad95d2c10c261e2ef6992c7c12dde03df0e Mon Sep 17 00:00:00 2001 From: Christopher S M Hall Date: Tue, 1 Apr 2025 16:35:29 -0700 Subject: [PATCH 23/81] igc: fix PTM cycle trigger logic Writing to clear the PTM status 'valid' bit while the PTM cycle is triggered results in unreliable PTM operation. To fix this, clear the PTM 'trigger' and status after each PTM transaction. The issue can be reproduced with the following: $ sudo phc2sys -R 1000 -O 0 -i tsn0 -m Note: 1000 Hz (-R 1000) is unrealistically large, but provides a way to quickly reproduce the issue. PHC2SYS exits with: "ioctl PTP_OFFSET_PRECISE: Connection timed out" when the PTM transaction fails This patch also fixes a hang in igc_probe() when loading the igc driver in the kdump kernel on systems supporting PTM. The igc driver running in the base kernel enables PTM trigger in igc_probe(). Therefore the driver is always in PTM trigger mode, except in brief periods when manually triggering a PTM cycle. When a crash occurs, the NIC is reset while PTM trigger is enabled. Due to a hardware problem, the NIC is subsequently in a bad busmaster state and doesn't handle register reads/writes. When running igc_probe() in the kdump kernel, the first register access to a NIC register hangs driver probing and ultimately breaks kdump. With this patch, igc has PTM trigger disabled most of the time, and the trigger is only enabled for very brief (10 - 100 us) periods when manually triggering a PTM cycle. Chances that a crash occurs during a PTM trigger are not 0, but extremely reduced. Fixes: a90ec8483732 ("igc: Add support for PTP getcrosststamp()") Reviewed-by: Michal Swiatkowski Tested-by: Mor Bar-Gabay Tested-by: Avigail Dahan Signed-off-by: Christopher S M Hall Reviewed-by: Corinna Vinschen Signed-off-by: Jacob Keller Tested-by: Corinna Vinschen Acked-by: Vinicius Costa Gomes Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igc/igc_defines.h | 1 + drivers/net/ethernet/intel/igc/igc_ptp.c | 70 ++++++++++++-------- 2 files changed, 42 insertions(+), 29 deletions(-) diff --git a/drivers/net/ethernet/intel/igc/igc_defines.h b/drivers/net/ethernet/intel/igc/igc_defines.h index 8e449904aa7d..2ff292f5f63b 100644 --- a/drivers/net/ethernet/intel/igc/igc_defines.h +++ b/drivers/net/ethernet/intel/igc/igc_defines.h @@ -593,6 +593,7 @@ #define IGC_PTM_STAT_T4M1_OVFL BIT(3) /* T4 minus T1 overflow */ #define IGC_PTM_STAT_ADJUST_1ST BIT(4) /* 1588 timer adjusted during 1st PTM cycle */ #define IGC_PTM_STAT_ADJUST_CYC BIT(5) /* 1588 timer adjusted during non-1st PTM cycle */ +#define IGC_PTM_STAT_ALL GENMASK(5, 0) /* Used to clear all status */ /* PCIe PTM Cycle Control */ #define IGC_PTM_CYCLE_CTRL_CYC_TIME(msec) ((msec) & 0x3ff) /* PTM Cycle Time (msec) */ diff --git a/drivers/net/ethernet/intel/igc/igc_ptp.c b/drivers/net/ethernet/intel/igc/igc_ptp.c index 946edbad4302..c640e346342b 100644 --- a/drivers/net/ethernet/intel/igc/igc_ptp.c +++ b/drivers/net/ethernet/intel/igc/igc_ptp.c @@ -974,13 +974,40 @@ static void igc_ptm_log_error(struct igc_adapter *adapter, u32 ptm_stat) } } +static void igc_ptm_trigger(struct igc_hw *hw) +{ + u32 ctrl; + + /* To "manually" start the PTM cycle we need to set the + * trigger (TRIG) bit + */ + ctrl = rd32(IGC_PTM_CTRL); + ctrl |= IGC_PTM_CTRL_TRIG; + wr32(IGC_PTM_CTRL, ctrl); + /* Perform flush after write to CTRL register otherwise + * transaction may not start + */ + wrfl(); +} + +static void igc_ptm_reset(struct igc_hw *hw) +{ + u32 ctrl; + + ctrl = rd32(IGC_PTM_CTRL); + ctrl &= ~IGC_PTM_CTRL_TRIG; + wr32(IGC_PTM_CTRL, ctrl); + /* Write to clear all status */ + wr32(IGC_PTM_STAT, IGC_PTM_STAT_ALL); +} + static int igc_phc_get_syncdevicetime(ktime_t *device, struct system_counterval_t *system, void *ctx) { - u32 stat, t2_curr_h, t2_curr_l, ctrl; struct igc_adapter *adapter = ctx; struct igc_hw *hw = &adapter->hw; + u32 stat, t2_curr_h, t2_curr_l; int err, count = 100; ktime_t t1, t2_curr; @@ -994,25 +1021,13 @@ static int igc_phc_get_syncdevicetime(ktime_t *device, * are transitory. Repeating the process returns valid * data eventually. */ - - /* To "manually" start the PTM cycle we need to clear and - * then set again the TRIG bit. - */ - ctrl = rd32(IGC_PTM_CTRL); - ctrl &= ~IGC_PTM_CTRL_TRIG; - wr32(IGC_PTM_CTRL, ctrl); - ctrl |= IGC_PTM_CTRL_TRIG; - wr32(IGC_PTM_CTRL, ctrl); - - /* The cycle only starts "for real" when software notifies - * that it has read the registers, this is done by setting - * VALID bit. - */ - wr32(IGC_PTM_STAT, IGC_PTM_STAT_VALID); + igc_ptm_trigger(hw); err = readx_poll_timeout(rd32, IGC_PTM_STAT, stat, stat, IGC_PTM_STAT_SLEEP, IGC_PTM_STAT_TIMEOUT); + igc_ptm_reset(hw); + if (err < 0) { netdev_err(adapter->netdev, "Timeout reading IGC_PTM_STAT register\n"); return err; @@ -1021,15 +1036,7 @@ static int igc_phc_get_syncdevicetime(ktime_t *device, if ((stat & IGC_PTM_STAT_VALID) == IGC_PTM_STAT_VALID) break; - if (stat & ~IGC_PTM_STAT_VALID) { - /* An error occurred, log it. */ - igc_ptm_log_error(adapter, stat); - /* The STAT register is write-1-to-clear (W1C), - * so write the previous error status to clear it. - */ - wr32(IGC_PTM_STAT, stat); - continue; - } + igc_ptm_log_error(adapter, stat); } while (--count); if (!count) { @@ -1255,7 +1262,7 @@ void igc_ptp_stop(struct igc_adapter *adapter) void igc_ptp_reset(struct igc_adapter *adapter) { struct igc_hw *hw = &adapter->hw; - u32 cycle_ctrl, ctrl; + u32 cycle_ctrl, ctrl, stat; unsigned long flags; u32 timadj; @@ -1290,14 +1297,19 @@ void igc_ptp_reset(struct igc_adapter *adapter) ctrl = IGC_PTM_CTRL_EN | IGC_PTM_CTRL_START_NOW | IGC_PTM_CTRL_SHRT_CYC(IGC_PTM_SHORT_CYC_DEFAULT) | - IGC_PTM_CTRL_PTM_TO(IGC_PTM_TIMEOUT_DEFAULT) | - IGC_PTM_CTRL_TRIG; + IGC_PTM_CTRL_PTM_TO(IGC_PTM_TIMEOUT_DEFAULT); wr32(IGC_PTM_CTRL, ctrl); /* Force the first cycle to run. */ - wr32(IGC_PTM_STAT, IGC_PTM_STAT_VALID); + igc_ptm_trigger(hw); + + if (readx_poll_timeout_atomic(rd32, IGC_PTM_STAT, stat, + stat, IGC_PTM_STAT_SLEEP, + IGC_PTM_STAT_TIMEOUT)) + netdev_err(adapter->netdev, "Timeout reading IGC_PTM_STAT register\n"); + igc_ptm_reset(hw); break; default: /* No work to do. */ From 714cd033da6fea4cf54a11b3cfd070afde3f31df Mon Sep 17 00:00:00 2001 From: Christopher S M Hall Date: Tue, 1 Apr 2025 16:35:30 -0700 Subject: [PATCH 24/81] igc: increase wait time before retrying PTM The i225/i226 hardware retries if it receives an inappropriate response from the upstream device. If the device retries too quickly, the root port does not respond. The wait between attempts was reduced from 10us to 1us in commit 6b8aa753a9f9 ("igc: Decrease PTM short interval from 10 us to 1 us"), which said: With the 10us interval, we were seeing PTM transactions take around 12us. Hardware team suggested this interval could be lowered to 1us which was confirmed with PCIe sniffer. With the 1us interval, PTM dialogs took around 2us. While a 1us short cycle time was thought to be theoretically sufficient, it turns out in practice it is not quite long enough. It is unclear if the problem is in the root port or an issue in i225/i226. Increase the wait from 1us to 4us. Increasing to 2us appeared to work in practice on the setups we have available. A value of 4us was chosen due to the limited hardware available for testing, with a goal of ensuring we wait long enough without overly penalizing the response time when unnecessary. The issue can be reproduced with the following: $ sudo phc2sys -R 1000 -O 0 -i tsn0 -m Note: 1000 Hz (-R 1000) is unrealistically large, but provides a way to quickly reproduce the issue. PHC2SYS exits with: "ioctl PTP_OFFSET_PRECISE: Connection timed out" when the PTM transaction fails Fixes: 6b8aa753a9f9 ("igc: Decrease PTM short interval from 10 us to 1 us") Reviewed-by: Michal Swiatkowski Tested-by: Mor Bar-Gabay Tested-by: Avigail Dahan Signed-off-by: Christopher S M Hall Reviewed-by: Corinna Vinschen Signed-off-by: Jacob Keller Acked-by: Vinicius Costa Gomes Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igc/igc_defines.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/igc/igc_defines.h b/drivers/net/ethernet/intel/igc/igc_defines.h index 2ff292f5f63b..d19325b0e6e0 100644 --- a/drivers/net/ethernet/intel/igc/igc_defines.h +++ b/drivers/net/ethernet/intel/igc/igc_defines.h @@ -574,7 +574,10 @@ #define IGC_PTM_CTRL_SHRT_CYC(usec) (((usec) & 0x3f) << 2) #define IGC_PTM_CTRL_PTM_TO(usec) (((usec) & 0xff) << 8) -#define IGC_PTM_SHORT_CYC_DEFAULT 1 /* Default short cycle interval */ +/* A short cycle time of 1us theoretically should work, but appears to be too + * short in practice. + */ +#define IGC_PTM_SHORT_CYC_DEFAULT 4 /* Default short cycle interval */ #define IGC_PTM_CYC_TIME_DEFAULT 5 /* Default PTM cycle time */ #define IGC_PTM_TIMEOUT_DEFAULT 255 /* Default timeout for PTM errors */ From cd7f7328d691937102732f39f97ead35b15bf803 Mon Sep 17 00:00:00 2001 From: Christopher S M Hall Date: Tue, 1 Apr 2025 16:35:31 -0700 Subject: [PATCH 25/81] igc: move ktime snapshot into PTM retry loop Move ktime_get_snapshot() into the loop. If a retry does occur, a more recent snapshot will result in a more accurate cross-timestamp. Fixes: a90ec8483732 ("igc: Add support for PTP getcrosststamp()") Reviewed-by: Michal Swiatkowski Tested-by: Mor Bar-Gabay Tested-by: Avigail Dahan Signed-off-by: Christopher S M Hall Reviewed-by: Corinna Vinschen Signed-off-by: Jacob Keller Acked-by: Vinicius Costa Gomes Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igc/igc_ptp.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/intel/igc/igc_ptp.c b/drivers/net/ethernet/intel/igc/igc_ptp.c index c640e346342b..516abe7405de 100644 --- a/drivers/net/ethernet/intel/igc/igc_ptp.c +++ b/drivers/net/ethernet/intel/igc/igc_ptp.c @@ -1011,16 +1011,16 @@ static int igc_phc_get_syncdevicetime(ktime_t *device, int err, count = 100; ktime_t t1, t2_curr; - /* Get a snapshot of system clocks to use as historic value. */ - ktime_get_snapshot(&adapter->snapshot); - + /* Doing this in a loop because in the event of a + * badly timed (ha!) system clock adjustment, we may + * get PTM errors from the PCI root, but these errors + * are transitory. Repeating the process returns valid + * data eventually. + */ do { - /* Doing this in a loop because in the event of a - * badly timed (ha!) system clock adjustment, we may - * get PTM errors from the PCI root, but these errors - * are transitory. Repeating the process returns valid - * data eventually. - */ + /* Get a snapshot of system clocks to use as historic value. */ + ktime_get_snapshot(&adapter->snapshot); + igc_ptm_trigger(hw); err = readx_poll_timeout(rd32, IGC_PTM_STAT, stat, From 26a3910afd111f7c1a96dace6dc02f3225063896 Mon Sep 17 00:00:00 2001 From: Christopher S M Hall Date: Tue, 1 Apr 2025 16:35:32 -0700 Subject: [PATCH 26/81] igc: handle the IGC_PTP_ENABLED flag correctly All functions in igc_ptp.c called from igc_main.c should check the IGC_PTP_ENABLED flag. Adding check for this flag to stop and reset functions. Fixes: 5f2958052c58 ("igc: Add basic skeleton for PTP") Signed-off-by: Christopher S M Hall Reviewed-by: Corinna Vinschen Signed-off-by: Jacob Keller Tested-by: Mor Bar-Gabay Acked-by: Vinicius Costa Gomes Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igc/igc_ptp.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/ethernet/intel/igc/igc_ptp.c b/drivers/net/ethernet/intel/igc/igc_ptp.c index 516abe7405de..343205bffc35 100644 --- a/drivers/net/ethernet/intel/igc/igc_ptp.c +++ b/drivers/net/ethernet/intel/igc/igc_ptp.c @@ -1244,8 +1244,12 @@ void igc_ptp_suspend(struct igc_adapter *adapter) **/ void igc_ptp_stop(struct igc_adapter *adapter) { + if (!(adapter->ptp_flags & IGC_PTP_ENABLED)) + return; + igc_ptp_suspend(adapter); + adapter->ptp_flags &= ~IGC_PTP_ENABLED; if (adapter->ptp_clock) { ptp_clock_unregister(adapter->ptp_clock); netdev_info(adapter->netdev, "PHC removed\n"); @@ -1266,6 +1270,9 @@ void igc_ptp_reset(struct igc_adapter *adapter) unsigned long flags; u32 timadj; + if (!(adapter->ptp_flags & IGC_PTP_ENABLED)) + return; + /* reset the tstamp_config */ igc_ptp_set_timestamp_mode(adapter, &adapter->tstamp_config); From 1f025759ba394dd53e434d2668cb0597886d9b69 Mon Sep 17 00:00:00 2001 From: Christopher S M Hall Date: Tue, 1 Apr 2025 16:35:33 -0700 Subject: [PATCH 27/81] igc: cleanup PTP module if probe fails Make sure that the PTP module is cleaned up if the igc_probe() fails by calling igc_ptp_stop() on exit. Fixes: d89f88419f99 ("igc: Add skeletal frame for Intel(R) 2.5G Ethernet Controller support") Signed-off-by: Christopher S M Hall Reviewed-by: Corinna Vinschen Signed-off-by: Jacob Keller Tested-by: Mor Bar-Gabay Acked-by: Vinicius Costa Gomes Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igc/igc_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index f1330379e6bb..b1669d7cf435 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -7231,6 +7231,7 @@ static int igc_probe(struct pci_dev *pdev, err_register: igc_release_hw_control(adapter); + igc_ptp_stop(adapter); err_eeprom: if (!igc_check_reset_block(hw)) igc_reset_phy(hw); From 1a931c4f5e6862e61a4b130cb76b422e1415f644 Mon Sep 17 00:00:00 2001 From: Christopher S M Hall Date: Tue, 1 Apr 2025 16:35:34 -0700 Subject: [PATCH 28/81] igc: add lock preventing multiple simultaneous PTM transactions Add a mutex around the PTM transaction to prevent multiple transactors Multiple processes try to initiate a PTM transaction, one or all may fail. This can be reproduced by running two instances of the following: $ sudo phc2sys -O 0 -i tsn0 -m PHC2SYS exits with: "ioctl PTP_OFFSET_PRECISE: Connection timed out" when the PTM transaction fails Note: Normally two instance of PHC2SYS will not run, but one process should not break another. Fixes: a90ec8483732 ("igc: Add support for PTP getcrosststamp()") Signed-off-by: Christopher S M Hall Reviewed-by: Corinna Vinschen Signed-off-by: Jacob Keller Tested-by: Mor Bar-Gabay Acked-by: Vinicius Costa Gomes Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igc/igc.h | 1 + drivers/net/ethernet/intel/igc/igc_ptp.c | 20 ++++++++++++++++++-- 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h index c35cc5cb1185..2f265c0959c7 100644 --- a/drivers/net/ethernet/intel/igc/igc.h +++ b/drivers/net/ethernet/intel/igc/igc.h @@ -319,6 +319,7 @@ struct igc_adapter { struct timespec64 prev_ptp_time; /* Pre-reset PTP clock */ ktime_t ptp_reset_start; /* Reset time in clock mono */ struct system_time_snapshot snapshot; + struct mutex ptm_lock; /* Only allow one PTM transaction at a time */ char fw_version[32]; diff --git a/drivers/net/ethernet/intel/igc/igc_ptp.c b/drivers/net/ethernet/intel/igc/igc_ptp.c index 343205bffc35..612ed26a29c5 100644 --- a/drivers/net/ethernet/intel/igc/igc_ptp.c +++ b/drivers/net/ethernet/intel/igc/igc_ptp.c @@ -974,6 +974,7 @@ static void igc_ptm_log_error(struct igc_adapter *adapter, u32 ptm_stat) } } +/* The PTM lock: adapter->ptm_lock must be held when calling igc_ptm_trigger() */ static void igc_ptm_trigger(struct igc_hw *hw) { u32 ctrl; @@ -990,6 +991,7 @@ static void igc_ptm_trigger(struct igc_hw *hw) wrfl(); } +/* The PTM lock: adapter->ptm_lock must be held when calling igc_ptm_reset() */ static void igc_ptm_reset(struct igc_hw *hw) { u32 ctrl; @@ -1068,9 +1070,16 @@ static int igc_ptp_getcrosststamp(struct ptp_clock_info *ptp, { struct igc_adapter *adapter = container_of(ptp, struct igc_adapter, ptp_caps); + int ret; - return get_device_system_crosststamp(igc_phc_get_syncdevicetime, - adapter, &adapter->snapshot, cts); + /* This blocks until any in progress PTM transactions complete */ + mutex_lock(&adapter->ptm_lock); + + ret = get_device_system_crosststamp(igc_phc_get_syncdevicetime, + adapter, &adapter->snapshot, cts); + mutex_unlock(&adapter->ptm_lock); + + return ret; } static int igc_ptp_getcyclesx64(struct ptp_clock_info *ptp, @@ -1169,6 +1178,7 @@ void igc_ptp_init(struct igc_adapter *adapter) spin_lock_init(&adapter->ptp_tx_lock); spin_lock_init(&adapter->free_timer_lock); spin_lock_init(&adapter->tmreg_lock); + mutex_init(&adapter->ptm_lock); adapter->tstamp_config.rx_filter = HWTSTAMP_FILTER_NONE; adapter->tstamp_config.tx_type = HWTSTAMP_TX_OFF; @@ -1181,6 +1191,7 @@ void igc_ptp_init(struct igc_adapter *adapter) if (IS_ERR(adapter->ptp_clock)) { adapter->ptp_clock = NULL; netdev_err(netdev, "ptp_clock_register failed\n"); + mutex_destroy(&adapter->ptm_lock); } else if (adapter->ptp_clock) { netdev_info(netdev, "PHC added\n"); adapter->ptp_flags |= IGC_PTP_ENABLED; @@ -1210,10 +1221,12 @@ static void igc_ptm_stop(struct igc_adapter *adapter) struct igc_hw *hw = &adapter->hw; u32 ctrl; + mutex_lock(&adapter->ptm_lock); ctrl = rd32(IGC_PTM_CTRL); ctrl &= ~IGC_PTM_CTRL_EN; wr32(IGC_PTM_CTRL, ctrl); + mutex_unlock(&adapter->ptm_lock); } /** @@ -1255,6 +1268,7 @@ void igc_ptp_stop(struct igc_adapter *adapter) netdev_info(adapter->netdev, "PHC removed\n"); adapter->ptp_flags &= ~IGC_PTP_ENABLED; } + mutex_destroy(&adapter->ptm_lock); } /** @@ -1294,6 +1308,7 @@ void igc_ptp_reset(struct igc_adapter *adapter) if (!igc_is_crosststamp_supported(adapter)) break; + mutex_lock(&adapter->ptm_lock); wr32(IGC_PCIE_DIG_DELAY, IGC_PCIE_DIG_DELAY_DEFAULT); wr32(IGC_PCIE_PHY_DELAY, IGC_PCIE_PHY_DELAY_DEFAULT); @@ -1317,6 +1332,7 @@ void igc_ptp_reset(struct igc_adapter *adapter) netdev_err(adapter->netdev, "Timeout reading IGC_PTM_STAT register\n"); igc_ptm_reset(hw); + mutex_unlock(&adapter->ptm_lock); break; default: /* No work to do. */ From 752e2217d789be2c6a6ac66554b981cd71cd9f31 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 7 Apr 2025 10:03:17 -0700 Subject: [PATCH 29/81] smc: Fix lockdep false-positive for IPPROTO_SMC. SMC consists of two sockets: smc_sock and kernel TCP socket. Currently, there are two ways of creating the sockets, and syzbot reported a lockdep splat [0] for the newer way introduced by commit d25a92ccae6b ("net/smc: Introduce IPPROTO_SMC"). socket(AF_SMC , SOCK_STREAM, SMCPROTO_SMC or SMCPROTO_SMC6) socket(AF_INET or AF_INET6, SOCK_STREAM, IPPROTO_SMC) When a socket is allocated, sock_lock_init() sets a lockdep lock class to sk->sk_lock.slock based on its protocol family. In the IPPROTO_SMC case, AF_INET or AF_INET6 lock class is assigned to smc_sock. The repro sets IPV6_JOIN_ANYCAST for IPv6 UDP and SMC socket and exercises smc_switch_to_fallback() for IPPROTO_SMC. 1. smc_switch_to_fallback() is called under lock_sock() and holds smc->clcsock_release_lock. sk_lock-AF_INET6 -> &smc->clcsock_release_lock (sk_lock-AF_SMC) 2. Setting IPV6_JOIN_ANYCAST to SMC holds smc->clcsock_release_lock and calls setsockopt() for the kernel TCP socket, which holds RTNL and the kernel socket's lock_sock(). &smc->clcsock_release_lock -> rtnl_mutex (-> k-sk_lock-AF_INET6) 3. Setting IPV6_JOIN_ANYCAST to UDP holds RTNL and lock_sock(). rtnl_mutex -> sk_lock-AF_INET6 Then, lockdep detects a false-positive circular locking, .-> sk_lock-AF_INET6 -> &smc->clcsock_release_lock -> rtnl_mutex -. `-----------------------------------------------------------------' but IPPROTO_SMC should have the same locking rule as AF_SMC. sk_lock-AF_SMC -> &smc->clcsock_release_lock -> rtnl_mutex -> k-sk_lock-AF_INET6 Let's set the same lock class for smc_sock. Given AF_SMC uses the same lock class for SMCPROTO_SMC and SMCPROTO_SMC6, we do not need to separate the class for AF_INET and AF_INET6. [0]: WARNING: possible circular locking dependency detected 6.14.0-rc3-syzkaller-00267-gff202c5028a1 #0 Not tainted syz.4.1528/11571 is trying to acquire lock: ffffffff8fef8de8 (rtnl_mutex){+.+.}-{4:4}, at: ipv6_sock_ac_close+0xd9/0x110 net/ipv6/anycast.c:220 but task is already holding lock: ffff888027f596a8 (&smc->clcsock_release_lock){+.+.}-{4:4}, at: smc_clcsock_release+0x75/0xe0 net/smc/smc_close.c:30 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #2 (&smc->clcsock_release_lock){+.+.}-{4:4}: __mutex_lock_common kernel/locking/mutex.c:585 [inline] __mutex_lock+0x19b/0xb10 kernel/locking/mutex.c:730 smc_switch_to_fallback+0x2d/0xa00 net/smc/af_smc.c:903 smc_sendmsg+0x13d/0x520 net/smc/af_smc.c:2781 sock_sendmsg_nosec net/socket.c:718 [inline] __sock_sendmsg net/socket.c:733 [inline] ____sys_sendmsg+0xaaf/0xc90 net/socket.c:2573 ___sys_sendmsg+0x135/0x1e0 net/socket.c:2627 __sys_sendmsg+0x16e/0x220 net/socket.c:2659 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xcd/0x250 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f -> #1 (sk_lock-AF_INET6){+.+.}-{0:0}: lock_sock_nested+0x3a/0xf0 net/core/sock.c:3645 lock_sock include/net/sock.h:1624 [inline] sockopt_lock_sock net/core/sock.c:1133 [inline] sockopt_lock_sock+0x54/0x70 net/core/sock.c:1124 do_ipv6_setsockopt+0x2160/0x4520 net/ipv6/ipv6_sockglue.c:567 ipv6_setsockopt+0xcb/0x170 net/ipv6/ipv6_sockglue.c:993 udpv6_setsockopt+0x7d/0xd0 net/ipv6/udp.c:1850 do_sock_setsockopt+0x222/0x480 net/socket.c:2303 __sys_setsockopt+0x1a0/0x230 net/socket.c:2328 __do_sys_setsockopt net/socket.c:2334 [inline] __se_sys_setsockopt net/socket.c:2331 [inline] __x64_sys_setsockopt+0xbd/0x160 net/socket.c:2331 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xcd/0x250 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f -> #0 (rtnl_mutex){+.+.}-{4:4}: check_prev_add kernel/locking/lockdep.c:3163 [inline] check_prevs_add kernel/locking/lockdep.c:3282 [inline] validate_chain kernel/locking/lockdep.c:3906 [inline] __lock_acquire+0x249e/0x3c40 kernel/locking/lockdep.c:5228 lock_acquire.part.0+0x11b/0x380 kernel/locking/lockdep.c:5851 __mutex_lock_common kernel/locking/mutex.c:585 [inline] __mutex_lock+0x19b/0xb10 kernel/locking/mutex.c:730 ipv6_sock_ac_close+0xd9/0x110 net/ipv6/anycast.c:220 inet6_release+0x47/0x70 net/ipv6/af_inet6.c:485 __sock_release net/socket.c:647 [inline] sock_release+0x8e/0x1d0 net/socket.c:675 smc_clcsock_release+0xb7/0xe0 net/smc/smc_close.c:34 __smc_release+0x5c2/0x880 net/smc/af_smc.c:301 smc_release+0x1fc/0x5f0 net/smc/af_smc.c:344 __sock_release+0xb0/0x270 net/socket.c:647 sock_close+0x1c/0x30 net/socket.c:1398 __fput+0x3ff/0xb70 fs/file_table.c:464 task_work_run+0x14e/0x250 kernel/task_work.c:227 resume_user_mode_work include/linux/resume_user_mode.h:50 [inline] exit_to_user_mode_loop kernel/entry/common.c:114 [inline] exit_to_user_mode_prepare include/linux/entry-common.h:329 [inline] __syscall_exit_to_user_mode_work kernel/entry/common.c:207 [inline] syscall_exit_to_user_mode+0x27b/0x2a0 kernel/entry/common.c:218 do_syscall_64+0xda/0x250 arch/x86/entry/common.c:89 entry_SYSCALL_64_after_hwframe+0x77/0x7f other info that might help us debug this: Chain exists of: rtnl_mutex --> sk_lock-AF_INET6 --> &smc->clcsock_release_lock Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&smc->clcsock_release_lock); lock(sk_lock-AF_INET6); lock(&smc->clcsock_release_lock); lock(rtnl_mutex); *** DEADLOCK *** 2 locks held by syz.4.1528/11571: #0: ffff888077e88208 (&sb->s_type->i_mutex_key#10){+.+.}-{4:4}, at: inode_lock include/linux/fs.h:877 [inline] #0: ffff888077e88208 (&sb->s_type->i_mutex_key#10){+.+.}-{4:4}, at: __sock_release+0x86/0x270 net/socket.c:646 #1: ffff888027f596a8 (&smc->clcsock_release_lock){+.+.}-{4:4}, at: smc_clcsock_release+0x75/0xe0 net/smc/smc_close.c:30 stack backtrace: CPU: 0 UID: 0 PID: 11571 Comm: syz.4.1528 Not tainted 6.14.0-rc3-syzkaller-00267-gff202c5028a1 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 02/12/2025 Call Trace: __dump_stack lib/dump_stack.c:94 [inline] dump_stack_lvl+0x116/0x1f0 lib/dump_stack.c:120 print_circular_bug+0x490/0x760 kernel/locking/lockdep.c:2076 check_noncircular+0x31a/0x400 kernel/locking/lockdep.c:2208 check_prev_add kernel/locking/lockdep.c:3163 [inline] check_prevs_add kernel/locking/lockdep.c:3282 [inline] validate_chain kernel/locking/lockdep.c:3906 [inline] __lock_acquire+0x249e/0x3c40 kernel/locking/lockdep.c:5228 lock_acquire.part.0+0x11b/0x380 kernel/locking/lockdep.c:5851 __mutex_lock_common kernel/locking/mutex.c:585 [inline] __mutex_lock+0x19b/0xb10 kernel/locking/mutex.c:730 ipv6_sock_ac_close+0xd9/0x110 net/ipv6/anycast.c:220 inet6_release+0x47/0x70 net/ipv6/af_inet6.c:485 __sock_release net/socket.c:647 [inline] sock_release+0x8e/0x1d0 net/socket.c:675 smc_clcsock_release+0xb7/0xe0 net/smc/smc_close.c:34 __smc_release+0x5c2/0x880 net/smc/af_smc.c:301 smc_release+0x1fc/0x5f0 net/smc/af_smc.c:344 __sock_release+0xb0/0x270 net/socket.c:647 sock_close+0x1c/0x30 net/socket.c:1398 __fput+0x3ff/0xb70 fs/file_table.c:464 task_work_run+0x14e/0x250 kernel/task_work.c:227 resume_user_mode_work include/linux/resume_user_mode.h:50 [inline] exit_to_user_mode_loop kernel/entry/common.c:114 [inline] exit_to_user_mode_prepare include/linux/entry-common.h:329 [inline] __syscall_exit_to_user_mode_work kernel/entry/common.c:207 [inline] syscall_exit_to_user_mode+0x27b/0x2a0 kernel/entry/common.c:218 do_syscall_64+0xda/0x250 arch/x86/entry/common.c:89 entry_SYSCALL_64_after_hwframe+0x77/0x7f RIP: 0033:0x7f8b4b38d169 Code: ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 a8 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007ffe4efd22d8 EFLAGS: 00000246 ORIG_RAX: 00000000000001b4 RAX: 0000000000000000 RBX: 00000000000b14a3 RCX: 00007f8b4b38d169 RDX: 0000000000000000 RSI: 000000000000001e RDI: 0000000000000003 RBP: 00007f8b4b5a7ba0 R08: 0000000000000001 R09: 000000114efd25cf R10: 00007f8b4b200000 R11: 0000000000000246 R12: 00007f8b4b5a5fac R13: 00007f8b4b5a5fa0 R14: ffffffffffffffff R15: 00007ffe4efd23f0 Fixes: d25a92ccae6b ("net/smc: Introduce IPPROTO_SMC") Reported-by: syzbot+be6f4b383534d88989f7@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=be6f4b383534d88989f7 Signed-off-by: Kuniyuki Iwashima Reviewed-by: Wenjia Zhang Link: https://patch.msgid.link/20250407170332.26959-1-kuniyu@amazon.com Signed-off-by: Jakub Kicinski --- net/smc/af_smc.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 3e6cb35baf25..3760131f1484 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -362,6 +362,9 @@ static void smc_destruct(struct sock *sk) return; } +static struct lock_class_key smc_key; +static struct lock_class_key smc_slock_key; + void smc_sk_init(struct net *net, struct sock *sk, int protocol) { struct smc_sock *smc = smc_sk(sk); @@ -375,6 +378,8 @@ void smc_sk_init(struct net *net, struct sock *sk, int protocol) INIT_WORK(&smc->connect_work, smc_connect_work); INIT_DELAYED_WORK(&smc->conn.tx_work, smc_tx_work); INIT_LIST_HEAD(&smc->accept_q); + sock_lock_init_class_and_name(sk, "slock-AF_SMC", &smc_slock_key, + "sk_lock-AF_SMC", &smc_key); spin_lock_init(&smc->accept_q_lock); spin_lock_init(&smc->conn.send_lock); sk->sk_prot->hash(sk); From 18c889a9a419dc1662548777b7122d980bccfdad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Thu, 10 Apr 2025 12:43:21 +0200 Subject: [PATCH 30/81] selftests/tc-testing: Add test for echo of big TC filters MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a selftest that checks whether the kernel can successfully echo a big tc filter, to test the fix introduced in commit: 369609fc6272 ("tc: Ensure we have enough buffer space when sending filter netlink notifications") Signed-off-by: Toke Høiland-Jørgensen Tested-by: Victor Nogueira Link: https://patch.msgid.link/20250410104322.214620-1-toke@redhat.com Signed-off-by: Jakub Kicinski --- .../tc-testing/tc-tests/infra/actions.json | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/tools/testing/selftests/tc-testing/tc-tests/infra/actions.json b/tools/testing/selftests/tc-testing/tc-tests/infra/actions.json index 1ba96c467754..d9fc62ab476c 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/infra/actions.json +++ b/tools/testing/selftests/tc-testing/tc-tests/infra/actions.json @@ -412,5 +412,27 @@ "teardown": [ "$TC qdisc del dev $DUMMY ingress" ] + }, + { + "id": "33f4", + "name": "Check echo of big filter command", + "category": [ + "infra", + "u32" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DUMMY parent root handle 10: fq_codel" + ], + "cmdUnderTest": "bash -c '$TC -echo filter add dev $DUMMY parent 10: u32 match u32 0 0 $(for i in $(seq 32); do echo action pedit munge ip dport set 22; done) | grep \"added filter\"'", + "verifyCmd": "", + "expExitCode": "0", + "matchCount": "0", + "matchPattern": "", + "teardown": [ + "$TC qdisc del dev $DUMMY parent root fq_codel" + ] } ] From 8b82f656826c741d032490b089a5638c33f2c91d Mon Sep 17 00:00:00 2001 From: Abdun Nihaal Date: Wed, 9 Apr 2025 11:14:48 +0530 Subject: [PATCH 31/81] pds_core: fix memory leak in pdsc_debugfs_add_qcq() The memory allocated for intr_ctrl_regset, which is passed to debugfs_create_regset32() may not be cleaned up when the driver is removed. Fix that by using device managed allocation for it. Fixes: 45d76f492938 ("pds_core: set up device and adminq") Signed-off-by: Abdun Nihaal Reviewed-by: Michal Swiatkowski Reviewed-by: Shannon Nelson Link: https://patch.msgid.link/20250409054450.48606-1-abdun.nihaal@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/amd/pds_core/debugfs.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/amd/pds_core/debugfs.c b/drivers/net/ethernet/amd/pds_core/debugfs.c index ac37a4e738ae..04c5e3abd8d7 100644 --- a/drivers/net/ethernet/amd/pds_core/debugfs.c +++ b/drivers/net/ethernet/amd/pds_core/debugfs.c @@ -154,8 +154,9 @@ void pdsc_debugfs_add_qcq(struct pdsc *pdsc, struct pdsc_qcq *qcq) debugfs_create_u32("index", 0400, intr_dentry, &intr->index); debugfs_create_u32("vector", 0400, intr_dentry, &intr->vector); - intr_ctrl_regset = kzalloc(sizeof(*intr_ctrl_regset), - GFP_KERNEL); + intr_ctrl_regset = devm_kzalloc(pdsc->dev, + sizeof(*intr_ctrl_regset), + GFP_KERNEL); if (!intr_ctrl_regset) return; intr_ctrl_regset->regs = intr_ctrl_regs; From f3fdd4fba16c74697d8bc730b82fb7c1eff7fab3 Mon Sep 17 00:00:00 2001 From: Damodharam Ammepalli Date: Wed, 9 Apr 2025 10:33:12 -0700 Subject: [PATCH 32/81] ethtool: cmis_cdb: use correct rpl size in ethtool_cmis_module_poll() rpl is passed as a pointer to ethtool_cmis_module_poll(), so the correct size of rpl is sizeof(*rpl) which should be just 1 byte. Using the pointer size instead can cause stack corruption: Kernel panic - not syncing: stack-protector: Kernel stack is corrupted in: ethtool_cmis_wait_for_cond+0xf4/0x100 CPU: 72 UID: 0 PID: 4440 Comm: kworker/72:2 Kdump: loaded Tainted: G OE 6.11.0 #24 Tainted: [O]=OOT_MODULE, [E]=UNSIGNED_MODULE Hardware name: Dell Inc. PowerEdge R760/04GWWM, BIOS 1.6.6 09/20/2023 Workqueue: events module_flash_fw_work Call Trace: panic+0x339/0x360 ? ethtool_cmis_wait_for_cond+0xf4/0x100 ? __pfx_status_success+0x10/0x10 ? __pfx_status_fail+0x10/0x10 __stack_chk_fail+0x10/0x10 ethtool_cmis_wait_for_cond+0xf4/0x100 ethtool_cmis_cdb_execute_cmd+0x1fc/0x330 ? __pfx_status_fail+0x10/0x10 cmis_cdb_module_features_get+0x6d/0xd0 ethtool_cmis_cdb_init+0x8a/0xd0 ethtool_cmis_fw_update+0x46/0x1d0 module_flash_fw_work+0x17/0xa0 process_one_work+0x179/0x390 worker_thread+0x239/0x340 ? __pfx_worker_thread+0x10/0x10 kthread+0xcc/0x100 ? __pfx_kthread+0x10/0x10 ret_from_fork+0x2d/0x50 ? __pfx_kthread+0x10/0x10 ret_from_fork_asm+0x1a/0x30 Fixes: a39c84d79625 ("ethtool: cmis_cdb: Add a layer for supporting CDB commands") Reviewed-by: Andy Gospodarek Reviewed-by: Simon Horman Reviewed-by: Ido Schimmel Signed-off-by: Damodharam Ammepalli Signed-off-by: Michael Chan Link: https://patch.msgid.link/20250409173312.733012-1-michael.chan@broadcom.com Signed-off-by: Jakub Kicinski --- net/ethtool/cmis_cdb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ethtool/cmis_cdb.c b/net/ethtool/cmis_cdb.c index 0e2691ccb0df..3057576bc81e 100644 --- a/net/ethtool/cmis_cdb.c +++ b/net/ethtool/cmis_cdb.c @@ -351,7 +351,7 @@ ethtool_cmis_module_poll(struct net_device *dev, struct netlink_ext_ack extack = {}; int err; - ethtool_cmis_page_init(&page_data, 0, offset, sizeof(rpl)); + ethtool_cmis_page_init(&page_data, 0, offset, sizeof(*rpl)); page_data.data = (u8 *)rpl; err = ops->get_module_eeprom_by_page(dev, &page_data, &extack); From 52024cd6ec71a6ca934d0cc12452bd8d49850679 Mon Sep 17 00:00:00 2001 From: Matt Johnston Date: Thu, 10 Apr 2025 11:53:19 +0800 Subject: [PATCH 33/81] net: mctp: Set SOCK_RCU_FREE Bind lookup runs under RCU, so ensure that a socket doesn't go away in the middle of a lookup. Fixes: 833ef3b91de6 ("mctp: Populate socket implementation") Signed-off-by: Matt Johnston Link: https://patch.msgid.link/20250410-mctp-rcu-sock-v1-1-872de9fdc877@codeconstruct.com.au Signed-off-by: Jakub Kicinski --- net/mctp/af_mctp.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/mctp/af_mctp.c b/net/mctp/af_mctp.c index dd895617defd..9b12ca97f412 100644 --- a/net/mctp/af_mctp.c +++ b/net/mctp/af_mctp.c @@ -630,6 +630,9 @@ static int mctp_sk_hash(struct sock *sk) { struct net *net = sock_net(sk); + /* Bind lookup runs under RCU, remain live during that. */ + sock_set_flag(sk, SOCK_RCU_FREE); + mutex_lock(&net->mctp.bind_lock); sk_add_node_rcu(sk, &net->mctp.binds); mutex_unlock(&net->mctp.bind_lock); From f7a11cba0ed79d9d37941dddf69a8a655c8644bc Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Thu, 10 Apr 2025 09:11:17 -0700 Subject: [PATCH 34/81] bonding: hold ops lock around get_link syzbot reports a case of ethtool_ops->get_link being called without ops lock: ethtool_op_get_link+0x15/0x60 net/ethtool/ioctl.c:63 bond_check_dev_link+0x1fb/0x4b0 drivers/net/bonding/bond_main.c:864 bond_miimon_inspect drivers/net/bonding/bond_main.c:2734 [inline] bond_mii_monitor+0x49d/0x3170 drivers/net/bonding/bond_main.c:2956 process_one_work kernel/workqueue.c:3238 [inline] process_scheduled_works+0xac3/0x18e0 kernel/workqueue.c:3319 worker_thread+0x870/0xd50 kernel/workqueue.c:3400 kthread+0x7b7/0x940 kernel/kthread.c:464 ret_from_fork+0x4b/0x80 arch/x86/kernel/process.c:153 ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:245 Commit 04efcee6ef8d ("net: hold instance lock during NETDEV_CHANGE") changed to lockless __linkwatch_sync_dev in ethtool_op_get_link. All paths except bonding are coming via locked ioctl. Add necessary locking to bonding. Reviewed-by: Hangbin Liu Reported-by: syzbot+48c14f61594bdfadb086@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=48c14f61594bdfadb086 Fixes: 04efcee6ef8d ("net: hold instance lock during NETDEV_CHANGE") Signed-off-by: Stanislav Fomichev Link: https://patch.msgid.link/20250410161117.3519250-1-sdf@fomichev.me Signed-off-by: Jakub Kicinski --- drivers/net/bonding/bond_main.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 950d8e4d86f8..8ea183da8d53 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -850,8 +850,9 @@ static int bond_check_dev_link(struct bonding *bond, struct net_device *slave_dev, int reporting) { const struct net_device_ops *slave_ops = slave_dev->netdev_ops; - struct ifreq ifr; struct mii_ioctl_data *mii; + struct ifreq ifr; + int ret; if (!reporting && !netif_running(slave_dev)) return 0; @@ -860,9 +861,13 @@ static int bond_check_dev_link(struct bonding *bond, return netif_carrier_ok(slave_dev) ? BMSR_LSTATUS : 0; /* Try to get link status using Ethtool first. */ - if (slave_dev->ethtool_ops->get_link) - return slave_dev->ethtool_ops->get_link(slave_dev) ? - BMSR_LSTATUS : 0; + if (slave_dev->ethtool_ops->get_link) { + netdev_lock_ops(slave_dev); + ret = slave_dev->ethtool_ops->get_link(slave_dev); + netdev_unlock_ops(slave_dev); + + return ret ? BMSR_LSTATUS : 0; + } /* Ethtool can't be used, fallback to MII ioctls. */ if (slave_ops->ndo_eth_ioctl) { From 5b04080cd6028f0737bbbd0c5b462d226cff9052 Mon Sep 17 00:00:00 2001 From: Jijie Shao Date: Thu, 10 Apr 2025 10:13:21 +0800 Subject: [PATCH 35/81] net: hibmcge: fix incorrect pause frame statistics issue The driver supports pause frames, but does not pass pause frames based on rx pause enable configuration, resulting in incorrect pause frame statistics. like this: mz eno3 '01 80 c2 00 00 01 00 18 2d 04 00 9c 88 08 00 01 ff ff' \ -p 64 -c 100 ethtool -S enp132s0f2 | grep -v ": 0" NIC statistics: rx_octets_total_filt_cnt: 6800 rx_filt_pkt_cnt: 100 The rx pause frames are filtered by the MAC hardware. This patch configures pass pause frames based on the rx puase enable status to ensure that rx pause frames are not filtered. mz eno3 '01 80 c2 00 00 01 00 18 2d 04 00 9c 88 08 00 01 ff ff' \ -p 64 -c 100 ethtool --include-statistics -a enp132s0f2 Pause parameters for enp132s0f2: Autonegotiate: on RX: on TX: on RX negotiated: on TX negotiated: on Statistics: tx_pause_frames: 0 rx_pause_frames: 100 Fixes: 3a03763f3876 ("net: hibmcge: Add pauseparam supported in this module") Signed-off-by: Jijie Shao Reviewed-by: Michal Swiatkowski Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250410021327.590362-2-shaojijie@huawei.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/hisilicon/hibmcge/hbg_hw.c | 3 +++ drivers/net/ethernet/hisilicon/hibmcge/hbg_reg.h | 1 + 2 files changed, 4 insertions(+) diff --git a/drivers/net/ethernet/hisilicon/hibmcge/hbg_hw.c b/drivers/net/ethernet/hisilicon/hibmcge/hbg_hw.c index 74a18033b444..7d3bbd3e2adc 100644 --- a/drivers/net/ethernet/hisilicon/hibmcge/hbg_hw.c +++ b/drivers/net/ethernet/hisilicon/hibmcge/hbg_hw.c @@ -242,6 +242,9 @@ void hbg_hw_set_pause_enable(struct hbg_priv *priv, u32 tx_en, u32 rx_en) HBG_REG_PAUSE_ENABLE_TX_B, tx_en); hbg_reg_write_field(priv, HBG_REG_PAUSE_ENABLE_ADDR, HBG_REG_PAUSE_ENABLE_RX_B, rx_en); + + hbg_reg_write_field(priv, HBG_REG_REC_FILT_CTRL_ADDR, + HBG_REG_REC_FILT_CTRL_PAUSE_FRM_PASS_B, rx_en); } void hbg_hw_get_pause_enable(struct hbg_priv *priv, u32 *tx_en, u32 *rx_en) diff --git a/drivers/net/ethernet/hisilicon/hibmcge/hbg_reg.h b/drivers/net/ethernet/hisilicon/hibmcge/hbg_reg.h index cc2cc612770d..fd623cfd13de 100644 --- a/drivers/net/ethernet/hisilicon/hibmcge/hbg_reg.h +++ b/drivers/net/ethernet/hisilicon/hibmcge/hbg_reg.h @@ -68,6 +68,7 @@ #define HBG_REG_TRANSMIT_CTRL_AN_EN_B BIT(5) #define HBG_REG_REC_FILT_CTRL_ADDR (HBG_REG_SGMII_BASE + 0x0064) #define HBG_REG_REC_FILT_CTRL_UC_MATCH_EN_B BIT(0) +#define HBG_REG_REC_FILT_CTRL_PAUSE_FRM_PASS_B BIT(4) #define HBG_REG_RX_OCTETS_TOTAL_OK_ADDR (HBG_REG_SGMII_BASE + 0x0080) #define HBG_REG_RX_OCTETS_BAD_ADDR (HBG_REG_SGMII_BASE + 0x0084) #define HBG_REG_RX_UC_PKTS_ADDR (HBG_REG_SGMII_BASE + 0x0088) From 9afaaa54e3eb9b64fc07c06741897800e98ac253 Mon Sep 17 00:00:00 2001 From: Jijie Shao Date: Thu, 10 Apr 2025 10:13:22 +0800 Subject: [PATCH 36/81] net: hibmcge: fix incorrect multicast filtering issue The driver does not support multicast filtering, the mask must be set to 0xFFFFFFFF. Otherwise, incorrect filtering occurs. This patch fixes this problem. Fixes: 37b367d60d0f ("net: hibmcge: Add unicast frame filter supported in this module") Signed-off-by: Jijie Shao Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250410021327.590362-3-shaojijie@huawei.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/hisilicon/hibmcge/hbg_hw.c | 4 ++++ drivers/net/ethernet/hisilicon/hibmcge/hbg_reg.h | 2 ++ 2 files changed, 6 insertions(+) diff --git a/drivers/net/ethernet/hisilicon/hibmcge/hbg_hw.c b/drivers/net/ethernet/hisilicon/hibmcge/hbg_hw.c index 7d3bbd3e2adc..9b65eef62b3f 100644 --- a/drivers/net/ethernet/hisilicon/hibmcge/hbg_hw.c +++ b/drivers/net/ethernet/hisilicon/hibmcge/hbg_hw.c @@ -234,6 +234,10 @@ void hbg_hw_set_mac_filter_enable(struct hbg_priv *priv, u32 enable) { hbg_reg_write_field(priv, HBG_REG_REC_FILT_CTRL_ADDR, HBG_REG_REC_FILT_CTRL_UC_MATCH_EN_B, enable); + + /* only uc filter is supported, so set all bits of mc mask reg to 1 */ + hbg_reg_write64(priv, HBG_REG_STATION_ADDR_LOW_MSK_0, U64_MAX); + hbg_reg_write64(priv, HBG_REG_STATION_ADDR_LOW_MSK_1, U64_MAX); } void hbg_hw_set_pause_enable(struct hbg_priv *priv, u32 tx_en, u32 rx_en) diff --git a/drivers/net/ethernet/hisilicon/hibmcge/hbg_reg.h b/drivers/net/ethernet/hisilicon/hibmcge/hbg_reg.h index fd623cfd13de..a6e7f5e62b48 100644 --- a/drivers/net/ethernet/hisilicon/hibmcge/hbg_reg.h +++ b/drivers/net/ethernet/hisilicon/hibmcge/hbg_reg.h @@ -135,6 +135,8 @@ #define HBG_REG_STATION_ADDR_HIGH_4_ADDR (HBG_REG_SGMII_BASE + 0x0224) #define HBG_REG_STATION_ADDR_LOW_5_ADDR (HBG_REG_SGMII_BASE + 0x0228) #define HBG_REG_STATION_ADDR_HIGH_5_ADDR (HBG_REG_SGMII_BASE + 0x022C) +#define HBG_REG_STATION_ADDR_LOW_MSK_0 (HBG_REG_SGMII_BASE + 0x0230) +#define HBG_REG_STATION_ADDR_LOW_MSK_1 (HBG_REG_SGMII_BASE + 0x0238) /* PCU */ #define HBG_REG_TX_FIFO_THRSLD_ADDR (HBG_REG_SGMII_BASE + 0x0420) From 4ad3df755a96012f792c7fa2aa62317db3cba82b Mon Sep 17 00:00:00 2001 From: Jijie Shao Date: Thu, 10 Apr 2025 10:13:23 +0800 Subject: [PATCH 37/81] net: hibmcge: fix the share of irq statistics among different network ports issue hbg_irqs is a global array which contains irq statistics. However, the irq statistics of different network ports point to the same global array. As a result, the statistics are incorrect. This patch allocates a statistics array for each network port to prevent the statistics of different network ports from affecting each other. irq statistics are removed from hbg_irq_info. Therefore, all data in hbg_irq_info remains unchanged. Therefore, the input parameter of some functions is changed to const. Fixes: 4d089035fa19 ("net: hibmcge: Add interrupt supported in this module") Signed-off-by: Jijie Shao Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250410021327.590362-4-shaojijie@huawei.com Signed-off-by: Jakub Kicinski --- .../ethernet/hisilicon/hibmcge/hbg_common.h | 8 ++++--- .../ethernet/hisilicon/hibmcge/hbg_debugfs.c | 4 ++-- .../ethernet/hisilicon/hibmcge/hbg_diagnose.c | 2 +- .../net/ethernet/hisilicon/hibmcge/hbg_irq.c | 24 ++++++++++++------- .../net/ethernet/hisilicon/hibmcge/hbg_main.c | 2 +- 5 files changed, 24 insertions(+), 16 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hibmcge/hbg_common.h b/drivers/net/ethernet/hisilicon/hibmcge/hbg_common.h index f8cdab62bf85..7725cb0c5c8a 100644 --- a/drivers/net/ethernet/hisilicon/hibmcge/hbg_common.h +++ b/drivers/net/ethernet/hisilicon/hibmcge/hbg_common.h @@ -108,14 +108,16 @@ struct hbg_irq_info { bool re_enable; bool need_print; bool need_reset; - u64 count; - void (*irq_handle)(struct hbg_priv *priv, struct hbg_irq_info *info); + void (*irq_handle)(struct hbg_priv *priv, + const struct hbg_irq_info *info); }; struct hbg_vector { char name[HBG_VECTOR_NUM][32]; - struct hbg_irq_info *info_array; + + u64 *stats_array; + const struct hbg_irq_info *info_array; u32 info_array_len; }; diff --git a/drivers/net/ethernet/hisilicon/hibmcge/hbg_debugfs.c b/drivers/net/ethernet/hisilicon/hibmcge/hbg_debugfs.c index 5e0ba4d5b08d..9c09e4835990 100644 --- a/drivers/net/ethernet/hisilicon/hibmcge/hbg_debugfs.c +++ b/drivers/net/ethernet/hisilicon/hibmcge/hbg_debugfs.c @@ -61,7 +61,7 @@ static int hbg_dbg_irq_info(struct seq_file *s, void *unused) { struct net_device *netdev = dev_get_drvdata(s->private); struct hbg_priv *priv = netdev_priv(netdev); - struct hbg_irq_info *info; + const struct hbg_irq_info *info; u32 i; for (i = 0; i < priv->vectors.info_array_len; i++) { @@ -73,7 +73,7 @@ static int hbg_dbg_irq_info(struct seq_file *s, void *unused) info->mask)), str_true_false(info->need_reset), str_true_false(info->need_print), - info->count); + priv->vectors.stats_array[i]); } return 0; diff --git a/drivers/net/ethernet/hisilicon/hibmcge/hbg_diagnose.c b/drivers/net/ethernet/hisilicon/hibmcge/hbg_diagnose.c index d61c03f34ff0..f23fb5920c3c 100644 --- a/drivers/net/ethernet/hisilicon/hibmcge/hbg_diagnose.c +++ b/drivers/net/ethernet/hisilicon/hibmcge/hbg_diagnose.c @@ -234,7 +234,7 @@ static u64 hbg_get_irq_stats(struct hbg_vector *vectors, u32 mask) for (i = 0; i < vectors->info_array_len; i++) if (vectors->info_array[i].mask == mask) - return vectors->info_array[i].count; + return vectors->stats_array[i]; return 0; } diff --git a/drivers/net/ethernet/hisilicon/hibmcge/hbg_irq.c b/drivers/net/ethernet/hisilicon/hibmcge/hbg_irq.c index e79e9ab3e530..8af0bc4cca21 100644 --- a/drivers/net/ethernet/hisilicon/hibmcge/hbg_irq.c +++ b/drivers/net/ethernet/hisilicon/hibmcge/hbg_irq.c @@ -6,7 +6,7 @@ #include "hbg_hw.h" static void hbg_irq_handle_err(struct hbg_priv *priv, - struct hbg_irq_info *irq_info) + const struct hbg_irq_info *irq_info) { if (irq_info->need_print) dev_err(&priv->pdev->dev, @@ -17,30 +17,30 @@ static void hbg_irq_handle_err(struct hbg_priv *priv, } static void hbg_irq_handle_tx(struct hbg_priv *priv, - struct hbg_irq_info *irq_info) + const struct hbg_irq_info *irq_info) { napi_schedule(&priv->tx_ring.napi); } static void hbg_irq_handle_rx(struct hbg_priv *priv, - struct hbg_irq_info *irq_info) + const struct hbg_irq_info *irq_info) { napi_schedule(&priv->rx_ring.napi); } static void hbg_irq_handle_rx_buf_val(struct hbg_priv *priv, - struct hbg_irq_info *irq_info) + const struct hbg_irq_info *irq_info) { priv->stats.rx_fifo_less_empty_thrsld_cnt++; } #define HBG_IRQ_I(name, handle) \ - {#name, HBG_INT_MSK_##name##_B, false, false, false, 0, handle} + {#name, HBG_INT_MSK_##name##_B, false, false, false, handle} #define HBG_ERR_IRQ_I(name, need_print, ndde_reset) \ {#name, HBG_INT_MSK_##name##_B, true, need_print, \ - ndde_reset, 0, hbg_irq_handle_err} + ndde_reset, hbg_irq_handle_err} -static struct hbg_irq_info hbg_irqs[] = { +static const struct hbg_irq_info hbg_irqs[] = { HBG_IRQ_I(RX, hbg_irq_handle_rx), HBG_IRQ_I(TX, hbg_irq_handle_tx), HBG_ERR_IRQ_I(TX_PKT_CPL, true, true), @@ -64,7 +64,7 @@ static struct hbg_irq_info hbg_irqs[] = { static irqreturn_t hbg_irq_handle(int irq_num, void *p) { - struct hbg_irq_info *info; + const struct hbg_irq_info *info; struct hbg_priv *priv = p; u32 status; u32 i; @@ -79,7 +79,7 @@ static irqreturn_t hbg_irq_handle(int irq_num, void *p) hbg_hw_irq_enable(priv, info->mask, false); hbg_hw_irq_clear(priv, info->mask); - info->count++; + priv->vectors.stats_array[i]++; if (info->irq_handle) info->irq_handle(priv, info); @@ -132,6 +132,12 @@ int hbg_irq_init(struct hbg_priv *priv) irq_names_map[i]); } + vectors->stats_array = devm_kcalloc(&priv->pdev->dev, + ARRAY_SIZE(hbg_irqs), + sizeof(u64), GFP_KERNEL); + if (!vectors->stats_array) + return -ENOMEM; + vectors->info_array = hbg_irqs; vectors->info_array_len = ARRAY_SIZE(hbg_irqs); return 0; diff --git a/drivers/net/ethernet/hisilicon/hibmcge/hbg_main.c b/drivers/net/ethernet/hisilicon/hibmcge/hbg_main.c index 2ac5454338e4..e5c961ad4b9b 100644 --- a/drivers/net/ethernet/hisilicon/hibmcge/hbg_main.c +++ b/drivers/net/ethernet/hisilicon/hibmcge/hbg_main.c @@ -21,7 +21,7 @@ static void hbg_all_irq_enable(struct hbg_priv *priv, bool enabled) { - struct hbg_irq_info *info; + const struct hbg_irq_info *info; u32 i; for (i = 0; i < priv->vectors.info_array_len; i++) { From 4e4ac53335de54bcb9d26842df0998cfd8fcaf90 Mon Sep 17 00:00:00 2001 From: Jijie Shao Date: Thu, 10 Apr 2025 10:13:24 +0800 Subject: [PATCH 38/81] net: hibmcge: fix wrong mtu log issue A dbg log is generated when the driver modifies the MTU, which is expected to trace the change of the MTU. However, the log is recorded after WRITE_ONCE(). At this time, netdev->mtu has been changed to the new value. As a result, netdev->mtu is the same as new_mtu. This patch modifies the log location and records logs before WRITE_ONCE(). Fixes: ff4edac6e9bd ("net: hibmcge: Implement some .ndo functions") Signed-off-by: Jijie Shao Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250410021327.590362-5-shaojijie@huawei.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/hisilicon/hibmcge/hbg_main.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hibmcge/hbg_main.c b/drivers/net/ethernet/hisilicon/hibmcge/hbg_main.c index e5c961ad4b9b..2e64dc1ab355 100644 --- a/drivers/net/ethernet/hisilicon/hibmcge/hbg_main.c +++ b/drivers/net/ethernet/hisilicon/hibmcge/hbg_main.c @@ -203,12 +203,12 @@ static int hbg_net_change_mtu(struct net_device *netdev, int new_mtu) if (netif_running(netdev)) return -EBUSY; - hbg_hw_set_mtu(priv, new_mtu); - WRITE_ONCE(netdev->mtu, new_mtu); - dev_dbg(&priv->pdev->dev, "change mtu from %u to %u\n", netdev->mtu, new_mtu); + hbg_hw_set_mtu(priv, new_mtu); + WRITE_ONCE(netdev->mtu, new_mtu); + return 0; } From 1d6c3e06232e5f53458842915bbff28e8fc29244 Mon Sep 17 00:00:00 2001 From: Jijie Shao Date: Thu, 10 Apr 2025 10:13:25 +0800 Subject: [PATCH 39/81] net: hibmcge: fix the incorrect np_link fail state issue. In the debugfs file, the driver displays the np_link fail state based on the HBG_NIC_STATE_NP_LINK_FAIL. However, HBG_NIC_STATE_NP_LINK_FAIL is cleared in hbg_service_task() So, this value of np_link fail is always false. This patch directly reads the related register to display the real state. Fixes: e0306637e85d ("net: hibmcge: Add support for mac link exception handling feature") Signed-off-by: Jijie Shao Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250410021327.590362-6-shaojijie@huawei.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/hisilicon/hibmcge/hbg_debugfs.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hibmcge/hbg_debugfs.c b/drivers/net/ethernet/hisilicon/hibmcge/hbg_debugfs.c index 9c09e4835990..01ad82d2f5cc 100644 --- a/drivers/net/ethernet/hisilicon/hibmcge/hbg_debugfs.c +++ b/drivers/net/ethernet/hisilicon/hibmcge/hbg_debugfs.c @@ -106,6 +106,7 @@ static int hbg_dbg_nic_state(struct seq_file *s, void *unused) { struct net_device *netdev = dev_get_drvdata(s->private); struct hbg_priv *priv = netdev_priv(netdev); + bool np_link_fail; seq_printf(s, "event handling state: %s\n", state_str_true_false(priv, HBG_NIC_STATE_EVENT_HANDLING)); @@ -117,8 +118,10 @@ static int hbg_dbg_nic_state(struct seq_file *s, void *unused) reset_type_str[priv->reset_type]); seq_printf(s, "need reset state: %s\n", state_str_true_false(priv, HBG_NIC_STATE_NEED_RESET)); - seq_printf(s, "np_link fail state: %s\n", - state_str_true_false(priv, HBG_NIC_STATE_NP_LINK_FAIL)); + + np_link_fail = !hbg_reg_read_field(priv, HBG_REG_AN_NEG_STATE_ADDR, + HBG_REG_AN_NEG_STATE_NP_LINK_OK_B); + seq_printf(s, "np_link fail state: %s\n", str_true_false(np_link_fail)); return 0; } From ae6c1dce3244e31011ee65f89fc2484f3cf6cf85 Mon Sep 17 00:00:00 2001 From: Jijie Shao Date: Thu, 10 Apr 2025 10:13:26 +0800 Subject: [PATCH 40/81] net: hibmcge: fix not restore rx pause mac addr after reset issue The MAC hardware supports receiving two types of pause frames from link partner. One is a pause frame with a destination address of 01:80:C2:00:00:01. The other is a pause frame whose destination address is the address of the hibmcge driver. 01:80:C2:00:00:01 is supported by default. In .ndo_set_mac_address(), the hibmcge driver calls .hbg_hw_set_rx_pause_mac_addr() to set its mac address as the destination address of the rx puase frame. Therefore, pause frames with two types of MAC addresses can be received. Currently, the rx pause addr does not restored after reset. As a result, pause frames whose destination address is the hibmcge driver address cannot be correctly received. This patch restores the configuration by calling .hbg_hw_set_rx_pause_mac_addr() after reset is complete. Fixes: 3f5a61f6d504 ("net: hibmcge: Add reset supported in this module") Signed-off-by: Jijie Shao Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250410021327.590362-7-shaojijie@huawei.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/hisilicon/hibmcge/hbg_err.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/hisilicon/hibmcge/hbg_err.c b/drivers/net/ethernet/hisilicon/hibmcge/hbg_err.c index 4e8cb66f601c..a0bcfb5a713d 100644 --- a/drivers/net/ethernet/hisilicon/hibmcge/hbg_err.c +++ b/drivers/net/ethernet/hisilicon/hibmcge/hbg_err.c @@ -26,12 +26,15 @@ static void hbg_restore_mac_table(struct hbg_priv *priv) static void hbg_restore_user_def_settings(struct hbg_priv *priv) { + /* The index of host mac is always 0. */ + u64 rx_pause_addr = ether_addr_to_u64(priv->filter.mac_table[0].addr); struct ethtool_pauseparam *pause_param = &priv->user_def.pause_param; hbg_restore_mac_table(priv); hbg_hw_set_mtu(priv, priv->netdev->mtu); hbg_hw_set_pause_enable(priv, pause_param->tx_pause, pause_param->rx_pause); + hbg_hw_set_rx_pause_mac_addr(priv, rx_pause_addr); } int hbg_rebuild(struct hbg_priv *priv) From e1d0b52d87ca68a92f2f8693b8eb475795a9a73f Mon Sep 17 00:00:00 2001 From: Jijie Shao Date: Thu, 10 Apr 2025 10:13:27 +0800 Subject: [PATCH 41/81] net: hibmcge: fix multiple phy_stop() issue After detecting the np_link_fail exception, the driver attempts to fix the exception by using phy_stop() and phy_start() in the scheduled task. However, hbg_fix_np_link_fail() and .ndo_stop() may be concurrently executed. As a result, phy_stop() is executed twice, and the following Calltrace occurs: hibmcge 0000:84:00.2 enp132s0f2: Link is Down hibmcge 0000:84:00.2: failed to link between MAC and PHY, try to fix... ------------[ cut here ]------------ called from state HALTED WARNING: CPU: 71 PID: 23391 at drivers/net/phy/phy.c:1503 phy_stop... ... pc : phy_stop+0x138/0x180 lr : phy_stop+0x138/0x180 sp : ffff8000c76bbd40 x29: ffff8000c76bbd40 x28: 0000000000000000 x27: 0000000000000000 x26: ffff2020047358c0 x25: ffff202004735940 x24: ffff20200000e405 x23: ffff2020060e5178 x22: ffff2020060e4000 x21: ffff2020060e49c0 x20: ffff2020060e5170 x19: ffff20202538e000 x18: 0000000000000020 x17: 0000000000000000 x16: ffffcede02e28f40 x15: ffffffffffffffff x14: 0000000000000000 x13: 205d313933333254 x12: 5b5d393430303233 x11: ffffcede04555958 x10: ffffcede04495918 x9 : ffffcede0274fee0 x8 : 00000000000bffe8 x7 : c0000000ffff7fff x6 : 0000000000000001 x5 : 00000000002bffa8 x4 : 0000000000000000 x3 : 0000000000000000 x2 : 0000000000000000 x1 : 0000000000000000 x0 : ffff20202e429480 Call trace: phy_stop+0x138/0x180 hbg_fix_np_link_fail+0x4c/0x90 [hibmcge] hbg_service_task+0xfc/0x148 [hibmcge] process_one_work+0x180/0x398 worker_thread+0x210/0x328 kthread+0xe0/0xf0 ret_from_fork+0x10/0x20 ---[ end trace 0000000000000000 ]--- This patch adds the rtnl_lock to hbg_fix_np_link_fail() to ensure that other operations are not performed concurrently. In addition, np_link_fail exception can be fixed only when the PHY is link. Fixes: e0306637e85d ("net: hibmcge: Add support for mac link exception handling feature") Signed-off-by: Jijie Shao Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250410021327.590362-8-shaojijie@huawei.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/hisilicon/hibmcge/hbg_mdio.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hibmcge/hbg_mdio.c b/drivers/net/ethernet/hisilicon/hibmcge/hbg_mdio.c index f29a937ad087..42b0083c9193 100644 --- a/drivers/net/ethernet/hisilicon/hibmcge/hbg_mdio.c +++ b/drivers/net/ethernet/hisilicon/hibmcge/hbg_mdio.c @@ -2,6 +2,7 @@ // Copyright (c) 2024 Hisilicon Limited. #include +#include #include "hbg_common.h" #include "hbg_hw.h" #include "hbg_mdio.h" @@ -133,12 +134,17 @@ void hbg_fix_np_link_fail(struct hbg_priv *priv) { struct device *dev = &priv->pdev->dev; + rtnl_lock(); + if (priv->stats.np_link_fail_cnt >= HBG_NP_LINK_FAIL_RETRY_TIMES) { dev_err(dev, "failed to fix the MAC link status\n"); priv->stats.np_link_fail_cnt = 0; - return; + goto unlock; } + if (!priv->mac.phydev->link) + goto unlock; + priv->stats.np_link_fail_cnt++; dev_err(dev, "failed to link between MAC and PHY, try to fix...\n"); @@ -147,6 +153,9 @@ void hbg_fix_np_link_fail(struct hbg_priv *priv) */ hbg_phy_stop(priv); hbg_phy_start(priv); + +unlock: + rtnl_unlock(); } static void hbg_phy_adjust_link(struct net_device *netdev) From f0433eea468810aebd61d0b9d095e9acd6bea2ed Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Sat, 12 Apr 2025 16:30:11 -0700 Subject: [PATCH 42/81] net: don't mix device locking in dev_close_many() calls Lockdep found the following dependency: &dev_instance_lock_key#3 --> &rdev->wiphy.mtx --> &net->xdp.lock --> &xs->mutex --> &dev_instance_lock_key#3 The first dependency is the problem. wiphy mutex should be outside the instance locks. The problem happens in notifiers (as always) for CLOSE. We only hold the instance lock for ops locked devices during CLOSE, and WiFi netdevs are not ops locked. Unfortunately, when we dev_close_many() during netns dismantle we may be holding the instance lock of _another_ netdev when issuing a CLOSE for a WiFi device. Lockdep's "Possible unsafe locking scenario" only prints 3 locks and we have 4, plus I think we'd need 3 CPUs, like this: CPU0 CPU1 CPU2 ---- ---- ---- lock(&xs->mutex); lock(&dev_instance_lock_key#3); lock(&rdev->wiphy.mtx); lock(&net->xdp.lock); lock(&xs->mutex); lock(&rdev->wiphy.mtx); lock(&dev_instance_lock_key#3); Tho, I don't think that's possible as CPU1 and CPU2 would be under rtnl_lock. Even if we have per-netns rtnl_lock and wiphy can span network namespaces - CPU0 and CPU1 must be in the same netns to see dev_instance_lock, so CPU0 can't be installing a socket as CPU1 is tearing the netns down. Regardless, our expected lock ordering is that wiphy lock is taken before instance locks, so let's fix this. Go over the ops locked and non-locked devices separately. Note that calling dev_close_many() on an empty list is perfectly fine. All processing (including RCU syncs) are conditional on the list not being empty, already. Fixes: 7e4d784f5810 ("net: hold netdev instance lock during rtnetlink operations") Reported-by: syzbot+6f588c78bf765b62b450@syzkaller.appspotmail.com Acked-by: Stanislav Fomichev Link: https://patch.msgid.link/20250412233011.309762-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- net/core/dev.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index 75e104322ad5..5fcbc66d865e 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -11932,15 +11932,24 @@ void unregister_netdevice_many_notify(struct list_head *head, BUG_ON(dev->reg_state != NETREG_REGISTERED); } - /* If device is running, close it first. */ + /* If device is running, close it first. Start with ops locked... */ list_for_each_entry(dev, head, unreg_list) { - list_add_tail(&dev->close_list, &close_head); - netdev_lock_ops(dev); + if (netdev_need_ops_lock(dev)) { + list_add_tail(&dev->close_list, &close_head); + netdev_lock(dev); + } + } + dev_close_many(&close_head, true); + /* ... now unlock them and go over the rest. */ + list_for_each_entry(dev, head, unreg_list) { + if (netdev_need_ops_lock(dev)) + netdev_unlock(dev); + else + list_add_tail(&dev->close_list, &close_head); } dev_close_many(&close_head, true); list_for_each_entry(dev, head, unreg_list) { - netdev_unlock_ops(dev); /* And unlink it from device chain. */ unlist_netdevice(dev); netdev_lock(dev); From 747fb8413aaa36e4c988d45c4fe20d4c2b0778cd Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 9 Apr 2025 07:55:41 -0700 Subject: [PATCH 43/81] netlink: specs: ovs_vport: align with C codegen capabilities We started generating C code for OvS a while back, but actually C codegen only supports fixed headers specified at the family level right now (schema also allows specifying them per op). ovs_flow and ovs_datapath already specify the fixed header at the family level but ovs_vport does it per op. Move the property, all ops use the same header. This ensures YNL C sees the correct hdr_len: const struct ynl_family ynl_ovs_vport_family = { .name = "ovs_vport", - .hdr_len = sizeof(struct genlmsghdr), + .hdr_len = sizeof(struct genlmsghdr) + sizeof(struct ovs_header), }; Fixes: 7c59c9c8f202 ("tools: ynl: generate code for ovs families") Link: https://patch.msgid.link/20250409145541.580674-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- Documentation/netlink/specs/ovs_vport.yaml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/Documentation/netlink/specs/ovs_vport.yaml b/Documentation/netlink/specs/ovs_vport.yaml index 86ba9ac2a521..b538bb99ee9b 100644 --- a/Documentation/netlink/specs/ovs_vport.yaml +++ b/Documentation/netlink/specs/ovs_vport.yaml @@ -123,12 +123,12 @@ attribute-sets: operations: name-prefix: ovs-vport-cmd- + fixed-header: ovs-header list: - name: new doc: Create a new OVS vport attribute-set: vport - fixed-header: ovs-header do: request: attributes: @@ -141,7 +141,6 @@ operations: name: del doc: Delete existing OVS vport from a data path attribute-set: vport - fixed-header: ovs-header do: request: attributes: @@ -152,7 +151,6 @@ operations: name: get doc: Get / dump OVS vport configuration and state attribute-set: vport - fixed-header: ovs-header do: &vport-get-op request: attributes: From 65d91192aa66f05710cfddf6a14b5a25ee554dba Mon Sep 17 00:00:00 2001 From: Ilya Maximets Date: Sat, 12 Apr 2025 12:40:18 +0200 Subject: [PATCH 44/81] net: openvswitch: fix nested key length validation in the set() action It's not safe to access nla_len(ovs_key) if the data is smaller than the netlink header. Check that the attribute is OK first. Fixes: ccb1352e76cf ("net: Add Open vSwitch kernel components.") Reported-by: syzbot+b07a9da40df1576b8048@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=b07a9da40df1576b8048 Tested-by: syzbot+b07a9da40df1576b8048@syzkaller.appspotmail.com Signed-off-by: Ilya Maximets Reviewed-by: Eelco Chaudron Acked-by: Aaron Conole Link: https://patch.msgid.link/20250412104052.2073688-1-i.maximets@ovn.org Signed-off-by: Jakub Kicinski --- net/openvswitch/flow_netlink.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index 95e0dd14dc1a..518be23e48ea 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -2876,7 +2876,8 @@ static int validate_set(const struct nlattr *a, size_t key_len; /* There can be only one key in a action */ - if (nla_total_size(nla_len(ovs_key)) != nla_len(a)) + if (!nla_ok(ovs_key, nla_len(a)) || + nla_total_size(nla_len(ovs_key)) != nla_len(a)) return -EINVAL; key_len = nla_len(ovs_key); From 8b1879491472c145c58c3cbbaf0e05ea93ee5ddf Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Tue, 18 Mar 2025 11:21:41 +0100 Subject: [PATCH 45/81] can: fix missing decrement of j1939_proto.inuse_idx Like other protocols on top of AF_CAN family, also j1939_proto.inuse_idx needs to be decremented on socket dismantle. Fixes: 6bffe88452db ("can: add protocol counter for AF_CAN sockets") Reported-by: Oliver Hartkopp Closes: https://lore.kernel.org/linux-can/7e35b13f-bbc4-491e-9081-fb939e1b8df0@hartkopp.net/ Signed-off-by: Davide Caratti Acked-by: Oleksij Rempel Link: https://patch.msgid.link/09ce71f281b9e27d1e3d1104430bf3fceb8c7321.1742292636.git.dcaratti@redhat.com Signed-off-by: Marc Kleine-Budde --- net/can/j1939/socket.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/can/j1939/socket.c b/net/can/j1939/socket.c index 17226b2341d0..6fefe7a68761 100644 --- a/net/can/j1939/socket.c +++ b/net/can/j1939/socket.c @@ -655,6 +655,7 @@ static int j1939_sk_release(struct socket *sock) sock->sk = NULL; release_sock(sk); + sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); sock_put(sk); return 0; From 6315d93541f8a5f77c5ef5c4f25233e66d189603 Mon Sep 17 00:00:00 2001 From: Weizhao Ouyang Date: Mon, 24 Mar 2025 19:44:16 +0800 Subject: [PATCH 46/81] can: rockchip_canfd: fix broken quirks checks First get the devtype_data then check quirks. Fixes: bbdffb341498 ("can: rockchip_canfd: add quirk for broken CAN-FD support") Signed-off-by: Weizhao Ouyang Reviewed-by: Vincent Mailhol Link: https://patch.msgid.link/20250324114416.10160-1-o451686892@gmail.com Signed-off-by: Marc Kleine-Budde --- drivers/net/can/rockchip/rockchip_canfd-core.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/can/rockchip/rockchip_canfd-core.c b/drivers/net/can/rockchip/rockchip_canfd-core.c index 46201c126703..7107a37da36c 100644 --- a/drivers/net/can/rockchip/rockchip_canfd-core.c +++ b/drivers/net/can/rockchip/rockchip_canfd-core.c @@ -902,15 +902,16 @@ static int rkcanfd_probe(struct platform_device *pdev) priv->can.data_bittiming_const = &rkcanfd_data_bittiming_const; priv->can.ctrlmode_supported = CAN_CTRLMODE_LOOPBACK | CAN_CTRLMODE_BERR_REPORTING; - if (!(priv->devtype_data.quirks & RKCANFD_QUIRK_CANFD_BROKEN)) - priv->can.ctrlmode_supported |= CAN_CTRLMODE_FD; priv->can.do_set_mode = rkcanfd_set_mode; priv->can.do_get_berr_counter = rkcanfd_get_berr_counter; priv->ndev = ndev; match = device_get_match_data(&pdev->dev); - if (match) + if (match) { priv->devtype_data = *(struct rkcanfd_devtype_data *)match; + if (!(priv->devtype_data.quirks & RKCANFD_QUIRK_CANFD_BROKEN)) + priv->can.ctrlmode_supported |= CAN_CTRLMODE_FD; + } err = can_rx_offload_add_manual(ndev, &priv->offload, RKCANFD_NAPI_WEIGHT); From 88fa80021b77732bc98f73fb69d69c7cc37b9f0d Mon Sep 17 00:00:00 2001 From: Abdun Nihaal Date: Sat, 12 Apr 2025 21:19:24 +0530 Subject: [PATCH 47/81] net: ngbe: fix memory leak in ngbe_probe() error path When ngbe_sw_init() is called, memory is allocated for wx->rss_key in wx_init_rss_key(). However, in ngbe_probe() function, the subsequent error paths after ngbe_sw_init() don't free the rss_key. Fix that by freeing it in error path along with wx->mac_table. Also change the label to which execution jumps when ngbe_sw_init() fails, because otherwise, it could lead to a double free for rss_key, when the mac_table allocation fails in wx_sw_init(). Fixes: 02338c484ab6 ("net: ngbe: Initialize sw info and register netdev") Signed-off-by: Abdun Nihaal Reviewed-by: Kory Maincent Reviewed-by: Jiawen Wu Link: https://patch.msgid.link/20250412154927.25908-1-abdun.nihaal@gmail.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/wangxun/ngbe/ngbe_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c b/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c index a6159214ec0a..91b3055a5a9f 100644 --- a/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c +++ b/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c @@ -625,7 +625,7 @@ static int ngbe_probe(struct pci_dev *pdev, /* setup the private structure */ err = ngbe_sw_init(wx); if (err) - goto err_free_mac_table; + goto err_pci_release_regions; /* check if flash load is done after hw power up */ err = wx_check_flash_load(wx, NGBE_SPI_ILDR_STATUS_PERST); @@ -719,6 +719,7 @@ static int ngbe_probe(struct pci_dev *pdev, err_clear_interrupt_scheme: wx_clear_interrupt_scheme(wx); err_free_mac_table: + kfree(wx->rss_key); kfree(wx->mac_table); err_pci_release_regions: pci_release_selected_regions(pdev, From 688abe1027d00b7d4b2ce2d8764a2ae5ec6d250b Mon Sep 17 00:00:00 2001 From: Chenyuan Yang Date: Sat, 12 Apr 2025 13:33:27 -0500 Subject: [PATCH 48/81] octeontx2-pf: handle otx2_mbox_get_rsp errors Adding error pointer check after calling otx2_mbox_get_rsp(). This is similar to the commit bd3110bc102a ("octeontx2-pf: handle otx2_mbox_get_rsp errors in otx2_flows.c"). Signed-off-by: Chenyuan Yang Fixes: 6c40ca957fe5 ("octeontx2-pf: Adds TC offload support") Reviewed-by: Michal Swiatkowski Link: https://patch.msgid.link/20250412183327.3550970-1-chenyuan0y@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/octeontx2/nic/rep.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/rep.c b/drivers/net/ethernet/marvell/octeontx2/nic/rep.c index 04e08e06f30f..7153a71dfc86 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/rep.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/rep.c @@ -67,6 +67,8 @@ static int rvu_rep_mcam_flow_init(struct rep_dev *rep) rsp = (struct npc_mcam_alloc_entry_rsp *)otx2_mbox_get_rsp (&priv->mbox.mbox, 0, &req->hdr); + if (IS_ERR(rsp)) + goto exit; for (ent = 0; ent < rsp->count; ent++) rep->flow_cfg->flow_ent[ent + allocated] = rsp->entry_list[ent]; From 903d2b9f9efc5b3339d74015fcfc0d9fff276c4c Mon Sep 17 00:00:00 2001 From: Michael Walle Date: Mon, 14 Apr 2025 10:39:42 +0200 Subject: [PATCH 49/81] net: ethernet: ti: am65-cpsw: fix port_np reference counting A reference to the device tree node is stored in a private struct, thus the reference count has to be incremented. Also, decrement the count on device removal and in the error path. Fixes: 93a76530316a ("net: ethernet: ti: introduce am65x/j721e gigabit eth subsystem driver") Signed-off-by: Michael Walle Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250414083942.4015060-1-mwalle@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ti/am65-cpsw-nuss.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c index c9fd34787c99..1e6d2335293d 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c @@ -2666,7 +2666,7 @@ static int am65_cpsw_nuss_init_slave_ports(struct am65_cpsw_common *common) of_property_read_bool(port_np, "ti,mac-only"); /* get phy/link info */ - port->slave.port_np = port_np; + port->slave.port_np = of_node_get(port_np); ret = of_get_phy_mode(port_np, &port->slave.phy_if); if (ret) { dev_err(dev, "%pOF read phy-mode err %d\n", @@ -2720,6 +2720,17 @@ static void am65_cpsw_nuss_phylink_cleanup(struct am65_cpsw_common *common) } } +static void am65_cpsw_remove_dt(struct am65_cpsw_common *common) +{ + struct am65_cpsw_port *port; + int i; + + for (i = 0; i < common->port_num; i++) { + port = &common->ports[i]; + of_node_put(port->slave.port_np); + } +} + static int am65_cpsw_nuss_init_port_ndev(struct am65_cpsw_common *common, u32 port_idx) { @@ -3622,6 +3633,7 @@ static int am65_cpsw_nuss_probe(struct platform_device *pdev) am65_cpsw_nuss_cleanup_ndev(common); am65_cpsw_nuss_phylink_cleanup(common); am65_cpts_release(common->cpts); + am65_cpsw_remove_dt(common); err_of_clear: if (common->mdio_dev) of_platform_device_destroy(common->mdio_dev, NULL); @@ -3661,6 +3673,7 @@ static void am65_cpsw_nuss_remove(struct platform_device *pdev) am65_cpsw_nuss_phylink_cleanup(common); am65_cpts_release(common->cpts); am65_cpsw_disable_serdes_phy(common); + am65_cpsw_remove_dt(common); if (common->mdio_dev) of_platform_device_destroy(common->mdio_dev, NULL); From 12f2d033fae957d84c2c0ce604d2a077e61fa2c0 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 14 Apr 2025 07:32:10 -0700 Subject: [PATCH 50/81] eth: bnxt: fix missing ring index trim on error path Commit under Fixes converted tx_prod to be free running but missed masking it on the Tx error path. This crashes on error conditions, for example when DMA mapping fails. Fixes: 6d1add95536b ("bnxt_en: Modify TX ring indexing logic.") Reviewed-by: Michael Chan Link: https://patch.msgid.link/20250414143210.458625-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 8725e1e13908..c8e3468eee61 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -787,7 +787,7 @@ static netdev_tx_t bnxt_start_xmit(struct sk_buff *skb, struct net_device *dev) dev_kfree_skb_any(skb); tx_kick_pending: if (BNXT_TX_PTP_IS_SET(lflags)) { - txr->tx_buf_ring[txr->tx_prod].is_ts_pkt = 0; + txr->tx_buf_ring[RING_TX(bp, txr->tx_prod)].is_ts_pkt = 0; atomic64_inc(&bp->ptp_cfg->stats.ts_err); if (!(bp->fw_cap & BNXT_FW_CAP_TX_TS_CMP)) /* set SKB to err so PTP worker will clean up */ @@ -795,7 +795,7 @@ static netdev_tx_t bnxt_start_xmit(struct sk_buff *skb, struct net_device *dev) } if (txr->kick_pending) bnxt_txr_db_kick(bp, txr, txr->tx_prod); - txr->tx_buf_ring[txr->tx_prod].skb = NULL; + txr->tx_buf_ring[RING_TX(bp, txr->tx_prod)].skb = NULL; dev_core_stats_tx_dropped_inc(dev); return NETDEV_TX_OK; } From 2d300ce0b783ba74420052ed3a12010ac93bdb08 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 14 Apr 2025 20:20:21 +0300 Subject: [PATCH 51/81] net: fib_rules: Fix iif / oif matching on L3 master device Before commit 40867d74c374 ("net: Add l3mdev index to flow struct and avoid oif reset for port devices") it was possible to use FIB rules to match on a L3 domain. This was done by having a FIB rule match on iif / oif being a L3 master device. It worked because prior to the FIB rule lookup the iif / oif fields in the flow structure were reset to the index of the L3 master device to which the input / output device was enslaved to. The above scheme made it impossible to match on the original input / output device. Therefore, cited commit stopped overwriting the iif / oif fields in the flow structure and instead stored the index of the enslaving L3 master device in a new field ('flowi_l3mdev') in the flow structure. While the change enabled new use cases, it broke the original use case of matching on a L3 domain. Fix this by interpreting the iif / oif matching on a L3 master device as a match against the L3 domain. In other words, if the iif / oif in the FIB rule points to a L3 master device, compare the provided index against 'flowi_l3mdev' rather than 'flowi_{i,o}if'. Before cited commit, a FIB rule that matched on 'iif vrf1' would only match incoming traffic from devices enslaved to 'vrf1'. With the proposed change (i.e., comparing against 'flowi_l3mdev'), the rule would also match traffic originating from a socket bound to 'vrf1'. Avoid that by adding a new flow flag ('FLOWI_FLAG_L3MDEV_OIF') that indicates if the L3 domain was derived from the output interface or the input interface (when not set) and take this flag into account when evaluating the FIB rule against the flow structure. Avoid unnecessary checks in the data path by detecting that a rule matches on a L3 master device when the rule is installed and marking it as such. Tested using the following script [1]. Output before 40867d74c374 (v5.4.291): default dev dummy1 table 100 scope link default dev dummy1 table 200 scope link Output after 40867d74c374: default dev dummy1 table 300 scope link default dev dummy1 table 300 scope link Output with this patch: default dev dummy1 table 100 scope link default dev dummy1 table 200 scope link [1] #!/bin/bash ip link add name vrf1 up type vrf table 10 ip link add name dummy1 up master vrf1 type dummy sysctl -wq net.ipv4.conf.all.forwarding=1 sysctl -wq net.ipv4.conf.all.rp_filter=0 ip route add table 100 default dev dummy1 ip route add table 200 default dev dummy1 ip route add table 300 default dev dummy1 ip rule add prio 0 oif vrf1 table 100 ip rule add prio 1 iif vrf1 table 200 ip rule add prio 2 table 300 ip route get 192.0.2.1 oif dummy1 fibmatch ip route get 192.0.2.1 iif dummy1 from 198.51.100.1 fibmatch Fixes: 40867d74c374 ("net: Add l3mdev index to flow struct and avoid oif reset for port devices") Reported-by: hanhuihui Closes: https://lore.kernel.org/netdev/ec671c4f821a4d63904d0da15d604b75@huawei.com/ Signed-off-by: Ido Schimmel Acked-by: David Ahern Link: https://patch.msgid.link/20250414172022.242991-2-idosch@nvidia.com Signed-off-by: Jakub Kicinski --- include/net/fib_rules.h | 2 ++ include/net/flow.h | 1 + include/net/l3mdev.h | 27 +++++++++++++++++++++++ net/core/fib_rules.c | 48 ++++++++++++++++++++++++++++++++++------- net/l3mdev/l3mdev.c | 4 +++- 5 files changed, 73 insertions(+), 9 deletions(-) diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h index 5927910ec06e..6e68e359ad18 100644 --- a/include/net/fib_rules.h +++ b/include/net/fib_rules.h @@ -45,6 +45,8 @@ struct fib_rule { struct fib_rule_port_range dport_range; u16 sport_mask; u16 dport_mask; + u8 iif_is_l3_master; + u8 oif_is_l3_master; struct rcu_head rcu; }; diff --git a/include/net/flow.h b/include/net/flow.h index 335bbc52171c..2a3f0c42f092 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -38,6 +38,7 @@ struct flowi_common { __u8 flowic_flags; #define FLOWI_FLAG_ANYSRC 0x01 #define FLOWI_FLAG_KNOWN_NH 0x02 +#define FLOWI_FLAG_L3MDEV_OIF 0x04 __u32 flowic_secid; kuid_t flowic_uid; __u32 flowic_multipath_hash; diff --git a/include/net/l3mdev.h b/include/net/l3mdev.h index f7fe796e8429..1eb8dad18f7e 100644 --- a/include/net/l3mdev.h +++ b/include/net/l3mdev.h @@ -59,6 +59,20 @@ int l3mdev_ifindex_lookup_by_table_id(enum l3mdev_type l3type, struct net *net, int l3mdev_fib_rule_match(struct net *net, struct flowi *fl, struct fib_lookup_arg *arg); +static inline +bool l3mdev_fib_rule_iif_match(const struct flowi *fl, int iifindex) +{ + return !(fl->flowi_flags & FLOWI_FLAG_L3MDEV_OIF) && + fl->flowi_l3mdev == iifindex; +} + +static inline +bool l3mdev_fib_rule_oif_match(const struct flowi *fl, int oifindex) +{ + return fl->flowi_flags & FLOWI_FLAG_L3MDEV_OIF && + fl->flowi_l3mdev == oifindex; +} + void l3mdev_update_flow(struct net *net, struct flowi *fl); int l3mdev_master_ifindex_rcu(const struct net_device *dev); @@ -327,6 +341,19 @@ int l3mdev_fib_rule_match(struct net *net, struct flowi *fl, { return 1; } + +static inline +bool l3mdev_fib_rule_iif_match(const struct flowi *fl, int iifindex) +{ + return false; +} + +static inline +bool l3mdev_fib_rule_oif_match(const struct flowi *fl, int oifindex) +{ + return false; +} + static inline void l3mdev_update_flow(struct net *net, struct flowi *fl) { diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 4bc64d912a1c..7af302080a66 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -257,6 +257,24 @@ static int nla_put_port_range(struct sk_buff *skb, int attrtype, return nla_put(skb, attrtype, sizeof(*range), range); } +static bool fib_rule_iif_match(const struct fib_rule *rule, int iifindex, + const struct flowi *fl) +{ + u8 iif_is_l3_master = READ_ONCE(rule->iif_is_l3_master); + + return iif_is_l3_master ? l3mdev_fib_rule_iif_match(fl, iifindex) : + fl->flowi_iif == iifindex; +} + +static bool fib_rule_oif_match(const struct fib_rule *rule, int oifindex, + const struct flowi *fl) +{ + u8 oif_is_l3_master = READ_ONCE(rule->oif_is_l3_master); + + return oif_is_l3_master ? l3mdev_fib_rule_oif_match(fl, oifindex) : + fl->flowi_oif == oifindex; +} + static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops, struct flowi *fl, int flags, struct fib_lookup_arg *arg) @@ -264,11 +282,11 @@ static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops, int iifindex, oifindex, ret = 0; iifindex = READ_ONCE(rule->iifindex); - if (iifindex && (iifindex != fl->flowi_iif)) + if (iifindex && !fib_rule_iif_match(rule, iifindex, fl)) goto out; oifindex = READ_ONCE(rule->oifindex); - if (oifindex && (oifindex != fl->flowi_oif)) + if (oifindex && !fib_rule_oif_match(rule, oifindex, fl)) goto out; if ((rule->mark ^ fl->flowi_mark) & rule->mark_mask) @@ -736,16 +754,20 @@ static int fib_nl2rule_rtnl(struct fib_rule *nlrule, struct net_device *dev; dev = __dev_get_by_name(nlrule->fr_net, nlrule->iifname); - if (dev) + if (dev) { nlrule->iifindex = dev->ifindex; + nlrule->iif_is_l3_master = netif_is_l3_master(dev); + } } if (tb[FRA_OIFNAME]) { struct net_device *dev; dev = __dev_get_by_name(nlrule->fr_net, nlrule->oifname); - if (dev) + if (dev) { nlrule->oifindex = dev->ifindex; + nlrule->oif_is_l3_master = netif_is_l3_master(dev); + } } return 0; @@ -1336,11 +1358,17 @@ static void attach_rules(struct list_head *rules, struct net_device *dev) list_for_each_entry(rule, rules, list) { if (rule->iifindex == -1 && - strcmp(dev->name, rule->iifname) == 0) + strcmp(dev->name, rule->iifname) == 0) { WRITE_ONCE(rule->iifindex, dev->ifindex); + WRITE_ONCE(rule->iif_is_l3_master, + netif_is_l3_master(dev)); + } if (rule->oifindex == -1 && - strcmp(dev->name, rule->oifname) == 0) + strcmp(dev->name, rule->oifname) == 0) { WRITE_ONCE(rule->oifindex, dev->ifindex); + WRITE_ONCE(rule->oif_is_l3_master, + netif_is_l3_master(dev)); + } } } @@ -1349,10 +1377,14 @@ static void detach_rules(struct list_head *rules, struct net_device *dev) struct fib_rule *rule; list_for_each_entry(rule, rules, list) { - if (rule->iifindex == dev->ifindex) + if (rule->iifindex == dev->ifindex) { WRITE_ONCE(rule->iifindex, -1); - if (rule->oifindex == dev->ifindex) + WRITE_ONCE(rule->iif_is_l3_master, false); + } + if (rule->oifindex == dev->ifindex) { WRITE_ONCE(rule->oifindex, -1); + WRITE_ONCE(rule->oif_is_l3_master, false); + } } } diff --git a/net/l3mdev/l3mdev.c b/net/l3mdev/l3mdev.c index ca10916340b0..5432a5f2dfc8 100644 --- a/net/l3mdev/l3mdev.c +++ b/net/l3mdev/l3mdev.c @@ -277,8 +277,10 @@ void l3mdev_update_flow(struct net *net, struct flowi *fl) if (fl->flowi_oif) { dev = dev_get_by_index_rcu(net, fl->flowi_oif); if (dev) { - if (!fl->flowi_l3mdev) + if (!fl->flowi_l3mdev) { fl->flowi_l3mdev = l3mdev_master_ifindex_rcu(dev); + fl->flowi_flags |= FLOWI_FLAG_L3MDEV_OIF; + } /* oif set to L3mdev directs lookup to its table; * reset to avoid oif match in fib_lookup From f9c87590ed6ab78b69042ea31b7b8e37302d53f3 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 14 Apr 2025 20:20:22 +0300 Subject: [PATCH 52/81] selftests: fib_rule_tests: Add VRF match tests Add tests for FIB rules that match on iif / oif being a VRF device. Test both good and bad flows. With previous patch ("net: fib_rules: Fix iif / oif matching on L3 master device"): # ./fib_rule_tests.sh [...] Tests passed: 328 Tests failed: 0 Without it: # ./fib_rule_tests.sh [...] Tests passed: 324 Tests failed: 4 Signed-off-by: Ido Schimmel Acked-by: David Ahern Link: https://patch.msgid.link/20250414172022.242991-3-idosch@nvidia.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/fib_rule_tests.sh | 34 +++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/tools/testing/selftests/net/fib_rule_tests.sh b/tools/testing/selftests/net/fib_rule_tests.sh index b866bab1d92a..c7cea556b416 100755 --- a/tools/testing/selftests/net/fib_rule_tests.sh +++ b/tools/testing/selftests/net/fib_rule_tests.sh @@ -359,6 +359,23 @@ fib_rule6_test() "$getnomatch" "iif flowlabel masked redirect to table" \ "iif flowlabel masked no redirect to table" fi + + $IP link show dev $DEV | grep -q vrf0 + if [ $? -eq 0 ]; then + match="oif vrf0" + getmatch="oif $DEV" + getnomatch="oif lo" + fib_rule6_test_match_n_redirect "$match" "$getmatch" \ + "$getnomatch" "VRF oif redirect to table" \ + "VRF oif no redirect to table" + + match="from $SRC_IP6 iif vrf0" + getmatch="from $SRC_IP6 iif $DEV" + getnomatch="from $SRC_IP6 iif lo" + fib_rule6_test_match_n_redirect "$match" "$getmatch" \ + "$getnomatch" "VRF iif redirect to table" \ + "VRF iif no redirect to table" + fi } fib_rule6_vrf_test() @@ -635,6 +652,23 @@ fib_rule4_test() "$getnomatch" "iif dscp masked redirect to table" \ "iif dscp masked no redirect to table" fi + + $IP link show dev $DEV | grep -q vrf0 + if [ $? -eq 0 ]; then + match="oif vrf0" + getmatch="oif $DEV" + getnomatch="oif lo" + fib_rule4_test_match_n_redirect "$match" "$getmatch" \ + "$getnomatch" "VRF oif redirect to table" \ + "VRF oif no redirect to table" + + match="from $SRC_IP iif vrf0" + getmatch="from $SRC_IP iif $DEV" + getnomatch="from $SRC_IP iif lo" + fib_rule4_test_match_n_redirect "$match" "$getmatch" \ + "$getnomatch" "VRF iif redirect to table" \ + "VRF iif no redirect to table" + fi } fib_rule4_vrf_test() From 10a77965760c6e2b3eef483be33ae407004df894 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Mon, 14 Apr 2025 20:05:37 +0200 Subject: [PATCH 53/81] batman-adv: Fix double-hold of meshif when getting enabled It was originally meant to replace the dev_hold with netdev_hold. But this was missed in batadv_hardif_enable_interface(). As result, there was an imbalance and a hang when trying to remove the mesh-interface with (previously) active hard-interfaces: unregister_netdevice: waiting for batadv0 to become free. Usage count = 3 Fixes: 00b35530811f ("batman-adv: adopt netdev_hold() / netdev_put()") Suggested-by: Eric Dumazet Reported-by: syzbot+ff3aa851d46ab82953a3@syzkaller.appspotmail.com Reported-by: syzbot+4036165fc595a74b09b2@syzkaller.appspotmail.com Reported-by: syzbot+c35d73ce910d86c0026e@syzkaller.appspotmail.com Reported-by: syzbot+48c14f61594bdfadb086@syzkaller.appspotmail.com Reported-by: syzbot+f37372d86207b3bb2941@syzkaller.appspotmail.com Signed-off-by: Sven Eckelmann Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20250414-double_hold_fix-v5-1-10e056324cde@narfation.org Signed-off-by: Jakub Kicinski --- net/batman-adv/hard-interface.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index f145f9662653..7cd4bdcee439 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -725,7 +725,6 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface, kref_get(&hard_iface->refcount); - dev_hold(mesh_iface); netdev_hold(mesh_iface, &hard_iface->meshif_dev_tracker, GFP_ATOMIC); hard_iface->mesh_iface = mesh_iface; bat_priv = netdev_priv(hard_iface->mesh_iface); From e2e49e214145a8f6ece6ecd52fec63ebc2b27ce9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Danis?= Date: Mon, 14 Apr 2025 11:08:15 +0200 Subject: [PATCH 54/81] Bluetooth: l2cap: Process valid commands in too long frame MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is required for passing PTS test cases: - L2CAP/COS/CED/BI-14-C Multiple Signaling Command in one PDU, Data Truncated, BR/EDR, Connection Request - L2CAP/COS/CED/BI-15-C Multiple Signaling Command in one PDU, Data Truncated, BR/EDR, Disconnection Request The test procedure defined in L2CAP.TS.p39 for both tests is: 1. The Lower Tester sends a C-frame to the IUT with PDU Length set to 8 and Channel ID set to the correct signaling channel for the logical link. The Information payload contains one L2CAP_ECHO_REQ packet with Data Length set to 0 with 0 octets of echo data and one command packet and Data Length set as specified in Table 4.6 and the correct command data. 2. The IUT sends an L2CAP_ECHO_RSP PDU to the Lower Tester. 3. Perform alternative 3A, 3B, 3C, or 3D depending on the IUT’s response. Alternative 3A (IUT terminates the link): 3A.1 The IUT terminates the link. 3A.2 The test ends with a Pass verdict. Alternative 3B (IUT discards the frame): 3B.1 The IUT does not send a reply to the Lower Tester. Alternative 3C (IUT rejects PDU): 3C.1 The IUT sends an L2CAP_COMMAND_REJECT_RSP PDU to the Lower Tester. Alternative 3D (Any other IUT response): 3D.1 The Upper Tester issues a warning and the test ends. 4. The Lower Tester sends a C-frame to the IUT with PDU Length set to 4 and Channel ID set to the correct signaling channel for the logical link. The Information payload contains Data Length set to 0 with an L2CAP_ECHO_REQ packet with 0 octets of echo data. 5. The IUT sends an L2CAP_ECHO_RSP PDU to the Lower Tester. With expected outcome: In Steps 2 and 5, the IUT responds with an L2CAP_ECHO_RSP. In Step 3A.1, the IUT terminates the link. In Step 3B.1, the IUT does not send a reply to the Lower Tester. In Step 3C.1, the IUT rejects the PDU. In Step 3D.1, the IUT sends any valid response. Currently PTS fails with the following logs: Failed to receive ECHO RESPONSE. And HCI logs: > ACL Data RX: Handle 11 flags 0x02 dlen 20 L2CAP: Information Response (0x0b) ident 2 len 12 Type: Fixed channels supported (0x0003) Result: Success (0x0000) Channels: 0x000000000000002e L2CAP Signaling (BR/EDR) Connectionless reception AMP Manager Protocol L2CAP Signaling (LE) > ACL Data RX: Handle 11 flags 0x02 dlen 13 frame too long 08 01 00 00 08 02 01 00 aa ......... Cc: stable@vger.kernel.org Signed-off-by: Frédéric Danis Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/l2cap_core.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index f1c4b8bd7a8b..5ca7ac43c58d 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -7539,8 +7539,24 @@ void l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 flags) if (skb->len > len) { BT_ERR("Frame is too long (len %u, expected len %d)", skb->len, len); + /* PTS test cases L2CAP/COS/CED/BI-14-C and BI-15-C + * (Multiple Signaling Command in one PDU, Data + * Truncated, BR/EDR) send a C-frame to the IUT with + * PDU Length set to 8 and Channel ID set to the + * correct signaling channel for the logical link. + * The Information payload contains one L2CAP_ECHO_REQ + * packet with Data Length set to 0 with 0 octets of + * echo data and one invalid command packet due to + * data truncated in PDU but present in HCI packet. + * + * Shorter the socket buffer to the PDU length to + * allow to process valid commands from the PDU before + * setting the socket unreliable. + */ + skb->len = len; + l2cap_recv_frame(conn, skb); l2cap_conn_unreliable(conn, ECOMM); - goto drop; + goto unlock; } /* Append fragment into frame (with header) */ From 875db86e1ec75fe633f1e85ed2f92c731cdbf760 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 15 Apr 2025 09:15:19 -0700 Subject: [PATCH 55/81] Bluetooth: vhci: Avoid needless snprintf() calls Avoid double-copying of string literals. Use a "const char *" for each string instead of copying from .rodata into stack and then into the skb. We can go directly from .rodata to the skb. This also works around a Clang bug (that has since been fixed[1]). Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202401250927.1poZERd6-lkp@intel.com/ Fixes: ab4e4380d4e1 ("Bluetooth: Add vhci devcoredump support") Link: https://github.com/llvm/llvm-project/commit/ea2e66aa8b6e363b89df66dc44275a0d7ecd70ce [1] Cc: stable@vger.kernel.org Signed-off-by: Kees Cook Reviewed-by: Nathan Chancellor Reviewed-by: Josh Poimboeuf Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/hci_vhci.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/bluetooth/hci_vhci.c b/drivers/bluetooth/hci_vhci.c index a51935d37e5d..59f4d7bdffdc 100644 --- a/drivers/bluetooth/hci_vhci.c +++ b/drivers/bluetooth/hci_vhci.c @@ -289,18 +289,18 @@ static void vhci_coredump(struct hci_dev *hdev) static void vhci_coredump_hdr(struct hci_dev *hdev, struct sk_buff *skb) { - char buf[80]; + const char *buf; - snprintf(buf, sizeof(buf), "Controller Name: vhci_ctrl\n"); + buf = "Controller Name: vhci_ctrl\n"; skb_put_data(skb, buf, strlen(buf)); - snprintf(buf, sizeof(buf), "Firmware Version: vhci_fw\n"); + buf = "Firmware Version: vhci_fw\n"; skb_put_data(skb, buf, strlen(buf)); - snprintf(buf, sizeof(buf), "Driver: vhci_drv\n"); + buf = "Driver: vhci_drv\n"; skb_put_data(skb, buf, strlen(buf)); - snprintf(buf, sizeof(buf), "Vendor: vhci\n"); + buf = "Vendor: vhci\n"; skb_put_data(skb, buf, strlen(buf)); } From 00ffb3724ce743578163f5ade2884374554ca021 Mon Sep 17 00:00:00 2001 From: Abdun Nihaal Date: Mon, 14 Apr 2025 22:36:46 +0530 Subject: [PATCH 56/81] cxgb4: fix memory leak in cxgb4_init_ethtool_filters() error path In the for loop used to allocate the loc_array and bmap for each port, a memory leak is possible when the allocation for loc_array succeeds, but the allocation for bmap fails. This is because when the control flow goes to the label free_eth_finfo, only the allocations starting from (i-1)th iteration are freed. Fix that by freeing the loc_array in the bmap allocation error path. Fixes: d915c299f1da ("cxgb4: add skeleton for ethtool n-tuple filters") Signed-off-by: Abdun Nihaal Reviewed-by: Simon Horman Reviewed-by: Jacob Keller Link: https://patch.msgid.link/20250414170649.89156-1-abdun.nihaal@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c index 7f3f5afa864f..1546c3db08f0 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c @@ -2270,6 +2270,7 @@ int cxgb4_init_ethtool_filters(struct adapter *adap) eth_filter->port[i].bmap = bitmap_zalloc(nentries, GFP_KERNEL); if (!eth_filter->port[i].bmap) { ret = -ENOMEM; + kvfree(eth_filter->port[i].loc_array); goto free_eth_finfo; } } From 4d07bbf2d45683841e578a2f255e4c174534bf38 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 14 Apr 2025 14:18:44 -0700 Subject: [PATCH 57/81] tools: ynl-gen: don't declare loop iterator in place The codegen tries to follow the "old" C style and declare loop iterators at the start of the block / function. Only nested request handling breaks this style, so adjust it. Reviewed-by: Donald Hunter Reviewed-by: Jacob Keller Link: https://patch.msgid.link/20250414211851.602096-2-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/net/ynl/pyynl/ynl_gen_c.py | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/tools/net/ynl/pyynl/ynl_gen_c.py b/tools/net/ynl/pyynl/ynl_gen_c.py index a1427c537030..305f5696bc4f 100755 --- a/tools/net/ynl/pyynl/ynl_gen_c.py +++ b/tools/net/ynl/pyynl/ynl_gen_c.py @@ -654,10 +654,10 @@ def _attr_get(self, ri, var): def attr_put(self, ri, var): if self.attr['type'] in scalars: put_type = self.type - ri.cw.p(f"for (unsigned int i = 0; i < {var}->n_{self.c_name}; i++)") + ri.cw.p(f"for (i = 0; i < {var}->n_{self.c_name}; i++)") ri.cw.p(f"ynl_attr_put_{put_type}(nlh, {self.enum_name}, {var}->{self.c_name}[i]);") elif 'type' not in self.attr or self.attr['type'] == 'nest': - ri.cw.p(f"for (unsigned int i = 0; i < {var}->n_{self.c_name}; i++)") + ri.cw.p(f"for (i = 0; i < {var}->n_{self.c_name}; i++)") self._attr_put_line(ri, var, f"{self.nested_render_name}_put(nlh, " + f"{self.enum_name}, &{var}->{self.c_name}[i])") else: @@ -1644,11 +1644,23 @@ def put_req_nested_prototype(ri, struct, suffix=';'): def put_req_nested(ri, struct): + local_vars = [] + init_lines = [] + + local_vars.append('struct nlattr *nest;') + init_lines.append("nest = ynl_attr_nest_start(nlh, attr_type);") + + for _, arg in struct.member_list(): + if arg.presence_type() == 'count': + local_vars.append('unsigned int i;') + break + put_req_nested_prototype(ri, struct, suffix='') ri.cw.block_start() - ri.cw.write_func_lvar('struct nlattr *nest;') + ri.cw.write_func_lvar(local_vars) - ri.cw.p("nest = ynl_attr_nest_start(nlh, attr_type);") + for line in init_lines: + ri.cw.p(line) for _, arg in struct.member_list(): arg.attr_put(ri, "obj") @@ -1850,6 +1862,11 @@ def print_req(ri): local_vars += ['size_t hdr_len;', 'void *hdr;'] + for _, attr in ri.struct["request"].member_list(): + if attr.presence_type() == 'count': + local_vars += ['unsigned int i;'] + break + print_prototype(ri, direction, terminate=False) ri.cw.block_start() ri.cw.write_func_lvar(local_vars) From dfa464b4a603984d648a9beb9bce72df5858c1e2 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 14 Apr 2025 14:18:45 -0700 Subject: [PATCH 58/81] tools: ynl-gen: move local vars after the opening bracket The "function writing helper" tries to put local variables between prototype and the opening bracket. Clearly wrong, but up until now nothing actually uses it to write local vars so it wasn't noticed. Reviewed-by: Donald Hunter Reviewed-by: Jacob Keller Link: https://patch.msgid.link/20250414211851.602096-3-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/net/ynl/pyynl/ynl_gen_c.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/net/ynl/pyynl/ynl_gen_c.py b/tools/net/ynl/pyynl/ynl_gen_c.py index 305f5696bc4f..662a925bd9e1 100755 --- a/tools/net/ynl/pyynl/ynl_gen_c.py +++ b/tools/net/ynl/pyynl/ynl_gen_c.py @@ -1399,9 +1399,9 @@ def write_func_lvar(self, local_vars): def write_func(self, qual_ret, name, body, args=None, local_vars=None): self.write_func_prot(qual_ret=qual_ret, name=name, args=args) + self.block_start() self.write_func_lvar(local_vars=local_vars) - self.block_start() for line in body: self.p(line) self.block_end() From ce6cb8113c842b94e77364b247c4f85c7b34e0c2 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 14 Apr 2025 14:18:46 -0700 Subject: [PATCH 59/81] tools: ynl-gen: individually free previous values on double set When user calls request_attrA_set() multiple times (for the same attribute), and attrA is of type which allocates memory - we try to free the previously associated values. For array types (including multi-attr) we have only freed the array, but the array may have contained pointers. Refactor the code generation for free attr and reuse the generated lines in setters to flush out the previous state. Since setters are static inlines in the header we need to add forward declarations for the free helpers of pure nested structs. Track which types get used by arrays and include the right forwad declarations. At least ethtool string set and bit set would not be freed without this. Tho, admittedly, overriding already set attribute twice is likely a very very rare thing to do. Fixes: be5bea1cc0bf ("net: add basic C code generators for Netlink") Reviewed-by: Donald Hunter Reviewed-by: Jacob Keller Link: https://patch.msgid.link/20250414211851.602096-4-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/net/ynl/pyynl/ynl_gen_c.py | 62 +++++++++++++++++++++++--------- 1 file changed, 45 insertions(+), 17 deletions(-) diff --git a/tools/net/ynl/pyynl/ynl_gen_c.py b/tools/net/ynl/pyynl/ynl_gen_c.py index 662a925bd9e1..2d856ccc88f4 100755 --- a/tools/net/ynl/pyynl/ynl_gen_c.py +++ b/tools/net/ynl/pyynl/ynl_gen_c.py @@ -162,9 +162,15 @@ def _complex_member_type(self, ri): def free_needs_iter(self): return False - def free(self, ri, var, ref): + def _free_lines(self, ri, var, ref): if self.is_multi_val() or self.presence_type() == 'len': - ri.cw.p(f'free({var}->{ref}{self.c_name});') + return [f'free({var}->{ref}{self.c_name});'] + return [] + + def free(self, ri, var, ref): + lines = self._free_lines(ri, var, ref) + for line in lines: + ri.cw.p(line) def arg_member(self, ri): member = self._complex_member_type(ri) @@ -263,6 +269,10 @@ def setter(self, ri, space, direction, deref=False, ref=None): var = "req" member = f"{var}->{'.'.join(ref)}" + local_vars = [] + if self.free_needs_iter(): + local_vars += ['unsigned int i;'] + code = [] presence = '' for i in range(0, len(ref)): @@ -272,6 +282,10 @@ def setter(self, ri, space, direction, deref=False, ref=None): if i == len(ref) - 1 and self.presence_type() != 'bit': continue code.append(presence + ' = 1;') + ref_path = '.'.join(ref[:-1]) + if ref_path: + ref_path += '.' + code += self._free_lines(ri, var, ref_path) code += self._setter_lines(ri, member, presence) func_name = f"{op_prefix(ri, direction, deref=deref)}_set_{'_'.join(ref)}" @@ -279,7 +293,8 @@ def setter(self, ri, space, direction, deref=False, ref=None): alloc = bool([x for x in code if 'alloc(' in x]) if free and not alloc: func_name = '__' + func_name - ri.cw.write_func('static inline void', func_name, body=code, + ri.cw.write_func('static inline void', func_name, local_vars=local_vars, + body=code, args=[f'{type_name(ri, direction, deref=deref)} *{var}'] + self.arg_member(ri)) @@ -482,8 +497,7 @@ def _attr_get(self, ri, var): ['unsigned int len;'] def _setter_lines(self, ri, member, presence): - return [f"free({member});", - f"{presence}_len = strlen({self.c_name});", + return [f"{presence}_len = strlen({self.c_name});", f"{member} = malloc({presence}_len + 1);", f'memcpy({member}, {self.c_name}, {presence}_len);', f'{member}[{presence}_len] = 0;'] @@ -536,8 +550,7 @@ def _attr_get(self, ri, var): ['unsigned int len;'] def _setter_lines(self, ri, member, presence): - return [f"free({member});", - f"{presence}_len = len;", + return [f"{presence}_len = len;", f"{member} = malloc({presence}_len);", f'memcpy({member}, {self.c_name}, {presence}_len);'] @@ -574,12 +587,14 @@ def is_recursive(self): def _complex_member_type(self, ri): return self.nested_struct_type - def free(self, ri, var, ref): + def _free_lines(self, ri, var, ref): + lines = [] at = '&' if self.is_recursive_for_op(ri): at = '' - ri.cw.p(f'if ({var}->{ref}{self.c_name})') - ri.cw.p(f'{self.nested_render_name}_free({at}{var}->{ref}{self.c_name});') + lines += [f'if ({var}->{ref}{self.c_name})'] + lines += [f'{self.nested_render_name}_free({at}{var}->{ref}{self.c_name});'] + return lines def _attr_typol(self): return f'.type = YNL_PT_NEST, .nest = &{self.nested_render_name}_nest, ' @@ -632,15 +647,19 @@ def _complex_member_type(self, ri): def free_needs_iter(self): return 'type' not in self.attr or self.attr['type'] == 'nest' - def free(self, ri, var, ref): + def _free_lines(self, ri, var, ref): + lines = [] if self.attr['type'] in scalars: - ri.cw.p(f"free({var}->{ref}{self.c_name});") + lines += [f"free({var}->{ref}{self.c_name});"] elif 'type' not in self.attr or self.attr['type'] == 'nest': - ri.cw.p(f"for (i = 0; i < {var}->{ref}n_{self.c_name}; i++)") - ri.cw.p(f'{self.nested_render_name}_free(&{var}->{ref}{self.c_name}[i]);') - ri.cw.p(f"free({var}->{ref}{self.c_name});") + lines += [ + f"for (i = 0; i < {var}->{ref}n_{self.c_name}; i++)", + f'{self.nested_render_name}_free(&{var}->{ref}{self.c_name}[i]);', + f"free({var}->{ref}{self.c_name});", + ] else: raise Exception(f"Free of MultiAttr sub-type {self.attr['type']} not supported yet") + return lines def _attr_policy(self, policy): return self.base_type._attr_policy(policy) @@ -666,8 +685,7 @@ def attr_put(self, ri, var): def _setter_lines(self, ri, member, presence): # For multi-attr we have a count, not presence, hack up the presence presence = presence[:-(len('_present.') + len(self.c_name))] + "n_" + self.c_name - return [f"free({member});", - f"{member} = {self.c_name};", + return [f"{member} = {self.c_name};", f"{presence} = n_{self.c_name};"] @@ -755,6 +773,7 @@ def __init__(self, family, space_name, type_list=None, inherited=None): self.request = False self.reply = False self.recursive = False + self.in_multi_val = False # used by a MultiAttr or and legacy arrays self.attr_list = [] self.attrs = dict() @@ -1122,6 +1141,10 @@ def _load_nested_sets(self): if attr in rs_members['reply']: self.pure_nested_structs[nested].reply = True + if spec.is_multi_val(): + child = self.pure_nested_structs.get(nested) + child.in_multi_val = True + self._sort_pure_types() # Propagate the request / reply / recursive @@ -1136,6 +1159,8 @@ def _load_nested_sets(self): struct.child_nests.update(child.child_nests) child.request |= struct.request child.reply |= struct.reply + if spec.is_multi_val(): + child.in_multi_val = True if attr_set in struct.child_nests: struct.recursive = True @@ -2958,6 +2983,9 @@ def main(): for attr_set, struct in parsed.pure_nested_structs.items(): ri = RenderInfo(cw, parsed, args.mode, "", "", attr_set) print_type_full(ri, struct) + if struct.request and struct.in_multi_val: + free_rsp_nested_prototype(ri) + cw.nl() for op_name, op in parsed.ops.items(): cw.p(f"/* ============== {op.enum_name} ============== */") From 57e7dedf2b8c72caa6f04b9e08b19e4f370562fa Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 14 Apr 2025 14:18:47 -0700 Subject: [PATCH 60/81] tools: ynl-gen: make sure we validate subtype of array-nest ArrayNest AKA indexed-array support currently skips inner type validation. We count the attributes and then we parse them, make sure we call validate, too. Otherwise buggy / unexpected kernel response may lead to crashes. Fixes: be5bea1cc0bf ("net: add basic C code generators for Netlink") Reviewed-by: Donald Hunter Reviewed-by: Jacob Keller Link: https://patch.msgid.link/20250414211851.602096-5-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/net/ynl/pyynl/ynl_gen_c.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tools/net/ynl/pyynl/ynl_gen_c.py b/tools/net/ynl/pyynl/ynl_gen_c.py index 2d856ccc88f4..30c0a34b2784 100755 --- a/tools/net/ynl/pyynl/ynl_gen_c.py +++ b/tools/net/ynl/pyynl/ynl_gen_c.py @@ -714,8 +714,11 @@ def _attr_typol(self): def _attr_get(self, ri, var): local_vars = ['const struct nlattr *attr2;'] get_lines = [f'attr_{self.c_name} = attr;', - 'ynl_attr_for_each_nested(attr2, attr)', - f'\t{var}->n_{self.c_name}++;'] + 'ynl_attr_for_each_nested(attr2, attr) {', + '\tif (ynl_attr_validate(yarg, attr2))', + '\t\treturn YNL_PARSE_CB_ERROR;', + f'\t{var}->n_{self.c_name}++;', + '}'] return get_lines, None, local_vars From acf4da17deada7f8b120e051aa6c9cac40dbd83b Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 14 Apr 2025 14:18:48 -0700 Subject: [PATCH 61/81] netlink: specs: rt-link: add an attr layer around alt-ifname alt-ifname attr is directly placed in requests (as an alternative to ifname) but in responses its wrapped up in IFLA_PROP_LIST and only there is may be multi-attr. See rtnl_fill_prop_list(). Fixes: b2f63d904e72 ("doc/netlink: Add spec for rt link messages") Reviewed-by: Donald Hunter Reviewed-by: Jacob Keller Link: https://patch.msgid.link/20250414211851.602096-6-kuba@kernel.org Signed-off-by: Jakub Kicinski --- Documentation/netlink/specs/rt_link.yaml | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/Documentation/netlink/specs/rt_link.yaml b/Documentation/netlink/specs/rt_link.yaml index 31238455f8e9..200e9a7e5b11 100644 --- a/Documentation/netlink/specs/rt_link.yaml +++ b/Documentation/netlink/specs/rt_link.yaml @@ -1113,11 +1113,10 @@ attribute-sets: - name: prop-list type: nest - nested-attributes: link-attrs + nested-attributes: prop-list-link-attrs - name: alt-ifname type: string - multi-attr: true - name: perm-address type: binary @@ -1163,6 +1162,13 @@ attribute-sets: - name: netns-immutable type: u8 + - + name: prop-list-link-attrs + subset-of: link-attrs + attributes: + - + name: alt-ifname + multi-attr: true - name: af-spec-attrs attributes: @@ -2453,7 +2459,6 @@ operations: - min-mtu - max-mtu - prop-list - - alt-ifname - perm-address - proto-down-reason - parent-dev-name From 540201c0ef7e8e7b169f68a238ade931a81a31a6 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 14 Apr 2025 14:18:49 -0700 Subject: [PATCH 62/81] netlink: specs: rtnetlink: attribute naming corrections Some attribute names diverge in very minor ways from the C names. These are most likely typos, and they prevent the C codegen from working. Fixes: bc515ed06652 ("netlink: specs: Add a spec for neighbor tables in rtnetlink") Fixes: b2f63d904e72 ("doc/netlink: Add spec for rt link messages") Reviewed-by: Donald Hunter Reviewed-by: Jacob Keller Link: https://patch.msgid.link/20250414211851.602096-7-kuba@kernel.org Signed-off-by: Jakub Kicinski --- Documentation/netlink/specs/rt_link.yaml | 6 +++--- Documentation/netlink/specs/rt_neigh.yaml | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Documentation/netlink/specs/rt_link.yaml b/Documentation/netlink/specs/rt_link.yaml index 200e9a7e5b11..03323d7f58dc 100644 --- a/Documentation/netlink/specs/rt_link.yaml +++ b/Documentation/netlink/specs/rt_link.yaml @@ -1591,7 +1591,7 @@ attribute-sets: name: nf-call-iptables type: u8 - - name: nf-call-ip6-tables + name: nf-call-ip6tables type: u8 - name: nf-call-arptables @@ -2083,7 +2083,7 @@ attribute-sets: name: id type: u16 - - name: flag + name: flags type: binary struct: ifla-vlan-flags - @@ -2171,7 +2171,7 @@ attribute-sets: type: binary struct: ifla-cacheinfo - - name: icmp6-stats + name: icmp6stats type: binary struct: ifla-icmp6-stats - diff --git a/Documentation/netlink/specs/rt_neigh.yaml b/Documentation/netlink/specs/rt_neigh.yaml index e670b6dc07be..a1e137a16abd 100644 --- a/Documentation/netlink/specs/rt_neigh.yaml +++ b/Documentation/netlink/specs/rt_neigh.yaml @@ -189,7 +189,7 @@ attribute-sets: type: binary display-hint: ipv4 - - name: lladr + name: lladdr type: binary display-hint: mac - From beb3c5ad8829b52057f48a776a9d9558b98c157f Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 14 Apr 2025 14:18:50 -0700 Subject: [PATCH 63/81] netlink: specs: rt-link: adjust mctp attribute naming MCTP attribute naming is inconsistent. In C we have: IFLA_MCTP_NET, IFLA_MCTP_PHYS_BINDING, ^^^^ but in YAML: - mctp-net - phys-binding ^ no "mctp" It's unclear whether the "mctp" part of the name is supposed to be a prefix or part of attribute name. Make it a prefix, seems cleaner, even tho technically phys-binding was added later. Fixes: b2f63d904e72 ("doc/netlink: Add spec for rt link messages") Reviewed-by: Donald Hunter Reviewed-by: Jacob Keller Link: https://patch.msgid.link/20250414211851.602096-8-kuba@kernel.org Signed-off-by: Jakub Kicinski --- Documentation/netlink/specs/rt_link.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Documentation/netlink/specs/rt_link.yaml b/Documentation/netlink/specs/rt_link.yaml index 03323d7f58dc..6b9d5ee87d93 100644 --- a/Documentation/netlink/specs/rt_link.yaml +++ b/Documentation/netlink/specs/rt_link.yaml @@ -2185,9 +2185,10 @@ attribute-sets: type: u32 - name: mctp-attrs + name-prefix: ifla-mctp- attributes: - - name: mctp-net + name: net type: u32 - name: phys-binding From e31f86ee4b9ccb844baf2131da8e5d4d6f23aa1d Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 14 Apr 2025 14:18:51 -0700 Subject: [PATCH 64/81] netlink: specs: rt-neigh: prefix struct nfmsg members with ndm Attach ndm- to all members of struct nfmsg. We could possibly use name-prefix just for C, but I don't think we have any precedent for using name-prefix on structs, and other rtnetlink sub-specs give full names for fixed header struct members. Fixes: bc515ed06652 ("netlink: specs: Add a spec for neighbor tables in rtnetlink") Reviewed-by: Donald Hunter Reviewed-by: Jacob Keller Link: https://patch.msgid.link/20250414211851.602096-9-kuba@kernel.org Signed-off-by: Jakub Kicinski --- Documentation/netlink/specs/rt_neigh.yaml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/Documentation/netlink/specs/rt_neigh.yaml b/Documentation/netlink/specs/rt_neigh.yaml index a1e137a16abd..a843caa72259 100644 --- a/Documentation/netlink/specs/rt_neigh.yaml +++ b/Documentation/netlink/specs/rt_neigh.yaml @@ -13,25 +13,25 @@ definitions: type: struct members: - - name: family + name: ndm-family type: u8 - - name: pad + name: ndm-pad type: pad len: 3 - - name: ifindex + name: ndm-ifindex type: s32 - - name: state + name: ndm-state type: u16 enum: nud-state - - name: flags + name: ndm-flags type: u8 enum: ntf-flags - - name: type + name: ndm-type type: u8 enum: rtm-type - From 36355ddfe8955f226a88a543ed354b9f6b84cd70 Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Mon, 14 Apr 2025 22:04:34 +0200 Subject: [PATCH 65/81] net: b53: enable BPDU reception for management port For STP to work, receiving BPDUs is essential, but the appropriate bit was never set. Without GC_RX_BPDU_EN, the switch chip will filter all BPDUs, even if an appropriate PVID VLAN was setup. Fixes: ff39c2d68679 ("net: dsa: b53: Add bridge support") Signed-off-by: Jonas Gorski Link: https://patch.msgid.link/20250414200434.194422-1-jonas.gorski@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/b53/b53_common.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index 61d164ffb3ae..e5ba71897906 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -737,6 +737,15 @@ static void b53_enable_mib(struct b53_device *dev) b53_write8(dev, B53_MGMT_PAGE, B53_GLOBAL_CONFIG, gc); } +static void b53_enable_stp(struct b53_device *dev) +{ + u8 gc; + + b53_read8(dev, B53_MGMT_PAGE, B53_GLOBAL_CONFIG, &gc); + gc |= GC_RX_BPDU_EN; + b53_write8(dev, B53_MGMT_PAGE, B53_GLOBAL_CONFIG, gc); +} + static u16 b53_default_pvid(struct b53_device *dev) { if (is5325(dev) || is5365(dev)) @@ -876,6 +885,7 @@ static int b53_switch_reset(struct b53_device *dev) } b53_enable_mib(dev); + b53_enable_stp(dev); return b53_flush_arl(dev, FAST_AGE_STATIC); } From eb25de13bd9cf025413a04f25e715d0e99847e30 Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Mon, 14 Apr 2025 22:00:20 +0200 Subject: [PATCH 66/81] net: bridge: switchdev: do not notify new brentries as changed When adding a bridge vlan that is pvid or untagged after the vlan has already been added to any other switchdev backed port, the vlan change will be propagated as changed, since the flags change. This causes the vlan to not be added to the hardware for DSA switches, since the DSA handler ignores any vlans for the CPU or DSA ports that are changed. E.g. the following order of operations would work: $ ip link add swbridge type bridge vlan_filtering 1 vlan_default_pvid 0 $ ip link set lan1 master swbridge $ bridge vlan add dev swbridge vid 1 pvid untagged self $ bridge vlan add dev lan1 vid 1 pvid untagged but this order would break: $ ip link add swbridge type bridge vlan_filtering 1 vlan_default_pvid 0 $ ip link set lan1 master swbridge $ bridge vlan add dev lan1 vid 1 pvid untagged $ bridge vlan add dev swbridge vid 1 pvid untagged self Additionally, the vlan on the bridge itself would become undeletable: $ bridge vlan port vlan-id lan1 1 PVID Egress Untagged swbridge 1 PVID Egress Untagged $ bridge vlan del dev swbridge vid 1 self $ bridge vlan port vlan-id lan1 1 PVID Egress Untagged swbridge 1 Egress Untagged since the vlan was never added to DSA's vlan list, so deleting it will cause an error, causing the bridge code to not remove it. Fix this by checking if flags changed only for vlans that are already brentry and pass changed as false for those that become brentries, as these are a new vlan (member) from the switchdev point of view. Since *changed is set to true for becomes_brentry = true regardless of would_change's value, this will not change any rtnetlink notification delivery, just the value passed on to switchdev in vlan->changed. Fixes: 8d23a54f5bee ("net: bridge: switchdev: differentiate new VLANs from changed ones") Reviewed-by: Vladimir Oltean Signed-off-by: Jonas Gorski Reviewed-by: Ido Schimmel Acked-by: Nikolay Aleksandrov Link: https://patch.msgid.link/20250414200020.192715-1-jonas.gorski@gmail.com Signed-off-by: Jakub Kicinski --- net/bridge/br_vlan.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index d9a69ec9affe..939a3aa78d5c 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -715,8 +715,8 @@ static int br_vlan_add_existing(struct net_bridge *br, u16 flags, bool *changed, struct netlink_ext_ack *extack) { - bool would_change = __vlan_flags_would_change(vlan, flags); bool becomes_brentry = false; + bool would_change = false; int err; if (!br_vlan_is_brentry(vlan)) { @@ -725,6 +725,8 @@ static int br_vlan_add_existing(struct net_bridge *br, return -EINVAL; becomes_brentry = true; + } else { + would_change = __vlan_flags_would_change(vlan, flags); } /* Master VLANs that aren't brentries weren't notified before, From b2727326d0a53709380aa147018085d71a6d4843 Mon Sep 17 00:00:00 2001 From: Abdun Nihaal Date: Tue, 15 Apr 2025 08:59:09 +0530 Subject: [PATCH 67/81] net: txgbe: fix memory leak in txgbe_probe() error path When txgbe_sw_init() is called, memory is allocated for wx->rss_key in wx_init_rss_key(). However, in txgbe_probe() function, the subsequent error paths after txgbe_sw_init() don't free the rss_key. Fix that by freeing it in error path along with wx->mac_table. Also change the label to which execution jumps when txgbe_sw_init() fails, because otherwise, it could lead to a double free for rss_key, when the mac_table allocation fails in wx_sw_init(). Fixes: 937d46ecc5f9 ("net: wangxun: add ethtool_ops for channel number") Reported-by: Jiawen Wu Signed-off-by: Abdun Nihaal Reviewed-by: Jiawen Wu Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250415032910.13139-1-abdun.nihaal@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/wangxun/txgbe/txgbe_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c index a2e245e3b016..38206a46693b 100644 --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c @@ -611,7 +611,7 @@ static int txgbe_probe(struct pci_dev *pdev, /* setup the private structure */ err = txgbe_sw_init(wx); if (err) - goto err_free_mac_table; + goto err_pci_release_regions; /* check if flash load is done after hw power up */ err = wx_check_flash_load(wx, TXGBE_SPI_ILDR_STATUS_PERST); @@ -769,6 +769,7 @@ static int txgbe_probe(struct pci_dev *pdev, wx_clear_interrupt_scheme(wx); wx_control_hw(wx, false); err_free_mac_table: + kfree(wx->rss_key); kfree(wx->mac_table); err_pci_release_regions: pci_release_selected_regions(pdev, From c84f6ce918a9e6f4996597cbc62536bbf2247c96 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 15 Apr 2025 00:28:50 +0300 Subject: [PATCH 68/81] net: dsa: mv88e6xxx: avoid unregistering devlink regions which were never registered Russell King reports that a system with mv88e6xxx dereferences a NULL pointer when unbinding this driver: https://lore.kernel.org/netdev/Z_lRkMlTJ1KQ0kVX@shell.armlinux.org.uk/ The crash seems to be in devlink_region_destroy(), which is not NULL tolerant but is given a NULL devlink global region pointer. At least on some chips, some devlink regions are conditionally registered since the blamed commit, see mv88e6xxx_setup_devlink_regions_global(): if (cond && !cond(chip)) continue; These are MV88E6XXX_REGION_STU and MV88E6XXX_REGION_PVT. If the chip does not have an STU or PVT, it should crash like this. To fix the issue, avoid unregistering those regions which are NULL, i.e. were skipped at mv88e6xxx_setup_devlink_regions_global() time. Fixes: 836021a2d0e0 ("net: dsa: mv88e6xxx: Export cross-chip PVT as devlink region") Tested-by: Russell King (Oracle) Signed-off-by: Vladimir Oltean Link: https://patch.msgid.link/20250414212850.2953957-1-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/mv88e6xxx/devlink.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/dsa/mv88e6xxx/devlink.c b/drivers/net/dsa/mv88e6xxx/devlink.c index 795c8df7b6a7..195460a0a0d4 100644 --- a/drivers/net/dsa/mv88e6xxx/devlink.c +++ b/drivers/net/dsa/mv88e6xxx/devlink.c @@ -736,7 +736,8 @@ void mv88e6xxx_teardown_devlink_regions_global(struct dsa_switch *ds) int i; for (i = 0; i < ARRAY_SIZE(mv88e6xxx_regions); i++) - dsa_devlink_region_destroy(chip->regions[i]); + if (chip->regions[i]) + dsa_devlink_region_destroy(chip->regions[i]); } void mv88e6xxx_teardown_devlink_regions_port(struct dsa_switch *ds, int port) From ea08dfc35f83cfc73493c52f63ae4f2e29edfe8d Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 15 Apr 2025 00:29:13 +0300 Subject: [PATCH 69/81] net: dsa: mv88e6xxx: fix -ENOENT when deleting VLANs and MST is unsupported Russell King reports that on the ZII dev rev B, deleting a bridge VLAN from a user port fails with -ENOENT: https://lore.kernel.org/netdev/Z_lQXNP0s5-IiJzd@shell.armlinux.org.uk/ This comes from mv88e6xxx_port_vlan_leave() -> mv88e6xxx_mst_put(), which tries to find an MST entry in &chip->msts associated with the SID, but fails and returns -ENOENT as such. But we know that this chip does not support MST at all, so that is not surprising. The question is why does the guard in mv88e6xxx_mst_put() not exit early: if (!sid) return 0; And the answer seems to be simple: the sid comes from vlan.sid which supposedly was previously populated by mv88e6xxx_vtu_get(). But some chip->info->ops->vtu_getnext() implementations do not populate vlan.sid, for example see mv88e6185_g1_vtu_getnext(). In that case, later in mv88e6xxx_port_vlan_leave() we are using a garbage sid which is just residual stack memory. Testing for sid == 0 covers all cases of a non-bridge VLAN or a bridge VLAN mapped to the default MSTI. For some chips, SID 0 is valid and installed by mv88e6xxx_stu_setup(). A chip which does not support the STU would implicitly only support mapping all VLANs to the default MSTI, so although SID 0 is not valid, it would be sufficient, if we were to zero-initialize the vlan structure, to fix the bug, due to the coincidence that a test for vlan.sid == 0 already exists and leads to the same (correct) behavior. Another option which would be sufficient would be to add a test for mv88e6xxx_has_stu() inside mv88e6xxx_mst_put(), symmetric to the one which already exists in mv88e6xxx_mst_get(). But that placement means the caller will have to dereference vlan.sid, which means it will access uninitialized memory, which is not nice even if it ignores it later. So we end up making both modifications, in order to not rely just on the sid == 0 coincidence, but also to avoid having uninitialized structure fields which might get temporarily accessed. Fixes: acaf4d2e36b3 ("net: dsa: mv88e6xxx: MST Offloading") Signed-off-by: Vladimir Oltean Link: https://patch.msgid.link/20250414212913.2955253-1-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/mv88e6xxx/chip.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 29a89ab4b789..08db846cda8d 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -1852,6 +1852,8 @@ static int mv88e6xxx_vtu_get(struct mv88e6xxx_chip *chip, u16 vid, if (!chip->info->ops->vtu_getnext) return -EOPNOTSUPP; + memset(entry, 0, sizeof(*entry)); + entry->vid = vid ? vid - 1 : mv88e6xxx_max_vid(chip); entry->valid = false; @@ -1960,7 +1962,16 @@ static int mv88e6xxx_mst_put(struct mv88e6xxx_chip *chip, u8 sid) struct mv88e6xxx_mst *mst, *tmp; int err; - if (!sid) + /* If the SID is zero, it is for a VLAN mapped to the default MSTI, + * and mv88e6xxx_stu_setup() made sure it is always present, and thus, + * should not be removed here. + * + * If the chip lacks STU support, numerically the "sid" variable will + * happen to also be zero, but we don't want to rely on that fact, so + * we explicitly test that first. In that case, there is also nothing + * to do here. + */ + if (!mv88e6xxx_has_stu(chip) || !sid) return 0; list_for_each_entry_safe(mst, tmp, &chip->msts, node) { From 7afb5fb42d4950f33af2732b8147c552659f79b7 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 15 Apr 2025 00:29:30 +0300 Subject: [PATCH 70/81] net: dsa: clean up FDB, MDB, VLAN entries on unbind As explained in many places such as commit b117e1e8a86d ("net: dsa: delete dsa_legacy_fdb_add and dsa_legacy_fdb_del"), DSA is written given the assumption that higher layers have balanced additions/deletions. As such, it only makes sense to be extremely vocal when those assumptions are violated and the driver unbinds with entries still present. But Ido Schimmel points out a very simple situation where that is wrong: https://lore.kernel.org/netdev/ZDazSM5UsPPjQuKr@shredder/ (also briefly discussed by me in the aforementioned commit). Basically, while the bridge bypass operations are not something that DSA explicitly documents, and for the majority of DSA drivers this API simply causes them to go to promiscuous mode, that isn't the case for all drivers. Some have the necessary requirements for bridge bypass operations to do something useful - see dsa_switch_supports_uc_filtering(). Although in tools/testing/selftests/net/forwarding/local_termination.sh, we made an effort to popularize better mechanisms to manage address filters on DSA interfaces from user space - namely macvlan for unicast, and setsockopt(IP_ADD_MEMBERSHIP) - through mtools - for multicast, the fact is that 'bridge fdb add ... self static local' also exists as kernel UAPI, and might be useful to someone, even if only for a quick hack. It seems counter-productive to block that path by implementing shim .ndo_fdb_add and .ndo_fdb_del operations which just return -EOPNOTSUPP in order to prevent the ndo_dflt_fdb_add() and ndo_dflt_fdb_del() from running, although we could do that. Accepting that cleanup is necessary seems to be the only option. Especially since we appear to be coming back at this from a different angle as well. Russell King is noticing that the WARN_ON() triggers even for VLANs: https://lore.kernel.org/netdev/Z_li8Bj8bD4-BYKQ@shell.armlinux.org.uk/ What happens in the bug report above is that dsa_port_do_vlan_del() fails, then the VLAN entry lingers on, and then we warn on unbind and leak it. This is not a straight revert of the blamed commit, but we now add an informational print to the kernel log (to still have a way to see that bugs exist), and some extra comments gathered from past years' experience, to justify the logic. Fixes: 0832cd9f1f02 ("net: dsa: warn if port lists aren't empty in dsa_port_teardown") Signed-off-by: Vladimir Oltean Link: https://patch.msgid.link/20250414212930.2956310-1-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski --- net/dsa/dsa.c | 38 +++++++++++++++++++++++++++++++++++--- 1 file changed, 35 insertions(+), 3 deletions(-) diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index e827775baf2e..e7e32956070a 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -1478,12 +1478,44 @@ static int dsa_switch_parse(struct dsa_switch *ds, struct dsa_chip_data *cd) static void dsa_switch_release_ports(struct dsa_switch *ds) { + struct dsa_mac_addr *a, *tmp; struct dsa_port *dp, *next; + struct dsa_vlan *v, *n; dsa_switch_for_each_port_safe(dp, next, ds) { - WARN_ON(!list_empty(&dp->fdbs)); - WARN_ON(!list_empty(&dp->mdbs)); - WARN_ON(!list_empty(&dp->vlans)); + /* These are either entries that upper layers lost track of + * (probably due to bugs), or installed through interfaces + * where one does not necessarily have to remove them, like + * ndo_dflt_fdb_add(). + */ + list_for_each_entry_safe(a, tmp, &dp->fdbs, list) { + dev_info(ds->dev, + "Cleaning up unicast address %pM vid %u from port %d\n", + a->addr, a->vid, dp->index); + list_del(&a->list); + kfree(a); + } + + list_for_each_entry_safe(a, tmp, &dp->mdbs, list) { + dev_info(ds->dev, + "Cleaning up multicast address %pM vid %u from port %d\n", + a->addr, a->vid, dp->index); + list_del(&a->list); + kfree(a); + } + + /* These are entries that upper layers have lost track of, + * probably due to bugs, but also due to dsa_port_do_vlan_del() + * having failed and the VLAN entry still lingering on. + */ + list_for_each_entry_safe(v, n, &dp->vlans, list) { + dev_info(ds->dev, + "Cleaning up vid %u from port %d\n", + v->vid, dp->index); + list_del(&v->list); + kfree(v); + } + list_del(&dp->list); kfree(dp); } From 8bf108d7161ffc6880ad13a0cc109de3cf631727 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 15 Apr 2025 00:30:01 +0300 Subject: [PATCH 71/81] net: dsa: free routing table on probe failure If complete = true in dsa_tree_setup(), it means that we are the last switch of the tree which is successfully probing, and we should be setting up all switches from our probe path. After "complete" becomes true, dsa_tree_setup_cpu_ports() or any subsequent function may fail. If that happens, the entire tree setup is in limbo: the first N-1 switches have successfully finished probing (doing nothing but having allocated persistent memory in the tree's dst->ports, and maybe dst->rtable), and switch N failed to probe, ending the tree setup process before anything is tangible from the user's PoV. If switch N fails to probe, its memory (ports) will be freed and removed from dst->ports. However, the dst->rtable elements pointing to its ports, as created by dsa_link_touch(), will remain there, and will lead to use-after-free if dereferenced. If dsa_tree_setup_switches() returns -EPROBE_DEFER, which is entirely possible because that is where ds->ops->setup() is, we get a kasan report like this: ================================================================== BUG: KASAN: slab-use-after-free in mv88e6xxx_setup_upstream_port+0x240/0x568 Read of size 8 at addr ffff000004f56020 by task kworker/u8:3/42 Call trace: __asan_report_load8_noabort+0x20/0x30 mv88e6xxx_setup_upstream_port+0x240/0x568 mv88e6xxx_setup+0xebc/0x1eb0 dsa_register_switch+0x1af4/0x2ae0 mv88e6xxx_register_switch+0x1b8/0x2a8 mv88e6xxx_probe+0xc4c/0xf60 mdio_probe+0x78/0xb8 really_probe+0x2b8/0x5a8 __driver_probe_device+0x164/0x298 driver_probe_device+0x78/0x258 __device_attach_driver+0x274/0x350 Allocated by task 42: __kasan_kmalloc+0x84/0xa0 __kmalloc_cache_noprof+0x298/0x490 dsa_switch_touch_ports+0x174/0x3d8 dsa_register_switch+0x800/0x2ae0 mv88e6xxx_register_switch+0x1b8/0x2a8 mv88e6xxx_probe+0xc4c/0xf60 mdio_probe+0x78/0xb8 really_probe+0x2b8/0x5a8 __driver_probe_device+0x164/0x298 driver_probe_device+0x78/0x258 __device_attach_driver+0x274/0x350 Freed by task 42: __kasan_slab_free+0x48/0x68 kfree+0x138/0x418 dsa_register_switch+0x2694/0x2ae0 mv88e6xxx_register_switch+0x1b8/0x2a8 mv88e6xxx_probe+0xc4c/0xf60 mdio_probe+0x78/0xb8 really_probe+0x2b8/0x5a8 __driver_probe_device+0x164/0x298 driver_probe_device+0x78/0x258 __device_attach_driver+0x274/0x350 The simplest way to fix the bug is to delete the routing table in its entirety. dsa_tree_setup_routing_table() has no problem in regenerating it even if we deleted links between ports other than those of switch N, because dsa_link_touch() first checks whether the port pair already exists in dst->rtable, allocating if not. The deletion of the routing table in its entirety already exists in dsa_tree_teardown(), so refactor that into a function that can also be called from the tree setup error path. In my analysis of the commit to blame, it is the one which added dsa_link elements to dst->rtable. Prior to that, each switch had its own ds->rtable which is freed when the switch fails to probe. But the tree is potentially persistent memory. Fixes: c5f51765a1f6 ("net: dsa: list DSA links in the fabric") Signed-off-by: Vladimir Oltean Link: https://patch.msgid.link/20250414213001.2957964-1-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski --- net/dsa/dsa.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index e7e32956070a..436a7e1b412a 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -862,6 +862,16 @@ static void dsa_tree_teardown_lags(struct dsa_switch_tree *dst) kfree(dst->lags); } +static void dsa_tree_teardown_routing_table(struct dsa_switch_tree *dst) +{ + struct dsa_link *dl, *next; + + list_for_each_entry_safe(dl, next, &dst->rtable, list) { + list_del(&dl->list); + kfree(dl); + } +} + static int dsa_tree_setup(struct dsa_switch_tree *dst) { bool complete; @@ -879,7 +889,7 @@ static int dsa_tree_setup(struct dsa_switch_tree *dst) err = dsa_tree_setup_cpu_ports(dst); if (err) - return err; + goto teardown_rtable; err = dsa_tree_setup_switches(dst); if (err) @@ -911,14 +921,14 @@ static int dsa_tree_setup(struct dsa_switch_tree *dst) dsa_tree_teardown_switches(dst); teardown_cpu_ports: dsa_tree_teardown_cpu_ports(dst); +teardown_rtable: + dsa_tree_teardown_routing_table(dst); return err; } static void dsa_tree_teardown(struct dsa_switch_tree *dst) { - struct dsa_link *dl, *next; - if (!dst->setup) return; @@ -932,10 +942,7 @@ static void dsa_tree_teardown(struct dsa_switch_tree *dst) dsa_tree_teardown_cpu_ports(dst); - list_for_each_entry_safe(dl, next, &dst->rtable, list) { - list_del(&dl->list); - kfree(dl); - } + dsa_tree_teardown_routing_table(dst); pr_info("DSA: tree %d torn down\n", dst->index); From 514eff7b0aa1c5eb645ddbb8676ef3e2d88a8b99 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 15 Apr 2025 00:30:20 +0300 Subject: [PATCH 72/81] net: dsa: avoid refcount warnings when ds->ops->tag_8021q_vlan_del() fails This is very similar to the problem and solution from commit 232deb3f9567 ("net: dsa: avoid refcount warnings when ->port_{fdb,mdb}_del returns error"), except for the dsa_port_do_tag_8021q_vlan_del() operation. Fixes: c64b9c05045a ("net: dsa: tag_8021q: add proper cross-chip notifier support") Signed-off-by: Vladimir Oltean Link: https://patch.msgid.link/20250414213020.2959021-1-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski --- net/dsa/tag_8021q.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/dsa/tag_8021q.c b/net/dsa/tag_8021q.c index 3ee53e28ec2e..53e03fd8071b 100644 --- a/net/dsa/tag_8021q.c +++ b/net/dsa/tag_8021q.c @@ -197,7 +197,7 @@ static int dsa_port_do_tag_8021q_vlan_del(struct dsa_port *dp, u16 vid) err = ds->ops->tag_8021q_vlan_del(ds, port, vid); if (err) { - refcount_inc(&v->refcount); + refcount_set(&v->refcount, 1); return err; } From 2a5970d5aaff8f3e33ce3bfaa403ae88c40de40d Mon Sep 17 00:00:00 2001 From: Sagi Maimon Date: Tue, 15 Apr 2025 08:31:31 +0300 Subject: [PATCH 73/81] ptp: ocp: fix start time alignment in ptp_ocp_signal_set In ptp_ocp_signal_set, the start time for periodic signals is not aligned to the next period boundary. The current code rounds up the start time and divides by the period but fails to multiply back by the period, causing misaligned signal starts. Fix this by multiplying the rounded-up value by the period to ensure the start time is the closest next period. Fixes: 4bd46bb037f8e ("ptp: ocp: Use DIV64_U64_ROUND_UP for rounding.") Signed-off-by: Sagi Maimon Reviewed-by: Vadim Fedorenko Link: https://patch.msgid.link/20250415053131.129413-1-maimon.sagi@gmail.com Signed-off-by: Jakub Kicinski --- drivers/ptp/ptp_ocp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/ptp/ptp_ocp.c b/drivers/ptp/ptp_ocp.c index 7945c6be1f7c..faf6e027f89a 100644 --- a/drivers/ptp/ptp_ocp.c +++ b/drivers/ptp/ptp_ocp.c @@ -2067,6 +2067,7 @@ ptp_ocp_signal_set(struct ptp_ocp *bp, int gen, struct ptp_ocp_signal *s) if (!s->start) { /* roundup() does not work on 32-bit systems */ s->start = DIV64_U64_ROUND_UP(start_ns, s->period); + s->start *= s->period; s->start = ktime_add(s->start, s->phase); } From 4798cfa2097f0833d54d8f5ce20ef14631917839 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 15 Apr 2025 08:15:52 -0700 Subject: [PATCH 74/81] net: don't try to ops lock uninitialized devs We need to be careful when operating on dev while in rtnl_create_link(). Some devices (vxlan) initialize netdev_ops in ->newlink, so later on. Avoid using netdev_lock_ops(), the device isn't registered so we cannot legally call its ops or generate any notifications for it. netdev_ops_assert_locked_or_invisible() is safe to use, it checks registration status first. Reported-by: syzbot+de1c7d68a10e3f123bdd@syzkaller.appspotmail.com Fixes: 04efcee6ef8d ("net: hold instance lock during NETDEV_CHANGE") Acked-by: Stanislav Fomichev Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250415151552.768373-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- net/core/dev.c | 2 ++ net/core/rtnetlink.c | 5 +---- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index 5fcbc66d865e..1be7cb73a602 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1520,6 +1520,8 @@ EXPORT_SYMBOL(netdev_features_change); void netif_state_change(struct net_device *dev) { + netdev_ops_assert_locked_or_invisible(dev); + if (dev->flags & IFF_UP) { struct netdev_notifier_change_info change_info = { .info.dev = dev, diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 39a5b72e861f..c5a7f41982a5 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -3676,11 +3676,8 @@ struct net_device *rtnl_create_link(struct net *net, const char *ifname, nla_len(tb[IFLA_BROADCAST])); if (tb[IFLA_TXQLEN]) dev->tx_queue_len = nla_get_u32(tb[IFLA_TXQLEN]); - if (tb[IFLA_OPERSTATE]) { - netdev_lock_ops(dev); + if (tb[IFLA_OPERSTATE]) set_operstate(dev, nla_get_u8(tb[IFLA_OPERSTATE])); - netdev_unlock_ops(dev); - } if (tb[IFLA_LINKMODE]) dev->link_mode = nla_get_u8(tb[IFLA_LINKMODE]); if (tb[IFLA_GROUP]) From d2d31ea8cd80b9830cdab624e94f9d41178fc99d Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 15 Apr 2025 15:53:48 +0200 Subject: [PATCH 75/81] netfilter: conntrack: fix erronous removal of offload bit The blamed commit exposes a possible issue with flow_offload_teardown(): We might remove the offload bit of a conntrack entry that has been offloaded again. 1. conntrack entry c1 is offloaded via flow f1 (f1->ct == c1). 2. f1 times out and is pushed back to slowpath, c1 offload bit is removed. Due to bug, f1 is not unlinked from rhashtable right away. 3. a new packet arrives for the flow and re-offload is triggered, i.e. f2->ct == c1. This is because lookup in flowtable skip entries with teardown bit set. 4. Next flowtable gc cycle finds f1 again 5. flow_offload_teardown() is called again for f1 and c1 offload bit is removed again, even though we have f2 referencing the same entry. This is harmless, but clearly not correct. Fix the bug that exposes this: set 'teardown = true' to have the gc callback unlink the flowtable entry from the table right away instead of the unintentional defer to the next round. Also prevent flow_offload_teardown() from fixing up the ct state more than once: We could also be called from the data path or a notifier, not only from the flowtable gc callback. NF_FLOW_TEARDOWN can never be unset, so we can use it as synchronization point: if we observe did not see a 0 -> 1 transition, then another CPU is already doing the ct state fixups for us. Fixes: 03428ca5cee9 ("netfilter: conntrack: rework offload nf_conn timeout extension logic") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_flow_table_core.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c index 9d8361526f82..9441ac3d8c1a 100644 --- a/net/netfilter/nf_flow_table_core.c +++ b/net/netfilter/nf_flow_table_core.c @@ -383,8 +383,8 @@ static void flow_offload_del(struct nf_flowtable *flow_table, void flow_offload_teardown(struct flow_offload *flow) { clear_bit(IPS_OFFLOAD_BIT, &flow->ct->status); - set_bit(NF_FLOW_TEARDOWN, &flow->flags); - flow_offload_fixup_ct(flow); + if (!test_and_set_bit(NF_FLOW_TEARDOWN, &flow->flags)) + flow_offload_fixup_ct(flow); } EXPORT_SYMBOL_GPL(flow_offload_teardown); @@ -558,10 +558,12 @@ static void nf_flow_offload_gc_step(struct nf_flowtable *flow_table, if (nf_flow_has_expired(flow) || nf_ct_is_dying(flow->ct) || - nf_flow_custom_gc(flow_table, flow)) + nf_flow_custom_gc(flow_table, flow)) { flow_offload_teardown(flow); - else if (!teardown) + teardown = true; + } else if (!teardown) { nf_flow_table_extend_ct_timeout(flow->ct); + } if (teardown) { if (test_bit(NF_FLOW_HW, &flow->flags)) { From 75bc744466444ef417b5f709f72b91c83301bcd1 Mon Sep 17 00:00:00 2001 From: Meghana Malladi Date: Tue, 15 Apr 2025 14:35:41 +0530 Subject: [PATCH 76/81] net: ti: icssg-prueth: Fix kernel warning while bringing down network interface During network interface initialization, the NIC driver needs to register its Rx queue with the XDP, to ensure the incoming XDP buffer carries a pointer reference to this info and is stored inside xdp_rxq_info. While this struct isn't tied to XDP prog, if there are any changes in Rx queue, the NIC driver needs to stop the Rx queue by unregistering with XDP before purging and reallocating memory. Drop page_pool destroy during Rx channel reset as this is already handled by XDP during xdp_rxq_info_unreg (Rx queue unregister), failing to do will cause the following warning: warning logs: https://gist.github.com/MeghanaMalladiTI/eb627e5dc8de24e42d7d46572c13e576 Fixes: 46eeb90f03e0 ("net: ti: icssg-prueth: Use page_pool API for RX buffer allocation") Signed-off-by: Meghana Malladi Reviewed-by: Simon Horman Reviewed-by: Roger Quadros Reviewed-by: Jacob Keller Link: https://patch.msgid.link/20250415090543.717991-2-m-malladi@ti.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/ti/icssg/icssg_common.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/net/ethernet/ti/icssg/icssg_common.c b/drivers/net/ethernet/ti/icssg/icssg_common.c index 14002b026452..ec643fb69d30 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_common.c +++ b/drivers/net/ethernet/ti/icssg/icssg_common.c @@ -1215,9 +1215,6 @@ void prueth_reset_rx_chan(struct prueth_rx_chn *chn, prueth_rx_cleanup); if (disable) k3_udma_glue_disable_rx_chn(chn->rx_chn); - - page_pool_destroy(chn->pg_pool); - chn->pg_pool = NULL; } EXPORT_SYMBOL_GPL(prueth_reset_rx_chan); From 8ed2fa661350f0b49edb765d18173b5c766c3686 Mon Sep 17 00:00:00 2001 From: Meghana Malladi Date: Tue, 15 Apr 2025 14:35:42 +0530 Subject: [PATCH 77/81] net: ti: icssg-prueth: Fix possible NULL pointer dereference inside emac_xmit_xdp_frame() There is an error check inside emac_xmit_xdp_frame() function which is called when the driver wants to transmit XDP frame, to check if the allocated tx descriptor is NULL, if true to exit and return ICSSG_XDP_CONSUMED implying failure in transmission. In this case trying to free a descriptor which is NULL will result in kernel crash due to NULL pointer dereference. Fix this error handling and increase netdev tx_dropped stats in the caller of this function if the function returns ICSSG_XDP_CONSUMED. Fixes: 62aa3246f462 ("net: ti: icssg-prueth: Add XDP support") Reported-by: Dan Carpenter Closes: https://lore.kernel.org/all/70d8dd76-0c76-42fc-8611-9884937c82f5@stanley.mountain/ Signed-off-by: Meghana Malladi Reviewed-by: Simon Horman Reviewed-by: Roger Quadros Reviewed-by: Jacob Keller Link: https://patch.msgid.link/20250415090543.717991-3-m-malladi@ti.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/ti/icssg/icssg_common.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/ti/icssg/icssg_common.c b/drivers/net/ethernet/ti/icssg/icssg_common.c index ec643fb69d30..b4be76e13a2f 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_common.c +++ b/drivers/net/ethernet/ti/icssg/icssg_common.c @@ -583,7 +583,7 @@ u32 emac_xmit_xdp_frame(struct prueth_emac *emac, first_desc = k3_cppi_desc_pool_alloc(tx_chn->desc_pool); if (!first_desc) { netdev_dbg(ndev, "xdp tx: failed to allocate descriptor\n"); - goto drop_free_descs; /* drop */ + return ICSSG_XDP_CONSUMED; /* drop */ } if (page) { /* already DMA mapped by page_pool */ @@ -671,8 +671,10 @@ static u32 emac_run_xdp(struct prueth_emac *emac, struct xdp_buff *xdp, q_idx = smp_processor_id() % emac->tx_ch_num; result = emac_xmit_xdp_frame(emac, xdpf, page, q_idx); - if (result == ICSSG_XDP_CONSUMED) + if (result == ICSSG_XDP_CONSUMED) { + ndev->stats.tx_dropped++; goto drop; + } dev_sw_netstats_rx_add(ndev, xdpf->len); return result; From 7349c9e9979333abfce42da5f9025598083b59c9 Mon Sep 17 00:00:00 2001 From: Meghana Malladi Date: Tue, 15 Apr 2025 14:35:43 +0530 Subject: [PATCH 78/81] net: ti: icss-iep: Fix possible NULL pointer dereference for perout request The ICSS IEP driver tracks perout and pps enable state with flags. Currently when disabling pps and perout signals during icss_iep_exit(), results in NULL pointer dereference for perout. To fix the null pointer dereference issue, the icss_iep_perout_enable_hw function can be modified to directly clear the IEP CMP registers when disabling PPS or PEROUT, without referencing the ptp_perout_request structure, as its contents are irrelevant in this case. Fixes: 9b115361248d ("net: ti: icssg-prueth: Fix clearing of IEP_CMP_CFG registers during iep_init") Reported-by: Dan Carpenter Closes: https://lore.kernel.org/all/7b1c7c36-363a-4085-b26c-4f210bee1df6@stanley.mountain/ Signed-off-by: Meghana Malladi Reviewed-by: Jacob Keller Link: https://patch.msgid.link/20250415090543.717991-4-m-malladi@ti.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/ti/icssg/icss_iep.c | 121 +++++++++++------------ 1 file changed, 58 insertions(+), 63 deletions(-) diff --git a/drivers/net/ethernet/ti/icssg/icss_iep.c b/drivers/net/ethernet/ti/icssg/icss_iep.c index b4a34c57b7b4..2a1c43316f46 100644 --- a/drivers/net/ethernet/ti/icssg/icss_iep.c +++ b/drivers/net/ethernet/ti/icssg/icss_iep.c @@ -412,6 +412,22 @@ static int icss_iep_perout_enable_hw(struct icss_iep *iep, int ret; u64 cmp; + if (!on) { + /* Disable CMP 1 */ + regmap_update_bits(iep->map, ICSS_IEP_CMP_CFG_REG, + IEP_CMP_CFG_CMP_EN(1), 0); + + /* clear CMP regs */ + regmap_write(iep->map, ICSS_IEP_CMP1_REG0, 0); + if (iep->plat_data->flags & ICSS_IEP_64BIT_COUNTER_SUPPORT) + regmap_write(iep->map, ICSS_IEP_CMP1_REG1, 0); + + /* Disable sync */ + regmap_write(iep->map, ICSS_IEP_SYNC_CTRL_REG, 0); + + return 0; + } + /* Calculate width of the signal for PPS/PEROUT handling */ ts.tv_sec = req->on.sec; ts.tv_nsec = req->on.nsec; @@ -430,64 +446,39 @@ static int icss_iep_perout_enable_hw(struct icss_iep *iep, if (ret) return ret; - if (on) { - /* Configure CMP */ - regmap_write(iep->map, ICSS_IEP_CMP1_REG0, lower_32_bits(cmp)); - if (iep->plat_data->flags & ICSS_IEP_64BIT_COUNTER_SUPPORT) - regmap_write(iep->map, ICSS_IEP_CMP1_REG1, upper_32_bits(cmp)); - /* Configure SYNC, based on req on width */ - regmap_write(iep->map, ICSS_IEP_SYNC_PWIDTH_REG, - div_u64(ns_width, iep->def_inc)); - regmap_write(iep->map, ICSS_IEP_SYNC0_PERIOD_REG, 0); - regmap_write(iep->map, ICSS_IEP_SYNC_START_REG, - div_u64(ns_start, iep->def_inc)); - regmap_write(iep->map, ICSS_IEP_SYNC_CTRL_REG, 0); /* one-shot mode */ - /* Enable CMP 1 */ - regmap_update_bits(iep->map, ICSS_IEP_CMP_CFG_REG, - IEP_CMP_CFG_CMP_EN(1), IEP_CMP_CFG_CMP_EN(1)); - } else { - /* Disable CMP 1 */ - regmap_update_bits(iep->map, ICSS_IEP_CMP_CFG_REG, - IEP_CMP_CFG_CMP_EN(1), 0); - - /* clear regs */ - regmap_write(iep->map, ICSS_IEP_CMP1_REG0, 0); - if (iep->plat_data->flags & ICSS_IEP_64BIT_COUNTER_SUPPORT) - regmap_write(iep->map, ICSS_IEP_CMP1_REG1, 0); - } + /* Configure CMP */ + regmap_write(iep->map, ICSS_IEP_CMP1_REG0, lower_32_bits(cmp)); + if (iep->plat_data->flags & ICSS_IEP_64BIT_COUNTER_SUPPORT) + regmap_write(iep->map, ICSS_IEP_CMP1_REG1, upper_32_bits(cmp)); + /* Configure SYNC, based on req on width */ + regmap_write(iep->map, ICSS_IEP_SYNC_PWIDTH_REG, + div_u64(ns_width, iep->def_inc)); + regmap_write(iep->map, ICSS_IEP_SYNC0_PERIOD_REG, 0); + regmap_write(iep->map, ICSS_IEP_SYNC_START_REG, + div_u64(ns_start, iep->def_inc)); + regmap_write(iep->map, ICSS_IEP_SYNC_CTRL_REG, 0); /* one-shot mode */ + /* Enable CMP 1 */ + regmap_update_bits(iep->map, ICSS_IEP_CMP_CFG_REG, + IEP_CMP_CFG_CMP_EN(1), IEP_CMP_CFG_CMP_EN(1)); } else { - if (on) { - u64 start_ns; - - iep->period = ((u64)req->period.sec * NSEC_PER_SEC) + - req->period.nsec; - start_ns = ((u64)req->period.sec * NSEC_PER_SEC) - + req->period.nsec; - icss_iep_update_to_next_boundary(iep, start_ns); - - regmap_write(iep->map, ICSS_IEP_SYNC_PWIDTH_REG, - div_u64(ns_width, iep->def_inc)); - regmap_write(iep->map, ICSS_IEP_SYNC_START_REG, - div_u64(ns_start, iep->def_inc)); - /* Enable Sync in single shot mode */ - regmap_write(iep->map, ICSS_IEP_SYNC_CTRL_REG, - IEP_SYNC_CTRL_SYNC_N_EN(0) | IEP_SYNC_CTRL_SYNC_EN); - /* Enable CMP 1 */ - regmap_update_bits(iep->map, ICSS_IEP_CMP_CFG_REG, - IEP_CMP_CFG_CMP_EN(1), IEP_CMP_CFG_CMP_EN(1)); - } else { - /* Disable CMP 1 */ - regmap_update_bits(iep->map, ICSS_IEP_CMP_CFG_REG, - IEP_CMP_CFG_CMP_EN(1), 0); - - /* clear CMP regs */ - regmap_write(iep->map, ICSS_IEP_CMP1_REG0, 0); - if (iep->plat_data->flags & ICSS_IEP_64BIT_COUNTER_SUPPORT) - regmap_write(iep->map, ICSS_IEP_CMP1_REG1, 0); - - /* Disable sync */ - regmap_write(iep->map, ICSS_IEP_SYNC_CTRL_REG, 0); - } + u64 start_ns; + + iep->period = ((u64)req->period.sec * NSEC_PER_SEC) + + req->period.nsec; + start_ns = ((u64)req->period.sec * NSEC_PER_SEC) + + req->period.nsec; + icss_iep_update_to_next_boundary(iep, start_ns); + + regmap_write(iep->map, ICSS_IEP_SYNC_PWIDTH_REG, + div_u64(ns_width, iep->def_inc)); + regmap_write(iep->map, ICSS_IEP_SYNC_START_REG, + div_u64(ns_start, iep->def_inc)); + /* Enable Sync in single shot mode */ + regmap_write(iep->map, ICSS_IEP_SYNC_CTRL_REG, + IEP_SYNC_CTRL_SYNC_N_EN(0) | IEP_SYNC_CTRL_SYNC_EN); + /* Enable CMP 1 */ + regmap_update_bits(iep->map, ICSS_IEP_CMP_CFG_REG, + IEP_CMP_CFG_CMP_EN(1), IEP_CMP_CFG_CMP_EN(1)); } return 0; @@ -498,11 +489,21 @@ static int icss_iep_perout_enable(struct icss_iep *iep, { int ret = 0; + if (!on) + goto disable; + /* Reject requests with unsupported flags */ if (req->flags & ~(PTP_PEROUT_DUTY_CYCLE | PTP_PEROUT_PHASE)) return -EOPNOTSUPP; + /* Set default "on" time (1ms) for the signal if not passed by the app */ + if (!(req->flags & PTP_PEROUT_DUTY_CYCLE)) { + req->on.sec = 0; + req->on.nsec = NSEC_PER_MSEC; + } + +disable: mutex_lock(&iep->ptp_clk_mutex); if (iep->pps_enabled) { @@ -513,12 +514,6 @@ static int icss_iep_perout_enable(struct icss_iep *iep, if (iep->perout_enabled == !!on) goto exit; - /* Set default "on" time (1ms) for the signal if not passed by the app */ - if (!(req->flags & PTP_PEROUT_DUTY_CYCLE)) { - req->on.sec = 0; - req->on.nsec = NSEC_PER_MSEC; - } - ret = icss_iep_perout_enable_hw(iep, req, on); if (!ret) iep->perout_enabled = !!on; From 6bc2b6c6f16d8e60de518d26da1bc6bc436cf71d Mon Sep 17 00:00:00 2001 From: Bo-Cun Chen Date: Wed, 16 Apr 2025 01:50:46 +0100 Subject: [PATCH 79/81] net: ethernet: mtk_eth_soc: reapply mdc divider on reset In the current method, the MDC divider was reset to the default setting of 2.5MHz after the NETSYS SER. Therefore, we need to reapply the MDC divider configuration function in mtk_hw_init() after reset. Fixes: c0a440031d431 ("net: ethernet: mtk_eth_soc: set MDIO bus clock frequency") Signed-off-by: Bo-Cun Chen Signed-off-by: Daniel Golle Link: https://patch.msgid.link/8ab7381447e6cdcb317d5b5a6ddd90a1734efcb0.1744764277.git.daniel@makrotopia.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 39 +++++++++++++-------- drivers/net/ethernet/mediatek/mtk_eth_soc.h | 1 + 2 files changed, 25 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 43197b28b3e7..1a235283b0e9 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -871,9 +871,25 @@ static const struct phylink_mac_ops mtk_phylink_ops = { .mac_enable_tx_lpi = mtk_mac_enable_tx_lpi, }; +static void mtk_mdio_config(struct mtk_eth *eth) +{ + u32 val; + + /* Configure MDC Divider */ + val = FIELD_PREP(PPSC_MDC_CFG, eth->mdc_divider); + + /* Configure MDC Turbo Mode */ + if (mtk_is_netsys_v3_or_greater(eth)) + mtk_m32(eth, 0, MISC_MDC_TURBO, MTK_MAC_MISC_V3); + else + val |= PPSC_MDC_TURBO; + + mtk_m32(eth, PPSC_MDC_CFG, val, MTK_PPSC); +} + static int mtk_mdio_init(struct mtk_eth *eth) { - unsigned int max_clk = 2500000, divider; + unsigned int max_clk = 2500000; struct device_node *mii_np; int ret; u32 val; @@ -908,20 +924,9 @@ static int mtk_mdio_init(struct mtk_eth *eth) } max_clk = val; } - divider = min_t(unsigned int, DIV_ROUND_UP(MDC_MAX_FREQ, max_clk), 63); - - /* Configure MDC Turbo Mode */ - if (mtk_is_netsys_v3_or_greater(eth)) - mtk_m32(eth, 0, MISC_MDC_TURBO, MTK_MAC_MISC_V3); - - /* Configure MDC Divider */ - val = FIELD_PREP(PPSC_MDC_CFG, divider); - if (!mtk_is_netsys_v3_or_greater(eth)) - val |= PPSC_MDC_TURBO; - mtk_m32(eth, PPSC_MDC_CFG, val, MTK_PPSC); - - dev_dbg(eth->dev, "MDC is running on %d Hz\n", MDC_MAX_FREQ / divider); - + eth->mdc_divider = min_t(unsigned int, DIV_ROUND_UP(MDC_MAX_FREQ, max_clk), 63); + mtk_mdio_config(eth); + dev_dbg(eth->dev, "MDC is running on %d Hz\n", MDC_MAX_FREQ / eth->mdc_divider); ret = of_mdiobus_register(eth->mii_bus, mii_np); err_put_node: @@ -3974,6 +3979,10 @@ static int mtk_hw_init(struct mtk_eth *eth, bool reset) else mtk_hw_reset(eth); + /* No MT7628/88 support yet */ + if (reset && !MTK_HAS_CAPS(eth->soc->caps, MTK_SOC_MT7628)) + mtk_mdio_config(eth); + if (mtk_is_netsys_v3_or_greater(eth)) { /* Set FE to PDMAv2 if necessary */ val = mtk_r32(eth, MTK_FE_GLO_MISC); diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h index 90a377ab4359..39709649ea8d 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h @@ -1271,6 +1271,7 @@ struct mtk_eth { struct clk *clks[MTK_CLK_MAX]; struct mii_bus *mii_bus; + unsigned int mdc_divider; struct work_struct pending_work; unsigned long state; From 6b02eb372c6776c9abb8bc81cf63f96039c24664 Mon Sep 17 00:00:00 2001 From: Bo-Cun Chen Date: Wed, 16 Apr 2025 01:51:07 +0100 Subject: [PATCH 80/81] net: ethernet: mtk_eth_soc: correct the max weight of the queue limit for 100Mbps Without this patch, the maximum weight of the queue limit will be incorrect when linked at 100Mbps due to an apparent typo. Fixes: f63959c7eec31 ("net: ethernet: mtk_eth_soc: implement multi-queue support for per-port queues") Signed-off-by: Bo-Cun Chen Signed-off-by: Daniel Golle Link: https://patch.msgid.link/74111ba0bdb13743313999ed467ce564e8189006.1744764277.git.daniel@makrotopia.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 1a235283b0e9..5a3cfb8908a1 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -734,7 +734,7 @@ static void mtk_set_queue_speed(struct mtk_eth *eth, unsigned int idx, case SPEED_100: val |= MTK_QTX_SCH_MAX_RATE_EN | FIELD_PREP(MTK_QTX_SCH_MAX_RATE_MAN, 103) | - FIELD_PREP(MTK_QTX_SCH_MAX_RATE_EXP, 3); + FIELD_PREP(MTK_QTX_SCH_MAX_RATE_EXP, 3) | FIELD_PREP(MTK_QTX_SCH_MAX_RATE_WEIGHT, 1); break; case SPEED_1000: @@ -757,7 +757,7 @@ static void mtk_set_queue_speed(struct mtk_eth *eth, unsigned int idx, case SPEED_100: val |= MTK_QTX_SCH_MAX_RATE_EN | FIELD_PREP(MTK_QTX_SCH_MAX_RATE_MAN, 1) | - FIELD_PREP(MTK_QTX_SCH_MAX_RATE_EXP, 5); + FIELD_PREP(MTK_QTX_SCH_MAX_RATE_EXP, 5) | FIELD_PREP(MTK_QTX_SCH_MAX_RATE_WEIGHT, 1); break; case SPEED_1000: From 1b66124135f5f8640bd540fadda4b20cdd23114b Mon Sep 17 00:00:00 2001 From: Bo-Cun Chen Date: Wed, 16 Apr 2025 01:51:25 +0100 Subject: [PATCH 81/81] net: ethernet: mtk_eth_soc: revise QDMA packet scheduler settings The QDMA packet scheduler suffers from a performance issue. Fix this by picking up changes from MediaTek's SDK which change to use Token Bucket instead of Leaky Bucket and fix the SPEED_1000 configuration. Fixes: 160d3a9b1929 ("net: ethernet: mtk_eth_soc: introduce MTK_NETSYS_V2 support") Signed-off-by: Bo-Cun Chen Signed-off-by: Daniel Golle Link: https://patch.msgid.link/18040f60f9e2f5855036b75b28c4332a2d2ebdd8.1744764277.git.daniel@makrotopia.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 5a3cfb8908a1..bdb98c9d8b1c 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -762,8 +762,8 @@ static void mtk_set_queue_speed(struct mtk_eth *eth, unsigned int idx, break; case SPEED_1000: val |= MTK_QTX_SCH_MAX_RATE_EN | - FIELD_PREP(MTK_QTX_SCH_MAX_RATE_MAN, 10) | - FIELD_PREP(MTK_QTX_SCH_MAX_RATE_EXP, 5) | + FIELD_PREP(MTK_QTX_SCH_MAX_RATE_MAN, 1) | + FIELD_PREP(MTK_QTX_SCH_MAX_RATE_EXP, 6) | FIELD_PREP(MTK_QTX_SCH_MAX_RATE_WEIGHT, 10); break; default: @@ -3320,7 +3320,7 @@ static int mtk_start_dma(struct mtk_eth *eth) if (mtk_is_netsys_v2_or_greater(eth)) val |= MTK_MUTLI_CNT | MTK_RESV_BUF | MTK_WCOMP_EN | MTK_DMAD_WR_WDONE | - MTK_CHK_DDONE_EN | MTK_LEAKY_BUCKET_EN; + MTK_CHK_DDONE_EN; else val |= MTK_RX_BT_32DWORDS; mtk_w32(eth, val, reg_map->qdma.glo_cfg);