From f26a29a038ee3982099e867bec2260576a19720f Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Sat, 16 Nov 2024 21:52:15 +0100 Subject: [PATCH 01/49] net: phy: ensure that genphy_c45_an_config_eee_aneg() sees new value of phydev->eee_cfg.eee_enabled This is a follow-up to 41ffcd95015f ("net: phy: fix phylib's dual eee_enabled") and resolves an issue with genphy_c45_an_config_eee_aneg() (called from genphy_c45_ethtool_set_eee) not seeing the new value of phydev->eee_cfg.eee_enabled. Fixes: 49168d1980e2 ("net: phy: Add phy_support_eee() indicating MAC support EEE") Signed-off-by: Heiner Kallweit Reported-by: Choong Yong Liang Reviewed-by: Russell King (Oracle) Signed-off-by: David S. Miller --- drivers/net/phy/phy.c | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index 4f3e742907cb6..a660a80f34b79 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -1672,7 +1672,7 @@ EXPORT_SYMBOL(phy_ethtool_get_eee); * phy_ethtool_set_eee_noneg - Adjusts MAC LPI configuration without PHY * renegotiation * @phydev: pointer to the target PHY device structure - * @data: pointer to the ethtool_keee structure containing the new EEE settings + * @old_cfg: pointer to the eee_config structure containing the old EEE settings * * This function updates the Energy Efficient Ethernet (EEE) configuration * for cases where only the MAC's Low Power Idle (LPI) configuration changes, @@ -1683,11 +1683,10 @@ EXPORT_SYMBOL(phy_ethtool_get_eee); * configuration. */ static void phy_ethtool_set_eee_noneg(struct phy_device *phydev, - struct ethtool_keee *data) + const struct eee_config *old_cfg) { - if (phydev->eee_cfg.tx_lpi_enabled != data->tx_lpi_enabled || - phydev->eee_cfg.tx_lpi_timer != data->tx_lpi_timer) { - eee_to_eeecfg(&phydev->eee_cfg, data); + if (phydev->eee_cfg.tx_lpi_enabled != old_cfg->tx_lpi_enabled || + phydev->eee_cfg.tx_lpi_timer != old_cfg->tx_lpi_timer) { phydev->enable_tx_lpi = eeecfg_mac_can_tx_lpi(&phydev->eee_cfg); if (phydev->link) { phydev->link = false; @@ -1707,18 +1706,23 @@ static void phy_ethtool_set_eee_noneg(struct phy_device *phydev, */ int phy_ethtool_set_eee(struct phy_device *phydev, struct ethtool_keee *data) { + struct eee_config old_cfg; int ret; if (!phydev->drv) return -EIO; mutex_lock(&phydev->lock); + + old_cfg = phydev->eee_cfg; + eee_to_eeecfg(&phydev->eee_cfg, data); + ret = genphy_c45_ethtool_set_eee(phydev, data); - if (ret >= 0) { - if (ret == 0) - phy_ethtool_set_eee_noneg(phydev, data); - eee_to_eeecfg(&phydev->eee_cfg, data); - } + if (ret == 0) + phy_ethtool_set_eee_noneg(phydev, &old_cfg); + else if (ret < 0) + phydev->eee_cfg = old_cfg; + mutex_unlock(&phydev->lock); return ret < 0 ? ret : 0; From 03819abbeb11117dcbba40bfe322b88c0c88a6b6 Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Sat, 16 Nov 2024 14:05:57 +0100 Subject: [PATCH 02/49] net: usb: lan78xx: Fix double free issue with interrupt buffer allocation In lan78xx_probe(), the buffer `buf` was being freed twice: once implicitly through `usb_free_urb(dev->urb_intr)` with the `URB_FREE_BUFFER` flag and again explicitly by `kfree(buf)`. This caused a double free issue. To resolve this, reordered `kmalloc()` and `usb_alloc_urb()` calls to simplify the initialization sequence and removed the redundant `kfree(buf)`. Now, `buf` is allocated after `usb_alloc_urb()`, ensuring it is correctly managed by `usb_fill_int_urb()` and freed by `usb_free_urb()` as intended. Fixes: a6df95cae40b ("lan78xx: Fix memory allocation bug") Cc: John Efstathiades Signed-off-by: Oleksij Rempel Link: https://patch.msgid.link/20241116130558.1352230-1-o.rempel@pengutronix.de Signed-off-by: Jakub Kicinski --- drivers/net/usb/lan78xx.c | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index 8adf77e3557e7..094a47b8b97eb 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -4414,29 +4414,30 @@ static int lan78xx_probe(struct usb_interface *intf, period = ep_intr->desc.bInterval; maxp = usb_maxpacket(dev->udev, dev->pipe_intr); - buf = kmalloc(maxp, GFP_KERNEL); - if (!buf) { + + dev->urb_intr = usb_alloc_urb(0, GFP_KERNEL); + if (!dev->urb_intr) { ret = -ENOMEM; goto out5; } - dev->urb_intr = usb_alloc_urb(0, GFP_KERNEL); - if (!dev->urb_intr) { + buf = kmalloc(maxp, GFP_KERNEL); + if (!buf) { ret = -ENOMEM; - goto out6; - } else { - usb_fill_int_urb(dev->urb_intr, dev->udev, - dev->pipe_intr, buf, maxp, - intr_complete, dev, period); - dev->urb_intr->transfer_flags |= URB_FREE_BUFFER; + goto free_urbs; } + usb_fill_int_urb(dev->urb_intr, dev->udev, + dev->pipe_intr, buf, maxp, + intr_complete, dev, period); + dev->urb_intr->transfer_flags |= URB_FREE_BUFFER; + dev->maxpacket = usb_maxpacket(dev->udev, dev->pipe_out); /* Reject broken descriptors. */ if (dev->maxpacket == 0) { ret = -ENODEV; - goto out6; + goto free_urbs; } /* driver requires remote-wakeup capability during autosuspend. */ @@ -4444,7 +4445,7 @@ static int lan78xx_probe(struct usb_interface *intf, ret = lan78xx_phy_init(dev); if (ret < 0) - goto out7; + goto free_urbs; ret = register_netdev(netdev); if (ret != 0) { @@ -4466,10 +4467,8 @@ static int lan78xx_probe(struct usb_interface *intf, out8: phy_disconnect(netdev->phydev); -out7: +free_urbs: usb_free_urb(dev->urb_intr); -out6: - kfree(buf); out5: lan78xx_unbind(dev, intf); out4: From ae7370e61c5d8f5bcefc2d4fca724bd4e9bbf789 Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Sat, 16 Nov 2024 14:05:58 +0100 Subject: [PATCH 03/49] net: usb: lan78xx: Fix memory leak on device unplug by freeing PHY device Add calls to `phy_device_free` after `fixed_phy_unregister` to fix a memory leak that occurs when the device is unplugged. This ensures proper cleanup of pseudo fixed-link PHYs. Fixes: 89b36fb5e532 ("lan78xx: Lan7801 Support for Fixed PHY") Cc: Raghuram Chary J Signed-off-by: Oleksij Rempel Link: https://patch.msgid.link/20241116130558.1352230-2-o.rempel@pengutronix.de Signed-off-by: Jakub Kicinski --- drivers/net/usb/lan78xx.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index 094a47b8b97eb..9f191b6ce8215 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -2380,6 +2380,7 @@ static int lan78xx_phy_init(struct lan78xx_net *dev) if (dev->chipid == ID_REV_CHIP_ID_7801_) { if (phy_is_pseudo_fixed_link(phydev)) { fixed_phy_unregister(phydev); + phy_device_free(phydev); } else { phy_unregister_fixup_for_uid(PHY_KSZ9031RNX, 0xfffffff0); @@ -4246,8 +4247,10 @@ static void lan78xx_disconnect(struct usb_interface *intf) phy_disconnect(net->phydev); - if (phy_is_pseudo_fixed_link(phydev)) + if (phy_is_pseudo_fixed_link(phydev)) { fixed_phy_unregister(phydev); + phy_device_free(phydev); + } usb_scuttle_anchored_urbs(&dev->deferred); From 078f644cb81b78afdfbc42b9cc2c11959f2ed65c Mon Sep 17 00:00:00 2001 From: David Wei Date: Thu, 21 Nov 2024 22:48:21 -0800 Subject: [PATCH 04/49] selftests: fix nested double quotes in f-string Replace nested double quotes in f-string with outer single quotes. Fixes: 6116075e18f7 ("selftests: nic_link_layer: Add link layer selftest for NIC driver") Signed-off-by: David Wei Link: https://patch.msgid.link/20241122064821.2821199-1-dw@davidwei.uk Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/hw/lib/py/linkconfig.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/drivers/net/hw/lib/py/linkconfig.py b/tools/testing/selftests/drivers/net/hw/lib/py/linkconfig.py index db84000fc75b1..79fde603cbbc2 100644 --- a/tools/testing/selftests/drivers/net/hw/lib/py/linkconfig.py +++ b/tools/testing/selftests/drivers/net/hw/lib/py/linkconfig.py @@ -218,5 +218,5 @@ def get_ethtool_field(self, field: str, remote: bool = False) -> Optional[str]: json_data = process[0] """Check if the field exist in the json data""" if field not in json_data: - raise KsftSkipEx(f"Field {field} does not exist in the output of interface {json_data["ifname"]}") + raise KsftSkipEx(f'Field {field} does not exist in the output of interface {json_data["ifname"]}') return json_data[field] From 9b234a97b10cf1385d451a3824539b774abbcdaf Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 21 Nov 2024 19:41:05 +0000 Subject: [PATCH 05/49] rtnetlink: fix rtnl_dump_ifinfo() error path syzbot found that rtnl_dump_ifinfo() could return with a lock held [1] Move code around so that rtnl_link_ops_put() and put_net() can be called at the end of this function. [1] WARNING: lock held when returning to user space! 6.12.0-rc7-syzkaller-01681-g38f83a57aa8e #0 Not tainted syz-executor399/5841 is leaving the kernel with locks still held! 1 lock held by syz-executor399/5841: #0: ffffffff8f46c2a0 (&ops->srcu#2){.+.+}-{0:0}, at: rcu_lock_acquire include/linux/rcupdate.h:337 [inline] #0: ffffffff8f46c2a0 (&ops->srcu#2){.+.+}-{0:0}, at: rcu_read_lock include/linux/rcupdate.h:849 [inline] #0: ffffffff8f46c2a0 (&ops->srcu#2){.+.+}-{0:0}, at: rtnl_link_ops_get+0x22/0x250 net/core/rtnetlink.c:555 Fixes: 43c7ce69d28e ("rtnetlink: Protect struct rtnl_link_ops with SRCU.") Reported-by: syzbot Signed-off-by: Eric Dumazet Reviewed-by: Joe Damato Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20241121194105.3632507-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/core/rtnetlink.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index dd142f444659d..58df76fe408a4 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2442,7 +2442,9 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) tgt_net = rtnl_get_net_ns_capable(skb->sk, netnsid); if (IS_ERR(tgt_net)) { NL_SET_ERR_MSG(extack, "Invalid target network namespace id"); - return PTR_ERR(tgt_net); + err = PTR_ERR(tgt_net); + netnsid = -1; + goto out; } break; case IFLA_EXT_MASK: @@ -2457,7 +2459,8 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) default: if (cb->strict_check) { NL_SET_ERR_MSG(extack, "Unsupported attribute in link dump request"); - return -EINVAL; + err = -EINVAL; + goto out; } } } @@ -2479,11 +2482,14 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) break; } - if (kind_ops) - rtnl_link_ops_put(kind_ops, ops_srcu_index); cb->seq = tgt_net->dev_base_seq; nl_dump_check_consistent(cb, nlmsg_hdr(skb)); + +out: + + if (kind_ops) + rtnl_link_ops_put(kind_ops, ops_srcu_index); if (netnsid >= 0) put_net(tgt_net); From 614f4d166eeeb9bd709b0ad29552f691c0f45776 Mon Sep 17 00:00:00 2001 From: Pavan Chebbi Date: Mon, 18 Nov 2024 21:57:41 -0800 Subject: [PATCH 06/49] tg3: Set coherent DMA mask bits to 31 for BCM57766 chipsets The hardware on Broadcom 1G chipsets have a known limitation where they cannot handle DMA addresses that cross over 4GB. When such an address is encountered, the hardware sets the address overflow error bit in the DMA status register and triggers a reset. However, BCM57766 hardware is setting the overflow bit and triggering a reset in some cases when there is no actual underlying address overflow. The hardware team analyzed the issue and concluded that it is happening when the status block update has an address with higher (b16 to b31) bits as 0xffff following a previous update that had lowest bits as 0xffff. To work around this bug in the BCM57766 hardware, set the coherent dma mask from the current 64b to 31b. This will ensure that upper bits of the status block DMA address are always at most 0x7fff, thus avoiding the improper overflow check described above. This work around is intended for only status block and ring memories and has no effect on TX and RX buffers as they do not require coherent memory. Fixes: 72f2afb8a685 ("[TG3]: Add DMA address workaround") Reported-by: Salam Noureddine Reviewed-by: Kalesh AP Reviewed-by: Somnath Kotur Signed-off-by: Pavan Chebbi Reviewed-by: Michal Kubiak Link: https://patch.msgid.link/20241119055741.147144-1-pavan.chebbi@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/tg3.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index 01dfec1159428..9cc8db10a8d60 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -17839,6 +17839,9 @@ static int tg3_init_one(struct pci_dev *pdev, } else persist_dma_mask = dma_mask = DMA_BIT_MASK(64); + if (tg3_asic_rev(tp) == ASIC_REV_57766) + persist_dma_mask = DMA_BIT_MASK(31); + /* Configure DMA attributes. */ if (dma_mask > DMA_BIT_MASK(32)) { err = dma_set_mask(&pdev->dev, dma_mask); From e863ff806f72098bccaf8fa89c80d9ad6187c3b0 Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Mon, 18 Nov 2024 15:03:51 +0100 Subject: [PATCH 07/49] net: usb: lan78xx: Fix refcounting and autosuspend on invalid WoL configuration Validate Wake-on-LAN (WoL) options in `lan78xx_set_wol` before calling `usb_autopm_get_interface`. This prevents USB autopm refcounting issues and ensures the adapter can properly enter autosuspend when invalid WoL options are provided. Fixes: eb9ad088f966 ("lan78xx: Check for supported Wake-on-LAN modes") Signed-off-by: Oleksij Rempel Acked-by: Florian Fainelli Link: https://patch.msgid.link/20241118140351.2398166-1-o.rempel@pengutronix.de Signed-off-by: Jakub Kicinski --- drivers/net/usb/lan78xx.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index 9f191b6ce8215..531b1b6a37d19 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -1652,13 +1652,13 @@ static int lan78xx_set_wol(struct net_device *netdev, struct lan78xx_priv *pdata = (struct lan78xx_priv *)(dev->data[0]); int ret; + if (wol->wolopts & ~WAKE_ALL) + return -EINVAL; + ret = usb_autopm_get_interface(dev->intf); if (ret < 0) return ret; - if (wol->wolopts & ~WAKE_ALL) - return -EINVAL; - pdata->wol = wol->wolopts; device_set_wakeup_enable(&dev->udev->dev, (bool)wol->wolopts); From f164b296638d1eb1fb1c537e93ab5c8b49966546 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Tue, 19 Nov 2024 13:32:02 -0800 Subject: [PATCH 08/49] net: microchip: vcap: Add typegroup table terminators in kunit tests VCAP API unit tests fail randomly with errors such as # vcap_api_iterator_init_test: EXPECTATION FAILED at drivers/net/ethernet/microchip/vcap/vcap_api_kunit.c:387 Expected 134 + 7 == iter.offset, but 134 + 7 == 141 (0x8d) iter.offset == 17214 (0x433e) # vcap_api_iterator_init_test: EXPECTATION FAILED at drivers/net/ethernet/microchip/vcap/vcap_api_kunit.c:388 Expected 5 == iter.reg_idx, but iter.reg_idx == 702 (0x2be) # vcap_api_iterator_init_test: EXPECTATION FAILED at drivers/net/ethernet/microchip/vcap/vcap_api_kunit.c:389 Expected 11 == iter.reg_bitpos, but iter.reg_bitpos == 15 (0xf) # vcap_api_iterator_init_test: pass:0 fail:1 skip:0 total:1 Comments in the code state that "A typegroup table ends with an all-zero terminator". Add the missing terminators. Some of the typegroups did have a terminator of ".offset = 0, .width = 0, .value = 0,". Replace those terminators with "{ }" (no trailing ',') for consistency and to excplicitly state "this is a terminator". Fixes: 67d637516fa9 ("net: microchip: sparx5: Adding KUNIT test for the VCAP API") Cc: Steen Hegelund Signed-off-by: Guenter Roeck Reviewed-by: Daniel Machon Reviewed-by: Jacob Keller Link: https://patch.msgid.link/20241119213202.2884639-1-linux@roeck-us.net Signed-off-by: Jakub Kicinski --- .../ethernet/microchip/vcap/vcap_api_kunit.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/microchip/vcap/vcap_api_kunit.c b/drivers/net/ethernet/microchip/vcap/vcap_api_kunit.c index 7251121ab196e..16eb3de60eb6d 100644 --- a/drivers/net/ethernet/microchip/vcap/vcap_api_kunit.c +++ b/drivers/net/ethernet/microchip/vcap/vcap_api_kunit.c @@ -366,12 +366,13 @@ static void vcap_api_iterator_init_test(struct kunit *test) struct vcap_typegroup typegroups[] = { { .offset = 0, .width = 2, .value = 2, }, { .offset = 156, .width = 1, .value = 0, }, - { .offset = 0, .width = 0, .value = 0, }, + { } }; struct vcap_typegroup typegroups2[] = { { .offset = 0, .width = 3, .value = 4, }, { .offset = 49, .width = 2, .value = 0, }, { .offset = 98, .width = 2, .value = 0, }, + { } }; vcap_iter_init(&iter, 52, typegroups, 86); @@ -399,6 +400,7 @@ static void vcap_api_iterator_next_test(struct kunit *test) { .offset = 147, .width = 3, .value = 0, }, { .offset = 196, .width = 2, .value = 0, }, { .offset = 245, .width = 1, .value = 0, }, + { } }; int idx; @@ -433,7 +435,7 @@ static void vcap_api_encode_typegroups_test(struct kunit *test) { .offset = 147, .width = 3, .value = 5, }, { .offset = 196, .width = 2, .value = 2, }, { .offset = 245, .width = 5, .value = 27, }, - { .offset = 0, .width = 0, .value = 0, }, + { } }; vcap_encode_typegroups(stream, 49, typegroups, false); @@ -463,6 +465,7 @@ static void vcap_api_encode_bit_test(struct kunit *test) { .offset = 147, .width = 3, .value = 5, }, { .offset = 196, .width = 2, .value = 2, }, { .offset = 245, .width = 1, .value = 0, }, + { } }; vcap_iter_init(&iter, 49, typegroups, 44); @@ -489,7 +492,7 @@ static void vcap_api_encode_field_test(struct kunit *test) { .offset = 147, .width = 3, .value = 5, }, { .offset = 196, .width = 2, .value = 2, }, { .offset = 245, .width = 5, .value = 27, }, - { .offset = 0, .width = 0, .value = 0, }, + { } }; struct vcap_field rf = { .type = VCAP_FIELD_U32, @@ -538,7 +541,7 @@ static void vcap_api_encode_short_field_test(struct kunit *test) { .offset = 0, .width = 3, .value = 7, }, { .offset = 21, .width = 2, .value = 3, }, { .offset = 42, .width = 1, .value = 1, }, - { .offset = 0, .width = 0, .value = 0, }, + { } }; struct vcap_field rf = { .type = VCAP_FIELD_U32, @@ -608,7 +611,7 @@ static void vcap_api_encode_keyfield_test(struct kunit *test) struct vcap_typegroup tgt[] = { { .offset = 0, .width = 2, .value = 2, }, { .offset = 156, .width = 1, .value = 1, }, - { .offset = 0, .width = 0, .value = 0, }, + { } }; vcap_test_api_init(&admin); @@ -671,7 +674,7 @@ static void vcap_api_encode_max_keyfield_test(struct kunit *test) struct vcap_typegroup tgt[] = { { .offset = 0, .width = 2, .value = 2, }, { .offset = 156, .width = 1, .value = 1, }, - { .offset = 0, .width = 0, .value = 0, }, + { } }; u32 keyres[] = { 0x928e8a84, @@ -732,7 +735,7 @@ static void vcap_api_encode_actionfield_test(struct kunit *test) { .offset = 0, .width = 2, .value = 2, }, { .offset = 21, .width = 1, .value = 1, }, { .offset = 42, .width = 1, .value = 0, }, - { .offset = 0, .width = 0, .value = 0, }, + { } }; vcap_encode_actionfield(&rule, &caf, &rf, tgt); From 3bf39fa849ab8ed52abb6715922e6102d3df9f97 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 19 Nov 2024 14:44:31 -0800 Subject: [PATCH 09/49] netlink: fix false positive warning in extack during dumps Commit under fixes extended extack reporting to dumps. It works under normal conditions, because extack errors are usually reported during ->start() or the first ->dump(), it's quite rare that the dump starts okay but fails later. If the dump does fail later, however, the input skb will already have the initiating message pulled, so checking if bad attr falls within skb->data will fail. Switch the check to using nlh, which is always valid. syzbot found a way to hit that scenario by filling up the receive queue. In this case we initiate a dump but don't call ->dump() until there is read space for an skb. WARNING: CPU: 1 PID: 5845 at net/netlink/af_netlink.c:2210 netlink_ack_tlv_fill+0x1a8/0x560 net/netlink/af_netlink.c:2209 RIP: 0010:netlink_ack_tlv_fill+0x1a8/0x560 net/netlink/af_netlink.c:2209 Call Trace: netlink_dump_done+0x513/0x970 net/netlink/af_netlink.c:2250 netlink_dump+0x91f/0xe10 net/netlink/af_netlink.c:2351 netlink_recvmsg+0x6bb/0x11d0 net/netlink/af_netlink.c:1983 sock_recvmsg_nosec net/socket.c:1051 [inline] sock_recvmsg+0x22f/0x280 net/socket.c:1073 __sys_recvfrom+0x246/0x3d0 net/socket.c:2267 __do_sys_recvfrom net/socket.c:2285 [inline] __se_sys_recvfrom net/socket.c:2281 [inline] __x64_sys_recvfrom+0xde/0x100 net/socket.c:2281 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xf3/0x230 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f RIP: 0033:0x7ff37dd17a79 Reported-by: syzbot+d4373fa8042c06cefa84@syzkaller.appspotmail.com Fixes: 8af4f60472fc ("netlink: support all extack types in dumps") Reviewed-by: Jacob Keller Link: https://patch.msgid.link/20241119224432.1713040-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- net/netlink/af_netlink.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index dd3517b0fdfd5..f4e7b5e4bb59f 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -2181,9 +2181,14 @@ netlink_ack_tlv_len(struct netlink_sock *nlk, int err, return tlvlen; } +static bool nlmsg_check_in_payload(const struct nlmsghdr *nlh, const void *addr) +{ + return !WARN_ON(addr < nlmsg_data(nlh) || + addr - (const void *) nlh >= nlh->nlmsg_len); +} + static void -netlink_ack_tlv_fill(struct sk_buff *in_skb, struct sk_buff *skb, - const struct nlmsghdr *nlh, int err, +netlink_ack_tlv_fill(struct sk_buff *skb, const struct nlmsghdr *nlh, int err, const struct netlink_ext_ack *extack) { if (extack->_msg) @@ -2195,9 +2200,7 @@ netlink_ack_tlv_fill(struct sk_buff *in_skb, struct sk_buff *skb, if (!err) return; - if (extack->bad_attr && - !WARN_ON((u8 *)extack->bad_attr < in_skb->data || - (u8 *)extack->bad_attr >= in_skb->data + in_skb->len)) + if (extack->bad_attr && nlmsg_check_in_payload(nlh, extack->bad_attr)) WARN_ON(nla_put_u32(skb, NLMSGERR_ATTR_OFFS, (u8 *)extack->bad_attr - (const u8 *)nlh)); if (extack->policy) @@ -2206,9 +2209,7 @@ netlink_ack_tlv_fill(struct sk_buff *in_skb, struct sk_buff *skb, if (extack->miss_type) WARN_ON(nla_put_u32(skb, NLMSGERR_ATTR_MISS_TYPE, extack->miss_type)); - if (extack->miss_nest && - !WARN_ON((u8 *)extack->miss_nest < in_skb->data || - (u8 *)extack->miss_nest > in_skb->data + in_skb->len)) + if (extack->miss_nest && nlmsg_check_in_payload(nlh, extack->miss_nest)) WARN_ON(nla_put_u32(skb, NLMSGERR_ATTR_MISS_NEST, (u8 *)extack->miss_nest - (const u8 *)nlh)); } @@ -2237,7 +2238,7 @@ static int netlink_dump_done(struct netlink_sock *nlk, struct sk_buff *skb, if (extack_len) { nlh->nlmsg_flags |= NLM_F_ACK_TLVS; if (skb_tailroom(skb) >= extack_len) { - netlink_ack_tlv_fill(cb->skb, skb, cb->nlh, + netlink_ack_tlv_fill(skb, cb->nlh, nlk->dump_done_errno, extack); nlmsg_end(skb, nlh); } @@ -2496,7 +2497,7 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err, } if (tlvlen) - netlink_ack_tlv_fill(in_skb, skb, nlh, err, extack); + netlink_ack_tlv_fill(skb, nlh, err, extack); nlmsg_end(skb, rep); From 9bb88c659673003453fd42e0ddf95c9628409094 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 19 Nov 2024 14:44:32 -0800 Subject: [PATCH 10/49] selftests: net: test extacks in netlink dumps Test that extacks in dumps work. The test fills up the receive buffer to test both the inline dump (as part of sendmsg()) and delayed one (run during recvmsg()). Use YNL helpers to parse the messages. We need to add the test to YNL file to make sure the right include path are used. Reviewed-by: Jacob Keller Link: https://patch.msgid.link/20241119224432.1713040-2-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/Makefile | 3 +- tools/testing/selftests/net/netlink-dumps.c | 129 ++++++++++++++++++++ 2 files changed, 130 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 3d487b03c4a09..cb2fc601de66b 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -78,7 +78,6 @@ TEST_PROGS += test_vxlan_vnifiltering.sh TEST_GEN_FILES += io_uring_zerocopy_tx TEST_PROGS += io_uring_zerocopy_tx.sh TEST_GEN_FILES += bind_bhash -TEST_GEN_PROGS += netlink-dumps TEST_GEN_PROGS += sk_bind_sendto_listen TEST_GEN_PROGS += sk_connect_zero_addr TEST_GEN_PROGS += sk_so_peek_off @@ -101,7 +100,7 @@ TEST_PROGS += ipv6_route_update_soft_lockup.sh TEST_PROGS += busy_poll_test.sh # YNL files, must be before "include ..lib.mk" -YNL_GEN_FILES := busy_poller +YNL_GEN_FILES := busy_poller netlink-dumps TEST_GEN_FILES += $(YNL_GEN_FILES) TEST_FILES := settings diff --git a/tools/testing/selftests/net/netlink-dumps.c b/tools/testing/selftests/net/netlink-dumps.c index 84e29b7dffb64..07423f256f963 100644 --- a/tools/testing/selftests/net/netlink-dumps.c +++ b/tools/testing/selftests/net/netlink-dumps.c @@ -12,11 +12,140 @@ #include #include +#include +#include #include #include +#include #include "../kselftest_harness.h" +#include + +struct ext_ack { + int err; + + __u32 attr_offs; + __u32 miss_type; + __u32 miss_nest; + const char *str; +}; + +/* 0: no done, 1: done found, 2: extack found, -1: error */ +static int nl_get_extack(char *buf, size_t n, struct ext_ack *ea) +{ + const struct nlmsghdr *nlh; + const struct nlattr *attr; + ssize_t rem; + + for (rem = n; rem > 0; NLMSG_NEXT(nlh, rem)) { + nlh = (struct nlmsghdr *)&buf[n - rem]; + if (!NLMSG_OK(nlh, rem)) + return -1; + + if (nlh->nlmsg_type != NLMSG_DONE) + continue; + + ea->err = -*(int *)NLMSG_DATA(nlh); + + if (!(nlh->nlmsg_flags & NLM_F_ACK_TLVS)) + return 1; + + ynl_attr_for_each(attr, nlh, sizeof(int)) { + switch (ynl_attr_type(attr)) { + case NLMSGERR_ATTR_OFFS: + ea->attr_offs = ynl_attr_get_u32(attr); + break; + case NLMSGERR_ATTR_MISS_TYPE: + ea->miss_type = ynl_attr_get_u32(attr); + break; + case NLMSGERR_ATTR_MISS_NEST: + ea->miss_nest = ynl_attr_get_u32(attr); + break; + case NLMSGERR_ATTR_MSG: + ea->str = ynl_attr_get_str(attr); + break; + } + } + + return 2; + } + + return 0; +} + +static const struct { + struct nlmsghdr nlhdr; + struct ndmsg ndm; + struct nlattr ahdr; + __u32 val; +} dump_neigh_bad = { + .nlhdr = { + .nlmsg_len = sizeof(dump_neigh_bad), + .nlmsg_type = RTM_GETNEIGH, + .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_DUMP, + .nlmsg_seq = 1, + }, + .ndm = { + .ndm_family = 123, + }, + .ahdr = { + .nla_len = 4 + 4, + .nla_type = NDA_FLAGS_EXT, + }, + .val = -1, // should fail MASK validation +}; + +TEST(dump_extack) +{ + int netlink_sock; + char buf[8192]; + int one = 1; + int i, cnt; + ssize_t n; + + netlink_sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); + ASSERT_GE(netlink_sock, 0); + + n = setsockopt(netlink_sock, SOL_NETLINK, NETLINK_CAP_ACK, + &one, sizeof(one)); + ASSERT_EQ(n, 0); + n = setsockopt(netlink_sock, SOL_NETLINK, NETLINK_EXT_ACK, + &one, sizeof(one)); + ASSERT_EQ(n, 0); + n = setsockopt(netlink_sock, SOL_NETLINK, NETLINK_GET_STRICT_CHK, + &one, sizeof(one)); + ASSERT_EQ(n, 0); + + /* Dump so many times we fill up the buffer */ + cnt = 64; + for (i = 0; i < cnt; i++) { + n = send(netlink_sock, &dump_neigh_bad, + sizeof(dump_neigh_bad), 0); + ASSERT_EQ(n, sizeof(dump_neigh_bad)); + } + + /* Read out the ENOBUFS */ + n = recv(netlink_sock, buf, sizeof(buf), MSG_DONTWAIT); + EXPECT_EQ(n, -1); + EXPECT_EQ(errno, ENOBUFS); + + for (i = 0; i < cnt; i++) { + struct ext_ack ea = {}; + + n = recv(netlink_sock, buf, sizeof(buf), MSG_DONTWAIT); + if (n < 0) { + ASSERT_GE(i, 10); + break; + } + ASSERT_GE(n, (ssize_t)sizeof(struct nlmsghdr)); + + EXPECT_EQ(nl_get_extack(buf, n, &ea), 2); + EXPECT_EQ(ea.attr_offs, + sizeof(struct nlmsghdr) + sizeof(struct ndmsg)); + } +} + static const struct { struct nlmsghdr nlhdr; struct genlmsghdr genlhdr; From 5d066766c5f1252f98ff859265bcd1a5b52ac46c Mon Sep 17 00:00:00 2001 From: James Chapman Date: Mon, 18 Nov 2024 14:04:11 +0000 Subject: [PATCH 11/49] net/l2tp: fix warning in l2tp_exit_net found by syzbot In l2tp's net exit handler, we check that an IDR is empty before destroying it: WARN_ON_ONCE(!idr_is_empty(&pn->l2tp_tunnel_idr)); idr_destroy(&pn->l2tp_tunnel_idr); By forcing memory allocation failures in idr_alloc_32, syzbot is able to provoke a condition where idr_is_empty returns false despite there being no items in the IDR. This turns out to be because the radix tree of the IDR contains only internal radix-tree nodes and it is this that causes idr_is_empty to return false. The internal nodes are cleaned by idr_destroy. Use idr_for_each to check that the IDR is empty instead of idr_is_empty to avoid the problem. Reported-by: syzbot+332fe1e67018625f63c9@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=332fe1e67018625f63c9 Fixes: 73d33bd063c4 ("l2tp: avoid using drain_workqueue in l2tp_pre_exit_net") Signed-off-by: James Chapman Link: https://patch.msgid.link/20241118140411.1582555-1-jchapman@katalix.com Signed-off-by: Paolo Abeni --- net/l2tp/l2tp_core.c | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 3eec23ac5ab10..369a2f2e459cd 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -1870,15 +1870,31 @@ static __net_exit void l2tp_pre_exit_net(struct net *net) } } +static int l2tp_idr_item_unexpected(int id, void *p, void *data) +{ + const char *idr_name = data; + + pr_err("l2tp: %s IDR not empty at net %d exit\n", idr_name, id); + WARN_ON_ONCE(1); + return 1; +} + static __net_exit void l2tp_exit_net(struct net *net) { struct l2tp_net *pn = l2tp_pernet(net); - WARN_ON_ONCE(!idr_is_empty(&pn->l2tp_v2_session_idr)); + /* Our per-net IDRs should be empty. Check that is so, to + * help catch cleanup races or refcnt leaks. + */ + idr_for_each(&pn->l2tp_v2_session_idr, l2tp_idr_item_unexpected, + "v2_session"); + idr_for_each(&pn->l2tp_v3_session_idr, l2tp_idr_item_unexpected, + "v3_session"); + idr_for_each(&pn->l2tp_tunnel_idr, l2tp_idr_item_unexpected, + "tunnel"); + idr_destroy(&pn->l2tp_v2_session_idr); - WARN_ON_ONCE(!idr_is_empty(&pn->l2tp_v3_session_idr)); idr_destroy(&pn->l2tp_v3_session_idr); - WARN_ON_ONCE(!idr_is_empty(&pn->l2tp_tunnel_idr)); idr_destroy(&pn->l2tp_tunnel_idr); } From ebaf81317e42aa990ad20b113cfe3a7b20d4e937 Mon Sep 17 00:00:00 2001 From: Sidraya Jayagond Date: Tue, 19 Nov 2024 16:22:19 +0100 Subject: [PATCH 12/49] s390/iucv: MSG_PEEK causes memory leak in iucv_sock_destruct() Passing MSG_PEEK flag to skb_recv_datagram() increments skb refcount (skb->users) and iucv_sock_recvmsg() does not decrement skb refcount at exit. This results in skb memory leak in skb_queue_purge() and WARN_ON in iucv_sock_destruct() during socket close. To fix this decrease skb refcount by one if MSG_PEEK is set in order to prevent memory leak and WARN_ON. WARNING: CPU: 2 PID: 6292 at net/iucv/af_iucv.c:286 iucv_sock_destruct+0x144/0x1a0 [af_iucv] CPU: 2 PID: 6292 Comm: afiucv_test_msg Kdump: loaded Tainted: G W 6.10.0-rc7 #1 Hardware name: IBM 3931 A01 704 (z/VM 7.3.0) Call Trace: [<001587c682c4aa98>] iucv_sock_destruct+0x148/0x1a0 [af_iucv] [<001587c682c4a9d0>] iucv_sock_destruct+0x80/0x1a0 [af_iucv] [<001587c704117a32>] __sk_destruct+0x52/0x550 [<001587c704104a54>] __sock_release+0xa4/0x230 [<001587c704104c0c>] sock_close+0x2c/0x40 [<001587c702c5f5a8>] __fput+0x2e8/0x970 [<001587c7024148c4>] task_work_run+0x1c4/0x2c0 [<001587c7023b0716>] do_exit+0x996/0x1050 [<001587c7023b13aa>] do_group_exit+0x13a/0x360 [<001587c7023b1626>] __s390x_sys_exit_group+0x56/0x60 [<001587c7022bccca>] do_syscall+0x27a/0x380 [<001587c7049a6a0c>] __do_syscall+0x9c/0x160 [<001587c7049ce8a8>] system_call+0x70/0x98 Last Breaking-Event-Address: [<001587c682c4a9d4>] iucv_sock_destruct+0x84/0x1a0 [af_iucv] Fixes: eac3731bd04c ("[S390]: Add AF_IUCV socket support") Reviewed-by: Alexandra Winter Reviewed-by: Thorsten Winkler Signed-off-by: Sidraya Jayagond Signed-off-by: Alexandra Winter Reviewed-by: David Wei Link: https://patch.msgid.link/20241119152219.3712168-1-wintera@linux.ibm.com Signed-off-by: Paolo Abeni --- net/iucv/af_iucv.c | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index c00323fa9eb66..7929df08d4e02 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -1236,7 +1236,9 @@ static int iucv_sock_recvmsg(struct socket *sock, struct msghdr *msg, return -EOPNOTSUPP; /* receive/dequeue next skb: - * the function understands MSG_PEEK and, thus, does not dequeue skb */ + * the function understands MSG_PEEK and, thus, does not dequeue skb + * only refcount is increased. + */ skb = skb_recv_datagram(sk, flags, &err); if (!skb) { if (sk->sk_shutdown & RCV_SHUTDOWN) @@ -1252,9 +1254,8 @@ static int iucv_sock_recvmsg(struct socket *sock, struct msghdr *msg, cskb = skb; if (skb_copy_datagram_msg(cskb, offset, msg, copied)) { - if (!(flags & MSG_PEEK)) - skb_queue_head(&sk->sk_receive_queue, skb); - return -EFAULT; + err = -EFAULT; + goto err_out; } /* SOCK_SEQPACKET: set MSG_TRUNC if recv buf size is too small */ @@ -1271,11 +1272,8 @@ static int iucv_sock_recvmsg(struct socket *sock, struct msghdr *msg, err = put_cmsg(msg, SOL_IUCV, SCM_IUCV_TRGCLS, sizeof(IUCV_SKB_CB(skb)->class), (void *)&IUCV_SKB_CB(skb)->class); - if (err) { - if (!(flags & MSG_PEEK)) - skb_queue_head(&sk->sk_receive_queue, skb); - return err; - } + if (err) + goto err_out; /* Mark read part of skb as used */ if (!(flags & MSG_PEEK)) { @@ -1331,8 +1329,18 @@ static int iucv_sock_recvmsg(struct socket *sock, struct msghdr *msg, /* SOCK_SEQPACKET: return real length if MSG_TRUNC is set */ if (sk->sk_type == SOCK_SEQPACKET && (flags & MSG_TRUNC)) copied = rlen; + if (flags & MSG_PEEK) + skb_unref(skb); return copied; + +err_out: + if (!(flags & MSG_PEEK)) + skb_queue_head(&sk->sk_receive_queue, skb); + else + skb_unref(skb); + + return err; } static inline __poll_t iucv_accept_poll(struct sock *parent) From a1f8609ff1f658e410f78d800ca947d57e51996a Mon Sep 17 00:00:00 2001 From: Justin Lai Date: Wed, 20 Nov 2024 15:56:22 +0800 Subject: [PATCH 13/49] rtase: Refactor the rtase_check_mac_version_valid() function Different hardware requires different configurations, but this distinction was not made previously. Additionally, the error message was not clear enough. Therefore, this patch will address the issues mentioned above. Fixes: a36e9f5cfe9e ("rtase: Add support for a pci table in this module") Signed-off-by: Justin Lai Signed-off-by: Paolo Abeni --- drivers/net/ethernet/realtek/rtase/rtase.h | 7 ++++- .../net/ethernet/realtek/rtase/rtase_main.c | 28 +++++++++++-------- 2 files changed, 22 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/realtek/rtase/rtase.h b/drivers/net/ethernet/realtek/rtase/rtase.h index 942f1e531a851..dbc3f92eebc48 100644 --- a/drivers/net/ethernet/realtek/rtase/rtase.h +++ b/drivers/net/ethernet/realtek/rtase/rtase.h @@ -9,7 +9,10 @@ #ifndef RTASE_H #define RTASE_H -#define RTASE_HW_VER_MASK 0x7C800000 +#define RTASE_HW_VER_MASK 0x7C800000 +#define RTASE_HW_VER_906X_7XA 0x00800000 +#define RTASE_HW_VER_906X_7XC 0x04000000 +#define RTASE_HW_VER_907XD_V1 0x04800000 #define RTASE_RX_DMA_BURST_256 4 #define RTASE_TX_DMA_BURST_UNLIMITED 7 @@ -327,6 +330,8 @@ struct rtase_private { u16 int_nums; u16 tx_int_mit; u16 rx_int_mit; + + u32 hw_ver; }; #define RTASE_LSO_64K 64000 diff --git a/drivers/net/ethernet/realtek/rtase/rtase_main.c b/drivers/net/ethernet/realtek/rtase/rtase_main.c index 874994d9ceb95..d4ae0dd315d3a 100644 --- a/drivers/net/ethernet/realtek/rtase/rtase_main.c +++ b/drivers/net/ethernet/realtek/rtase/rtase_main.c @@ -1972,20 +1972,21 @@ static void rtase_init_software_variable(struct pci_dev *pdev, tp->dev->max_mtu = RTASE_MAX_JUMBO_SIZE; } -static bool rtase_check_mac_version_valid(struct rtase_private *tp) +static int rtase_check_mac_version_valid(struct rtase_private *tp) { - u32 hw_ver = rtase_r32(tp, RTASE_TX_CONFIG_0) & RTASE_HW_VER_MASK; - bool known_ver = false; + int ret = -ENODEV; - switch (hw_ver) { - case 0x00800000: - case 0x04000000: - case 0x04800000: - known_ver = true; + tp->hw_ver = rtase_r32(tp, RTASE_TX_CONFIG_0) & RTASE_HW_VER_MASK; + + switch (tp->hw_ver) { + case RTASE_HW_VER_906X_7XA: + case RTASE_HW_VER_906X_7XC: + case RTASE_HW_VER_907XD_V1: + ret = 0; break; } - return known_ver; + return ret; } static int rtase_init_board(struct pci_dev *pdev, struct net_device **dev_out, @@ -2105,9 +2106,12 @@ static int rtase_init_one(struct pci_dev *pdev, tp->pdev = pdev; /* identify chip attached to board */ - if (!rtase_check_mac_version_valid(tp)) - return dev_err_probe(&pdev->dev, -ENODEV, - "unknown chip version, contact rtase maintainers (see MAINTAINERS file)\n"); + ret = rtase_check_mac_version_valid(tp); + if (ret != 0) { + dev_err(&pdev->dev, + "unknown chip version: 0x%08x, contact rtase maintainers (see MAINTAINERS file)\n", + tp->hw_ver); + } rtase_init_software_variable(pdev, tp); rtase_init_hardware(tp); From c1fc14c4df801fe2d9ec3160b52fa63569ce164c Mon Sep 17 00:00:00 2001 From: Justin Lai Date: Wed, 20 Nov 2024 15:56:23 +0800 Subject: [PATCH 14/49] rtase: Correct the speed for RTL907XD-V1 Previously, the reported speed was uniformly set to SPEED_5000, but the RTL907XD-V1 actually operates at a speed of SPEED_10000. Therefore, this patch makes the necessary correction. Fixes: dd7f17c40fd1 ("rtase: Implement ethtool function") Signed-off-by: Justin Lai Signed-off-by: Paolo Abeni --- drivers/net/ethernet/realtek/rtase/rtase_main.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/realtek/rtase/rtase_main.c b/drivers/net/ethernet/realtek/rtase/rtase_main.c index d4ae0dd315d3a..c1782e32f9c6a 100644 --- a/drivers/net/ethernet/realtek/rtase/rtase_main.c +++ b/drivers/net/ethernet/realtek/rtase/rtase_main.c @@ -1714,10 +1714,21 @@ static int rtase_get_settings(struct net_device *dev, struct ethtool_link_ksettings *cmd) { u32 supported = SUPPORTED_MII | SUPPORTED_Pause | SUPPORTED_Asym_Pause; + const struct rtase_private *tp = netdev_priv(dev); ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported, supported); - cmd->base.speed = SPEED_5000; + + switch (tp->hw_ver) { + case RTASE_HW_VER_906X_7XA: + case RTASE_HW_VER_906X_7XC: + cmd->base.speed = SPEED_5000; + break; + case RTASE_HW_VER_907XD_V1: + cmd->base.speed = SPEED_10000; + break; + } + cmd->base.duplex = DUPLEX_FULL; cmd->base.port = PORT_MII; cmd->base.autoneg = AUTONEG_DISABLE; From a01cfcfda5cc787552b344cbc92f9c363c81ad4f Mon Sep 17 00:00:00 2001 From: Justin Lai Date: Wed, 20 Nov 2024 15:56:24 +0800 Subject: [PATCH 15/49] rtase: Corrects error handling of the rtase_check_mac_version_valid() Previously, when the hardware version ID was determined to be invalid, only an error message was printed without any further handling. Therefore, this patch makes the necessary corrections to address this. Fixes: a36e9f5cfe9e ("rtase: Add support for a pci table in this module") Signed-off-by: Justin Lai Signed-off-by: Paolo Abeni --- drivers/net/ethernet/realtek/rtase/rtase_main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/realtek/rtase/rtase_main.c b/drivers/net/ethernet/realtek/rtase/rtase_main.c index c1782e32f9c6a..de7f11232593f 100644 --- a/drivers/net/ethernet/realtek/rtase/rtase_main.c +++ b/drivers/net/ethernet/realtek/rtase/rtase_main.c @@ -2122,6 +2122,7 @@ static int rtase_init_one(struct pci_dev *pdev, dev_err(&pdev->dev, "unknown chip version: 0x%08x, contact rtase maintainers (see MAINTAINERS file)\n", tp->hw_ver); + goto err_out_release_board; } rtase_init_software_variable(pdev, tp); @@ -2196,6 +2197,7 @@ static int rtase_init_one(struct pci_dev *pdev, netif_napi_del(&ivec->napi); } +err_out_release_board: rtase_release_board(pdev, dev, ioaddr); return ret; From 59c5e1411a0a13ebb930f4ebba495cc4eb14f8f2 Mon Sep 17 00:00:00 2001 From: Choong Yong Liang Date: Wed, 20 Nov 2024 16:38:18 +0800 Subject: [PATCH 16/49] net: stmmac: set initial EEE policy configuration Set the initial eee_cfg values to have 'ethtool --show-eee ' display the initial EEE configuration. Fixes: 49168d1980e2 ("net: phy: Add phy_support_eee() indicating MAC support EEE") Cc: Signed-off-by: Choong Yong Liang Reviewed-by: Jacob Keller Link: https://patch.msgid.link/20241120083818.1079456-1-yong.liang.choong@linux.intel.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 3cdc3910f3a08..9b262cdad60b2 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -1177,6 +1177,9 @@ static int stmmac_init_phy(struct net_device *dev) return -ENODEV; } + if (priv->dma_cap.eee) + phy_support_eee(phydev); + ret = phylink_connect_phy(priv->phylink, phydev); } else { fwnode_handle_put(phy_fwnode); From 00b5b7aab9e422d00d5a9d03d7e0760a76b5d57f Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Wed, 20 Nov 2024 09:51:07 +0000 Subject: [PATCH 17/49] net/ipv6: delete temporary address if mngtmpaddr is removed or unmanaged MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit RFC8981 section 3.4 says that existing temporary addresses must have their lifetimes adjusted so that no temporary addresses should ever remain "valid" or "preferred" longer than the incoming SLAAC Prefix Information. This would strongly imply in Linux's case that if the "mngtmpaddr" address is deleted or un-flagged as such, its corresponding temporary addresses must be cleared out right away. But now the temporary address is renewed even after ‘mngtmpaddr’ is removed or becomes unmanaged as manage_tempaddrs() set temporary addresses prefered/valid time to 0, and later in addrconf_verify_rtnl() all checkings failed to remove the addresses. Fix this by deleting the temporary address directly for these situations. Fixes: 778964f2fdf0 ("ipv6/addrconf: fix timing bug in tempaddr regen") Signed-off-by: Hangbin Liu Reviewed-by: David Ahern Signed-off-by: Paolo Abeni --- net/ipv6/addrconf.c | 41 +++++++++++++++++++++++++++++------------ 1 file changed, 29 insertions(+), 12 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 96b5b2b0d5072..c489a1e6aec9a 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2570,6 +2570,24 @@ static struct inet6_dev *addrconf_add_dev(struct net_device *dev) return idev; } +static void delete_tempaddrs(struct inet6_dev *idev, + struct inet6_ifaddr *ifp) +{ + struct inet6_ifaddr *ift, *tmp; + + write_lock_bh(&idev->lock); + list_for_each_entry_safe(ift, tmp, &idev->tempaddr_list, tmp_list) { + if (ift->ifpub != ifp) + continue; + + in6_ifa_hold(ift); + write_unlock_bh(&idev->lock); + ipv6_del_addr(ift); + write_lock_bh(&idev->lock); + } + write_unlock_bh(&idev->lock); +} + static void manage_tempaddrs(struct inet6_dev *idev, struct inet6_ifaddr *ifp, __u32 valid_lft, __u32 prefered_lft, @@ -3124,11 +3142,12 @@ static int inet6_addr_del(struct net *net, int ifindex, u32 ifa_flags, in6_ifa_hold(ifp); read_unlock_bh(&idev->lock); - if (!(ifp->flags & IFA_F_TEMPORARY) && - (ifa_flags & IFA_F_MANAGETEMPADDR)) - manage_tempaddrs(idev, ifp, 0, 0, false, - jiffies); ipv6_del_addr(ifp); + + if (!(ifp->flags & IFA_F_TEMPORARY) && + (ifp->flags & IFA_F_MANAGETEMPADDR)) + delete_tempaddrs(idev, ifp); + addrconf_verify_rtnl(net); if (ipv6_addr_is_multicast(pfx)) { ipv6_mc_config(net->ipv6.mc_autojoin_sk, @@ -4952,14 +4971,12 @@ static int inet6_addr_modify(struct net *net, struct inet6_ifaddr *ifp, } if (was_managetempaddr || ifp->flags & IFA_F_MANAGETEMPADDR) { - if (was_managetempaddr && - !(ifp->flags & IFA_F_MANAGETEMPADDR)) { - cfg->valid_lft = 0; - cfg->preferred_lft = 0; - } - manage_tempaddrs(ifp->idev, ifp, cfg->valid_lft, - cfg->preferred_lft, !was_managetempaddr, - jiffies); + if (was_managetempaddr && !(ifp->flags & IFA_F_MANAGETEMPADDR)) + delete_tempaddrs(ifp->idev, ifp); + else + manage_tempaddrs(ifp->idev, ifp, cfg->valid_lft, + cfg->preferred_lft, !was_managetempaddr, + jiffies); } addrconf_verify_rtnl(net); From f6e1dcd6444485356d1df9d29df702acc7bba00c Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Wed, 20 Nov 2024 09:51:08 +0000 Subject: [PATCH 18/49] selftests/rtnetlink.sh: add mngtempaddr test Add a test to check the temporary address could be added/removed correctly when mngtempaddr is set or removed/unmanaged. Signed-off-by: Sam Edwards Signed-off-by: Hangbin Liu Reviewed-by: David Ahern Signed-off-by: Paolo Abeni --- tools/testing/selftests/net/rtnetlink.sh | 95 ++++++++++++++++++++++++ 1 file changed, 95 insertions(+) diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh index 7f05b5f9b76f6..2e8243a65b507 100755 --- a/tools/testing/selftests/net/rtnetlink.sh +++ b/tools/testing/selftests/net/rtnetlink.sh @@ -29,6 +29,7 @@ ALL_TESTS=" kci_test_bridge_parent_id kci_test_address_proto kci_test_enslave_bonding + kci_test_mngtmpaddr " devdummy="test-dummy0" @@ -44,6 +45,7 @@ check_err() if [ $ret -eq 0 ]; then ret=$1 fi + [ -n "$2" ] && echo "$2" } # same but inverted -- used when command must fail for test to pass @@ -1239,6 +1241,99 @@ kci_test_enslave_bonding() ip netns del "$testns" } +# Called to validate the addresses on $IFNAME: +# +# 1. Every `temporary` address must have a matching `mngtmpaddr` +# 2. Every `mngtmpaddr` address must have some un`deprecated` `temporary` +# +# If the mngtmpaddr or tempaddr checking failed, return 0 and stop slowwait +validate_mngtmpaddr() +{ + local dev=$1 + local prefix="" + local addr_list=$(ip -j -n $testns addr show dev ${dev}) + local temp_addrs=$(echo ${addr_list} | \ + jq -r '.[].addr_info[] | select(.temporary == true) | .local') + local mng_prefixes=$(echo ${addr_list} | \ + jq -r '.[].addr_info[] | select(.mngtmpaddr == true) | .local' | \ + cut -d: -f1-4 | tr '\n' ' ') + local undep_prefixes=$(echo ${addr_list} | \ + jq -r '.[].addr_info[] | select(.temporary == true and .deprecated != true) | .local' | \ + cut -d: -f1-4 | tr '\n' ' ') + + # 1. All temporary addresses (temp and dep) must have a matching mngtmpaddr + for address in ${temp_addrs}; do + prefix=$(echo ${address} | cut -d: -f1-4) + if [[ ! " ${mng_prefixes} " =~ " $prefix " ]]; then + check_err 1 "FAIL: Temporary $address with no matching mngtmpaddr!"; + return 0 + fi + done + + # 2. All mngtmpaddr addresses must have a temporary address (not dep) + for prefix in ${mng_prefixes}; do + if [[ ! " ${undep_prefixes} " =~ " $prefix " ]]; then + check_err 1 "FAIL: No undeprecated temporary in $prefix!"; + return 0 + fi + done + + return 1 +} + +kci_test_mngtmpaddr() +{ + local ret=0 + + setup_ns testns + if [ $? -ne 0 ]; then + end_test "SKIP mngtmpaddr tests: cannot add net namespace $testns" + return $ksft_skip + fi + + # 1. Create a dummy Ethernet interface + run_cmd ip -n $testns link add ${devdummy} type dummy + run_cmd ip -n $testns link set ${devdummy} up + run_cmd ip netns exec $testns sysctl -w net.ipv6.conf.${devdummy}.use_tempaddr=1 + run_cmd ip netns exec $testns sysctl -w net.ipv6.conf.${devdummy}.temp_prefered_lft=10 + run_cmd ip netns exec $testns sysctl -w net.ipv6.conf.${devdummy}.temp_valid_lft=25 + run_cmd ip netns exec $testns sysctl -w net.ipv6.conf.${devdummy}.max_desync_factor=1 + + # 2. Create several mngtmpaddr addresses on that interface. + # with temp_*_lft configured to be pretty short (10 and 35 seconds + # for prefer/valid respectively) + for i in $(seq 1 9); do + run_cmd ip -n $testns addr add 2001:db8:7e57:${i}::1/64 mngtmpaddr dev ${devdummy} + done + + # 3. Confirm that a preferred temporary address exists for each mngtmpaddr + # address at all times, polling once per second for 30 seconds. + slowwait 30 validate_mngtmpaddr ${devdummy} + + # 4. Delete each mngtmpaddr address, one at a time (alternating between + # deleting and merely un-mngtmpaddr-ing), and confirm that the other + # mngtmpaddr addresses still have preferred temporaries. + for i in $(seq 1 9); do + (( $i % 4 == 0 )) && mng_flag="mngtmpaddr" || mng_flag="" + if (( $i % 2 == 0 )); then + run_cmd ip -n $testns addr del 2001:db8:7e57:${i}::1/64 $mng_flag dev ${devdummy} + else + run_cmd ip -n $testns addr change 2001:db8:7e57:${i}::1/64 dev ${devdummy} + fi + # the temp addr should be deleted + validate_mngtmpaddr ${devdummy} + done + + if [ $ret -ne 0 ]; then + end_test "FAIL: mngtmpaddr add/remove incorrect" + else + end_test "PASS: mngtmpaddr add/remove correctly" + fi + + ip netns del "$testns" + return $ret +} + kci_test_rtnl() { local current_test From 9cc8d0ecdd2aad42e377e971e3bb114339df609e Mon Sep 17 00:00:00 2001 From: Rosen Penev Date: Thu, 21 Nov 2024 11:31:52 -0800 Subject: [PATCH 19/49] net: mdio-ipq4019: add missing error check If an optional resource is found but fails to remap, return on failure. Avoids any potential problems when using the iomapped resource as the assumption is that it's available. Fixes: 23a890d493e3 ("net: mdio: Add the reset function for IPQ MDIO driver") Signed-off-by: Rosen Penev Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20241121193152.8966-1-rosenp@gmail.com Signed-off-by: Paolo Abeni --- drivers/net/mdio/mdio-ipq4019.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/mdio/mdio-ipq4019.c b/drivers/net/mdio/mdio-ipq4019.c index dd3ed2d6430b1..d9a94df482d9f 100644 --- a/drivers/net/mdio/mdio-ipq4019.c +++ b/drivers/net/mdio/mdio-ipq4019.c @@ -352,8 +352,11 @@ static int ipq4019_mdio_probe(struct platform_device *pdev) /* The platform resource is provided on the chipset IPQ5018 */ /* This resource is optional */ res = platform_get_resource(pdev, IORESOURCE_MEM, 1); - if (res) + if (res) { priv->eth_ldo_rdy = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(priv->eth_ldo_rdy)) + return PTR_ERR(priv->eth_ldo_rdy); + } bus->name = "ipq4019_mdio"; bus->read = ipq4019_mdio_read_c22; From b032ae57d4fe2b2445e3bc190db6fcaa8c102f68 Mon Sep 17 00:00:00 2001 From: Vitalii Mordan Date: Thu, 21 Nov 2024 23:06:58 +0300 Subject: [PATCH 20/49] marvell: pxa168_eth: fix call balance of pep->clk handling routines If the clock pep->clk was not enabled in pxa168_eth_probe, it should not be disabled in any path. Conversely, if it was enabled in pxa168_eth_probe, it must be disabled in all error paths to ensure proper cleanup. Use the devm_clk_get_enabled helper function to ensure proper call balance for pep->clk. Found by Linux Verification Center (linuxtesting.org) with Klever. Fixes: a49f37eed22b ("net: add Fast Ethernet driver for PXA168.") Signed-off-by: Vitalii Mordan Link: https://patch.msgid.link/20241121200658.2203871-1-mordan@ispras.ru Signed-off-by: Paolo Abeni --- drivers/net/ethernet/marvell/pxa168_eth.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/marvell/pxa168_eth.c b/drivers/net/ethernet/marvell/pxa168_eth.c index fe38426ec42d4..2bf426cea6ddc 100644 --- a/drivers/net/ethernet/marvell/pxa168_eth.c +++ b/drivers/net/ethernet/marvell/pxa168_eth.c @@ -1394,18 +1394,15 @@ static int pxa168_eth_probe(struct platform_device *pdev) printk(KERN_NOTICE "PXA168 10/100 Ethernet Driver\n"); - clk = devm_clk_get(&pdev->dev, NULL); + clk = devm_clk_get_enabled(&pdev->dev, NULL); if (IS_ERR(clk)) { - dev_err(&pdev->dev, "Fast Ethernet failed to get clock\n"); + dev_err(&pdev->dev, "Fast Ethernet failed to get and enable clock\n"); return -ENODEV; } - clk_prepare_enable(clk); dev = alloc_etherdev(sizeof(struct pxa168_eth_private)); - if (!dev) { - err = -ENOMEM; - goto err_clk; - } + if (!dev) + return -ENOMEM; platform_set_drvdata(pdev, dev); pep = netdev_priv(dev); @@ -1523,8 +1520,6 @@ static int pxa168_eth_probe(struct platform_device *pdev) mdiobus_free(pep->smi_bus); err_netdev: free_netdev(dev); -err_clk: - clk_disable_unprepare(clk); return err; } @@ -1542,7 +1537,6 @@ static void pxa168_eth_remove(struct platform_device *pdev) if (dev->phydev) phy_disconnect(dev->phydev); - clk_disable_unprepare(pep->clk); mdiobus_unregister(pep->smi_bus); mdiobus_free(pep->smi_bus); unregister_netdev(dev); From 407618d66dba55e7db1278872e8be106808bbe91 Mon Sep 17 00:00:00 2001 From: Maxime Chevallier Date: Fri, 22 Nov 2024 15:12:55 +0100 Subject: [PATCH 21/49] net: stmmac: dwmac-socfpga: Set RX watchdog interrupt as broken On DWMAC3 and later, there's a RX Watchdog interrupt that's used for interrupt coalescing. It's known to be buggy on some platforms, and dwmac-socfpga appears to be one of them. Changing the interrupt coalescing from ethtool doesn't appear to have any effect here. Without disabling RIWT (Received Interrupt Watchdog Timer, I believe...), we observe latencies while receiving traffic that amount to around ~0.4ms. This was discovered with NTP but can be easily reproduced with a simple ping. Without this patch : 64 bytes from 192.168.5.2: icmp_seq=1 ttl=64 time=0.657 ms With this patch : 64 bytes from 192.168.5.2: icmp_seq=1 ttl=64 time=0.254 ms Fixes: 801d233b7302 ("net: stmmac: Add SOCFPGA glue driver") Signed-off-by: Maxime Chevallier Link: https://patch.msgid.link/20241122141256.764578-1-maxime.chevallier@bootlin.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c index 248b30d7b864b..16020b72dec83 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c @@ -487,6 +487,8 @@ static int socfpga_dwmac_probe(struct platform_device *pdev) plat_dat->select_pcs = socfpga_dwmac_select_pcs; plat_dat->has_gmac = true; + plat_dat->riwt_off = 1; + ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res); if (ret) return ret; From 7ebbbb23ea5b6d051509cb11399afac5042c9266 Mon Sep 17 00:00:00 2001 From: Hariprasad Kelam Date: Fri, 22 Nov 2024 21:50:31 +0530 Subject: [PATCH 22/49] octeontx2-af: RPM: Fix mismatch in lmac type Due to a bug in the previous patch, there is a mismatch between the lmac type reported by the driver and the actual hardware configuration. Fixes: 3ad3f8f93c81 ("octeontx2-af: cn10k: MAC internal loopback support") Signed-off-by: Hariprasad Kelam Signed-off-by: Paolo Abeni --- drivers/net/ethernet/marvell/octeontx2/af/rpm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rpm.c b/drivers/net/ethernet/marvell/octeontx2/af/rpm.c index 1b34cf9c97035..9e8c5e4389f8b 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rpm.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rpm.c @@ -467,7 +467,7 @@ u8 rpm_get_lmac_type(void *rpmd, int lmac_id) int err; req = FIELD_SET(CMDREG_ID, CGX_CMD_GET_LINK_STS, req); - err = cgx_fwi_cmd_generic(req, &resp, rpm, 0); + err = cgx_fwi_cmd_generic(req, &resp, rpm, lmac_id); if (!err) return FIELD_GET(RESP_LINKSTAT_LMAC_TYPE, resp); return err; From d1e8884e050c1255a9ceb477f5ff926ee9214a23 Mon Sep 17 00:00:00 2001 From: Hariprasad Kelam Date: Fri, 22 Nov 2024 21:50:32 +0530 Subject: [PATCH 23/49] octeontx2-af: RPM: Fix low network performance Low network performance is observed even on RPMs with larger FIFO lengths. The cn10kb silicon has three RPM blocks with the following FIFO sizes: -------------------- | RPM0 | 256KB | | RPM1 | 256KB | | RPM2 | 128KB | -------------------- The current design stores the FIFO length in a common structure for all RPMs (mac_ops). As a result, the FIFO length of the last RPM is applied to all RPMs, leading to reduced network performance. This patch resolved the problem by storing the fifo length in per MAC structure (cgx). Fixes: b9d0fedc6234 ("octeontx2-af: cn10kb: Add RPM_USX MAC support") Signed-off-by: Hariprasad Kelam Signed-off-by: Paolo Abeni --- drivers/net/ethernet/marvell/octeontx2/af/cgx.c | 9 +++++++-- drivers/net/ethernet/marvell/octeontx2/af/cgx.h | 1 + drivers/net/ethernet/marvell/octeontx2/af/lmac_common.h | 5 ++++- drivers/net/ethernet/marvell/octeontx2/af/rpm.c | 6 +++--- drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c | 9 ++++----- 5 files changed, 19 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c index 27935c54b91bc..2f621714c54e6 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c @@ -112,6 +112,11 @@ struct mac_ops *get_mac_ops(void *cgxd) return ((struct cgx *)cgxd)->mac_ops; } +u32 cgx_get_fifo_len(void *cgxd) +{ + return ((struct cgx *)cgxd)->fifo_len; +} + void cgx_write(struct cgx *cgx, u64 lmac, u64 offset, u64 val) { writeq(val, cgx->reg_base + (lmac << cgx->mac_ops->lmac_offset) + @@ -501,7 +506,7 @@ static u32 cgx_get_lmac_fifo_len(void *cgxd, int lmac_id) u8 num_lmacs; u32 fifo_len; - fifo_len = cgx->mac_ops->fifo_len; + fifo_len = cgx->fifo_len; num_lmacs = cgx->mac_ops->get_nr_lmacs(cgx); switch (num_lmacs) { @@ -1764,7 +1769,7 @@ static void cgx_populate_features(struct cgx *cgx) u64 cfg; cfg = cgx_read(cgx, 0, CGX_CONST); - cgx->mac_ops->fifo_len = FIELD_GET(CGX_CONST_RXFIFO_SIZE, cfg); + cgx->fifo_len = FIELD_GET(CGX_CONST_RXFIFO_SIZE, cfg); cgx->max_lmac_per_mac = FIELD_GET(CGX_CONST_MAX_LMACS, cfg); if (is_dev_rpm(cgx)) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.h b/drivers/net/ethernet/marvell/octeontx2/af/cgx.h index dc9ace30554af..f9cd4b58f0c02 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.h @@ -185,4 +185,5 @@ int cgx_lmac_get_pfc_frm_cfg(void *cgxd, int lmac_id, u8 *tx_pause, int verify_lmac_fc_cfg(void *cgxd, int lmac_id, u8 tx_pause, u8 rx_pause, int pfvf_idx); int cgx_lmac_reset(void *cgxd, int lmac_id, u8 pf_req_flr); +u32 cgx_get_fifo_len(void *cgxd); #endif /* CGX_H */ diff --git a/drivers/net/ethernet/marvell/octeontx2/af/lmac_common.h b/drivers/net/ethernet/marvell/octeontx2/af/lmac_common.h index 9ffc6790c5130..c43ff68ef1408 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/lmac_common.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/lmac_common.h @@ -72,7 +72,6 @@ struct mac_ops { u8 irq_offset; u8 int_ena_bit; u8 lmac_fwi; - u32 fifo_len; bool non_contiguous_serdes_lane; /* RPM & CGX differs in number of Receive/transmit stats */ u8 rx_stats_cnt; @@ -142,6 +141,10 @@ struct cgx { u8 lmac_count; /* number of LMACs per MAC could be 4 or 8 */ u8 max_lmac_per_mac; + /* length of fifo varies depending on the number + * of LMACS + */ + u32 fifo_len; #define MAX_LMAC_COUNT 8 struct lmac *lmac_idmap[MAX_LMAC_COUNT]; struct work_struct cgx_cmd_work; diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rpm.c b/drivers/net/ethernet/marvell/octeontx2/af/rpm.c index 9e8c5e4389f8b..22dd50a3fcd3a 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rpm.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rpm.c @@ -480,7 +480,7 @@ u32 rpm_get_lmac_fifo_len(void *rpmd, int lmac_id) u8 num_lmacs; u32 fifo_len; - fifo_len = rpm->mac_ops->fifo_len; + fifo_len = rpm->fifo_len; num_lmacs = rpm->mac_ops->get_nr_lmacs(rpm); switch (num_lmacs) { @@ -533,9 +533,9 @@ u32 rpm2_get_lmac_fifo_len(void *rpmd, int lmac_id) */ max_lmac = (rpm_read(rpm, 0, CGX_CONST) >> 24) & 0xFF; if (max_lmac > 4) - fifo_len = rpm->mac_ops->fifo_len / 2; + fifo_len = rpm->fifo_len / 2; else - fifo_len = rpm->mac_ops->fifo_len; + fifo_len = rpm->fifo_len; if (lmac_id < 4) { num_lmacs = hweight8(lmac_info & 0xF); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c index 266ecbc1b97a6..4dcd7bfcad4e4 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c @@ -923,13 +923,12 @@ int rvu_mbox_handler_cgx_features_get(struct rvu *rvu, u32 rvu_cgx_get_fifolen(struct rvu *rvu) { - struct mac_ops *mac_ops; - u32 fifo_len; + void *cgxd = rvu_first_cgx_pdata(rvu); - mac_ops = get_mac_ops(rvu_first_cgx_pdata(rvu)); - fifo_len = mac_ops ? mac_ops->fifo_len : 0; + if (!cgxd) + return 0; - return fifo_len; + return cgx_get_fifo_len(cgxd); } u32 rvu_cgx_get_lmac_fifolen(struct rvu *rvu, int cgx, int lmac) From 07cd1eb166a3fa7244afa74d48bd13c9df7c559d Mon Sep 17 00:00:00 2001 From: Hariprasad Kelam Date: Fri, 22 Nov 2024 21:50:33 +0530 Subject: [PATCH 24/49] octeontx2-af: RPM: fix stale RSFEC counters The earlier patch sets the 'Stats control register' for RPM receive/transmit statistics instead of RSFEC statistics, causing the driver to return stale FEC counters. Fixes: 84ad3642115d ("octeontx2-af: Add FEC stats for RPM/RPM_USX block") Signed-off-by: Hariprasad Kelam Signed-off-by: Paolo Abeni --- drivers/net/ethernet/marvell/octeontx2/af/rpm.c | 13 +++++++++---- drivers/net/ethernet/marvell/octeontx2/af/rpm.h | 4 +++- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rpm.c b/drivers/net/ethernet/marvell/octeontx2/af/rpm.c index 22dd50a3fcd3a..70629f94c27ef 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rpm.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rpm.c @@ -699,6 +699,10 @@ int rpm_get_fec_stats(void *rpmd, int lmac_id, struct cgx_fec_stats_rsp *rsp) if (rpm->lmac_idmap[lmac_id]->link_info.fec == OTX2_FEC_NONE) return 0; + /* latched registers FCFECX_CW_HI/RSFEC_STAT_FAST_DATA_HI_CDC are common + * for all counters. Acquire lock to ensure serialized reads + */ + mutex_lock(&rpm->lock); if (rpm->lmac_idmap[lmac_id]->link_info.fec == OTX2_FEC_BASER) { val_lo = rpm_read(rpm, lmac_id, RPMX_MTI_FCFECX_VL0_CCW_LO); val_hi = rpm_read(rpm, lmac_id, RPMX_MTI_FCFECX_CW_HI); @@ -725,20 +729,21 @@ int rpm_get_fec_stats(void *rpmd, int lmac_id, struct cgx_fec_stats_rsp *rsp) } } else { /* enable RS-FEC capture */ - cfg = rpm_read(rpm, 0, RPMX_MTI_STAT_STATN_CONTROL); + cfg = rpm_read(rpm, 0, RPMX_MTI_RSFEC_STAT_STATN_CONTROL); cfg |= RPMX_RSFEC_RX_CAPTURE | BIT(lmac_id); - rpm_write(rpm, 0, RPMX_MTI_STAT_STATN_CONTROL, cfg); + rpm_write(rpm, 0, RPMX_MTI_RSFEC_STAT_STATN_CONTROL, cfg); val_lo = rpm_read(rpm, 0, RPMX_MTI_RSFEC_STAT_COUNTER_CAPTURE_2); - val_hi = rpm_read(rpm, 0, RPMX_MTI_STAT_DATA_HI_CDC); + val_hi = rpm_read(rpm, 0, RPMX_MTI_RSFEC_STAT_FAST_DATA_HI_CDC); rsp->fec_corr_blks = (val_hi << 32 | val_lo); val_lo = rpm_read(rpm, 0, RPMX_MTI_RSFEC_STAT_COUNTER_CAPTURE_3); - val_hi = rpm_read(rpm, 0, RPMX_MTI_STAT_DATA_HI_CDC); + val_hi = rpm_read(rpm, 0, RPMX_MTI_RSFEC_STAT_FAST_DATA_HI_CDC); rsp->fec_uncorr_blks = (val_hi << 32 | val_lo); } + mutex_unlock(&rpm->lock); return 0; } diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rpm.h b/drivers/net/ethernet/marvell/octeontx2/af/rpm.h index 34b11deb0f3c1..a5773fbacaff8 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rpm.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rpm.h @@ -84,9 +84,11 @@ /* FEC stats */ #define RPMX_MTI_STAT_STATN_CONTROL 0x10018 #define RPMX_MTI_STAT_DATA_HI_CDC 0x10038 -#define RPMX_RSFEC_RX_CAPTURE BIT_ULL(27) +#define RPMX_RSFEC_RX_CAPTURE BIT_ULL(28) #define RPMX_CMD_CLEAR_RX BIT_ULL(30) #define RPMX_CMD_CLEAR_TX BIT_ULL(31) +#define RPMX_MTI_RSFEC_STAT_STATN_CONTROL 0x40018 +#define RPMX_MTI_RSFEC_STAT_FAST_DATA_HI_CDC 0x40000 #define RPMX_MTI_RSFEC_STAT_COUNTER_CAPTURE_2 0x40050 #define RPMX_MTI_RSFEC_STAT_COUNTER_CAPTURE_3 0x40058 #define RPMX_MTI_FCFECX_VL0_CCW_LO 0x38618 From 6fc2164108462b913a1290fa2c44054c70b060ef Mon Sep 17 00:00:00 2001 From: Hariprasad Kelam Date: Fri, 22 Nov 2024 21:50:34 +0530 Subject: [PATCH 25/49] octeontx2-af: RPM: fix stale FCFEC counters The corrected words register(FCFECX_VL0_CCW_LO)/Uncorrected words register (FCFECX_VL0_NCCW_LO) of FCFEC counter has different LMAC offset which needs to be accessed differently. Fixes: 84ad3642115d ("octeontx2-af: Add FEC stats for RPM/RPM_USX block") Signed-off-by: Hariprasad Kelam Signed-off-by: Paolo Abeni --- .../net/ethernet/marvell/octeontx2/af/rpm.c | 24 +++++++++---------- .../net/ethernet/marvell/octeontx2/af/rpm.h | 10 ++++---- 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rpm.c b/drivers/net/ethernet/marvell/octeontx2/af/rpm.c index 70629f94c27ef..e97fcc51d7f24 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rpm.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rpm.c @@ -704,27 +704,27 @@ int rpm_get_fec_stats(void *rpmd, int lmac_id, struct cgx_fec_stats_rsp *rsp) */ mutex_lock(&rpm->lock); if (rpm->lmac_idmap[lmac_id]->link_info.fec == OTX2_FEC_BASER) { - val_lo = rpm_read(rpm, lmac_id, RPMX_MTI_FCFECX_VL0_CCW_LO); - val_hi = rpm_read(rpm, lmac_id, RPMX_MTI_FCFECX_CW_HI); + val_lo = rpm_read(rpm, 0, RPMX_MTI_FCFECX_VL0_CCW_LO(lmac_id)); + val_hi = rpm_read(rpm, 0, RPMX_MTI_FCFECX_CW_HI(lmac_id)); rsp->fec_corr_blks = (val_hi << 16 | val_lo); - val_lo = rpm_read(rpm, lmac_id, RPMX_MTI_FCFECX_VL0_NCCW_LO); - val_hi = rpm_read(rpm, lmac_id, RPMX_MTI_FCFECX_CW_HI); + val_lo = rpm_read(rpm, 0, RPMX_MTI_FCFECX_VL0_NCCW_LO(lmac_id)); + val_hi = rpm_read(rpm, 0, RPMX_MTI_FCFECX_CW_HI(lmac_id)); rsp->fec_uncorr_blks = (val_hi << 16 | val_lo); /* 50G uses 2 Physical serdes lines */ if (rpm->lmac_idmap[lmac_id]->link_info.lmac_type_id == LMAC_MODE_50G_R) { - val_lo = rpm_read(rpm, lmac_id, - RPMX_MTI_FCFECX_VL1_CCW_LO); - val_hi = rpm_read(rpm, lmac_id, - RPMX_MTI_FCFECX_CW_HI); + val_lo = rpm_read(rpm, 0, + RPMX_MTI_FCFECX_VL1_CCW_LO(lmac_id)); + val_hi = rpm_read(rpm, 0, + RPMX_MTI_FCFECX_CW_HI(lmac_id)); rsp->fec_corr_blks += (val_hi << 16 | val_lo); - val_lo = rpm_read(rpm, lmac_id, - RPMX_MTI_FCFECX_VL1_NCCW_LO); - val_hi = rpm_read(rpm, lmac_id, - RPMX_MTI_FCFECX_CW_HI); + val_lo = rpm_read(rpm, 0, + RPMX_MTI_FCFECX_VL1_NCCW_LO(lmac_id)); + val_hi = rpm_read(rpm, 0, + RPMX_MTI_FCFECX_CW_HI(lmac_id)); rsp->fec_uncorr_blks += (val_hi << 16 | val_lo); } } else { diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rpm.h b/drivers/net/ethernet/marvell/octeontx2/af/rpm.h index a5773fbacaff8..5194fec4c3b8e 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rpm.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rpm.h @@ -91,11 +91,11 @@ #define RPMX_MTI_RSFEC_STAT_FAST_DATA_HI_CDC 0x40000 #define RPMX_MTI_RSFEC_STAT_COUNTER_CAPTURE_2 0x40050 #define RPMX_MTI_RSFEC_STAT_COUNTER_CAPTURE_3 0x40058 -#define RPMX_MTI_FCFECX_VL0_CCW_LO 0x38618 -#define RPMX_MTI_FCFECX_VL0_NCCW_LO 0x38620 -#define RPMX_MTI_FCFECX_VL1_CCW_LO 0x38628 -#define RPMX_MTI_FCFECX_VL1_NCCW_LO 0x38630 -#define RPMX_MTI_FCFECX_CW_HI 0x38638 +#define RPMX_MTI_FCFECX_VL0_CCW_LO(a) (0x38618 + ((a) * 0x40)) +#define RPMX_MTI_FCFECX_VL0_NCCW_LO(a) (0x38620 + ((a) * 0x40)) +#define RPMX_MTI_FCFECX_VL1_CCW_LO(a) (0x38628 + ((a) * 0x40)) +#define RPMX_MTI_FCFECX_VL1_NCCW_LO(a) (0x38630 + ((a) * 0x40)) +#define RPMX_MTI_FCFECX_CW_HI(a) (0x38638 + ((a) * 0x40)) /* CN10KB CSR Declaration */ #define RPM2_CMRX_SW_INT 0x1b0 From 762ca6eed026346d9d41ed5ac633083c4f1e5071 Mon Sep 17 00:00:00 2001 From: Hariprasad Kelam Date: Fri, 22 Nov 2024 21:50:35 +0530 Subject: [PATCH 26/49] octeontx2-af: Quiesce traffic before NIX block reset During initialization, the AF driver resets all blocks. The RPM (MAC) block and NIX block operate on a credit-based model. When the NIX block resets during active traffic flow, it doesn't release credits to the RPM block. This causes the RPM FIFO to overflow, leading to receive traffic struck. To address this issue, the patch introduces the following changes: 1. Stop receiving traffic at the MAC level during AF driver initialization. 2. Perform an X2P reset (prevents RXFIFO of all LMACS from pushing data) 3. Reset the NIX block. 4. Clear the X2P reset and re-enable receiving traffic. Fixes: 54d557815e15 ("octeontx2-af: Reset all RVU blocks") Signed-off-by: Hariprasad Kelam Signed-off-by: Paolo Abeni --- .../net/ethernet/marvell/octeontx2/af/cgx.c | 61 +++++++++++++++++++ .../net/ethernet/marvell/octeontx2/af/cgx.h | 4 ++ .../marvell/octeontx2/af/lmac_common.h | 2 + .../net/ethernet/marvell/octeontx2/af/rpm.c | 42 +++++++++++++ .../net/ethernet/marvell/octeontx2/af/rpm.h | 4 ++ .../net/ethernet/marvell/octeontx2/af/rvu.c | 1 + .../net/ethernet/marvell/octeontx2/af/rvu.h | 1 + .../ethernet/marvell/octeontx2/af/rvu_cgx.c | 36 +++++++++-- 8 files changed, 145 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c index 2f621714c54e6..8216f843a7cd5 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c @@ -214,6 +214,24 @@ u8 cgx_lmac_get_p2x(int cgx_id, int lmac_id) return (cfg & CMR_P2X_SEL_MASK) >> CMR_P2X_SEL_SHIFT; } +static u8 cgx_get_nix_resetbit(struct cgx *cgx) +{ + int first_lmac; + u8 p2x; + + /* non 98XX silicons supports only NIX0 block */ + if (cgx->pdev->subsystem_device != PCI_SUBSYS_DEVID_98XX) + return CGX_NIX0_RESET; + + first_lmac = find_first_bit(&cgx->lmac_bmap, cgx->max_lmac_per_mac); + p2x = cgx_lmac_get_p2x(cgx->cgx_id, first_lmac); + + if (p2x == CMR_P2X_SEL_NIX1) + return CGX_NIX1_RESET; + else + return CGX_NIX0_RESET; +} + /* Ensure the required lock for event queue(where asynchronous events are * posted) is acquired before calling this API. Else an asynchronous event(with * latest link status) can reach the destination before this function returns @@ -1724,6 +1742,8 @@ static int cgx_lmac_init(struct cgx *cgx) lmac->lmac_type = cgx->mac_ops->get_lmac_type(cgx, lmac->lmac_id); } + /* Start X2P reset on given MAC block */ + cgx->mac_ops->mac_x2p_reset(cgx, true); return cgx_lmac_verify_fwi_version(cgx); err_bitmap_free: @@ -1789,6 +1809,45 @@ static u8 cgx_get_rxid_mapoffset(struct cgx *cgx) return 0x60; } +static void cgx_x2p_reset(void *cgxd, bool enable) +{ + struct cgx *cgx = cgxd; + int lmac_id; + u64 cfg; + + if (enable) { + for_each_set_bit(lmac_id, &cgx->lmac_bmap, cgx->max_lmac_per_mac) + cgx->mac_ops->mac_enadis_rx(cgx, lmac_id, false); + + usleep_range(1000, 2000); + + cfg = cgx_read(cgx, 0, CGXX_CMR_GLOBAL_CONFIG); + cfg |= cgx_get_nix_resetbit(cgx) | CGX_NSCI_DROP; + cgx_write(cgx, 0, CGXX_CMR_GLOBAL_CONFIG, cfg); + } else { + cfg = cgx_read(cgx, 0, CGXX_CMR_GLOBAL_CONFIG); + cfg &= ~(cgx_get_nix_resetbit(cgx) | CGX_NSCI_DROP); + cgx_write(cgx, 0, CGXX_CMR_GLOBAL_CONFIG, cfg); + } +} + +static int cgx_enadis_rx(void *cgxd, int lmac_id, bool enable) +{ + struct cgx *cgx = cgxd; + u64 cfg; + + if (!is_lmac_valid(cgx, lmac_id)) + return -ENODEV; + + cfg = cgx_read(cgx, lmac_id, CGXX_CMRX_CFG); + if (enable) + cfg |= DATA_PKT_RX_EN; + else + cfg &= ~DATA_PKT_RX_EN; + cgx_write(cgx, lmac_id, CGXX_CMRX_CFG, cfg); + return 0; +} + static struct mac_ops cgx_mac_ops = { .name = "cgx", .csr_offset = 0, @@ -1820,6 +1879,8 @@ static struct mac_ops cgx_mac_ops = { .mac_get_pfc_frm_cfg = cgx_lmac_get_pfc_frm_cfg, .mac_reset = cgx_lmac_reset, .mac_stats_reset = cgx_stats_reset, + .mac_x2p_reset = cgx_x2p_reset, + .mac_enadis_rx = cgx_enadis_rx, }; static int cgx_probe(struct pci_dev *pdev, const struct pci_device_id *id) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.h b/drivers/net/ethernet/marvell/octeontx2/af/cgx.h index f9cd4b58f0c02..1cf12e5c7da87 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.h @@ -32,6 +32,10 @@ #define CGX_LMAC_TYPE_MASK 0xF #define CGXX_CMRX_INT 0x040 #define FW_CGX_INT BIT_ULL(1) +#define CGXX_CMR_GLOBAL_CONFIG 0x08 +#define CGX_NIX0_RESET BIT_ULL(2) +#define CGX_NIX1_RESET BIT_ULL(3) +#define CGX_NSCI_DROP BIT_ULL(9) #define CGXX_CMRX_INT_ENA_W1S 0x058 #define CGXX_CMRX_RX_ID_MAP 0x060 #define CGXX_CMRX_RX_STAT0 0x070 diff --git a/drivers/net/ethernet/marvell/octeontx2/af/lmac_common.h b/drivers/net/ethernet/marvell/octeontx2/af/lmac_common.h index c43ff68ef1408..6180e68e1765a 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/lmac_common.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/lmac_common.h @@ -132,6 +132,8 @@ struct mac_ops { int (*get_fec_stats)(void *cgxd, int lmac_id, struct cgx_fec_stats_rsp *rsp); int (*mac_stats_reset)(void *cgxd, int lmac_id); + void (*mac_x2p_reset)(void *cgxd, bool enable); + int (*mac_enadis_rx)(void *cgxd, int lmac_id, bool enable); }; struct cgx { diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rpm.c b/drivers/net/ethernet/marvell/octeontx2/af/rpm.c index e97fcc51d7f24..2e9945446199e 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rpm.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rpm.c @@ -39,6 +39,8 @@ static struct mac_ops rpm_mac_ops = { .mac_get_pfc_frm_cfg = rpm_lmac_get_pfc_frm_cfg, .mac_reset = rpm_lmac_reset, .mac_stats_reset = rpm_stats_reset, + .mac_x2p_reset = rpm_x2p_reset, + .mac_enadis_rx = rpm_enadis_rx, }; static struct mac_ops rpm2_mac_ops = { @@ -72,6 +74,8 @@ static struct mac_ops rpm2_mac_ops = { .mac_get_pfc_frm_cfg = rpm_lmac_get_pfc_frm_cfg, .mac_reset = rpm_lmac_reset, .mac_stats_reset = rpm_stats_reset, + .mac_x2p_reset = rpm_x2p_reset, + .mac_enadis_rx = rpm_enadis_rx, }; bool is_dev_rpm2(void *rpmd) @@ -768,3 +772,41 @@ int rpm_lmac_reset(void *rpmd, int lmac_id, u8 pf_req_flr) return 0; } + +void rpm_x2p_reset(void *rpmd, bool enable) +{ + rpm_t *rpm = rpmd; + int lmac_id; + u64 cfg; + + if (enable) { + for_each_set_bit(lmac_id, &rpm->lmac_bmap, rpm->max_lmac_per_mac) + rpm->mac_ops->mac_enadis_rx(rpm, lmac_id, false); + + usleep_range(1000, 2000); + + cfg = rpm_read(rpm, 0, RPMX_CMR_GLOBAL_CFG); + rpm_write(rpm, 0, RPMX_CMR_GLOBAL_CFG, cfg | RPM_NIX0_RESET); + } else { + cfg = rpm_read(rpm, 0, RPMX_CMR_GLOBAL_CFG); + cfg &= ~RPM_NIX0_RESET; + rpm_write(rpm, 0, RPMX_CMR_GLOBAL_CFG, cfg); + } +} + +int rpm_enadis_rx(void *rpmd, int lmac_id, bool enable) +{ + rpm_t *rpm = rpmd; + u64 cfg; + + if (!is_lmac_valid(rpm, lmac_id)) + return -ENODEV; + + cfg = rpm_read(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG); + if (enable) + cfg |= RPM_RX_EN; + else + cfg &= ~RPM_RX_EN; + rpm_write(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG, cfg); + return 0; +} diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rpm.h b/drivers/net/ethernet/marvell/octeontx2/af/rpm.h index 5194fec4c3b8e..b8d3972e096ae 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rpm.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rpm.h @@ -17,6 +17,8 @@ /* Registers */ #define RPMX_CMRX_CFG 0x00 +#define RPMX_CMR_GLOBAL_CFG 0x08 +#define RPM_NIX0_RESET BIT_ULL(3) #define RPMX_RX_TS_PREPEND BIT_ULL(22) #define RPMX_TX_PTP_1S_SUPPORT BIT_ULL(17) #define RPMX_CMRX_RX_ID_MAP 0x80 @@ -139,4 +141,6 @@ bool is_dev_rpm2(void *rpmd); int rpm_get_fec_stats(void *cgxd, int lmac_id, struct cgx_fec_stats_rsp *rsp); int rpm_lmac_reset(void *rpmd, int lmac_id, u8 pf_req_flr); int rpm_stats_reset(void *rpmd, int lmac_id); +void rpm_x2p_reset(void *rpmd, bool enable); +int rpm_enadis_rx(void *rpmd, int lmac_id, bool enable); #endif /* RPM_H */ diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c index 1a97fb9032fa4..cd0d7b7774f1a 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c @@ -1162,6 +1162,7 @@ static int rvu_setup_hw_resources(struct rvu *rvu) } rvu_program_channels(rvu); + cgx_start_linkup(rvu); err = rvu_mcs_init(rvu); if (err) { diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h index b897845e25fd3..a383b5ef5b2d8 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h @@ -1025,6 +1025,7 @@ int rvu_cgx_prio_flow_ctrl_cfg(struct rvu *rvu, u16 pcifunc, u8 tx_pause, u8 rx_ int rvu_cgx_cfg_pause_frm(struct rvu *rvu, u16 pcifunc, u8 tx_pause, u8 rx_pause); void rvu_mac_reset(struct rvu *rvu, u16 pcifunc); u32 rvu_cgx_get_lmac_fifolen(struct rvu *rvu, int cgx, int lmac); +void cgx_start_linkup(struct rvu *rvu); int npc_get_nixlf_mcam_index(struct npc_mcam *mcam, u16 pcifunc, int nixlf, int type); bool is_mcam_entry_enabled(struct rvu *rvu, struct npc_mcam *mcam, int blkaddr, diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c index 4dcd7bfcad4e4..992fa0b82e8d2 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c @@ -349,6 +349,7 @@ static void rvu_cgx_wq_destroy(struct rvu *rvu) int rvu_cgx_init(struct rvu *rvu) { + struct mac_ops *mac_ops; int cgx, err; void *cgxd; @@ -375,6 +376,15 @@ int rvu_cgx_init(struct rvu *rvu) if (err) return err; + /* Clear X2P reset on all MAC blocks */ + for (cgx = 0; cgx < rvu->cgx_cnt_max; cgx++) { + cgxd = rvu_cgx_pdata(cgx, rvu); + if (!cgxd) + continue; + mac_ops = get_mac_ops(cgxd); + mac_ops->mac_x2p_reset(cgxd, false); + } + /* Register for CGX events */ err = cgx_lmac_event_handler_init(rvu); if (err) @@ -382,10 +392,26 @@ int rvu_cgx_init(struct rvu *rvu) mutex_init(&rvu->cgx_cfg_lock); - /* Ensure event handler registration is completed, before - * we turn on the links - */ - mb(); + return 0; +} + +void cgx_start_linkup(struct rvu *rvu) +{ + unsigned long lmac_bmap; + struct mac_ops *mac_ops; + int cgx, lmac, err; + void *cgxd; + + /* Enable receive on all LMACS */ + for (cgx = 0; cgx <= rvu->cgx_cnt_max; cgx++) { + cgxd = rvu_cgx_pdata(cgx, rvu); + if (!cgxd) + continue; + mac_ops = get_mac_ops(cgxd); + lmac_bmap = cgx_get_lmac_bmap(cgxd); + for_each_set_bit(lmac, &lmac_bmap, rvu->hw->lmac_per_cgx) + mac_ops->mac_enadis_rx(cgxd, lmac, true); + } /* Do link up for all CGX ports */ for (cgx = 0; cgx <= rvu->cgx_cnt_max; cgx++) { @@ -398,8 +424,6 @@ int rvu_cgx_init(struct rvu *rvu) "Link up process failed to start on cgx %d\n", cgx); } - - return 0; } int rvu_cgx_exit(struct rvu *rvu) From 9cfb5e7f0ded2bfaabc270ceb5f91d13f0e805b9 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 22 Nov 2024 17:13:43 +0000 Subject: [PATCH 27/49] net: hsr: fix hsr_init_sk() vs network/transport headers. Following sequence in hsr_init_sk() is invalid : skb_reset_mac_header(skb); skb_reset_mac_len(skb); skb_reset_network_header(skb); skb_reset_transport_header(skb); It is invalid because skb_reset_mac_len() needs the correct network header, which should be after the mac header. This patch moves the skb_reset_network_header() and skb_reset_transport_header() before the call to dev_hard_header(). As a result skb->mac_len is no longer set to a value close to 65535. Fixes: 48b491a5cc74 ("net: hsr: fix mac_len checks") Signed-off-by: Eric Dumazet Cc: George McCollister Link: https://patch.msgid.link/20241122171343.897551-1-edumazet@google.com Signed-off-by: Paolo Abeni --- net/hsr/hsr_device.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c index 9e64496a5c1c8..31a416ee21ad9 100644 --- a/net/hsr/hsr_device.c +++ b/net/hsr/hsr_device.c @@ -268,6 +268,8 @@ static struct sk_buff *hsr_init_skb(struct hsr_port *master) skb->dev = master->dev; skb->priority = TC_PRIO_CONTROL; + skb_reset_network_header(skb); + skb_reset_transport_header(skb); if (dev_hard_header(skb, skb->dev, ETH_P_PRP, hsr->sup_multicast_addr, skb->dev->dev_addr, skb->len) <= 0) @@ -275,8 +277,6 @@ static struct sk_buff *hsr_init_skb(struct hsr_port *master) skb_reset_mac_header(skb); skb_reset_mac_len(skb); - skb_reset_network_header(skb); - skb_reset_transport_header(skb); return skb; out: From 5311598f7f3293683cdc761df71ae3469327332c Mon Sep 17 00:00:00 2001 From: Saravanan Vajravel Date: Fri, 22 Nov 2024 14:45:41 -0800 Subject: [PATCH 28/49] bnxt_en: Reserve rings after PCIe AER recovery if NIC interface is down After successful PCIe AER recovery, FW will reset all resource reservations. If it is IF_UP, the driver will call bnxt_open() and all resources will be reserved again. It it is IF_DOWN, we should call bnxt_reserve_rings() so that we can reserve resources including RoCE resources to allow RoCE to resume after AER. Without this patch, RoCE fails to resume in this IF_DOWN scenario. Later, if it becomes IF_UP, bnxt_open() will see that resources have been reserved and will not reserve again. Fixes: fb1e6e562b37 ("bnxt_en: Fix AER recovery.") Reviewed-by: Somnath Kotur Reviewed-by: Pavan Chebbi Reviewed-by: Kashyap Desai Signed-off-by: Saravanan Vajravel Signed-off-by: Michael Chan Signed-off-by: Paolo Abeni --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 5f7bdafcf05de..3eeaceb3ff383 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -16478,8 +16478,12 @@ static void bnxt_io_resume(struct pci_dev *pdev) rtnl_lock(); err = bnxt_hwrm_func_qcaps(bp); - if (!err && netif_running(netdev)) - err = bnxt_open(netdev); + if (!err) { + if (netif_running(netdev)) + err = bnxt_open(netdev); + else + err = bnxt_reserve_rings(bp, true); + } if (!err) netif_device_attach(netdev); From 5007991670941c132fb3bc0484c009cf4bcea30f Mon Sep 17 00:00:00 2001 From: Shravya KN Date: Fri, 22 Nov 2024 14:45:42 -0800 Subject: [PATCH 29/49] bnxt_en: Set backplane link modes correctly for ethtool Use the return value from bnxt_get_media() to determine the port and link modes. bnxt_get_media() returns the proper BNXT_MEDIA_KR when the PHY is backplane. This will correct the ethtool settings for backplane devices. Fixes: 5d4e1bf60664 ("bnxt_en: extend media types to supported and autoneg modes") Reviewed-by: Somnath Kotur Signed-off-by: Shravya KN Signed-off-by: Michael Chan Signed-off-by: Paolo Abeni --- drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index 2f4987ec74643..f1f6bb328a55b 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -2837,19 +2837,24 @@ static int bnxt_get_link_ksettings(struct net_device *dev, } base->port = PORT_NONE; - if (link_info->media_type == PORT_PHY_QCFG_RESP_MEDIA_TYPE_TP) { + if (media == BNXT_MEDIA_TP) { base->port = PORT_TP; linkmode_set_bit(ETHTOOL_LINK_MODE_TP_BIT, lk_ksettings->link_modes.supported); linkmode_set_bit(ETHTOOL_LINK_MODE_TP_BIT, lk_ksettings->link_modes.advertising); + } else if (media == BNXT_MEDIA_KR) { + linkmode_set_bit(ETHTOOL_LINK_MODE_Backplane_BIT, + lk_ksettings->link_modes.supported); + linkmode_set_bit(ETHTOOL_LINK_MODE_Backplane_BIT, + lk_ksettings->link_modes.advertising); } else { linkmode_set_bit(ETHTOOL_LINK_MODE_FIBRE_BIT, lk_ksettings->link_modes.supported); linkmode_set_bit(ETHTOOL_LINK_MODE_FIBRE_BIT, lk_ksettings->link_modes.advertising); - if (link_info->media_type == PORT_PHY_QCFG_RESP_MEDIA_TYPE_DAC) + if (media == BNXT_MEDIA_CR) base->port = PORT_DA; else base->port = PORT_FIBRE; From 5ac066b7b062ee753a14557ea11bdc62364c8090 Mon Sep 17 00:00:00 2001 From: Somnath Kotur Date: Fri, 22 Nov 2024 14:45:43 -0800 Subject: [PATCH 30/49] bnxt_en: Fix queue start to update vnic RSS table HWRM_RING_FREE followed by a HWRM_RING_ALLOC is not guaranteed to have the same FW ring ID as before. So we must reinitialize the RSS table with the correct ring IDs. Otherwise, traffic may not resume properly if the restarted ring ID is stale. Since this feature is only supported on P5_PLUS chips, we call bnxt_vnic_set_rss_p5() to update the HW RSS table. Fixes: 2d694c27d32e ("bnxt_en: implement netdev_queue_mgmt_ops") Cc: David Wei Reviewed-by: Kalesh AP Reviewed-by: Andy Gospodarek Signed-off-by: Somnath Kotur Signed-off-by: Michael Chan Signed-off-by: Paolo Abeni --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 3eeaceb3ff383..3bee485b50f0e 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -15477,6 +15477,13 @@ static int bnxt_queue_start(struct net_device *dev, void *qmem, int idx) for (i = 0; i <= BNXT_VNIC_NTUPLE; i++) { vnic = &bp->vnic_info[i]; + + rc = bnxt_hwrm_vnic_set_rss_p5(bp, vnic, true); + if (rc) { + netdev_err(bp->dev, "hwrm vnic %d set rss failure rc: %d\n", + vnic->vnic_id, rc); + return rc; + } vnic->mru = bp->dev->mtu + ETH_HLEN + VLAN_HLEN; bnxt_hwrm_vnic_update(bp, vnic, VNIC_UPDATE_REQ_ENABLES_MRU_VALID); From 3051a77a09dfe3022aa012071346937fdf059033 Mon Sep 17 00:00:00 2001 From: Shravya KN Date: Fri, 22 Nov 2024 14:45:44 -0800 Subject: [PATCH 31/49] bnxt_en: Fix receive ring space parameters when XDP is active The MTU setting at the time an XDP multi-buffer is attached determines whether the aggregation ring will be used and the rx_skb_func handler. This is done in bnxt_set_rx_skb_mode(). If the MTU is later changed, the aggregation ring setting may need to be changed and it may become out-of-sync with the settings initially done in bnxt_set_rx_skb_mode(). This may result in random memory corruption and crashes as the HW may DMA data larger than the allocated buffer size, such as: BUG: kernel NULL pointer dereference, address: 00000000000003c0 PGD 0 P4D 0 Oops: 0000 [#1] PREEMPT SMP NOPTI CPU: 17 PID: 0 Comm: swapper/17 Kdump: loaded Tainted: G S OE 6.1.0-226bf9805506 #1 Hardware name: Wiwynn Delta Lake PVT BZA.02601.0150/Delta Lake-Class1, BIOS F0E_3A12 08/26/2021 RIP: 0010:bnxt_rx_pkt+0xe97/0x1ae0 [bnxt_en] Code: 8b 95 70 ff ff ff 4c 8b 9d 48 ff ff ff 66 41 89 87 b4 00 00 00 e9 0b f7 ff ff 0f b7 43 0a 49 8b 95 a8 04 00 00 25 ff 0f 00 00 <0f> b7 14 42 48 c1 e2 06 49 03 95 a0 04 00 00 0f b6 42 33f RSP: 0018:ffffa19f40cc0d18 EFLAGS: 00010202 RAX: 00000000000001e0 RBX: ffff8e2c805c6100 RCX: 00000000000007ff RDX: 0000000000000000 RSI: ffff8e2c271ab990 RDI: ffff8e2c84f12380 RBP: ffffa19f40cc0e48 R08: 000000000001000d R09: 974ea2fcddfa4cbf R10: 0000000000000000 R11: ffffa19f40cc0ff8 R12: ffff8e2c94b58980 R13: ffff8e2c952d6600 R14: 0000000000000016 R15: ffff8e2c271ab990 FS: 0000000000000000(0000) GS:ffff8e3b3f840000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00000000000003c0 CR3: 0000000e8580a004 CR4: 00000000007706e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: __bnxt_poll_work+0x1c2/0x3e0 [bnxt_en] To address the issue, we now call bnxt_set_rx_skb_mode() within bnxt_change_mtu() to properly set the AGG rings configuration and update rx_skb_func based on the new MTU value. Additionally, BNXT_FLAG_NO_AGG_RINGS is cleared at the beginning of bnxt_set_rx_skb_mode() to make sure it gets set or cleared based on the current MTU. Fixes: 08450ea98ae9 ("bnxt_en: Fix max_mtu setting for multi-buf XDP") Co-developed-by: Somnath Kotur Signed-off-by: Somnath Kotur Signed-off-by: Shravya KN Signed-off-by: Michael Chan Signed-off-by: Paolo Abeni --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 3bee485b50f0e..b7541156fe46a 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -4661,7 +4661,7 @@ int bnxt_set_rx_skb_mode(struct bnxt *bp, bool page_mode) struct net_device *dev = bp->dev; if (page_mode) { - bp->flags &= ~BNXT_FLAG_AGG_RINGS; + bp->flags &= ~(BNXT_FLAG_AGG_RINGS | BNXT_FLAG_NO_AGG_RINGS); bp->flags |= BNXT_FLAG_RX_PAGE_MODE; if (bp->xdp_prog->aux->xdp_has_frags) @@ -14740,6 +14740,14 @@ static int bnxt_change_mtu(struct net_device *dev, int new_mtu) bnxt_close_nic(bp, true, false); WRITE_ONCE(dev->mtu, new_mtu); + + /* MTU change may change the AGG ring settings if an XDP multi-buffer + * program is attached. We need to set the AGG rings settings and + * rx_skb_func accordingly. + */ + if (READ_ONCE(bp->xdp_prog)) + bnxt_set_rx_skb_mode(bp, true); + bnxt_set_ring_params(bp); if (netif_running(dev)) From 1e9614cd956268e10a669c0593e7e54d03d0c087 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Fri, 22 Nov 2024 14:45:45 -0800 Subject: [PATCH 32/49] bnxt_en: Refactor bnxt_ptp_init() Instead of passing the 2nd parameter phc_cfg to bnxt_ptp_init(). Store it in bp->ptp_cfg so that the caller doesn't need to know what the value should be. In the next patch, we'll need to call bnxt_ptp_init() in bnxt_resume() and this will make it easier. Reviewed-by: Somnath Kotur Reviewed-by: Pavan Chebbi Reviewed-by: Kalesh AP Signed-off-by: Michael Chan Signed-off-by: Paolo Abeni --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 6 +++--- drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c | 4 ++-- drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h | 3 ++- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index b7541156fe46a..9c4b8ea22bf94 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -9296,7 +9296,6 @@ static int __bnxt_hwrm_ptp_qcfg(struct bnxt *bp) struct hwrm_port_mac_ptp_qcfg_output *resp; struct hwrm_port_mac_ptp_qcfg_input *req; struct bnxt_ptp_cfg *ptp = bp->ptp_cfg; - bool phc_cfg; u8 flags; int rc; @@ -9343,8 +9342,9 @@ static int __bnxt_hwrm_ptp_qcfg(struct bnxt *bp) rc = -ENODEV; goto exit; } - phc_cfg = (flags & PORT_MAC_PTP_QCFG_RESP_FLAGS_RTC_CONFIGURED) != 0; - rc = bnxt_ptp_init(bp, phc_cfg); + ptp->rtc_configured = + (flags & PORT_MAC_PTP_QCFG_RESP_FLAGS_RTC_CONFIGURED) != 0; + rc = bnxt_ptp_init(bp); if (rc) netdev_warn(bp->dev, "PTP initialization failed.\n"); exit: diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c index 075ccd589845c..2d4e19b96ee74 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c @@ -1038,7 +1038,7 @@ static void bnxt_ptp_free(struct bnxt *bp) } } -int bnxt_ptp_init(struct bnxt *bp, bool phc_cfg) +int bnxt_ptp_init(struct bnxt *bp) { struct bnxt_ptp_cfg *ptp = bp->ptp_cfg; int rc; @@ -1061,7 +1061,7 @@ int bnxt_ptp_init(struct bnxt *bp, bool phc_cfg) if (BNXT_PTP_USE_RTC(bp)) { bnxt_ptp_timecounter_init(bp, false); - rc = bnxt_ptp_init_rtc(bp, phc_cfg); + rc = bnxt_ptp_init_rtc(bp, ptp->rtc_configured); if (rc) goto out; } else { diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h index c7851f8c971c5..a95f05e9c579b 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h @@ -135,6 +135,7 @@ struct bnxt_ptp_cfg { BNXT_PTP_MSG_PDELAY_REQ | \ BNXT_PTP_MSG_PDELAY_RESP) u8 tx_tstamp_en:1; + u8 rtc_configured:1; int rx_filter; u32 tstamp_filters; @@ -168,7 +169,7 @@ void bnxt_tx_ts_cmp(struct bnxt *bp, struct bnxt_napi *bnapi, struct tx_ts_cmp *tscmp); void bnxt_ptp_rtc_timecounter_init(struct bnxt_ptp_cfg *ptp, u64 ns); int bnxt_ptp_init_rtc(struct bnxt *bp, bool phc_cfg); -int bnxt_ptp_init(struct bnxt *bp, bool phc_cfg); +int bnxt_ptp_init(struct bnxt *bp); void bnxt_ptp_clear(struct bnxt *bp); static inline u64 bnxt_timecounter_cyc2time(struct bnxt_ptp_cfg *ptp, u64 ts) { From 3661c05c54e8db7064aa96a0774654740974dffc Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Fri, 22 Nov 2024 14:45:46 -0800 Subject: [PATCH 33/49] bnxt_en: Unregister PTP during PCI shutdown and suspend If we go through the PCI shutdown or suspend path, we shutdown the NIC but PTP remains registered. If the kernel continues to run for a little bit, the periodic PTP .do_aux_work() function may be called and it will read the PHC from the BAR register. Since the device has already been disabled, it will cause a PCIe completion timeout. Fix it by calling bnxt_ptp_clear() in the PCI shutdown/suspend handlers. bnxt_ptp_clear() will unregister from PTP and .do_aux_work() will be canceled. In bnxt_resume(), we need to re-initialize PTP. Fixes: a521c8a01d26 ("bnxt_en: Move bnxt_ptp_init() from bnxt_open() back to bnxt_init_one()") Cc: Richard Cochran Reviewed-by: Somnath Kotur Reviewed-by: Pavan Chebbi Reviewed-by: Kalesh AP Signed-off-by: Michael Chan Signed-off-by: Paolo Abeni --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 9c4b8ea22bf94..57e69c0552aba 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -16245,6 +16245,7 @@ static void bnxt_shutdown(struct pci_dev *pdev) if (netif_running(dev)) dev_close(dev); + bnxt_ptp_clear(bp); bnxt_clear_int_mode(bp); pci_disable_device(pdev); @@ -16272,6 +16273,7 @@ static int bnxt_suspend(struct device *device) rc = bnxt_close(dev); } bnxt_hwrm_func_drv_unrgtr(bp); + bnxt_ptp_clear(bp); pci_disable_device(bp->pdev); bnxt_free_ctx_mem(bp, false); rtnl_unlock(); @@ -16315,6 +16317,10 @@ static int bnxt_resume(struct device *device) if (bp->fw_crash_mem) bnxt_hwrm_crash_dump_mem_cfg(bp); + if (bnxt_ptp_init(bp)) { + kfree(bp->ptp_cfg); + bp->ptp_cfg = NULL; + } bnxt_get_wol_settings(bp); if (netif_running(dev)) { rc = bnxt_open(dev); From 0b882940665ca2849386ee459d4331aa2f8c4e7d Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Fri, 15 Nov 2024 10:45:31 -0500 Subject: [PATCH 34/49] Bluetooth: MGMT: Fix slab-use-after-free Read in set_powered_sync This fixes the following crash: ================================================================== BUG: KASAN: slab-use-after-free in set_powered_sync+0x3a/0xc0 net/bluetooth/mgmt.c:1353 Read of size 8 at addr ffff888029b4dd18 by task kworker/u9:0/54 CPU: 1 UID: 0 PID: 54 Comm: kworker/u9:0 Not tainted 6.11.0-rc6-syzkaller-01155-gf723224742fc #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 08/06/2024 Workqueue: hci0 hci_cmd_sync_work Call Trace: __dump_stack lib/dump_stack.c:93 [inline] dump_stack_lvl+0x241/0x360 lib/dump_stack.c:119 print_address_description mm/kasan/report.c:377 [inline] print_report+0x169/0x550 mm/kasan/report.c:488 q kasan_report+0x143/0x180 mm/kasan/report.c:601 set_powered_sync+0x3a/0xc0 net/bluetooth/mgmt.c:1353 hci_cmd_sync_work+0x22b/0x400 net/bluetooth/hci_sync.c:328 process_one_work kernel/workqueue.c:3231 [inline] process_scheduled_works+0xa2c/0x1830 kernel/workqueue.c:3312 worker_thread+0x86d/0xd10 kernel/workqueue.c:3389 kthread+0x2f0/0x390 kernel/kthread.c:389 ret_from_fork+0x4b/0x80 arch/x86/kernel/process.c:147 ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:244 Allocated by task 5247: kasan_save_stack mm/kasan/common.c:47 [inline] kasan_save_track+0x3f/0x80 mm/kasan/common.c:68 poison_kmalloc_redzone mm/kasan/common.c:370 [inline] __kasan_kmalloc+0x98/0xb0 mm/kasan/common.c:387 kasan_kmalloc include/linux/kasan.h:211 [inline] __kmalloc_cache_noprof+0x19c/0x2c0 mm/slub.c:4193 kmalloc_noprof include/linux/slab.h:681 [inline] kzalloc_noprof include/linux/slab.h:807 [inline] mgmt_pending_new+0x65/0x250 net/bluetooth/mgmt_util.c:269 mgmt_pending_add+0x36/0x120 net/bluetooth/mgmt_util.c:296 set_powered+0x3cd/0x5e0 net/bluetooth/mgmt.c:1394 hci_mgmt_cmd+0xc47/0x11d0 net/bluetooth/hci_sock.c:1712 hci_sock_sendmsg+0x7b8/0x11c0 net/bluetooth/hci_sock.c:1832 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg+0x221/0x270 net/socket.c:745 sock_write_iter+0x2dd/0x400 net/socket.c:1160 new_sync_write fs/read_write.c:497 [inline] vfs_write+0xa72/0xc90 fs/read_write.c:590 ksys_write+0x1a0/0x2c0 fs/read_write.c:643 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xf3/0x230 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f Freed by task 5246: kasan_save_stack mm/kasan/common.c:47 [inline] kasan_save_track+0x3f/0x80 mm/kasan/common.c:68 kasan_save_free_info+0x40/0x50 mm/kasan/generic.c:579 poison_slab_object+0xe0/0x150 mm/kasan/common.c:240 __kasan_slab_free+0x37/0x60 mm/kasan/common.c:256 kasan_slab_free include/linux/kasan.h:184 [inline] slab_free_hook mm/slub.c:2256 [inline] slab_free mm/slub.c:4477 [inline] kfree+0x149/0x360 mm/slub.c:4598 settings_rsp+0x2bc/0x390 net/bluetooth/mgmt.c:1443 mgmt_pending_foreach+0xd1/0x130 net/bluetooth/mgmt_util.c:259 __mgmt_power_off+0x112/0x420 net/bluetooth/mgmt.c:9455 hci_dev_close_sync+0x665/0x11a0 net/bluetooth/hci_sync.c:5191 hci_dev_do_close net/bluetooth/hci_core.c:483 [inline] hci_dev_close+0x112/0x210 net/bluetooth/hci_core.c:508 sock_do_ioctl+0x158/0x460 net/socket.c:1222 sock_ioctl+0x629/0x8e0 net/socket.c:1341 vfs_ioctl fs/ioctl.c:51 [inline] __do_sys_ioctl fs/ioctl.c:907 [inline] __se_sys_ioctl+0xfc/0x170 fs/ioctl.c:893 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xf3/0x230 arch/x86/entry/common.c:83gv entry_SYSCALL_64_after_hwframe+0x77/0x7f Reported-by: syzbot+03d6270b6425df1605bf@syzkaller.appspotmail.com Tested-by: syzbot+03d6270b6425df1605bf@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=03d6270b6425df1605bf Fixes: 275f3f648702 ("Bluetooth: Fix not checking MGMT cmd pending queue") Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/mgmt.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 1f6d083682b8e..e406eb8e43277 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -1318,7 +1318,8 @@ static void mgmt_set_powered_complete(struct hci_dev *hdev, void *data, int err) struct mgmt_mode *cp; /* Make sure cmd still outstanding. */ - if (cmd != pending_find(MGMT_OP_SET_POWERED, hdev)) + if (err == -ECANCELED || + cmd != pending_find(MGMT_OP_SET_POWERED, hdev)) return; cp = cmd->param; @@ -1351,7 +1352,13 @@ static void mgmt_set_powered_complete(struct hci_dev *hdev, void *data, int err) static int set_powered_sync(struct hci_dev *hdev, void *data) { struct mgmt_pending_cmd *cmd = data; - struct mgmt_mode *cp = cmd->param; + struct mgmt_mode *cp; + + /* Make sure cmd still outstanding. */ + if (cmd != pending_find(MGMT_OP_SET_POWERED, hdev)) + return -ECANCELED; + + cp = cmd->param; BT_DBG("%s", hdev->name); From a66dfaf18fd61bb75ef8cee83db46b2aadf153d0 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Thu, 21 Nov 2024 11:09:22 -0500 Subject: [PATCH 35/49] Bluetooth: MGMT: Fix possible deadlocks This fixes possible deadlocks like the following caused by hci_cmd_sync_dequeue causing the destroy function to run: INFO: task kworker/u19:0:143 blocked for more than 120 seconds. Tainted: G W O 6.8.0-2024-03-19-intel-next-iLS-24ww14 #1 "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. task:kworker/u19:0 state:D stack:0 pid:143 tgid:143 ppid:2 flags:0x00004000 Workqueue: hci0 hci_cmd_sync_work [bluetooth] Call Trace: __schedule+0x374/0xaf0 schedule+0x3c/0xf0 schedule_preempt_disabled+0x1c/0x30 __mutex_lock.constprop.0+0x3ef/0x7a0 __mutex_lock_slowpath+0x13/0x20 mutex_lock+0x3c/0x50 mgmt_set_connectable_complete+0xa4/0x150 [bluetooth] ? kfree+0x211/0x2a0 hci_cmd_sync_dequeue+0xae/0x130 [bluetooth] ? __pfx_cmd_complete_rsp+0x10/0x10 [bluetooth] cmd_complete_rsp+0x26/0x80 [bluetooth] mgmt_pending_foreach+0x4d/0x70 [bluetooth] __mgmt_power_off+0x8d/0x180 [bluetooth] ? _raw_spin_unlock_irq+0x23/0x40 hci_dev_close_sync+0x445/0x5b0 [bluetooth] hci_set_powered_sync+0x149/0x250 [bluetooth] set_powered_sync+0x24/0x60 [bluetooth] hci_cmd_sync_work+0x90/0x150 [bluetooth] process_one_work+0x13e/0x300 worker_thread+0x2f7/0x420 ? __pfx_worker_thread+0x10/0x10 kthread+0x107/0x140 ? __pfx_kthread+0x10/0x10 ret_from_fork+0x3d/0x60 ? __pfx_kthread+0x10/0x10 ret_from_fork_asm+0x1b/0x30 Tested-by: Kiran K Fixes: f53e1c9c726d ("Bluetooth: MGMT: Fix possible crash on mgmt_index_removed") Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/mgmt.c | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index e406eb8e43277..b31192d473d09 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -1518,7 +1518,8 @@ static void mgmt_set_discoverable_complete(struct hci_dev *hdev, void *data, bt_dev_dbg(hdev, "err %d", err); /* Make sure cmd still outstanding. */ - if (cmd != pending_find(MGMT_OP_SET_DISCOVERABLE, hdev)) + if (err == -ECANCELED || + cmd != pending_find(MGMT_OP_SET_DISCOVERABLE, hdev)) return; hci_dev_lock(hdev); @@ -1692,7 +1693,8 @@ static void mgmt_set_connectable_complete(struct hci_dev *hdev, void *data, bt_dev_dbg(hdev, "err %d", err); /* Make sure cmd still outstanding. */ - if (cmd != pending_find(MGMT_OP_SET_CONNECTABLE, hdev)) + if (err == -ECANCELED || + cmd != pending_find(MGMT_OP_SET_CONNECTABLE, hdev)) return; hci_dev_lock(hdev); @@ -1924,7 +1926,7 @@ static void set_ssp_complete(struct hci_dev *hdev, void *data, int err) bool changed; /* Make sure cmd still outstanding. */ - if (cmd != pending_find(MGMT_OP_SET_SSP, hdev)) + if (err == -ECANCELED || cmd != pending_find(MGMT_OP_SET_SSP, hdev)) return; if (err) { @@ -3848,7 +3850,8 @@ static void set_name_complete(struct hci_dev *hdev, void *data, int err) bt_dev_dbg(hdev, "err %d", err); - if (cmd != pending_find(MGMT_OP_SET_LOCAL_NAME, hdev)) + if (err == -ECANCELED || + cmd != pending_find(MGMT_OP_SET_LOCAL_NAME, hdev)) return; if (status) { @@ -4023,7 +4026,8 @@ static void set_default_phy_complete(struct hci_dev *hdev, void *data, int err) struct sk_buff *skb = cmd->skb; u8 status = mgmt_status(err); - if (cmd != pending_find(MGMT_OP_SET_PHY_CONFIGURATION, hdev)) + if (err == -ECANCELED || + cmd != pending_find(MGMT_OP_SET_PHY_CONFIGURATION, hdev)) return; if (!status) { @@ -5914,13 +5918,16 @@ static void start_discovery_complete(struct hci_dev *hdev, void *data, int err) { struct mgmt_pending_cmd *cmd = data; + bt_dev_dbg(hdev, "err %d", err); + + if (err == -ECANCELED) + return; + if (cmd != pending_find(MGMT_OP_START_DISCOVERY, hdev) && cmd != pending_find(MGMT_OP_START_LIMITED_DISCOVERY, hdev) && cmd != pending_find(MGMT_OP_START_SERVICE_DISCOVERY, hdev)) return; - bt_dev_dbg(hdev, "err %d", err); - mgmt_cmd_complete(cmd->sk, cmd->index, cmd->opcode, mgmt_status(err), cmd->param, 1); mgmt_pending_remove(cmd); @@ -6153,7 +6160,8 @@ static void stop_discovery_complete(struct hci_dev *hdev, void *data, int err) { struct mgmt_pending_cmd *cmd = data; - if (cmd != pending_find(MGMT_OP_STOP_DISCOVERY, hdev)) + if (err == -ECANCELED || + cmd != pending_find(MGMT_OP_STOP_DISCOVERY, hdev)) return; bt_dev_dbg(hdev, "err %d", err); @@ -8144,7 +8152,8 @@ static void read_local_oob_ext_data_complete(struct hci_dev *hdev, void *data, u8 status = mgmt_status(err); u16 eir_len; - if (cmd != pending_find(MGMT_OP_READ_LOCAL_OOB_EXT_DATA, hdev)) + if (err == -ECANCELED || + cmd != pending_find(MGMT_OP_READ_LOCAL_OOB_EXT_DATA, hdev)) return; if (!status) { From ed9588554943097bdf09588a8a105fbb058869c5 Mon Sep 17 00:00:00 2001 From: Edward Adam Davis Date: Tue, 26 Nov 2024 07:58:43 +0800 Subject: [PATCH 36/49] Bluetooth: SCO: remove the redundant sco_conn_put When adding conn, it is necessary to increase and retain the conn reference count at the same time. Another problem was fixed along the way, conn_put is missing when hcon is NULL in the timeout routine. Fixes: e6720779ae61 ("Bluetooth: SCO: Use kref to track lifetime of sco_conn") Reported-and-tested-by: syzbot+489f78df4709ac2bfdd3@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=489f78df4709ac2bfdd3 Signed-off-by: Edward Adam Davis Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/sco.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 1b8e468d24cf5..78f7bca244877 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -143,6 +143,7 @@ static void sco_sock_timeout(struct work_struct *work) sco_conn_lock(conn); if (!conn->hcon) { sco_conn_unlock(conn); + sco_conn_put(conn); return; } sk = sco_sock_hold(conn); @@ -192,7 +193,6 @@ static struct sco_conn *sco_conn_add(struct hci_conn *hcon) conn->hcon = hcon; sco_conn_unlock(conn); } - sco_conn_put(conn); return conn; } From 1465036b10be4b8b00eb31c879e86de633ad74c1 Mon Sep 17 00:00:00 2001 From: Michal Luczaj Date: Tue, 19 Nov 2024 14:31:41 +0100 Subject: [PATCH 37/49] llc: Improve setsockopt() handling of malformed user input copy_from_sockptr() is used incorrectly: return value is the number of bytes that could not be copied. Since it's deprecated, switch to copy_safe_from_sockptr(). Note: Keeping the `optlen != sizeof(int)` check as copy_safe_from_sockptr() by itself would also accept optlen > sizeof(int). Which would allow a more lenient handling of inputs. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Suggested-by: David Wei Signed-off-by: Michal Luczaj Signed-off-by: Paolo Abeni --- net/llc/af_llc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 4eb52add7103b..0259cde394ba0 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -1098,7 +1098,7 @@ static int llc_ui_setsockopt(struct socket *sock, int level, int optname, lock_sock(sk); if (unlikely(level != SOL_LLC || optlen != sizeof(int))) goto out; - rc = copy_from_sockptr(&opt, optval, sizeof(opt)); + rc = copy_safe_from_sockptr(&opt, sizeof(opt), optval, optlen); if (rc) goto out; rc = -EINVAL; From 02020056647017e70509bb58c3096448117099e1 Mon Sep 17 00:00:00 2001 From: Michal Luczaj Date: Tue, 19 Nov 2024 14:31:42 +0100 Subject: [PATCH 38/49] rxrpc: Improve setsockopt() handling of malformed user input copy_from_sockptr() does not return negative value on error; instead, it reports the number of bytes that failed to copy. Since it's deprecated, switch to copy_safe_from_sockptr(). Note: Keeping the `optlen != sizeof(unsigned int)` check as copy_safe_from_sockptr() by itself would also accept optlen > sizeof(unsigned int). Which would allow a more lenient handling of inputs. Fixes: 17926a79320a ("[AF_RXRPC]: Provide secure RxRPC sockets for use by userspace and kernel both") Signed-off-by: Michal Luczaj Signed-off-by: Paolo Abeni --- net/rxrpc/af_rxrpc.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index f4844683e1203..9d8bd0b37e41d 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -707,9 +707,10 @@ static int rxrpc_setsockopt(struct socket *sock, int level, int optname, ret = -EISCONN; if (rx->sk.sk_state != RXRPC_UNBOUND) goto error; - ret = copy_from_sockptr(&min_sec_level, optval, - sizeof(unsigned int)); - if (ret < 0) + ret = copy_safe_from_sockptr(&min_sec_level, + sizeof(min_sec_level), + optval, optlen); + if (ret) goto error; ret = -EINVAL; if (min_sec_level > RXRPC_SECURITY_MAX) From 49b2b973325add467270e906d4d794aa6679b38b Mon Sep 17 00:00:00 2001 From: Michal Luczaj Date: Tue, 19 Nov 2024 14:31:43 +0100 Subject: [PATCH 39/49] net: Comment copy_from_sockptr() explaining its behaviour copy_from_sockptr() has a history of misuse. Add a comment explaining that the function follows API of copy_from_user(), i.e. returns 0 for success, or number of bytes not copied on error. Signed-off-by: Michal Luczaj Signed-off-by: Paolo Abeni --- include/linux/sockptr.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/linux/sockptr.h b/include/linux/sockptr.h index 195debe2b1dbc..3e6c8e9d67aef 100644 --- a/include/linux/sockptr.h +++ b/include/linux/sockptr.h @@ -53,6 +53,8 @@ static inline int copy_from_sockptr_offset(void *dst, sockptr_t src, /* Deprecated. * This is unsafe, unless caller checked user provided optlen. * Prefer copy_safe_from_sockptr() instead. + * + * Returns 0 for success, or number of bytes not copied on error. */ static inline int copy_from_sockptr(void *dst, sockptr_t src, size_t size) { From e2668c34b7e1a2288ea0a97ccf3cd12e2870ca18 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Sat, 23 Nov 2024 14:50:12 +0000 Subject: [PATCH 40/49] net: phy: fix phy_ethtool_set_eee() incorrectly enabling LPI When phy_ethtool_set_eee_noneg() detects a change in the LPI parameters, it attempts to update phylib state and trigger the link to cycle so the MAC sees the updated parameters. However, in doing so, it sets phydev->enable_tx_lpi depending on whether the EEE configuration allows the MAC to generate LPI without taking into account the result of negotiation. This can be demonstrated with a 1000base-T FD interface by: # ethtool --set-eee eno0 advertise 8 # cause EEE to be not negotiated # ethtool --set-eee eno0 tx-lpi off # ethtool --set-eee eno0 tx-lpi on This results in being true, despite EEE not having been negotiated and: # ethtool --show-eee eno0 EEE status: enabled - inactive Tx LPI: 250 (us) Supported EEE link modes: 100baseT/Full 1000baseT/Full Advertised EEE link modes: 100baseT/Full 1000baseT/Full Fix this by keeping track of whether EEE was negotiated via a new eee_active member in struct phy_device, and include this state in the decision whether phydev->enable_tx_lpi should be set. Fixes: 3e43b903da04 ("net: phy: Immediately call adjust_link if only tx_lpi_enabled changes") Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1tErSe-005RhB-2R@rmk-PC.armlinux.org.uk Signed-off-by: Paolo Abeni --- drivers/net/phy/phy-c45.c | 2 +- drivers/net/phy/phy.c | 30 ++++++++++++++++++------------ include/linux/phy.h | 2 ++ 3 files changed, 21 insertions(+), 13 deletions(-) diff --git a/drivers/net/phy/phy-c45.c b/drivers/net/phy/phy-c45.c index 96d0b3a5a9d3d..944ae98ad1106 100644 --- a/drivers/net/phy/phy-c45.c +++ b/drivers/net/phy/phy-c45.c @@ -1530,7 +1530,7 @@ int genphy_c45_ethtool_get_eee(struct phy_device *phydev, return ret; data->eee_enabled = is_enabled; - data->eee_active = ret; + data->eee_active = phydev->eee_active; linkmode_copy(data->supported, phydev->supported_eee); return 0; diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index a660a80f34b79..0d20b534122b2 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -990,14 +990,14 @@ static int phy_check_link_status(struct phy_device *phydev) phydev->state = PHY_RUNNING; err = genphy_c45_eee_is_active(phydev, NULL, NULL, NULL); - if (err <= 0) - phydev->enable_tx_lpi = false; - else - phydev->enable_tx_lpi = phydev->eee_cfg.tx_lpi_enabled; + phydev->eee_active = err > 0; + phydev->enable_tx_lpi = phydev->eee_cfg.tx_lpi_enabled && + phydev->eee_active; phy_link_up(phydev); } else if (!phydev->link && phydev->state != PHY_NOLINK) { phydev->state = PHY_NOLINK; + phydev->eee_active = false; phydev->enable_tx_lpi = false; phy_link_down(phydev); } @@ -1685,15 +1685,21 @@ EXPORT_SYMBOL(phy_ethtool_get_eee); static void phy_ethtool_set_eee_noneg(struct phy_device *phydev, const struct eee_config *old_cfg) { - if (phydev->eee_cfg.tx_lpi_enabled != old_cfg->tx_lpi_enabled || + bool enable_tx_lpi; + + if (!phydev->link) + return; + + enable_tx_lpi = phydev->eee_cfg.tx_lpi_enabled && phydev->eee_active; + + if (phydev->enable_tx_lpi != enable_tx_lpi || phydev->eee_cfg.tx_lpi_timer != old_cfg->tx_lpi_timer) { - phydev->enable_tx_lpi = eeecfg_mac_can_tx_lpi(&phydev->eee_cfg); - if (phydev->link) { - phydev->link = false; - phy_link_down(phydev); - phydev->link = true; - phy_link_up(phydev); - } + phydev->enable_tx_lpi = false; + phydev->link = false; + phy_link_down(phydev); + phydev->enable_tx_lpi = enable_tx_lpi; + phydev->link = true; + phy_link_up(phydev); } } diff --git a/include/linux/phy.h b/include/linux/phy.h index 77c6d6451638a..563c462056857 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -602,6 +602,7 @@ struct macsec_ops; * @supported_eee: supported PHY EEE linkmodes * @advertising_eee: Currently advertised EEE linkmodes * @enable_tx_lpi: When True, MAC should transmit LPI to PHY + * @eee_active: phylib private state, indicating that EEE has been negotiated * @eee_cfg: User configuration of EEE * @lp_advertising: Current link partner advertised linkmodes * @host_interfaces: PHY interface modes supported by host @@ -723,6 +724,7 @@ struct phy_device { /* Energy efficient ethernet modes which should be prohibited */ __ETHTOOL_DECLARE_LINK_MODE_MASK(eee_broken_modes); bool enable_tx_lpi; + bool eee_active; struct eee_config eee_cfg; /* Host supported PHY interface types. Should be ignored if empty. */ From c31e72d021db2714df03df6c42855a1db592716c Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Sat, 23 Nov 2024 09:42:36 -0800 Subject: [PATCH 41/49] tcp: Fix use-after-free of nreq in reqsk_timer_handler(). The cited commit replaced inet_csk_reqsk_queue_drop_and_put() with __inet_csk_reqsk_queue_drop() and reqsk_put() in reqsk_timer_handler(). Then, oreq should be passed to reqsk_put() instead of req; otherwise use-after-free of nreq could happen when reqsk is migrated but the retry attempt failed (e.g. due to timeout). Let's pass oreq to reqsk_put(). Fixes: e8c526f2bdf1 ("tcp/dccp: Don't use timer_pending() in reqsk_queue_unlink().") Reported-by: Liu Jian Closes: https://lore.kernel.org/netdev/1284490f-9525-42ee-b7b8-ccadf6606f6d@huawei.com/ Signed-off-by: Kuniyuki Iwashima Reviewed-by: Vadim Fedorenko Reviewed-by: Liu Jian Reviewed-by: Eric Dumazet Reviewed-by: Martin KaFai Lau Link: https://patch.msgid.link/20241123174236.62438-1-kuniyu@amazon.com Signed-off-by: Paolo Abeni --- net/ipv4/inet_connection_sock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 491c2c6b683e2..6872b5aff73e5 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -1191,7 +1191,7 @@ static void reqsk_timer_handler(struct timer_list *t) drop: __inet_csk_reqsk_queue_drop(sk_listener, oreq, true); - reqsk_put(req); + reqsk_put(oreq); } static bool reqsk_queue_hash_req(struct request_sock *req, From 122aba8c80618eca904490b1733af27fb8f07528 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Sat, 23 Nov 2024 18:21:48 -0800 Subject: [PATCH 42/49] net_sched: sch_fq: don't follow the fast path if Tx is behind now Recent kernels cause a lot of TCP retransmissions [ ID] Interval Transfer Bitrate Retr Cwnd [ 5] 0.00-1.00 sec 2.24 GBytes 19.2 Gbits/sec 2767 442 KBytes [ 5] 1.00-2.00 sec 2.23 GBytes 19.1 Gbits/sec 2312 350 KBytes ^^^^ Replacing the qdisc with pfifo makes retransmissions go away. It appears that a flow may have a delayed packet with a very near Tx time. Later, we may get busy processing Rx and the target Tx time will pass, but we won't service Tx since the CPU is busy with Rx. If Rx sees an ACK and we try to push more data for the delayed flow we may fastpath the skb, not realizing that there are already "ready to send" packets for this flow sitting in the qdisc. Don't trust the fastpath if we are "behind" according to the projected Tx time for next flow waiting in the Qdisc. Because we consider anything within the offload window to be okay for fastpath we must consider the entire offload window as "now". Qdisc config: qdisc fq 8001: dev eth0 parent 1234:1 limit 10000p flow_limit 100p \ buckets 32768 orphan_mask 1023 bands 3 \ priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1 \ weights 589824 196608 65536 quantum 3028b initial_quantum 15140b \ low_rate_threshold 550Kbit \ refill_delay 40ms timer_slack 10us horizon 10s horizon_drop For iperf this change seems to do fine, the reordering is gone. The fastpath still gets used most of the time: gc 0 highprio 0 fastpath 142614 throttled 418309 latency 19.1us xx_behind 2731 where "xx_behind" counts how many times we hit the new "return false". CC: stable@vger.kernel.org Fixes: 076433bd78d7 ("net_sched: sch_fq: add fast path for mostly idle qdisc") Signed-off-by: Jakub Kicinski Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20241124022148.3126719-1-kuba@kernel.org Signed-off-by: Paolo Abeni --- net/sched/sch_fq.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c index a97638bef6da5..a5e87f9ea9861 100644 --- a/net/sched/sch_fq.c +++ b/net/sched/sch_fq.c @@ -332,6 +332,12 @@ static bool fq_fastpath_check(const struct Qdisc *sch, struct sk_buff *skb, */ if (q->internal.qlen >= 8) return false; + + /* Ordering invariants fall apart if some delayed flows + * are ready but we haven't serviced them, yet. + */ + if (q->time_next_delayed_flow <= now + q->offload_horizon) + return false; } sk = skb->sk; From 663a917475530feff868a4f2bda286ea4171f420 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Sun, 24 Nov 2024 07:32:43 +0000 Subject: [PATCH 43/49] selftests: rds: move test.py to TEST_FILES The test.py should not be run separately. It should be run via run.sh, which will do some sanity checks first. Move the test.py from TEST_PROGS to TEST_FILES. Reported-by: Maximilian Heyne Closes: https://lore.kernel.org/netdev/20241122150129.GB18887@dev-dsk-mheyne-1b-55676e6a.eu-west-1.amazon.com Fixes: 3ade6ce1255e ("selftests: rds: add testing infrastructure") Signed-off-by: Hangbin Liu Reviewed-by: Allison Henderson Link: https://patch.msgid.link/20241124073243.847932-1-liuhangbin@gmail.com Signed-off-by: Paolo Abeni --- tools/testing/selftests/net/rds/Makefile | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/net/rds/Makefile b/tools/testing/selftests/net/rds/Makefile index 1803c39dbacb1..612a7219990eb 100644 --- a/tools/testing/selftests/net/rds/Makefile +++ b/tools/testing/selftests/net/rds/Makefile @@ -3,10 +3,9 @@ all: @echo mk_build_dir="$(shell pwd)" > include.sh -TEST_PROGS := run.sh \ - test.py +TEST_PROGS := run.sh -TEST_FILES := include.sh +TEST_FILES := include.sh test.py EXTRA_CLEAN := /tmp/rds_logs include.sh From 11b6e701bce96f98474084f26821157cb0dccf69 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Sun, 24 Nov 2024 16:40:56 +0100 Subject: [PATCH 44/49] ipmr: add debug check for mr table cleanup The multicast route tables lifecycle, for both ipv4 and ipv6, is protected by RCU using the RTNL lock for write access. In many places a table pointer escapes the RCU (or RTNL) protected critical section, but such scenarios are actually safe because tables are deleted only at namespace cleanup time or just after allocation, in case of default rule creation failure. Tables freed at namespace cleanup time are assured to be alive for the whole netns lifetime; tables freed just after creation time are never exposed to other possible users. Ensure that the free conditions are respected in ip{,6}mr_free_table, to document the locking schema and to prevent future possible introduction of 'table del' operation from breaking it. Reviewed-by: David Ahern Signed-off-by: Paolo Abeni --- net/ipv4/ipmr.c | 14 ++++++++++++++ net/ipv6/ip6mr.c | 14 ++++++++++++++ 2 files changed, 28 insertions(+) diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index c58dd78509a27..bac0776648e09 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -120,6 +120,11 @@ static void ipmr_expire_process(struct timer_list *t); lockdep_rtnl_is_held() || \ list_empty(&net->ipv4.mr_tables)) +static bool ipmr_can_free_table(struct net *net) +{ + return !check_net(net) || !net->ipv4.mr_rules_ops; +} + static struct mr_table *ipmr_mr_table_iter(struct net *net, struct mr_table *mrt) { @@ -302,6 +307,11 @@ EXPORT_SYMBOL(ipmr_rule_default); #define ipmr_for_each_table(mrt, net) \ for (mrt = net->ipv4.mrt; mrt; mrt = NULL) +static bool ipmr_can_free_table(struct net *net) +{ + return !check_net(net); +} + static struct mr_table *ipmr_mr_table_iter(struct net *net, struct mr_table *mrt) { @@ -413,6 +423,10 @@ static struct mr_table *ipmr_new_table(struct net *net, u32 id) static void ipmr_free_table(struct mr_table *mrt) { + struct net *net = read_pnet(&mrt->net); + + DEBUG_NET_WARN_ON_ONCE(!ipmr_can_free_table(net)); + timer_shutdown_sync(&mrt->ipmr_expire_timer); mroute_clean_tables(mrt, MRT_FLUSH_VIFS | MRT_FLUSH_VIFS_STATIC | MRT_FLUSH_MFC | MRT_FLUSH_MFC_STATIC); diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index d66f58932a793..b80fca8949169 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -108,6 +108,11 @@ static void ipmr_expire_process(struct timer_list *t); lockdep_rtnl_is_held() || \ list_empty(&net->ipv6.mr6_tables)) +static bool ip6mr_can_free_table(struct net *net) +{ + return !check_net(net) || !net->ipv6.mr6_rules_ops; +} + static struct mr_table *ip6mr_mr_table_iter(struct net *net, struct mr_table *mrt) { @@ -291,6 +296,11 @@ EXPORT_SYMBOL(ip6mr_rule_default); #define ip6mr_for_each_table(mrt, net) \ for (mrt = net->ipv6.mrt6; mrt; mrt = NULL) +static bool ip6mr_can_free_table(struct net *net) +{ + return !check_net(net); +} + static struct mr_table *ip6mr_mr_table_iter(struct net *net, struct mr_table *mrt) { @@ -392,6 +402,10 @@ static struct mr_table *ip6mr_new_table(struct net *net, u32 id) static void ip6mr_free_table(struct mr_table *mrt) { + struct net *net = read_pnet(&mrt->net); + + DEBUG_NET_WARN_ON_ONCE(!ip6mr_can_free_table(net)); + timer_shutdown_sync(&mrt->ipmr_expire_timer); mroute_clean_tables(mrt, MRT6_FLUSH_MIFS | MRT6_FLUSH_MIFS_STATIC | MRT6_FLUSH_MFC | MRT6_FLUSH_MFC_STATIC); From f1553c9894b4dbeb10a2ab15ab1aa113b3b4047c Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Sun, 24 Nov 2024 16:40:57 +0100 Subject: [PATCH 45/49] ip6mr: fix tables suspicious RCU usage Several places call ip6mr_get_table() with no RCU nor RTNL lock. Add RCU protection inside such helper and provide a lockless variant for the few callers that already acquired the relevant lock. Note that some users additionally reference the table outside the RCU lock. That is actually safe as the table deletion can happen only after all table accesses are completed. Fixes: e2d57766e674 ("net: Provide compat support for SIOCGETMIFCNT_IN6 and SIOCGETSGCNT_IN6.") Fixes: d7c31cbde4bc ("net: ip6mr: add RTM_GETROUTE netlink op") Reviewed-by: David Ahern Signed-off-by: Paolo Abeni --- net/ipv6/ip6mr.c | 38 +++++++++++++++++++++++++++----------- 1 file changed, 27 insertions(+), 11 deletions(-) diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index b80fca8949169..4147890fe98ff 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -130,7 +130,7 @@ static struct mr_table *ip6mr_mr_table_iter(struct net *net, return ret; } -static struct mr_table *ip6mr_get_table(struct net *net, u32 id) +static struct mr_table *__ip6mr_get_table(struct net *net, u32 id) { struct mr_table *mrt; @@ -141,6 +141,16 @@ static struct mr_table *ip6mr_get_table(struct net *net, u32 id) return NULL; } +static struct mr_table *ip6mr_get_table(struct net *net, u32 id) +{ + struct mr_table *mrt; + + rcu_read_lock(); + mrt = __ip6mr_get_table(net, id); + rcu_read_unlock(); + return mrt; +} + static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6, struct mr_table **mrt) { @@ -182,7 +192,7 @@ static int ip6mr_rule_action(struct fib_rule *rule, struct flowi *flp, arg->table = fib_rule_get_table(rule, arg); - mrt = ip6mr_get_table(rule->fr_net, arg->table); + mrt = __ip6mr_get_table(rule->fr_net, arg->table); if (!mrt) return -EAGAIN; res->mrt = mrt; @@ -314,6 +324,8 @@ static struct mr_table *ip6mr_get_table(struct net *net, u32 id) return net->ipv6.mrt6; } +#define __ip6mr_get_table ip6mr_get_table + static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6, struct mr_table **mrt) { @@ -392,7 +404,7 @@ static struct mr_table *ip6mr_new_table(struct net *net, u32 id) { struct mr_table *mrt; - mrt = ip6mr_get_table(net, id); + mrt = __ip6mr_get_table(net, id); if (mrt) return mrt; @@ -425,13 +437,15 @@ static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos) struct net *net = seq_file_net(seq); struct mr_table *mrt; - mrt = ip6mr_get_table(net, RT6_TABLE_DFLT); - if (!mrt) + rcu_read_lock(); + mrt = __ip6mr_get_table(net, RT6_TABLE_DFLT); + if (!mrt) { + rcu_read_unlock(); return ERR_PTR(-ENOENT); + } iter->mrt = mrt; - rcu_read_lock(); return mr_vif_seq_start(seq, pos); } @@ -2292,11 +2306,13 @@ int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm, struct mfc6_cache *cache; struct rt6_info *rt = dst_rt6_info(skb_dst(skb)); - mrt = ip6mr_get_table(net, RT6_TABLE_DFLT); - if (!mrt) + rcu_read_lock(); + mrt = __ip6mr_get_table(net, RT6_TABLE_DFLT); + if (!mrt) { + rcu_read_unlock(); return -ENOENT; + } - rcu_read_lock(); cache = ip6mr_cache_find(mrt, &rt->rt6i_src.addr, &rt->rt6i_dst.addr); if (!cache && skb->dev) { int vif = ip6mr_find_vif(mrt, skb->dev); @@ -2576,7 +2592,7 @@ static int ip6mr_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, grp = nla_get_in6_addr(tb[RTA_DST]); tableid = nla_get_u32_default(tb[RTA_TABLE], 0); - mrt = ip6mr_get_table(net, tableid ?: RT_TABLE_DEFAULT); + mrt = __ip6mr_get_table(net, tableid ?: RT_TABLE_DEFAULT); if (!mrt) { NL_SET_ERR_MSG_MOD(extack, "MR table does not exist"); return -ENOENT; @@ -2623,7 +2639,7 @@ static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb) if (filter.table_id) { struct mr_table *mrt; - mrt = ip6mr_get_table(sock_net(skb->sk), filter.table_id); + mrt = __ip6mr_get_table(sock_net(skb->sk), filter.table_id); if (!mrt) { if (rtnl_msg_family(cb->nlh) != RTNL_FAMILY_IP6MR) return skb->len; From fc9c273d6daaa9866f349bbe8cae25c67764c456 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Sun, 24 Nov 2024 16:40:58 +0100 Subject: [PATCH 46/49] ipmr: fix tables suspicious RCU usage Similar to the previous patch, plumb the RCU lock inside the ipmr_get_table(), provided a lockless variant and apply the latter in the few spots were the lock is already held. Fixes: 709b46e8d90b ("net: Add compat ioctl support for the ipv4 multicast ioctl SIOCGETSGCNT") Fixes: f0ad0860d01e ("ipv4: ipmr: support multiple tables") Reviewed-by: David Ahern Signed-off-by: Paolo Abeni --- net/ipv4/ipmr.c | 42 +++++++++++++++++++++++++++++------------- 1 file changed, 29 insertions(+), 13 deletions(-) diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index bac0776648e09..383ea8b91cc78 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -142,7 +142,7 @@ static struct mr_table *ipmr_mr_table_iter(struct net *net, return ret; } -static struct mr_table *ipmr_get_table(struct net *net, u32 id) +static struct mr_table *__ipmr_get_table(struct net *net, u32 id) { struct mr_table *mrt; @@ -153,6 +153,16 @@ static struct mr_table *ipmr_get_table(struct net *net, u32 id) return NULL; } +static struct mr_table *ipmr_get_table(struct net *net, u32 id) +{ + struct mr_table *mrt; + + rcu_read_lock(); + mrt = __ipmr_get_table(net, id); + rcu_read_unlock(); + return mrt; +} + static int ipmr_fib_lookup(struct net *net, struct flowi4 *flp4, struct mr_table **mrt) { @@ -194,7 +204,7 @@ static int ipmr_rule_action(struct fib_rule *rule, struct flowi *flp, arg->table = fib_rule_get_table(rule, arg); - mrt = ipmr_get_table(rule->fr_net, arg->table); + mrt = __ipmr_get_table(rule->fr_net, arg->table); if (!mrt) return -EAGAIN; res->mrt = mrt; @@ -325,6 +335,8 @@ static struct mr_table *ipmr_get_table(struct net *net, u32 id) return net->ipv4.mrt; } +#define __ipmr_get_table ipmr_get_table + static int ipmr_fib_lookup(struct net *net, struct flowi4 *flp4, struct mr_table **mrt) { @@ -413,7 +425,7 @@ static struct mr_table *ipmr_new_table(struct net *net, u32 id) if (id != RT_TABLE_DEFAULT && id >= 1000000000) return ERR_PTR(-EINVAL); - mrt = ipmr_get_table(net, id); + mrt = __ipmr_get_table(net, id); if (mrt) return mrt; @@ -1388,7 +1400,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, sockptr_t optval, goto out_unlock; } - mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT); + mrt = __ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT); if (!mrt) { ret = -ENOENT; goto out_unlock; @@ -2276,11 +2288,13 @@ int ipmr_get_route(struct net *net, struct sk_buff *skb, struct mr_table *mrt; int err; - mrt = ipmr_get_table(net, RT_TABLE_DEFAULT); - if (!mrt) + rcu_read_lock(); + mrt = __ipmr_get_table(net, RT_TABLE_DEFAULT); + if (!mrt) { + rcu_read_unlock(); return -ENOENT; + } - rcu_read_lock(); cache = ipmr_cache_find(mrt, saddr, daddr); if (!cache && skb->dev) { int vif = ipmr_find_vif(mrt, skb->dev); @@ -2564,7 +2578,7 @@ static int ipmr_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, grp = nla_get_in_addr_default(tb[RTA_DST], 0); tableid = nla_get_u32_default(tb[RTA_TABLE], 0); - mrt = ipmr_get_table(net, tableid ? tableid : RT_TABLE_DEFAULT); + mrt = __ipmr_get_table(net, tableid ? tableid : RT_TABLE_DEFAULT); if (!mrt) { err = -ENOENT; goto errout_free; @@ -2618,7 +2632,7 @@ static int ipmr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb) if (filter.table_id) { struct mr_table *mrt; - mrt = ipmr_get_table(sock_net(skb->sk), filter.table_id); + mrt = __ipmr_get_table(sock_net(skb->sk), filter.table_id); if (!mrt) { if (rtnl_msg_family(cb->nlh) != RTNL_FAMILY_IPMR) return skb->len; @@ -2726,7 +2740,7 @@ static int rtm_to_ipmr_mfcc(struct net *net, struct nlmsghdr *nlh, break; } } - mrt = ipmr_get_table(net, tblid); + mrt = __ipmr_get_table(net, tblid); if (!mrt) { ret = -ENOENT; goto out; @@ -2934,13 +2948,15 @@ static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos) struct net *net = seq_file_net(seq); struct mr_table *mrt; - mrt = ipmr_get_table(net, RT_TABLE_DEFAULT); - if (!mrt) + rcu_read_lock(); + mrt = __ipmr_get_table(net, RT_TABLE_DEFAULT); + if (!mrt) { + rcu_read_unlock(); return ERR_PTR(-ENOENT); + } iter->mrt = mrt; - rcu_read_lock(); return mr_vif_seq_start(seq, pos); } From 6e33123a18bf6c69c941d09e091e8bfef5fb0d41 Mon Sep 17 00:00:00 2001 From: Vyshnav Ajith Date: Fri, 22 Nov 2024 03:48:52 +0530 Subject: [PATCH 47/49] Fix spelling mistake Changed from reequires to require. A minute typo. Signed-off-by: Vyshnav Ajith Link: https://patch.msgid.link/20241121221852.10754-1-puthen1977@gmail.com Signed-off-by: Paolo Abeni --- Documentation/networking/cdc_mbim.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/networking/cdc_mbim.rst b/Documentation/networking/cdc_mbim.rst index 37f968acc473a..8404a3f794f37 100644 --- a/Documentation/networking/cdc_mbim.rst +++ b/Documentation/networking/cdc_mbim.rst @@ -51,7 +51,7 @@ Such userspace applications includes, but are not limited to: - mbimcli (included with the libmbim [3] library), and - ModemManager [4] -Establishing a MBIM IP session reequires at least these actions by the +Establishing a MBIM IP session requires at least these actions by the management application: - open the control channel From 04f5cb48995d51deed0af71aaba1b8699511313f Mon Sep 17 00:00:00 2001 From: Leo Stone Date: Sun, 24 Nov 2024 15:00:02 -0800 Subject: [PATCH 48/49] Documentation: tls_offload: fix typos and grammar Fix typos and grammar where it improves readability. Signed-off-by: Leo Stone Reviewed-by: Bagas Sanjaya Link: https://patch.msgid.link/20241124230002.56058-1-leocstone@gmail.com Signed-off-by: Paolo Abeni --- Documentation/networking/tls-offload.rst | 29 ++++++++++++------------ 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/Documentation/networking/tls-offload.rst b/Documentation/networking/tls-offload.rst index 5f0dea3d571e3..7354d48cdf926 100644 --- a/Documentation/networking/tls-offload.rst +++ b/Documentation/networking/tls-offload.rst @@ -51,7 +51,7 @@ and send them to the device for encryption and transmission. RX -- -On the receive side if the device handled decryption and authentication +On the receive side, if the device handled decryption and authentication successfully, the driver will set the decrypted bit in the associated :c:type:`struct sk_buff `. The packets reach the TCP stack and are handled normally. ``ktls`` is informed when data is queued to the socket @@ -120,8 +120,9 @@ before installing the connection state in the kernel. RX -- -In RX direction local networking stack has little control over the segmentation, -so the initial records' TCP sequence number may be anywhere inside the segment. +In the RX direction, the local networking stack has little control over +segmentation, so the initial records' TCP sequence number may be anywhere +inside the segment. Normal operation ================ @@ -138,8 +139,8 @@ There are no guarantees on record length or record segmentation. In particular segments may start at any point of a record and contain any number of records. Assuming segments are received in order, the device should be able to perform crypto operations and authentication regardless of segmentation. For this -to be possible device has to keep small amount of segment-to-segment state. -This includes at least: +to be possible, the device has to keep a small amount of segment-to-segment +state. This includes at least: * partial headers (if a segment carried only a part of the TLS header) * partial data block @@ -175,12 +176,12 @@ and packet transformation functions) the device validates the Layer 4 checksum and performs a 5-tuple lookup to find any TLS connection the packet may belong to (technically a 4-tuple lookup is sufficient - IP addresses and TCP port numbers, as the protocol -is always TCP). If connection is matched device confirms if the TCP sequence -number is the expected one and proceeds to TLS handling (record delineation, -decryption, authentication for each record in the packet). The device leaves -the record framing unmodified, the stack takes care of record decapsulation. -Device indicates successful handling of TLS offload in the per-packet context -(descriptor) passed to the host. +is always TCP). If the packet is matched to a connection, the device confirms +if the TCP sequence number is the expected one and proceeds to TLS handling +(record delineation, decryption, authentication for each record in the packet). +The device leaves the record framing unmodified, the stack takes care of record +decapsulation. Device indicates successful handling of TLS offload in the +per-packet context (descriptor) passed to the host. Upon reception of a TLS offloaded packet, the driver sets the :c:member:`decrypted` mark in :c:type:`struct sk_buff ` @@ -439,7 +440,7 @@ by the driver: * ``rx_tls_resync_req_end`` - number of times the TLS async resync request properly ended with providing the HW tracked tcp-seq. * ``rx_tls_resync_req_skip`` - number of times the TLS async resync request - procedure was started by not properly ended. + procedure was started but not properly ended. * ``rx_tls_resync_res_ok`` - number of times the TLS resync response call to the driver was successfully handled. * ``rx_tls_resync_res_skip`` - number of times the TLS resync response call to @@ -507,8 +508,8 @@ in packets as seen on the wire. Transport layer transparency ---------------------------- -The device should not modify any packet headers for the purpose -of the simplifying TLS offload. +For the purpose of simplifying TLS offload, the device should not modify any +packet headers. The device should not depend on any packet headers beyond what is strictly necessary for TLS offload. From f6d7695b5ae22092fa2cc42529bb7462f7e0c4ad Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 28 Nov 2024 17:18:04 +0100 Subject: [PATCH 49/49] ipmr: fix build with clang and DEBUG_NET disabled. Sasha reported a build issue in ipmr:: net/ipv4/ipmr.c:320:13: error: function 'ipmr_can_free_table' is not \ needed and will not be emitted \ [-Werror,-Wunneeded-internal-declaration] 320 | static bool ipmr_can_free_table(struct net *net) Apparently clang is too smart with BUILD_BUG_ON_INVALID(), let's fallback to a plain WARN_ON_ONCE(). Reported-by: Sasha Levin Closes: https://qa-reports.linaro.org/lkft/sashal-linus-next/build/v6.11-25635-g6813e2326f1e/testrun/26111580/suite/build/test/clang-nightly-lkftconfig/details/ Fixes: 11b6e701bce9 ("ipmr: add debug check for mr table cleanup") Signed-off-by: Paolo Abeni Link: https://patch.msgid.link/ee75faa926b2446b8302ee5fc30e129d2df73b90.1732810228.git.pabeni@redhat.com Signed-off-by: Paolo Abeni --- net/ipv4/ipmr.c | 2 +- net/ipv6/ip6mr.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 383ea8b91cc78..c5b8ec5c0a8c0 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -437,7 +437,7 @@ static void ipmr_free_table(struct mr_table *mrt) { struct net *net = read_pnet(&mrt->net); - DEBUG_NET_WARN_ON_ONCE(!ipmr_can_free_table(net)); + WARN_ON_ONCE(!ipmr_can_free_table(net)); timer_shutdown_sync(&mrt->ipmr_expire_timer); mroute_clean_tables(mrt, MRT_FLUSH_VIFS | MRT_FLUSH_VIFS_STATIC | diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 4147890fe98ff..7f1902ac35862 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -416,7 +416,7 @@ static void ip6mr_free_table(struct mr_table *mrt) { struct net *net = read_pnet(&mrt->net); - DEBUG_NET_WARN_ON_ONCE(!ip6mr_can_free_table(net)); + WARN_ON_ONCE(!ip6mr_can_free_table(net)); timer_shutdown_sync(&mrt->ipmr_expire_timer); mroute_clean_tables(mrt, MRT6_FLUSH_MIFS | MRT6_FLUSH_MIFS_STATIC |