From 76fe372ccb81b0c89b6cd2fec26e2f38c958be85 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 22 Jul 2024 12:28:42 -0700 Subject: [PATCH 01/13] can: bcm: Remove proc entry when dev is unregistered. syzkaller reported a warning in bcm_connect() below. [0] The repro calls connect() to vxcan1, removes vxcan1, and calls connect() with ifindex == 0. Calling connect() for a BCM socket allocates a proc entry. Then, bcm_sk(sk)->bound is set to 1 to prevent further connect(). However, removing the bound device resets bcm_sk(sk)->bound to 0 in bcm_notify(). The 2nd connect() tries to allocate a proc entry with the same name and sets NULL to bcm_sk(sk)->bcm_proc_read, leaking the original proc entry. Since the proc entry is available only for connect()ed sockets, let's clean up the entry when the bound netdev is unregistered. [0]: proc_dir_entry 'can-bcm/2456' already registered WARNING: CPU: 1 PID: 394 at fs/proc/generic.c:376 proc_register+0x645/0x8f0 fs/proc/generic.c:375 Modules linked in: CPU: 1 PID: 394 Comm: syz-executor403 Not tainted 6.10.0-rc7-g852e42cc2dd4 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.3-0-ga6ed6b701f0a-prebuilt.qemu.org 04/01/2014 RIP: 0010:proc_register+0x645/0x8f0 fs/proc/generic.c:375 Code: 00 00 00 00 00 48 85 ed 0f 85 97 02 00 00 4d 85 f6 0f 85 9f 02 00 00 48 c7 c7 9b cb cf 87 48 89 de 4c 89 fa e8 1c 6f eb fe 90 <0f> 0b 90 90 48 c7 c7 98 37 99 89 e8 cb 7e 22 05 bb 00 00 00 10 48 RSP: 0018:ffa0000000cd7c30 EFLAGS: 00010246 RAX: 9e129be1950f0200 RBX: ff1100011b51582c RCX: ff1100011857cd80 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000002 RBP: 0000000000000000 R08: ffd400000000000f R09: ff1100013e78cac0 R10: ffac800000cd7980 R11: ff1100013e12b1f0 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: ff1100011a99a2ec FS: 00007fbd7086f740(0000) GS:ff1100013fd00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00000000200071c0 CR3: 0000000118556004 CR4: 0000000000771ef0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe07f0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: proc_create_net_single+0x144/0x210 fs/proc/proc_net.c:220 bcm_connect+0x472/0x840 net/can/bcm.c:1673 __sys_connect_file net/socket.c:2049 [inline] __sys_connect+0x5d2/0x690 net/socket.c:2066 __do_sys_connect net/socket.c:2076 [inline] __se_sys_connect net/socket.c:2073 [inline] __x64_sys_connect+0x8f/0x100 net/socket.c:2073 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xd9/0x1c0 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x4b/0x53 RIP: 0033:0x7fbd708b0e5d Code: ff c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 73 9f 1b 00 f7 d8 64 89 01 48 RSP: 002b:00007fff8cd33f08 EFLAGS: 00000246 ORIG_RAX: 000000000000002a RAX: ffffffffffffffda RBX: 0000000000000003 RCX: 00007fbd708b0e5d RDX: 0000000000000010 RSI: 0000000020000040 RDI: 0000000000000003 RBP: 0000000000000000 R08: 0000000000000040 R09: 0000000000000040 R10: 0000000000000040 R11: 0000000000000246 R12: 00007fff8cd34098 R13: 0000000000401280 R14: 0000000000406de8 R15: 00007fbd70ab9000 remove_proc_entry: removing non-empty directory 'net/can-bcm', leaking at least '2456' Fixes: ffd980f976e7 ("[CAN]: Add broadcast manager (bcm) protocol") Reported-by: syzkaller Signed-off-by: Kuniyuki Iwashima Reviewed-by: Simon Horman Link: https://lore.kernel.org/all/20240722192842.37421-1-kuniyu@amazon.com Signed-off-by: Marc Kleine-Budde --- net/can/bcm.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/can/bcm.c b/net/can/bcm.c index 27d5fcf0eac9d..46d3ec3aa44b4 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -1470,6 +1470,10 @@ static void bcm_notify(struct bcm_sock *bo, unsigned long msg, /* remove device reference, if this is our bound device */ if (bo->bound && bo->ifindex == dev->ifindex) { +#if IS_ENABLED(CONFIG_PROC_FS) + if (sock_net(sk)->can.bcmproc_dir && bo->bcm_proc_read) + remove_proc_entry(bo->procname, sock_net(sk)->can.bcmproc_dir); +#endif bo->bound = 0; bo->ifindex = 0; notify_enodev = 1; From 06d4ef3056a7ac31be331281bb7a6302ef5a7f8a Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Mon, 5 Aug 2024 15:01:58 +0100 Subject: [PATCH 02/13] can: m_can: Release irq on error in m_can_open It appears that the irq requested in m_can_open() may be leaked if an error subsequently occurs: if m_can_start() fails. Address this by calling free_irq in the unwind path for such cases. Flagged by Smatch. Compile tested only. Fixes: eaacfeaca7ad ("can: m_can: Call the RAM init directly from m_can_chip_config") Acked-by: Marc Kleine-Budde Signed-off-by: Simon Horman Link: https://lore.kernel.org/all/20240805-mcan-irq-v2-1-7154c0484819@kernel.org Signed-off-by: Marc Kleine-Budde --- drivers/net/can/m_can/m_can.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c index 7f63f866083e0..cd83c8b5d4b15 100644 --- a/drivers/net/can/m_can/m_can.c +++ b/drivers/net/can/m_can/m_can.c @@ -2052,7 +2052,7 @@ static int m_can_open(struct net_device *dev) /* start the m_can controller */ err = m_can_start(dev); if (err) - goto exit_irq_fail; + goto exit_start_fail; if (!cdev->is_peripheral) napi_enable(&cdev->napi); @@ -2061,6 +2061,9 @@ static int m_can_open(struct net_device *dev) return 0; +exit_start_fail: + if (cdev->is_peripheral || dev->irq) + free_irq(dev->irq, dev); exit_irq_fail: if (cdev->is_peripheral) destroy_workqueue(cdev->tx_wq); From a651261ac74298535f6d6316ebe27beceb6b17b1 Mon Sep 17 00:00:00 2001 From: Markus Schneider-Pargmann Date: Mon, 5 Aug 2024 20:30:41 +0200 Subject: [PATCH 03/13] can: m_can: Reset coalescing during suspend/resume MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit During resume the interrupts are limited to IR_RF0N and the chip keeps running. In this case if coalescing is enabled and active we may miss waterlevel interrupts during suspend. It is safer to reset the coalescing by stopping the timer and adding IR_RF0N | IR_TEFN to the interrupts. This is a theoratical issue and probably extremely rare. Cc: Martin Hundebøll Fixes: 4a94d7e31cf5 ("can: m_can: allow keeping the transceiver running in suspend") Signed-off-by: Markus Schneider-Pargmann Link: https://lore.kernel.org/all/20240805183047.305630-2-msp@baylibre.com Signed-off-by: Marc Kleine-Budde --- drivers/net/can/m_can/m_can.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c index cd83c8b5d4b15..31991e2f343e9 100644 --- a/drivers/net/can/m_can/m_can.c +++ b/drivers/net/can/m_can/m_can.c @@ -2430,12 +2430,15 @@ int m_can_class_suspend(struct device *dev) netif_device_detach(ndev); /* leave the chip running with rx interrupt enabled if it is - * used as a wake-up source. + * used as a wake-up source. Coalescing needs to be reset then, + * the timer is cancelled here, interrupts are done in resume. */ - if (cdev->pm_wake_source) + if (cdev->pm_wake_source) { + hrtimer_cancel(&cdev->hrtimer); m_can_write(cdev, M_CAN_IE, IR_RF0N); - else + } else { m_can_stop(ndev); + } m_can_clk_stop(cdev); } @@ -2465,6 +2468,13 @@ int m_can_class_resume(struct device *dev) return ret; if (cdev->pm_wake_source) { + /* Restore active interrupts but disable coalescing as + * we may have missed important waterlevel interrupts + * between suspend and resume. Timers are already + * stopped in suspend. Here we enable all interrupts + * again. + */ + cdev->active_interrupts |= IR_RF0N | IR_TEFN; m_can_write(cdev, M_CAN_IE, cdev->active_interrupts); } else { ret = m_can_start(ndev); From 6eff1cead75ff330bb33264424c1da6cc7179ab8 Mon Sep 17 00:00:00 2001 From: Markus Schneider-Pargmann Date: Mon, 5 Aug 2024 20:30:42 +0200 Subject: [PATCH 04/13] can: m_can: Remove coalesing disable in isr during suspend We don't need to disable coalescing when the interrupt handler executes while the chip is suspended. The coalescing is already reset during suspend. Fixes: 07f25091ca02 ("can: m_can: Implement receive coalescing") Signed-off-by: Markus Schneider-Pargmann Link: https://lore.kernel.org/all/20240805183047.305630-3-msp@baylibre.com Signed-off-by: Marc Kleine-Budde --- drivers/net/can/m_can/m_can.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c index 31991e2f343e9..ba416c973e8d2 100644 --- a/drivers/net/can/m_can/m_can.c +++ b/drivers/net/can/m_can/m_can.c @@ -1223,10 +1223,8 @@ static irqreturn_t m_can_isr(int irq, void *dev_id) struct m_can_classdev *cdev = netdev_priv(dev); u32 ir; - if (pm_runtime_suspended(cdev->dev)) { - m_can_coalescing_disable(cdev); + if (pm_runtime_suspended(cdev->dev)) return IRQ_NONE; - } ir = m_can_read(cdev, M_CAN_IR); m_can_coalescing_update(cdev, ir); From 40e4552eeef0e3090a5988de15889795936fd38f Mon Sep 17 00:00:00 2001 From: Markus Schneider-Pargmann Date: Mon, 5 Aug 2024 20:30:43 +0200 Subject: [PATCH 05/13] can: m_can: Remove m_can_rx_peripheral indirection m_can_rx_peripheral() is a wrapper around m_can_rx_handler() that calls m_can_disable_all_interrupts() on error. The same handling for the same error path is done in m_can_isr() as well. So remove m_can_rx_peripheral() and do the call from m_can_isr() directly. Signed-off-by: Markus Schneider-Pargmann Link: https://lore.kernel.org/all/20240805183047.305630-4-msp@baylibre.com Signed-off-by: Marc Kleine-Budde --- drivers/net/can/m_can/m_can.c | 18 +----------------- 1 file changed, 1 insertion(+), 17 deletions(-) diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c index ba416c973e8d2..a37ed376de9b3 100644 --- a/drivers/net/can/m_can/m_can.c +++ b/drivers/net/can/m_can/m_can.c @@ -1037,22 +1037,6 @@ static int m_can_rx_handler(struct net_device *dev, int quota, u32 irqstatus) return work_done; } -static int m_can_rx_peripheral(struct net_device *dev, u32 irqstatus) -{ - struct m_can_classdev *cdev = netdev_priv(dev); - int work_done; - - work_done = m_can_rx_handler(dev, NAPI_POLL_WEIGHT, irqstatus); - - /* Don't re-enable interrupts if the driver had a fatal error - * (e.g., FIFO read failure). - */ - if (work_done < 0) - m_can_disable_all_interrupts(cdev); - - return work_done; -} - static int m_can_poll(struct napi_struct *napi, int quota) { struct net_device *dev = napi->dev; @@ -1250,7 +1234,7 @@ static irqreturn_t m_can_isr(int irq, void *dev_id) } else { int pkts; - pkts = m_can_rx_peripheral(dev, ir); + pkts = m_can_rx_handler(dev, NAPI_POLL_WEIGHT, ir); if (pkts < 0) goto out_fail; } From 4d5159bfafa8d1a205d8213b7434e0402588b9ed Mon Sep 17 00:00:00 2001 From: Markus Schneider-Pargmann Date: Mon, 5 Aug 2024 20:30:44 +0200 Subject: [PATCH 06/13] can: m_can: Do not cancel timer from within timer On setups without interrupts, the interrupt handler is called from a timer callback. For non-peripheral receives napi is scheduled, interrupts are disabled and the timer is canceled with a blocking call. In case of an error this can happen as well. Check if napi is scheduled in the timer callback after the interrupt handler executed. If napi is scheduled, the timer is disabled. It will be reenabled by m_can_poll(). Return error values from the interrupt handler so that interrupt threads and timer callback can deal differently with it. In case of the timer we only disable the timer. The rest will be done when stopping the interface. Fixes: b382380c0d2d ("can: m_can: Add hrtimer to generate software interrupt") Fixes: a163c5761019 ("can: m_can: Start/Cancel polling timer together with interrupts") Signed-off-by: Markus Schneider-Pargmann Link: https://lore.kernel.org/all/20240805183047.305630-5-msp@baylibre.com Signed-off-by: Marc Kleine-Budde --- drivers/net/can/m_can/m_can.c | 57 ++++++++++++++++++++++++++--------- 1 file changed, 42 insertions(+), 15 deletions(-) diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c index a37ed376de9b3..5228304779f19 100644 --- a/drivers/net/can/m_can/m_can.c +++ b/drivers/net/can/m_can/m_can.c @@ -487,7 +487,7 @@ static inline void m_can_disable_all_interrupts(struct m_can_classdev *cdev) if (!cdev->net->irq) { dev_dbg(cdev->dev, "Stop hrtimer\n"); - hrtimer_cancel(&cdev->hrtimer); + hrtimer_try_to_cancel(&cdev->hrtimer); } } @@ -1201,11 +1201,15 @@ static void m_can_coalescing_update(struct m_can_classdev *cdev, u32 ir) HRTIMER_MODE_REL); } -static irqreturn_t m_can_isr(int irq, void *dev_id) +/* This interrupt handler is called either from the interrupt thread or a + * hrtimer. This has implications like cancelling a timer won't be possible + * blocking. + */ +static int m_can_interrupt_handler(struct m_can_classdev *cdev) { - struct net_device *dev = (struct net_device *)dev_id; - struct m_can_classdev *cdev = netdev_priv(dev); + struct net_device *dev = cdev->net; u32 ir; + int ret; if (pm_runtime_suspended(cdev->dev)) return IRQ_NONE; @@ -1232,11 +1236,9 @@ static irqreturn_t m_can_isr(int irq, void *dev_id) m_can_disable_all_interrupts(cdev); napi_schedule(&cdev->napi); } else { - int pkts; - - pkts = m_can_rx_handler(dev, NAPI_POLL_WEIGHT, ir); - if (pkts < 0) - goto out_fail; + ret = m_can_rx_handler(dev, NAPI_POLL_WEIGHT, ir); + if (ret < 0) + return ret; } } @@ -1254,8 +1256,9 @@ static irqreturn_t m_can_isr(int irq, void *dev_id) } else { if (ir & (IR_TEFN | IR_TEFW)) { /* New TX FIFO Element arrived */ - if (m_can_echo_tx_event(dev) != 0) - goto out_fail; + ret = m_can_echo_tx_event(dev); + if (ret != 0) + return ret; } } @@ -1263,16 +1266,31 @@ static irqreturn_t m_can_isr(int irq, void *dev_id) can_rx_offload_threaded_irq_finish(&cdev->offload); return IRQ_HANDLED; +} -out_fail: - m_can_disable_all_interrupts(cdev); - return IRQ_HANDLED; +static irqreturn_t m_can_isr(int irq, void *dev_id) +{ + struct net_device *dev = (struct net_device *)dev_id; + struct m_can_classdev *cdev = netdev_priv(dev); + int ret; + + ret = m_can_interrupt_handler(cdev); + if (ret < 0) { + m_can_disable_all_interrupts(cdev); + return IRQ_HANDLED; + } + + return ret; } static enum hrtimer_restart m_can_coalescing_timer(struct hrtimer *timer) { struct m_can_classdev *cdev = container_of(timer, struct m_can_classdev, hrtimer); + if (cdev->can.state == CAN_STATE_BUS_OFF || + cdev->can.state == CAN_STATE_STOPPED) + return HRTIMER_NORESTART; + irq_wake_thread(cdev->net->irq, cdev->net); return HRTIMER_NORESTART; @@ -1973,8 +1991,17 @@ static enum hrtimer_restart hrtimer_callback(struct hrtimer *timer) { struct m_can_classdev *cdev = container_of(timer, struct m_can_classdev, hrtimer); + int ret; + + if (cdev->can.state == CAN_STATE_BUS_OFF || + cdev->can.state == CAN_STATE_STOPPED) + return HRTIMER_NORESTART; + + ret = m_can_interrupt_handler(cdev); - m_can_isr(0, cdev->net); + /* On error or if napi is scheduled to read, stop the timer */ + if (ret < 0 || napi_is_scheduled(&cdev->napi)) + return HRTIMER_NORESTART; hrtimer_forward_now(timer, ms_to_ktime(HRTIMER_POLL_INTERVAL_MS)); From a572fea86c9b06cd3e6e89d79d565b52cb7e7cff Mon Sep 17 00:00:00 2001 From: Markus Schneider-Pargmann Date: Mon, 5 Aug 2024 20:30:45 +0200 Subject: [PATCH 07/13] can: m_can: disable_all_interrupts, not clear active_interrupts active_interrupts is a cache for the enabled interrupts and not the global masking of interrupts. Do not clear this variable otherwise we may loose the state of the interrupts. Fixes: 07f25091ca02 ("can: m_can: Implement receive coalescing") Signed-off-by: Markus Schneider-Pargmann Link: https://lore.kernel.org/all/20240805183047.305630-6-msp@baylibre.com Signed-off-by: Marc Kleine-Budde --- drivers/net/can/m_can/m_can.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c index 5228304779f19..68bd4a00eccaa 100644 --- a/drivers/net/can/m_can/m_can.c +++ b/drivers/net/can/m_can/m_can.c @@ -483,7 +483,6 @@ static inline void m_can_disable_all_interrupts(struct m_can_classdev *cdev) { m_can_coalescing_disable(cdev); m_can_write(cdev, M_CAN_ILE, 0x0); - cdev->active_interrupts = 0x0; if (!cdev->net->irq) { dev_dbg(cdev->dev, "Stop hrtimer\n"); From 733dbf556cd5b71d5e6f6aa7a93f117b438ab785 Mon Sep 17 00:00:00 2001 From: Markus Schneider-Pargmann Date: Mon, 5 Aug 2024 20:30:46 +0200 Subject: [PATCH 08/13] can: m_can: Reset cached active_interrupts on start To force writing the enabled interrupts, reset the active_interrupts cache. Fixes: 07f25091ca02 ("can: m_can: Implement receive coalescing") Signed-off-by: Markus Schneider-Pargmann Link: https://lore.kernel.org/all/20240805183047.305630-7-msp@baylibre.com Signed-off-by: Marc Kleine-Budde --- drivers/net/can/m_can/m_can.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c index 68bd4a00eccaa..67c4c740c416f 100644 --- a/drivers/net/can/m_can/m_can.c +++ b/drivers/net/can/m_can/m_can.c @@ -1541,6 +1541,7 @@ static int m_can_chip_config(struct net_device *dev) else interrupts &= ~(IR_ERR_LEC_31X); } + cdev->active_interrupts = 0; m_can_interrupt_enable(cdev, interrupts); /* route all interrupts to INT0 */ From e443d15b949952ee039b731d5c35bcbafa300024 Mon Sep 17 00:00:00 2001 From: Markus Schneider-Pargmann Date: Mon, 5 Aug 2024 20:30:47 +0200 Subject: [PATCH 09/13] can: m_can: Limit coalescing to peripheral instances The use of coalescing for non-peripheral chips in the current implementation is limited to non-existing. Disable the possibility to set coalescing through ethtool. Signed-off-by: Markus Schneider-Pargmann Link: https://lore.kernel.org/all/20240805183047.305630-8-msp@baylibre.com Signed-off-by: Marc Kleine-Budde --- drivers/net/can/m_can/m_can.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c index 67c4c740c416f..012c3d22b01dd 100644 --- a/drivers/net/can/m_can/m_can.c +++ b/drivers/net/can/m_can/m_can.c @@ -2184,7 +2184,7 @@ static int m_can_set_coalesce(struct net_device *dev, return 0; } -static const struct ethtool_ops m_can_ethtool_ops = { +static const struct ethtool_ops m_can_ethtool_ops_coalescing = { .supported_coalesce_params = ETHTOOL_COALESCE_RX_USECS_IRQ | ETHTOOL_COALESCE_RX_MAX_FRAMES_IRQ | ETHTOOL_COALESCE_TX_USECS_IRQ | @@ -2195,18 +2195,20 @@ static const struct ethtool_ops m_can_ethtool_ops = { .set_coalesce = m_can_set_coalesce, }; -static const struct ethtool_ops m_can_ethtool_ops_polling = { +static const struct ethtool_ops m_can_ethtool_ops = { .get_ts_info = ethtool_op_get_ts_info, }; -static int register_m_can_dev(struct net_device *dev) +static int register_m_can_dev(struct m_can_classdev *cdev) { + struct net_device *dev = cdev->net; + dev->flags |= IFF_ECHO; /* we support local echo */ dev->netdev_ops = &m_can_netdev_ops; - if (dev->irq) - dev->ethtool_ops = &m_can_ethtool_ops; + if (dev->irq && cdev->is_peripheral) + dev->ethtool_ops = &m_can_ethtool_ops_coalescing; else - dev->ethtool_ops = &m_can_ethtool_ops_polling; + dev->ethtool_ops = &m_can_ethtool_ops; return register_candev(dev); } @@ -2392,7 +2394,7 @@ int m_can_class_register(struct m_can_classdev *cdev) if (ret) goto rx_offload_del; - ret = register_m_can_dev(cdev->net); + ret = register_m_can_dev(cdev); if (ret) { dev_err(cdev->dev, "registering %s failed (err=%d)\n", cdev->net->name, ret); From 50ea5449c56310d2d31c28ba91a59232116d3c1e Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Fri, 5 Jul 2024 17:28:27 +0200 Subject: [PATCH 10/13] can: mcp251xfd: fix ring configuration when switching from CAN-CC to CAN-FD mode If the ring (rx, tx) and/or coalescing parameters (rx-frames-irq, tx-frames-irq) have been configured while the interface was in CAN-CC mode, but the interface is brought up in CAN-FD mode, the ring parameters might be too big. Use the default CAN-FD values in this case. Fixes: 9263c2e92be9 ("can: mcp251xfd: ring: add support for runtime configurable RX/TX ring parameters") Link: https://lore.kernel.org/all/20240805-mcp251xfd-fix-ringconfig-v1-1-72086f0ca5ee@pengutronix.de Signed-off-by: Marc Kleine-Budde --- drivers/net/can/spi/mcp251xfd/mcp251xfd-ram.c | 11 +++++++++- .../net/can/spi/mcp251xfd/mcp251xfd-ring.c | 20 ++++++++++++++++--- 2 files changed, 27 insertions(+), 4 deletions(-) diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-ram.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-ram.c index 9e8e82cdba461..61b0d6fa52dd8 100644 --- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-ram.c +++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-ram.c @@ -97,7 +97,16 @@ void can_ram_get_layout(struct can_ram_layout *layout, if (ring) { u8 num_rx_coalesce = 0, num_tx_coalesce = 0; - num_rx = can_ram_rounddown_pow_of_two(config, &config->rx, 0, ring->rx_pending); + /* If the ring parameters have been configured in + * CAN-CC mode, but and we are in CAN-FD mode now, + * they might be to big. Use the default CAN-FD values + * in this case. + */ + num_rx = ring->rx_pending; + if (num_rx > layout->max_rx) + num_rx = layout->default_rx; + + num_rx = can_ram_rounddown_pow_of_two(config, &config->rx, 0, num_rx); /* The ethtool doc says: * To disable coalescing, set usecs = 0 and max_frames = 1. diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-ring.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-ring.c index 7bd2bcb5cf876..f72582d4d3e8e 100644 --- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-ring.c +++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-ring.c @@ -469,11 +469,25 @@ int mcp251xfd_ring_alloc(struct mcp251xfd_priv *priv) /* switching from CAN-2.0 to CAN-FD mode or vice versa */ if (fd_mode != test_bit(MCP251XFD_FLAGS_FD_MODE, priv->flags)) { + const struct ethtool_ringparam ring = { + .rx_pending = priv->rx_obj_num, + .tx_pending = priv->tx->obj_num, + }; + const struct ethtool_coalesce ec = { + .rx_coalesce_usecs_irq = priv->rx_coalesce_usecs_irq, + .rx_max_coalesced_frames_irq = priv->rx_obj_num_coalesce_irq, + .tx_coalesce_usecs_irq = priv->tx_coalesce_usecs_irq, + .tx_max_coalesced_frames_irq = priv->tx_obj_num_coalesce_irq, + }; struct can_ram_layout layout; - can_ram_get_layout(&layout, &mcp251xfd_ram_config, NULL, NULL, fd_mode); - priv->rx_obj_num = layout.default_rx; - tx_ring->obj_num = layout.default_tx; + can_ram_get_layout(&layout, &mcp251xfd_ram_config, &ring, &ec, fd_mode); + + priv->rx_obj_num = layout.cur_rx; + priv->rx_obj_num_coalesce_irq = layout.rx_coalesce; + + tx_ring->obj_num = layout.cur_tx; + priv->tx_obj_num_coalesce_irq = layout.tx_coalesce; } if (fd_mode) { From ac2b81eb8b2d104033560daea886ee84531e3d0a Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Fri, 5 Jul 2024 17:24:42 +0200 Subject: [PATCH 11/13] can: mcp251xfd: mcp251xfd_ring_init(): check TX-coalescing configuration When changing the interface from CAN-CC to CAN-FD mode the old coalescing parameters are re-used. This might cause problem, as the configured parameters are too big for CAN-FD mode. During testing an invalid TX coalescing configuration has been seen. The problem should be been fixed in the previous patch, but add a safeguard here to ensure that the number of TEF coalescing buffers (if configured) is exactly the half of all TEF buffers. Link: https://lore.kernel.org/all/20240805-mcp251xfd-fix-ringconfig-v1-2-72086f0ca5ee@pengutronix.de Signed-off-by: Marc Kleine-Budde --- drivers/net/can/spi/mcp251xfd/mcp251xfd-ring.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-ring.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-ring.c index f72582d4d3e8e..83c18035b2a24 100644 --- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-ring.c +++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-ring.c @@ -290,7 +290,7 @@ int mcp251xfd_ring_init(struct mcp251xfd_priv *priv) const struct mcp251xfd_rx_ring *rx_ring; u16 base = 0, ram_used; u8 fifo_nr = 1; - int i; + int err = 0, i; netdev_reset_queue(priv->ndev); @@ -386,10 +386,18 @@ int mcp251xfd_ring_init(struct mcp251xfd_priv *priv) netdev_err(priv->ndev, "Error during ring configuration, using more RAM (%u bytes) than available (%u bytes).\n", ram_used, MCP251XFD_RAM_SIZE); - return -ENOMEM; + err = -ENOMEM; } - return 0; + if (priv->tx_obj_num_coalesce_irq && + priv->tx_obj_num_coalesce_irq * 2 != priv->tx->obj_num) { + netdev_err(priv->ndev, + "Error during ring configuration, number of TEF coalescing buffers (%u) must be half of TEF buffers (%u).\n", + priv->tx_obj_num_coalesce_irq, priv->tx->obj_num); + err = -EINVAL; + } + + return err; } void mcp251xfd_ring_free(struct mcp251xfd_priv *priv) From 7dd9c26bd6cf679bcfdef01a8659791aa6487a29 Mon Sep 17 00:00:00 2001 From: Simon Arlott Date: Thu, 22 Aug 2024 08:25:07 +0100 Subject: [PATCH 12/13] can: mcp251x: fix deadlock if an interrupt occurs during mcp251x_open The mcp251x_hw_wake() function is called with the mpc_lock mutex held and disables the interrupt handler so that no interrupts can be processed while waking the device. If an interrupt has already occurred then waiting for the interrupt handler to complete will deadlock because it will be trying to acquire the same mutex. CPU0 CPU1 ---- ---- mcp251x_open() mutex_lock(&priv->mcp_lock) request_threaded_irq() mcp251x_can_ist() mutex_lock(&priv->mcp_lock) mcp251x_hw_wake() disable_irq() <-- deadlock Use disable_irq_nosync() instead because the interrupt handler does everything while holding the mutex so it doesn't matter if it's still running. Fixes: 8ce8c0abcba3 ("can: mcp251x: only reset hardware as required") Signed-off-by: Simon Arlott Reviewed-by: Przemek Kitszel Cc: stable@vger.kernel.org Link: https://lore.kernel.org/all/4fc08687-1d80-43fe-9f0d-8ef8475e75f6@0882a8b5-c6c3-11e9-b005-00805fc181fe.uuid.home.arpa Signed-off-by: Marc Kleine-Budde --- drivers/net/can/spi/mcp251x.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/can/spi/mcp251x.c b/drivers/net/can/spi/mcp251x.c index 3b8736ff0345c..ec5c64006a16f 100644 --- a/drivers/net/can/spi/mcp251x.c +++ b/drivers/net/can/spi/mcp251x.c @@ -752,7 +752,7 @@ static int mcp251x_hw_wake(struct spi_device *spi) int ret; /* Force wakeup interrupt to wake device, but don't execute IST */ - disable_irq(spi->irq); + disable_irq_nosync(spi->irq); mcp251x_write_2regs(spi, CANINTE, CANINTE_WAKIE, CANINTF_WAKIF); /* Wait for oscillator startup timer after wake up */ From dd885d90c047dbdd2773c1d33954cbd8747d81e2 Mon Sep 17 00:00:00 2001 From: Martin Jocic Date: Fri, 30 Aug 2024 17:31:13 +0200 Subject: [PATCH 13/13] can: kvaser_pciefd: Use a single write when releasing RX buffers Kvaser's PCIe cards uses the KCAN FPGA IP block which has dual 4K buffers for incoming messages shared by all (currently up to eight) channels. While the driver processes messages in one buffer, new incoming messages are stored in the other and so on. The design of KCAN is such that a buffer must be fully read and then released. Releasing a buffer will make the FPGA switch buffers. If the other buffer contains at least one incoming message the FPGA will also instantly issue a new interrupt, if not the interrupt will be issued after receiving the first new message. With IRQx interrupts, it takes a little time for the interrupt to happen, enough for any previous ISR call to do it's business and return, but MSI interrupts are way faster so this time is reduced to almost nothing. So with MSI, releasing the buffer HAS to be the very last action of the ISR before returning, otherwise the new interrupt might be "masked" by the kernel because the previous ISR call hasn't returned. And the interrupts are edge-triggered so we cannot loose one, or the ping-pong reading process will stop. This is why this patch modifies the driver to use a single write to the SRB_CMD register before returning. Signed-off-by: Martin Jocic Reviewed-by: Vincent Mailhol Link: https://patch.msgid.link/20240830153113.2081440-1-martin.jocic@kvaser.com Fixes: 26ad340e582d ("can: kvaser_pciefd: Add driver for Kvaser PCIEcan devices") Signed-off-by: Marc Kleine-Budde --- drivers/net/can/kvaser_pciefd.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/drivers/net/can/kvaser_pciefd.c b/drivers/net/can/kvaser_pciefd.c index a60d9efd5f8d1..9ffc3ffb4e8f8 100644 --- a/drivers/net/can/kvaser_pciefd.c +++ b/drivers/net/can/kvaser_pciefd.c @@ -1686,6 +1686,7 @@ static irqreturn_t kvaser_pciefd_irq_handler(int irq, void *dev) const struct kvaser_pciefd_irq_mask *irq_mask = pcie->driver_data->irq_mask; u32 pci_irq = ioread32(KVASER_PCIEFD_PCI_IRQ_ADDR(pcie)); u32 srb_irq = 0; + u32 srb_release = 0; int i; if (!(pci_irq & irq_mask->all)) @@ -1699,17 +1700,14 @@ static irqreturn_t kvaser_pciefd_irq_handler(int irq, void *dev) kvaser_pciefd_transmit_irq(pcie->can[i]); } - if (srb_irq & KVASER_PCIEFD_SRB_IRQ_DPD0) { - /* Reset DMA buffer 0, may trigger new interrupt */ - iowrite32(KVASER_PCIEFD_SRB_CMD_RDB0, - KVASER_PCIEFD_SRB_ADDR(pcie) + KVASER_PCIEFD_SRB_CMD_REG); - } + if (srb_irq & KVASER_PCIEFD_SRB_IRQ_DPD0) + srb_release |= KVASER_PCIEFD_SRB_CMD_RDB0; - if (srb_irq & KVASER_PCIEFD_SRB_IRQ_DPD1) { - /* Reset DMA buffer 1, may trigger new interrupt */ - iowrite32(KVASER_PCIEFD_SRB_CMD_RDB1, - KVASER_PCIEFD_SRB_ADDR(pcie) + KVASER_PCIEFD_SRB_CMD_REG); - } + if (srb_irq & KVASER_PCIEFD_SRB_IRQ_DPD1) + srb_release |= KVASER_PCIEFD_SRB_CMD_RDB1; + + if (srb_release) + iowrite32(srb_release, KVASER_PCIEFD_SRB_ADDR(pcie) + KVASER_PCIEFD_SRB_CMD_REG); return IRQ_HANDLED; }