From 23722fb46725da42b80bc55a91a9bac69e35188a Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Thu, 21 Jul 2022 14:03:29 +0100 Subject: [PATCH 1/2] coresight: Fix possible deadlock with lock dependency With lockdeps enabled, we get the following warning: ====================================================== WARNING: possible circular locking dependency detected ------------------------------------------------------ kworker/u12:1/53 is trying to acquire lock: ffff80000adce220 (coresight_mutex){+.+.}-{4:4}, at: coresight_set_assoc_ectdev_mutex+0x3c/0x5c but task is already holding lock: ffff80000add1f60 (ect_mutex){+.+.}-{4:4}, at: cti_probe+0x318/0x394 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #1 (ect_mutex){+.+.}-{4:4}: __mutex_lock_common+0xd8/0xe60 mutex_lock_nested+0x44/0x50 cti_add_assoc_to_csdev+0x4c/0x184 coresight_register+0x2f0/0x314 tmc_probe+0x33c/0x414 -> #0 (coresight_mutex){+.+.}-{4:4}: __lock_acquire+0x1a20/0x32d0 lock_acquire+0x160/0x308 __mutex_lock_common+0xd8/0xe60 mutex_lock_nested+0x44/0x50 coresight_set_assoc_ectdev_mutex+0x3c/0x5c cti_update_conn_xrefs+0x6c/0xf8 cti_probe+0x33c/0x394 other info that might help us debug this: Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(ect_mutex); lock(coresight_mutex); lock(ect_mutex); lock(coresight_mutex); *** DEADLOCK *** 4 locks held by kworker/u12:1/53: #0: ((wq_completion)events_unbound){+.+.}-{0:0}, at: process_one_work+0x1fc/0x63c #1: (deferred_probe_work){+.+.}-{0:0}, at: process_one_work+0x228/0x63c #2: (&dev->mutex){....}-{4:4}, at: __device_attach+0x48/0x1a8 #3: (ect_mutex){+.+.}-{4:4}, at: cti_probe+0x318/0x394 To fix the same, call cti_add_assoc_to_csdev without the holding coresight_mutex and confine the locking while setting the associated ect / cti device using coresight_set_assoc_ectdev_mutex(). Fixes: 177af8285b59 ("coresight: cti: Enable CTI associated with devices") Cc: Mathieu Poirier Cc: Suzuki K Poulose Cc: Mike Leach Cc: Leo Yan Signed-off-by: Sudeep Holla Reviewed-by: Mike Leach Signed-off-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20220721130329.3787211-1-sudeep.holla@arm.com --- drivers/hwtracing/coresight/coresight-core.c | 7 ++++--- drivers/hwtracing/coresight/coresight-cti-core.c | 5 +++-- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-core.c b/drivers/hwtracing/coresight/coresight-core.c index d5dbc67bacb44..f3068175ca9d9 100644 --- a/drivers/hwtracing/coresight/coresight-core.c +++ b/drivers/hwtracing/coresight/coresight-core.c @@ -1687,14 +1687,15 @@ struct coresight_device *coresight_register(struct coresight_desc *desc) ret = coresight_fixup_device_conns(csdev); if (!ret) ret = coresight_fixup_orphan_conns(csdev); - if (!ret && cti_assoc_ops && cti_assoc_ops->add) - cti_assoc_ops->add(csdev); out_unlock: mutex_unlock(&coresight_mutex); /* Success */ - if (!ret) + if (!ret) { + if (cti_assoc_ops && cti_assoc_ops->add) + cti_assoc_ops->add(csdev); return csdev; + } /* Unregister the device if needed */ if (registered) { diff --git a/drivers/hwtracing/coresight/coresight-cti-core.c b/drivers/hwtracing/coresight/coresight-cti-core.c index 8988b2ed2ea6f..1be92342b5b98 100644 --- a/drivers/hwtracing/coresight/coresight-cti-core.c +++ b/drivers/hwtracing/coresight/coresight-cti-core.c @@ -541,7 +541,7 @@ cti_match_fixup_csdev(struct cti_device *ctidev, const char *node_name, /* * Search the cti list to add an associated CTI into the supplied CS device * This will set the association if CTI declared before the CS device. - * (called from coresight_register() with coresight_mutex locked). + * (called from coresight_register() without coresight_mutex locked). */ static void cti_add_assoc_to_csdev(struct coresight_device *csdev) { @@ -569,7 +569,8 @@ static void cti_add_assoc_to_csdev(struct coresight_device *csdev) * if we found a matching csdev then update the ECT * association pointer for the device with this CTI. */ - csdev->ect_dev = ect_item->csdev; + coresight_set_assoc_ectdev_mutex(csdev->ect_dev, + ect_item->csdev); break; } } From 665c157e0204176023860b51a46528ba0ba62c33 Mon Sep 17 00:00:00 2001 From: James Clark Date: Wed, 5 Oct 2022 14:14:52 +0100 Subject: [PATCH 2/2] coresight: cti: Fix hang in cti_disable_hw() cti_enable_hw() and cti_disable_hw() are called from an atomic context so shouldn't use runtime PM because it can result in a sleep when communicating with firmware. Since commit 3c6656337852 ("Revert "firmware: arm_scmi: Add clock management to the SCMI power domain""), this causes a hang on Juno when running the Perf Coresight tests or running this command: perf record -e cs_etm//u -- ls This was also missed until the revert commit because pm_runtime_put() was called with the wrong device until commit 692c9a499b28 ("coresight: cti: Correct the parameter for pm_runtime_put") With lock and scheduler debugging enabled the following is output: coresight cti_sys0: cti_enable_hw -- dev:cti_sys0 parent: 20020000.cti BUG: sleeping function called from invalid context at drivers/base/power/runtime.c:1151 in_atomic(): 1, irqs_disabled(): 128, non_block: 0, pid: 330, name: perf-exec preempt_count: 2, expected: 0 RCU nest depth: 0, expected: 0 INFO: lockdep is turned off. irq event stamp: 0 hardirqs last enabled at (0): [<0000000000000000>] 0x0 hardirqs last disabled at (0): [] copy_process+0xa0c/0x1948 softirqs last enabled at (0): [] copy_process+0xa0c/0x1948 softirqs last disabled at (0): [<0000000000000000>] 0x0 CPU: 3 PID: 330 Comm: perf-exec Not tainted 6.0.0-00053-g042116d99298 #7 Hardware name: ARM LTD ARM Juno Development Platform/ARM Juno Development Platform, BIOS EDK II Sep 13 2022 Call trace: dump_backtrace+0x134/0x140 show_stack+0x20/0x58 dump_stack_lvl+0x8c/0xb8 dump_stack+0x18/0x34 __might_resched+0x180/0x228 __might_sleep+0x50/0x88 __pm_runtime_resume+0xac/0xb0 cti_enable+0x44/0x120 coresight_control_assoc_ectdev+0xc0/0x150 coresight_enable_path+0xb4/0x288 etm_event_start+0x138/0x170 etm_event_add+0x48/0x70 event_sched_in.isra.122+0xb4/0x280 merge_sched_in+0x1fc/0x3d0 visit_groups_merge.constprop.137+0x16c/0x4b0 ctx_sched_in+0x114/0x1f0 perf_event_sched_in+0x60/0x90 ctx_resched+0x68/0xb0 perf_event_exec+0x138/0x508 begin_new_exec+0x52c/0xd40 load_elf_binary+0x6b8/0x17d0 bprm_execve+0x360/0x7f8 do_execveat_common.isra.47+0x218/0x238 __arm64_sys_execve+0x48/0x60 invoke_syscall+0x4c/0x110 el0_svc_common.constprop.4+0xfc/0x120 do_el0_svc+0x34/0xc0 el0_svc+0x40/0x98 el0t_64_sync_handler+0x98/0xc0 el0t_64_sync+0x170/0x174 Fix the issue by removing the runtime PM calls completely. They are not needed here because it must have already been done when building the path for a trace. Fixes: 835d722ba10a ("coresight: cti: Initial CoreSight CTI Driver") Reported-by: Aishwarya TCV Reported-by: Cristian Marussi Suggested-by: Suzuki Poulose Signed-off-by: James Clark Reviewed-by: Mike Leach Tested-by: Mike Leach Signed-off-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20221005131452.1506328-1-james.clark@arm.com --- drivers/hwtracing/coresight/coresight-cti-core.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/hwtracing/coresight/coresight-cti-core.c b/drivers/hwtracing/coresight/coresight-cti-core.c index 1be92342b5b98..4a02ae23d3a06 100644 --- a/drivers/hwtracing/coresight/coresight-cti-core.c +++ b/drivers/hwtracing/coresight/coresight-cti-core.c @@ -94,7 +94,6 @@ static int cti_enable_hw(struct cti_drvdata *drvdata) unsigned long flags; int rc = 0; - pm_runtime_get_sync(dev->parent); spin_lock_irqsave(&drvdata->spinlock, flags); /* no need to do anything if enabled or unpowered*/ @@ -119,7 +118,6 @@ static int cti_enable_hw(struct cti_drvdata *drvdata) /* cannot enable due to error */ cti_err_not_enabled: spin_unlock_irqrestore(&drvdata->spinlock, flags); - pm_runtime_put(dev->parent); return rc; } @@ -175,7 +173,6 @@ static int cti_disable_hw(struct cti_drvdata *drvdata) coresight_disclaim_device_unlocked(csdev); CS_LOCK(drvdata->base); spin_unlock(&drvdata->spinlock); - pm_runtime_put(dev->parent); return 0; /* not disabled this call */