From 2edc66e22ba1af33020ff8b75fe1a2b055cdb73f Mon Sep 17 00:00:00 2001 From: Oded Gabbay <oded.gabbay@gmail.com> Date: Fri, 3 Jul 2020 19:28:54 +0300 Subject: [PATCH 01/10] habanalabs: block WREG_BULK packet on PDMA WREG_BULK is a special packet that has a variable length. Therefore, we can't parse it when validating CBs that go to the PCI DMA queue. In case the user needs to use it, it can put multiple WREG32 packets instead. Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com> Reviewed-by: Omer Shpigelman <oshpigelman@habana.ai> --- drivers/misc/habanalabs/gaudi/gaudi.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index 834470d10b46b..e222065271647 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -3865,6 +3865,12 @@ static int gaudi_validate_cb(struct hl_device *hdev, rc = -EPERM; break; + case PACKET_WREG_BULK: + dev_err(hdev->dev, + "User not allowed to use WREG_BULK\n"); + rc = -EPERM; + break; + case PACKET_LOAD_AND_EXE: rc = gaudi_validate_load_and_exe_pkt(hdev, parser, (struct packet_load_and_exe *) user_pkt); @@ -3880,7 +3886,6 @@ static int gaudi_validate_cb(struct hl_device *hdev, break; case PACKET_WREG_32: - case PACKET_WREG_BULK: case PACKET_MSG_LONG: case PACKET_MSG_SHORT: case PACKET_REPEAT: From e38bfd30e08802d9661efffb8c048bd53a3acfc4 Mon Sep 17 00:00:00 2001 From: Oded Gabbay <oded.gabbay@gmail.com> Date: Fri, 3 Jul 2020 20:46:12 +0300 Subject: [PATCH 02/10] habanalabs: set clock gating per engine For debugging purposes, we need to allow the root user better control of the clock gating feature of the DMA and compute engines. Therefore, change the clock gating debugfs interface to be bitmask instead of true/false. Each bit represents a different engine, according to gaudi_engine_id enum. See debugfs documentation for more details. Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com> Reviewed-by: Omer Shpigelman <oshpigelman@habana.ai> --- .../ABI/testing/debugfs-driver-habanalabs | 11 +- drivers/misc/habanalabs/debugfs.c | 17 +-- drivers/misc/habanalabs/device.c | 2 +- drivers/misc/habanalabs/gaudi/gaudi.c | 112 ++++++++++++------ drivers/misc/habanalabs/goya/goya.c | 8 +- drivers/misc/habanalabs/habanalabs.h | 13 +- drivers/misc/habanalabs/habanalabs_drv.c | 2 +- 7 files changed, 103 insertions(+), 62 deletions(-) diff --git a/Documentation/ABI/testing/debugfs-driver-habanalabs b/Documentation/ABI/testing/debugfs-driver-habanalabs index f6d9c2a8d5280..2e9ae311e02d2 100644 --- a/Documentation/ABI/testing/debugfs-driver-habanalabs +++ b/Documentation/ABI/testing/debugfs-driver-habanalabs @@ -16,7 +16,16 @@ Description: Allow the root user to disable/enable in runtime the clock gating mechanism in Gaudi. Due to how Gaudi is built, the clock gating needs to be disabled in order to access the registers of the TPC and MME engines. This is sometimes needed - during debug by the user and hence the user needs this option + during debug by the user and hence the user needs this option. + The user can supply a bitmask value, each bit represents + a different engine to disable/enable its clock gating feature. + The bitmask is composed of 20 bits: + 0 - 7 : DMA channels + 8 - 11 : MME engines + 12 - 19 : TPC engines + The bit's location of a specific engine can be determined + using (1 << GAUDI_ENGINE_ID_*). GAUDI_ENGINE_ID_* values + are defined in uapi habanalabs.h file in enum gaudi_engine_id What: /sys/kernel/debug/habanalabs/hl<n>/command_buffers Date: Jan 2019 diff --git a/drivers/misc/habanalabs/debugfs.c b/drivers/misc/habanalabs/debugfs.c index fc4372c18ce20..136b8f6fa0b3f 100644 --- a/drivers/misc/habanalabs/debugfs.c +++ b/drivers/misc/habanalabs/debugfs.c @@ -981,7 +981,7 @@ static ssize_t hl_clk_gate_read(struct file *f, char __user *buf, if (*ppos) return 0; - sprintf(tmp_buf, "%d\n", hdev->clock_gating); + sprintf(tmp_buf, "0x%llx\n", hdev->clock_gating_mask); rc = simple_read_from_buffer(buf, strlen(tmp_buf) + 1, ppos, tmp_buf, strlen(tmp_buf) + 1); @@ -993,7 +993,7 @@ static ssize_t hl_clk_gate_write(struct file *f, const char __user *buf, { struct hl_dbg_device_entry *entry = file_inode(f)->i_private; struct hl_device *hdev = entry->hdev; - u32 value; + u64 value; ssize_t rc; if (atomic_read(&hdev->in_reset)) { @@ -1002,19 +1002,12 @@ static ssize_t hl_clk_gate_write(struct file *f, const char __user *buf, return 0; } - rc = kstrtouint_from_user(buf, count, 10, &value); + rc = kstrtoull_from_user(buf, count, 16, &value); if (rc) return rc; - if (value) { - hdev->clock_gating = 1; - if (hdev->asic_funcs->enable_clock_gating) - hdev->asic_funcs->enable_clock_gating(hdev); - } else { - if (hdev->asic_funcs->disable_clock_gating) - hdev->asic_funcs->disable_clock_gating(hdev); - hdev->clock_gating = 0; - } + hdev->clock_gating_mask = value; + hdev->asic_funcs->set_clock_gating(hdev); return count; } diff --git a/drivers/misc/habanalabs/device.c b/drivers/misc/habanalabs/device.c index 2b38a119704c4..59608d1bac880 100644 --- a/drivers/misc/habanalabs/device.c +++ b/drivers/misc/habanalabs/device.c @@ -608,7 +608,7 @@ int hl_device_set_debug_mode(struct hl_device *hdev, bool enable) hdev->in_debug = 0; if (!hdev->hard_reset_pending) - hdev->asic_funcs->enable_clock_gating(hdev); + hdev->asic_funcs->set_clock_gating(hdev); goto out; } diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index e222065271647..9d6aebef88542 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -98,6 +98,11 @@ #define GAUDI_ARB_WDT_TIMEOUT 0x1000000 +#define GAUDI_CLK_GATE_DEBUGFS_MASK (\ + BIT(GAUDI_ENGINE_ID_MME_0) |\ + BIT(GAUDI_ENGINE_ID_MME_2) |\ + GENMASK_ULL(GAUDI_ENGINE_ID_TPC_7, GAUDI_ENGINE_ID_TPC_0)) + static const char gaudi_irq_name[GAUDI_MSI_ENTRIES][GAUDI_MAX_STRING_LEN] = { "gaudi cq 0_0", "gaudi cq 0_1", "gaudi cq 0_2", "gaudi cq 0_3", "gaudi cq 1_0", "gaudi cq 1_1", "gaudi cq 1_2", "gaudi cq 1_3", @@ -106,14 +111,14 @@ static const char gaudi_irq_name[GAUDI_MSI_ENTRIES][GAUDI_MAX_STRING_LEN] = { }; static const u8 gaudi_dma_assignment[GAUDI_DMA_MAX] = { - [GAUDI_PCI_DMA_1] = 0, - [GAUDI_PCI_DMA_2] = 1, - [GAUDI_PCI_DMA_3] = 5, - [GAUDI_HBM_DMA_1] = 2, - [GAUDI_HBM_DMA_2] = 3, - [GAUDI_HBM_DMA_3] = 4, - [GAUDI_HBM_DMA_4] = 6, - [GAUDI_HBM_DMA_5] = 7 + [GAUDI_PCI_DMA_1] = GAUDI_ENGINE_ID_DMA_0, + [GAUDI_PCI_DMA_2] = GAUDI_ENGINE_ID_DMA_1, + [GAUDI_PCI_DMA_3] = GAUDI_ENGINE_ID_DMA_5, + [GAUDI_HBM_DMA_1] = GAUDI_ENGINE_ID_DMA_2, + [GAUDI_HBM_DMA_2] = GAUDI_ENGINE_ID_DMA_3, + [GAUDI_HBM_DMA_3] = GAUDI_ENGINE_ID_DMA_4, + [GAUDI_HBM_DMA_4] = GAUDI_ENGINE_ID_DMA_6, + [GAUDI_HBM_DMA_5] = GAUDI_ENGINE_ID_DMA_7 }; static const u8 gaudi_cq_assignment[NUMBER_OF_CMPLT_QUEUES] = { @@ -1819,7 +1824,7 @@ static void gaudi_init_golden_registers(struct hl_device *hdev) gaudi_init_rate_limiter(hdev); - gaudi_disable_clock_gating(hdev); + hdev->asic_funcs->disable_clock_gating(hdev); for (tpc_id = 0, tpc_offset = 0; tpc_id < TPC_NUMBER_OF_ENGINES; @@ -2531,46 +2536,55 @@ static void gaudi_tpc_stall(struct hl_device *hdev) WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT); } -static void gaudi_enable_clock_gating(struct hl_device *hdev) +static void gaudi_set_clock_gating(struct hl_device *hdev) { struct gaudi_device *gaudi = hdev->asic_specific; u32 qman_offset; int i; - if (!hdev->clock_gating) - return; - - if (gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) - return; - /* In case we are during debug session, don't enable the clock gate * as it may interfere */ if (hdev->in_debug) return; - for (i = 0, qman_offset = 0 ; i < PCI_DMA_NUMBER_OF_CHNLS ; i++) { + for (i = GAUDI_PCI_DMA_1, qman_offset = 0 ; i < GAUDI_HBM_DMA_1 ; i++) { + if (!(hdev->clock_gating_mask & + (BIT_ULL(gaudi_dma_assignment[i])))) + continue; + qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET; WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, QMAN_CGM1_PWR_GATE_EN); WREG32(mmDMA0_QM_CGM_CFG + qman_offset, QMAN_UPPER_CP_CGM_PWR_GATE_EN); } - for (; i < HBM_DMA_NUMBER_OF_CHNLS ; i++) { + for (i = GAUDI_HBM_DMA_1 ; i < GAUDI_DMA_MAX ; i++) { + if (!(hdev->clock_gating_mask & + (BIT_ULL(gaudi_dma_assignment[i])))) + continue; + qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET; WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, QMAN_CGM1_PWR_GATE_EN); WREG32(mmDMA0_QM_CGM_CFG + qman_offset, QMAN_COMMON_CP_CGM_PWR_GATE_EN); } - WREG32(mmMME0_QM_CGM_CFG1, QMAN_CGM1_PWR_GATE_EN); - WREG32(mmMME0_QM_CGM_CFG, - QMAN_COMMON_CP_CGM_PWR_GATE_EN); - WREG32(mmMME2_QM_CGM_CFG1, QMAN_CGM1_PWR_GATE_EN); - WREG32(mmMME2_QM_CGM_CFG, - QMAN_COMMON_CP_CGM_PWR_GATE_EN); + if (hdev->clock_gating_mask & (BIT_ULL(GAUDI_ENGINE_ID_MME_0))) { + WREG32(mmMME0_QM_CGM_CFG1, QMAN_CGM1_PWR_GATE_EN); + WREG32(mmMME0_QM_CGM_CFG, QMAN_COMMON_CP_CGM_PWR_GATE_EN); + } + + if (hdev->clock_gating_mask & (BIT_ULL(GAUDI_ENGINE_ID_MME_2))) { + WREG32(mmMME2_QM_CGM_CFG1, QMAN_CGM1_PWR_GATE_EN); + WREG32(mmMME2_QM_CGM_CFG, QMAN_COMMON_CP_CGM_PWR_GATE_EN); + } for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) { + if (!(hdev->clock_gating_mask & + (BIT_ULL(GAUDI_ENGINE_ID_TPC_0 + i)))) + continue; + WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset, QMAN_CGM1_PWR_GATE_EN); WREG32(mmTPC0_QM_CGM_CFG + qman_offset, @@ -2663,7 +2677,7 @@ static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset) gaudi_stop_hbm_dma_qmans(hdev); gaudi_stop_pci_dma_qmans(hdev); - gaudi_disable_clock_gating(hdev); + hdev->asic_funcs->disable_clock_gating(hdev); msleep(wait_timeout_ms); @@ -3003,7 +3017,7 @@ static int gaudi_hw_init(struct hl_device *hdev) gaudi_init_tpc_qmans(hdev); - gaudi_enable_clock_gating(hdev); + hdev->asic_funcs->set_clock_gating(hdev); gaudi_enable_timestamp(hdev); @@ -3112,7 +3126,9 @@ static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset) HW_CAP_HBM_DMA | HW_CAP_PLL | HW_CAP_MMU | HW_CAP_SRAM_SCRAMBLER | - HW_CAP_HBM_SCRAMBLER); + HW_CAP_HBM_SCRAMBLER | + HW_CAP_CLK_GATE); + memset(gaudi->events_stat, 0, sizeof(gaudi->events_stat)); } @@ -4526,13 +4542,18 @@ static int gaudi_debugfs_read32(struct hl_device *hdev, u64 addr, u32 *val) int rc = 0; if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) { - if (gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) { + + if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) && + (hdev->clock_gating_mask & + GAUDI_CLK_GATE_DEBUGFS_MASK)) { + dev_err_ratelimited(hdev->dev, "Can't read register - clock gating is enabled!\n"); rc = -EFAULT; } else { *val = RREG32(addr - CFG_BASE); } + } else if ((addr >= SRAM_BASE_ADDR) && (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) { *val = readl(hdev->pcie_bar[SRAM_BAR_ID] + @@ -4568,13 +4589,18 @@ static int gaudi_debugfs_write32(struct hl_device *hdev, u64 addr, u32 val) int rc = 0; if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) { - if (gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) { + + if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) && + (hdev->clock_gating_mask & + GAUDI_CLK_GATE_DEBUGFS_MASK)) { + dev_err_ratelimited(hdev->dev, "Can't write register - clock gating is enabled!\n"); rc = -EFAULT; } else { WREG32(addr - CFG_BASE, val); } + } else if ((addr >= SRAM_BASE_ADDR) && (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) { writel(val, hdev->pcie_bar[SRAM_BAR_ID] + @@ -4610,7 +4636,11 @@ static int gaudi_debugfs_read64(struct hl_device *hdev, u64 addr, u64 *val) int rc = 0; if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) { - if (gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) { + + if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) && + (hdev->clock_gating_mask & + GAUDI_CLK_GATE_DEBUGFS_MASK)) { + dev_err_ratelimited(hdev->dev, "Can't read register - clock gating is enabled!\n"); rc = -EFAULT; @@ -4620,6 +4650,7 @@ static int gaudi_debugfs_read64(struct hl_device *hdev, u64 addr, u64 *val) *val = (((u64) val_h) << 32) | val_l; } + } else if ((addr >= SRAM_BASE_ADDR) && (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) { *val = readq(hdev->pcie_bar[SRAM_BAR_ID] + @@ -4656,7 +4687,11 @@ static int gaudi_debugfs_write64(struct hl_device *hdev, u64 addr, u64 val) int rc = 0; if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) { - if (gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) { + + if ((gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) && + (hdev->clock_gating_mask & + GAUDI_CLK_GATE_DEBUGFS_MASK)) { + dev_err_ratelimited(hdev->dev, "Can't write register - clock gating is enabled!\n"); rc = -EFAULT; @@ -4665,6 +4700,7 @@ static int gaudi_debugfs_write64(struct hl_device *hdev, u64 addr, u64 val) WREG32(addr + sizeof(u32) - CFG_BASE, upper_32_bits(val)); } + } else if ((addr >= SRAM_BASE_ADDR) && (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) { writeq(val, hdev->pcie_bar[SRAM_BAR_ID] + @@ -4886,7 +4922,7 @@ static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid) gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_ARUSER, asid); gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_AWUSER, asid); - hdev->asic_funcs->enable_clock_gating(hdev); + hdev->asic_funcs->set_clock_gating(hdev); mutex_unlock(&gaudi->clk_gate_mutex); } @@ -5267,7 +5303,7 @@ static void gaudi_print_ecc_info_generic(struct hl_device *hdev, } if (disable_clock_gating) { - hdev->asic_funcs->enable_clock_gating(hdev); + hdev->asic_funcs->set_clock_gating(hdev); mutex_unlock(&gaudi->clk_gate_mutex); } } @@ -5754,7 +5790,7 @@ static bool gaudi_tpc_read_interrupts(struct hl_device *hdev, u8 tpc_id, /* Clear interrupts */ WREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset, 0); - hdev->asic_funcs->enable_clock_gating(hdev); + hdev->asic_funcs->set_clock_gating(hdev); mutex_unlock(&gaudi->clk_gate_mutex); @@ -6270,7 +6306,7 @@ static bool gaudi_is_device_idle(struct hl_device *hdev, u32 *mask, if (s) seq_puts(s, "\n"); - hdev->asic_funcs->enable_clock_gating(hdev); + hdev->asic_funcs->set_clock_gating(hdev); mutex_unlock(&gaudi->clk_gate_mutex); @@ -6371,7 +6407,7 @@ static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel, dev_err(hdev->dev, "Timeout while waiting for TPC%d icache prefetch\n", tpc_id); - hdev->asic_funcs->enable_clock_gating(hdev); + hdev->asic_funcs->set_clock_gating(hdev); mutex_unlock(&gaudi->clk_gate_mutex); return -EIO; } @@ -6400,7 +6436,7 @@ static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel, 1000, kernel_timeout); - hdev->asic_funcs->enable_clock_gating(hdev); + hdev->asic_funcs->set_clock_gating(hdev); mutex_unlock(&gaudi->clk_gate_mutex); if (rc) { @@ -6741,7 +6777,7 @@ static const struct hl_asic_funcs gaudi_funcs = { .mmu_invalidate_cache = gaudi_mmu_invalidate_cache, .mmu_invalidate_cache_range = gaudi_mmu_invalidate_cache_range, .send_heartbeat = gaudi_send_heartbeat, - .enable_clock_gating = gaudi_enable_clock_gating, + .set_clock_gating = gaudi_set_clock_gating, .disable_clock_gating = gaudi_disable_clock_gating, .debug_coresight = gaudi_debug_coresight, .is_device_idle = gaudi_is_device_idle, diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index 0d2952bb58dfb..83f0c70f140b1 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -5028,14 +5028,14 @@ int goya_armcp_info_get(struct hl_device *hdev) return 0; } -static void goya_enable_clock_gating(struct hl_device *hdev) +static void goya_set_clock_gating(struct hl_device *hdev) { - + /* clock gating not supported in Goya */ } static void goya_disable_clock_gating(struct hl_device *hdev) { - + /* clock gating not supported in Goya */ } static bool goya_is_device_idle(struct hl_device *hdev, u32 *mask, @@ -5259,7 +5259,7 @@ static const struct hl_asic_funcs goya_funcs = { .mmu_invalidate_cache = goya_mmu_invalidate_cache, .mmu_invalidate_cache_range = goya_mmu_invalidate_cache_range, .send_heartbeat = goya_send_heartbeat, - .enable_clock_gating = goya_enable_clock_gating, + .set_clock_gating = goya_set_clock_gating, .disable_clock_gating = goya_disable_clock_gating, .debug_coresight = goya_debug_coresight, .is_device_idle = goya_is_device_idle, diff --git a/drivers/misc/habanalabs/habanalabs.h b/drivers/misc/habanalabs/habanalabs.h index 1ecdcf8b763a2..dee5cc25fe5b6 100644 --- a/drivers/misc/habanalabs/habanalabs.h +++ b/drivers/misc/habanalabs/habanalabs.h @@ -578,8 +578,9 @@ enum hl_pll_frequency { * @mmu_invalidate_cache_range: flush specific MMU STLB cache lines with * ASID-VA-size mask. * @send_heartbeat: send is-alive packet to ArmCP and verify response. - * @enable_clock_gating: enable clock gating for reducing power consumption. - * @disable_clock_gating: disable clock for accessing registers on HBW. + * @set_clock_gating: enable/disable clock gating per engine according to + * clock gating mask in hdev + * @disable_clock_gating: disable clock gating completely * @debug_coresight: perform certain actions on Coresight for debugging. * @is_device_idle: return true if device is idle, false otherwise. * @soft_reset_late_init: perform certain actions needed after soft reset. @@ -680,7 +681,7 @@ struct hl_asic_funcs { int (*mmu_invalidate_cache_range)(struct hl_device *hdev, bool is_hard, u32 asid, u64 va, u64 size); int (*send_heartbeat)(struct hl_device *hdev); - void (*enable_clock_gating)(struct hl_device *hdev); + void (*set_clock_gating)(struct hl_device *hdev); void (*disable_clock_gating)(struct hl_device *hdev); int (*debug_coresight)(struct hl_device *hdev, void *data); bool (*is_device_idle)(struct hl_device *hdev, u32 *mask, @@ -1398,6 +1399,9 @@ struct hl_device_idle_busy_ts { * @max_power: the max power of the device, as configured by the sysadmin. This * value is saved so in case of hard-reset, the driver will restore * this value and update the F/W after the re-initialization + * @clock_gating_mask: is clock gating enabled. bitmask that represents the + * different engines. See debugfs-driver-habanalabs for + * details. * @in_reset: is device in reset flow. * @curr_pll_profile: current PLL profile. * @cs_active_cnt: number of active command submissions on this device (active @@ -1425,7 +1429,6 @@ struct hl_device_idle_busy_ts { * @init_done: is the initialization of the device done. * @mmu_enable: is MMU enabled. * @mmu_huge_page_opt: is MMU huge pages optimization enabled. - * @clock_gating: is clock gating enabled. * @device_cpu_disabled: is the device CPU disabled (due to timeouts) * @dma_mask: the dma mask that was set for this device * @in_debug: is device under debug. This, together with fpriv_list, enforces @@ -1493,6 +1496,7 @@ struct hl_device { atomic64_t dram_used_mem; u64 timeout_jiffies; u64 max_power; + u64 clock_gating_mask; atomic_t in_reset; enum hl_pll_frequency curr_pll_profile; int cs_active_cnt; @@ -1514,7 +1518,6 @@ struct hl_device { u8 dram_default_page_mapping; u8 pmmu_huge_range; u8 init_done; - u8 clock_gating; u8 device_cpu_disabled; u8 dma_mask; u8 in_debug; diff --git a/drivers/misc/habanalabs/habanalabs_drv.c b/drivers/misc/habanalabs/habanalabs_drv.c index 8652c7e5d7f10..22716da9f85fc 100644 --- a/drivers/misc/habanalabs/habanalabs_drv.c +++ b/drivers/misc/habanalabs/habanalabs_drv.c @@ -232,7 +232,7 @@ static void set_driver_behavior_per_device(struct hl_device *hdev) hdev->fw_loading = 1; hdev->cpu_queues_enable = 1; hdev->heartbeat = 1; - hdev->clock_gating = 1; + hdev->clock_gating_mask = ULONG_MAX; hdev->reset_pcilink = 0; hdev->axi_drain = 0; From 788cacf308871db0a619952321bedfec8f1773e2 Mon Sep 17 00:00:00 2001 From: Oded Gabbay <oded.gabbay@gmail.com> Date: Tue, 7 Jul 2020 17:30:13 +0300 Subject: [PATCH 03/10] habanalabs: set 4s timeout for message to device CPU We see that sometimes the CPU in GOYA and GAUDI is occupied by the power/thermal loop and can't answer requests from the driver fast enough. Therefore, to avoid false notifications on timeouts, increase the timeout to 4 seconds on each message sent to the device CPU. Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com> Reviewed-by: Tomer Tayar <ttayar@habana.ai> --- drivers/misc/habanalabs/debugfs.c | 6 +++--- drivers/misc/habanalabs/firmware_if.c | 10 +++++----- drivers/misc/habanalabs/gaudi/gaudi.c | 4 ++++ drivers/misc/habanalabs/goya/goya.c | 12 ++++++++---- drivers/misc/habanalabs/habanalabs.h | 6 +++++- drivers/misc/habanalabs/hwmon.c | 19 +++++++++---------- drivers/misc/habanalabs/sysfs.c | 11 ++++------- 7 files changed, 38 insertions(+), 30 deletions(-) diff --git a/drivers/misc/habanalabs/debugfs.c b/drivers/misc/habanalabs/debugfs.c index 136b8f6fa0b3f..0bc036e01ee8d 100644 --- a/drivers/misc/habanalabs/debugfs.c +++ b/drivers/misc/habanalabs/debugfs.c @@ -36,7 +36,7 @@ static int hl_debugfs_i2c_read(struct hl_device *hdev, u8 i2c_bus, u8 i2c_addr, pkt.i2c_reg = i2c_reg; rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - HL_DEVICE_TIMEOUT_USEC, (long *) val); + 0, (long *) val); if (rc) dev_err(hdev->dev, "Failed to read from I2C, error %d\n", rc); @@ -63,7 +63,7 @@ static int hl_debugfs_i2c_write(struct hl_device *hdev, u8 i2c_bus, u8 i2c_addr, pkt.value = cpu_to_le64(val); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - HL_DEVICE_TIMEOUT_USEC, NULL); + 0, NULL); if (rc) dev_err(hdev->dev, "Failed to write to I2C, error %d\n", rc); @@ -87,7 +87,7 @@ static void hl_debugfs_led_set(struct hl_device *hdev, u8 led, u8 state) pkt.value = cpu_to_le64(state); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - HL_DEVICE_TIMEOUT_USEC, NULL); + 0, NULL); if (rc) dev_err(hdev->dev, "Failed to set LED %d, error %d\n", led, rc); diff --git a/drivers/misc/habanalabs/firmware_if.c b/drivers/misc/habanalabs/firmware_if.c index baf790cf4b787..d27841cb5bcb3 100644 --- a/drivers/misc/habanalabs/firmware_if.c +++ b/drivers/misc/habanalabs/firmware_if.c @@ -61,7 +61,7 @@ int hl_fw_send_pci_access_msg(struct hl_device *hdev, u32 opcode) pkt.ctl = cpu_to_le32(opcode << ARMCP_PKT_CTL_OPCODE_SHIFT); return hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, - sizeof(pkt), HL_DEVICE_TIMEOUT_USEC, NULL); + sizeof(pkt), 0, NULL); } int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg, @@ -144,7 +144,7 @@ int hl_fw_unmask_irq(struct hl_device *hdev, u16 event_type) pkt.value = cpu_to_le64(event_type); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - HL_DEVICE_TIMEOUT_USEC, &result); + 0, &result); if (rc) dev_err(hdev->dev, "failed to unmask RAZWI IRQ %d", event_type); @@ -183,7 +183,7 @@ int hl_fw_unmask_irq_arr(struct hl_device *hdev, const u32 *irq_arr, ARMCP_PKT_CTL_OPCODE_SHIFT); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) pkt, - total_pkt_size, HL_DEVICE_TIMEOUT_USEC, &result); + total_pkt_size, 0, &result); if (rc) dev_err(hdev->dev, "failed to unmask IRQ array\n"); @@ -204,7 +204,7 @@ int hl_fw_test_cpu_queue(struct hl_device *hdev) test_pkt.value = cpu_to_le64(ARMCP_PACKET_FENCE_VAL); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &test_pkt, - sizeof(test_pkt), HL_DEVICE_TIMEOUT_USEC, &result); + sizeof(test_pkt), 0, &result); if (!rc) { if (result != ARMCP_PACKET_FENCE_VAL) @@ -248,7 +248,7 @@ int hl_fw_send_heartbeat(struct hl_device *hdev) hb_pkt.value = cpu_to_le64(ARMCP_PACKET_FENCE_VAL); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &hb_pkt, - sizeof(hb_pkt), HL_DEVICE_TIMEOUT_USEC, &result); + sizeof(hb_pkt), 0, &result); if ((rc) || (result != ARMCP_PACKET_FENCE_VAL)) rc = -EIO; diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index 9d6aebef88542..637a9d608707f 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -80,6 +80,7 @@ #define GAUDI_PLDM_QMAN0_TIMEOUT_USEC (HL_DEVICE_TIMEOUT_USEC * 30) #define GAUDI_PLDM_TPC_KERNEL_WAIT_USEC (HL_DEVICE_TIMEOUT_USEC * 30) #define GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC 1000000 /* 1s */ +#define GAUDI_MSG_TO_CPU_TIMEOUT_USEC 4000000 /* 4s */ #define GAUDI_QMAN0_FENCE_VAL 0x72E91AB9 @@ -3479,6 +3480,9 @@ static int gaudi_send_cpu_message(struct hl_device *hdev, u32 *msg, return 0; } + if (!timeout) + timeout = GAUDI_MSG_TO_CPU_TIMEOUT_USEC; + return hl_fw_send_cpu_message(hdev, GAUDI_QUEUE_ID_CPU_PQ, msg, len, timeout, result); } diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index 83f0c70f140b1..88460b2138d88 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -88,6 +88,7 @@ #define GOYA_PLDM_MMU_TIMEOUT_USEC (MMU_CONFIG_TIMEOUT_USEC * 100) #define GOYA_PLDM_QMAN0_TIMEOUT_USEC (HL_DEVICE_TIMEOUT_USEC * 30) #define GOYA_BOOT_FIT_REQ_TIMEOUT_USEC 1000000 /* 1s */ +#define GOYA_MSG_TO_CPU_TIMEOUT_USEC 4000000 /* 4s */ #define GOYA_QMAN0_FENCE_VAL 0xD169B243 @@ -2830,6 +2831,9 @@ int goya_send_cpu_message(struct hl_device *hdev, u32 *msg, u16 len, return 0; } + if (!timeout) + timeout = GOYA_MSG_TO_CPU_TIMEOUT_USEC; + return hl_fw_send_cpu_message(hdev, GOYA_QUEUE_ID_CPU_PQ, msg, len, timeout, result); } @@ -4431,8 +4435,8 @@ static int goya_unmask_irq_arr(struct hl_device *hdev, u32 *irq_arr, pkt->armcp_pkt.ctl = cpu_to_le32(ARMCP_PACKET_UNMASK_RAZWI_IRQ_ARRAY << ARMCP_PKT_CTL_OPCODE_SHIFT); - rc = goya_send_cpu_message(hdev, (u32 *) pkt, total_pkt_size, - HL_DEVICE_TIMEOUT_USEC, &result); + rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) pkt, + total_pkt_size, 0, &result); if (rc) dev_err(hdev->dev, "failed to unmask IRQ array\n"); @@ -4464,8 +4468,8 @@ static int goya_unmask_irq(struct hl_device *hdev, u16 event_type) ARMCP_PKT_CTL_OPCODE_SHIFT); pkt.value = cpu_to_le64(event_type); - rc = goya_send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - HL_DEVICE_TIMEOUT_USEC, &result); + rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), + 0, &result); if (rc) dev_err(hdev->dev, "failed to unmask RAZWI IRQ %d", event_type); diff --git a/drivers/misc/habanalabs/habanalabs.h b/drivers/misc/habanalabs/habanalabs.h index dee5cc25fe5b6..194d833526964 100644 --- a/drivers/misc/habanalabs/habanalabs.h +++ b/drivers/misc/habanalabs/habanalabs.h @@ -588,7 +588,11 @@ enum hl_pll_frequency { * @hw_queues_unlock: release H/W queues lock. * @get_pci_id: retrieve PCI ID. * @get_eeprom_data: retrieve EEPROM data from F/W. - * @send_cpu_message: send buffer to ArmCP. + * @send_cpu_message: send message to F/W. If the message is timedout, the + * driver will eventually reset the device. The timeout can + * be determined by the calling function or it can be 0 and + * then the timeout is the default timeout for the specific + * ASIC * @get_hw_state: retrieve the H/W state * @pci_bars_map: Map PCI BARs. * @set_dram_bar_base: Set DRAM BAR to map specific device address. Returns diff --git a/drivers/misc/habanalabs/hwmon.c b/drivers/misc/habanalabs/hwmon.c index 8c6cd77e6af6b..b997336fa75fc 100644 --- a/drivers/misc/habanalabs/hwmon.c +++ b/drivers/misc/habanalabs/hwmon.c @@ -10,7 +10,6 @@ #include <linux/pci.h> #include <linux/hwmon.h> -#define SENSORS_PKT_TIMEOUT 1000000 /* 1s */ #define HWMON_NR_SENSOR_TYPES (hwmon_pwm + 1) int hl_build_hwmon_channel_info(struct hl_device *hdev, @@ -323,7 +322,7 @@ int hl_get_temperature(struct hl_device *hdev, pkt.type = __cpu_to_le16(attr); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - SENSORS_PKT_TIMEOUT, value); + 0, value); if (rc) { dev_err(hdev->dev, @@ -350,7 +349,7 @@ int hl_set_temperature(struct hl_device *hdev, pkt.value = __cpu_to_le64(value); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - SENSORS_PKT_TIMEOUT, NULL); + 0, NULL); if (rc) dev_err(hdev->dev, @@ -374,7 +373,7 @@ int hl_get_voltage(struct hl_device *hdev, pkt.type = __cpu_to_le16(attr); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - SENSORS_PKT_TIMEOUT, value); + 0, value); if (rc) { dev_err(hdev->dev, @@ -400,7 +399,7 @@ int hl_get_current(struct hl_device *hdev, pkt.type = __cpu_to_le16(attr); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - SENSORS_PKT_TIMEOUT, value); + 0, value); if (rc) { dev_err(hdev->dev, @@ -426,7 +425,7 @@ int hl_get_fan_speed(struct hl_device *hdev, pkt.type = __cpu_to_le16(attr); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - SENSORS_PKT_TIMEOUT, value); + 0, value); if (rc) { dev_err(hdev->dev, @@ -452,7 +451,7 @@ int hl_get_pwm_info(struct hl_device *hdev, pkt.type = __cpu_to_le16(attr); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - SENSORS_PKT_TIMEOUT, value); + 0, value); if (rc) { dev_err(hdev->dev, @@ -479,7 +478,7 @@ void hl_set_pwm_info(struct hl_device *hdev, int sensor_index, u32 attr, pkt.value = cpu_to_le64(value); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - SENSORS_PKT_TIMEOUT, NULL); + 0, NULL); if (rc) dev_err(hdev->dev, @@ -502,7 +501,7 @@ int hl_set_voltage(struct hl_device *hdev, pkt.value = __cpu_to_le64(value); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - SENSORS_PKT_TIMEOUT, NULL); + 0, NULL); if (rc) dev_err(hdev->dev, @@ -527,7 +526,7 @@ int hl_set_current(struct hl_device *hdev, pkt.value = __cpu_to_le64(value); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - SENSORS_PKT_TIMEOUT, NULL); + 0, NULL); if (rc) dev_err(hdev->dev, diff --git a/drivers/misc/habanalabs/sysfs.c b/drivers/misc/habanalabs/sysfs.c index 5d78d5e1c7826..70b6b1863c2ef 100644 --- a/drivers/misc/habanalabs/sysfs.c +++ b/drivers/misc/habanalabs/sysfs.c @@ -9,9 +9,6 @@ #include <linux/pci.h> -#define SET_CLK_PKT_TIMEOUT 1000000 /* 1s */ -#define SET_PWR_PKT_TIMEOUT 1000000 /* 1s */ - long hl_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr) { struct armcp_packet pkt; @@ -29,7 +26,7 @@ long hl_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr) pkt.pll_index = cpu_to_le32(pll_index); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - SET_CLK_PKT_TIMEOUT, &result); + 0, &result); if (rc) { dev_err(hdev->dev, @@ -54,7 +51,7 @@ void hl_set_frequency(struct hl_device *hdev, u32 pll_index, u64 freq) pkt.value = cpu_to_le64(freq); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - SET_CLK_PKT_TIMEOUT, NULL); + 0, NULL); if (rc) dev_err(hdev->dev, @@ -74,7 +71,7 @@ u64 hl_get_max_power(struct hl_device *hdev) ARMCP_PKT_CTL_OPCODE_SHIFT); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - SET_PWR_PKT_TIMEOUT, &result); + 0, &result); if (rc) { dev_err(hdev->dev, "Failed to get max power, error %d\n", rc); @@ -96,7 +93,7 @@ void hl_set_max_power(struct hl_device *hdev, u64 value) pkt.value = cpu_to_le64(value); rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), - SET_PWR_PKT_TIMEOUT, NULL); + 0, NULL); if (rc) dev_err(hdev->dev, "Failed to set max power, error %d\n", rc); From e19485dc7a0d210b428a249c0595769bd495fb71 Mon Sep 17 00:00:00 2001 From: Xu Yilun <yilun.xu@intel.com> Date: Mon, 13 Jul 2020 14:10:02 +0800 Subject: [PATCH 04/10] fpga: dfl: pci: reduce the scope of variable 'ret' This is to fix lkp cppcheck warnings: drivers/fpga/dfl-pci.c:230:6: warning: The scope of the variable 'ret' can be reduced. [variableScope] int ret = 0; ^ drivers/fpga/dfl-pci.c:230:10: warning: Variable 'ret' is assigned a value that is never used. [unreadVariable] int ret = 0; ^ Fixes: 3c2760b78f90 ("fpga: dfl: pci: fix return value of cci_pci_sriov_configure") Reported-by: kbuild test robot <lkp@intel.com> Signed-off-by: Xu Yilun <yilun.xu@intel.com> Acked-by: Wu Hao <hao.wu@intel.com> Reviewed-by: Tom Rix <trix@redhat.com> Signed-off-by: Moritz Fischer <mdf@kernel.org> --- drivers/fpga/dfl-pci.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/fpga/dfl-pci.c b/drivers/fpga/dfl-pci.c index 538755062ab7c..a78c409bf2c44 100644 --- a/drivers/fpga/dfl-pci.c +++ b/drivers/fpga/dfl-pci.c @@ -227,7 +227,6 @@ static int cci_pci_sriov_configure(struct pci_dev *pcidev, int num_vfs) { struct cci_drvdata *drvdata = pci_get_drvdata(pcidev); struct dfl_fpga_cdev *cdev = drvdata->cdev; - int ret = 0; if (!num_vfs) { /* @@ -239,6 +238,8 @@ static int cci_pci_sriov_configure(struct pci_dev *pcidev, int num_vfs) dfl_fpga_cdev_config_ports_pf(cdev); } else { + int ret; + /* * before enable SRIOV, put released ports into VF access mode * first of all. From 8614afd689df59d9ce019439389be20bd788a897 Mon Sep 17 00:00:00 2001 From: Matthew Gerlach <matthew.gerlach@linux.intel.com> Date: Mon, 13 Jul 2020 14:10:03 +0800 Subject: [PATCH 05/10] fpga: dfl: fix bug in port reset handshake When putting the port in reset, driver must wait for the soft reset acknowledgment bit instead of the soft reset bit. Fixes: 47c1b19c160f (fpga: dfl: afu: add port ops support) Signed-off-by: Matthew Gerlach <matthew.gerlach@linux.intel.com> Signed-off-by: Xu Yilun <yilun.xu@intel.com> Acked-by: Wu Hao <hao.wu@intel.com> Reviewed-by: Tom Rix <trix@redhat.com> Signed-off-by: Moritz Fischer <mdf@kernel.org> --- drivers/fpga/dfl-afu-main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/fpga/dfl-afu-main.c b/drivers/fpga/dfl-afu-main.c index b0c31789a9096..3fa2c59921733 100644 --- a/drivers/fpga/dfl-afu-main.c +++ b/drivers/fpga/dfl-afu-main.c @@ -83,7 +83,8 @@ int __afu_port_disable(struct platform_device *pdev) * on this port and minimum soft reset pulse width has elapsed. * Driver polls port_soft_reset_ack to determine if reset done by HW. */ - if (readq_poll_timeout(base + PORT_HDR_CTRL, v, v & PORT_CTRL_SFTRST, + if (readq_poll_timeout(base + PORT_HDR_CTRL, v, + v & PORT_CTRL_SFTRST_ACK, RST_POLL_INVL, RST_POLL_TIMEOUT)) { dev_err(&pdev->dev, "timeout, fail to reset device\n"); return -ETIMEDOUT; From cea7a0449ea3fa4883bf5dc8397f000d6b67d6cd Mon Sep 17 00:00:00 2001 From: Oded Gabbay <oded.gabbay@gmail.com> Date: Sun, 12 Jul 2020 23:34:57 +0300 Subject: [PATCH 06/10] habanalabs: prevent possible out-of-bounds array access Queue index is received from the user. Therefore, we must validate it before using it to access the queue props array. Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com> Reviewed-by: Tomer Tayar <ttayar@habana.ai> --- drivers/misc/habanalabs/command_submission.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/misc/habanalabs/command_submission.c b/drivers/misc/habanalabs/command_submission.c index b0f62cbbdc878..f3a8f113865d2 100644 --- a/drivers/misc/habanalabs/command_submission.c +++ b/drivers/misc/habanalabs/command_submission.c @@ -499,11 +499,19 @@ static int validate_queue_index(struct hl_device *hdev, struct asic_fixed_properties *asic = &hdev->asic_prop; struct hw_queue_properties *hw_queue_prop; + /* This must be checked here to prevent out-of-bounds access to + * hw_queues_props array + */ + if (chunk->queue_index >= HL_MAX_QUEUES) { + dev_err(hdev->dev, "Queue index %d is invalid\n", + chunk->queue_index); + return -EINVAL; + } + hw_queue_prop = &asic->hw_queues_props[chunk->queue_index]; - if ((chunk->queue_index >= HL_MAX_QUEUES) || - (hw_queue_prop->type == QUEUE_TYPE_NA)) { - dev_err(hdev->dev, "Queue index %d is invalid\n", + if (hw_queue_prop->type == QUEUE_TYPE_NA) { + dev_err(hdev->dev, "Queue index %d is not applicable\n", chunk->queue_index); return -EINVAL; } From f867c771f98891841c217fa8459244ed0dd28921 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa <penguin-kernel@i-love.sakura.ne.jp> Date: Fri, 17 Jul 2020 00:12:15 +0900 Subject: [PATCH 07/10] binder: Don't use mmput() from shrinker function. syzbot is reporting that mmput() from shrinker function has a risk of deadlock [1], for delayed_uprobe_add() from update_ref_ctr() calls kzalloc(GFP_KERNEL) with delayed_uprobe_lock held, and uprobe_clear_state() from __mmput() also holds delayed_uprobe_lock. Commit a1b2289cef92ef0e ("android: binder: drop lru lock in isolate callback") replaced mmput() with mmput_async() in order to avoid sleeping with spinlock held. But this patch replaces mmput() with mmput_async() in order not to start __mmput() from shrinker context. [1] https://syzkaller.appspot.com/bug?id=bc9e7303f537c41b2b0cc2dfcea3fc42964c2d45 Reported-by: syzbot <syzbot+1068f09c44d151250c33@syzkaller.appspotmail.com> Reported-by: syzbot <syzbot+e5344baa319c9a96edec@syzkaller.appspotmail.com> Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp> Reviewed-by: Michal Hocko <mhocko@suse.com> Acked-by: Todd Kjos <tkjos@google.com> Acked-by: Christian Brauner <christian.brauner@ubuntu.com> Cc: stable <stable@vger.kernel.org> Link: https://lore.kernel.org/r/4ba9adb2-43f5-2de0-22de-f6075c1fab50@i-love.sakura.ne.jp Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> --- drivers/android/binder_alloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/android/binder_alloc.c b/drivers/android/binder_alloc.c index 42c672f1584e9..cbe6aa77d50d1 100644 --- a/drivers/android/binder_alloc.c +++ b/drivers/android/binder_alloc.c @@ -947,7 +947,7 @@ enum lru_status binder_alloc_free_page(struct list_head *item, trace_binder_unmap_user_end(alloc, index); } mmap_read_unlock(mm); - mmput(mm); + mmput_async(mm); trace_binder_unmap_kernel_start(alloc, index); From b34e7e298d7a5ed76b3aa327c240c29f1ef6dd22 Mon Sep 17 00:00:00 2001 From: Eric Biggers <ebiggers@google.com> Date: Wed, 15 Jul 2020 23:05:53 -0700 Subject: [PATCH 08/10] /dev/mem: Add missing memory barriers for devmem_inode WRITE_ONCE() isn't the correct way to publish a pointer to a data structure, since it doesn't include a write memory barrier. Therefore other tasks may see that the pointer has been set but not see that the pointed-to memory has finished being initialized yet. Instead a primitive with "release" semantics is needed. Use smp_store_release() for this. The use of READ_ONCE() on the read side is still potentially correct if there's no control dependency, i.e. if all memory being "published" is transitively reachable via the pointer itself. But this pairing is somewhat confusing and error-prone. So just upgrade the read side to smp_load_acquire() so that it clearly pairs with smp_store_release(). Cc: Arnd Bergmann <arnd@arndb.de> Cc: Ingo Molnar <mingo@redhat.com> Cc: Kees Cook <keescook@chromium.org> Cc: Matthew Wilcox <willy@infradead.org> Cc: Russell King <linux@arm.linux.org.uk> Cc: Andrew Morton <akpm@linux-foundation.org> Fixes: 3234ac664a87 ("/dev/mem: Revoke mappings when a driver claims the region") Signed-off-by: Eric Biggers <ebiggers@google.com> Cc: stable <stable@vger.kernel.org> Acked-by: Dan Williams <dan.j.williams@intel.com> Link: https://lore.kernel.org/r/20200716060553.24618-1-ebiggers@kernel.org Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> --- drivers/char/mem.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/char/mem.c b/drivers/char/mem.c index 934c92dcb9ab7..687d4af6945d3 100644 --- a/drivers/char/mem.c +++ b/drivers/char/mem.c @@ -814,7 +814,8 @@ static struct inode *devmem_inode; #ifdef CONFIG_IO_STRICT_DEVMEM void revoke_devmem(struct resource *res) { - struct inode *inode = READ_ONCE(devmem_inode); + /* pairs with smp_store_release() in devmem_init_inode() */ + struct inode *inode = smp_load_acquire(&devmem_inode); /* * Check that the initialization has completed. Losing the race @@ -1028,8 +1029,11 @@ static int devmem_init_inode(void) return rc; } - /* publish /dev/mem initialized */ - WRITE_ONCE(devmem_inode, inode); + /* + * Publish /dev/mem initialized. + * Pairs with smp_load_acquire() in revoke_devmem(). + */ + smp_store_release(&devmem_inode, inode); return 0; } From 91b44981a2316e7b00574d32dec4fae356444dcf Mon Sep 17 00:00:00 2001 From: Georgi Djakov <georgi.djakov@linaro.org> Date: Thu, 23 Jul 2020 11:37:34 +0300 Subject: [PATCH 09/10] interconnect: Do not skip aggregation for disabled paths When an interconnect path is being disabled, currently we don't aggregate the requests for it afterwards. But the re-aggregation step shouldn't be skipped, as it may leave the nodes with outdated bandwidth data. This outdated data may actually keep the path still enabled and prevent the device from going into lower power states. Reported-by: Atul Dhudase <adhudase@codeaurora.org> Fixes: 7d374b209083 ("interconnect: Add helpers for enabling/disabling a path") Reviewed-by: Sibi Sankar <sibis@codeaurora.org> Tested-by: Atul Dhudase <adhudase@codeaurora.org> Reviewed-by: Atul Dhudase <adhudase@codeaurora.org> Link: https://lore.kernel.org/r/20200721120740.3436-1-georgi.djakov@linaro.org Signed-off-by: Georgi Djakov <georgi.djakov@linaro.org> Link: https://lore.kernel.org/r/20200723083735.5616-2-georgi.djakov@linaro.org Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> --- drivers/interconnect/core.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/interconnect/core.c b/drivers/interconnect/core.c index e5f9987445011..9e1ab701785c7 100644 --- a/drivers/interconnect/core.c +++ b/drivers/interconnect/core.c @@ -243,6 +243,7 @@ static int aggregate_requests(struct icc_node *node) { struct icc_provider *p = node->provider; struct icc_req *r; + u32 avg_bw, peak_bw; node->avg_bw = 0; node->peak_bw = 0; @@ -251,9 +252,14 @@ static int aggregate_requests(struct icc_node *node) p->pre_aggregate(node); hlist_for_each_entry(r, &node->req_list, req_node) { - if (!r->enabled) - continue; - p->aggregate(node, r->tag, r->avg_bw, r->peak_bw, + if (r->enabled) { + avg_bw = r->avg_bw; + peak_bw = r->peak_bw; + } else { + avg_bw = 0; + peak_bw = 0; + } + p->aggregate(node, r->tag, avg_bw, peak_bw, &node->avg_bw, &node->peak_bw); } From 92d232d176041db5b033dd7b7f7f2cf343f82237 Mon Sep 17 00:00:00 2001 From: Georgi Djakov <georgi.djakov@linaro.org> Date: Thu, 23 Jul 2020 11:37:35 +0300 Subject: [PATCH 10/10] interconnect: msm8916: Fix buswidth of pcnoc_s nodes The buswidth of the pcnoc_s_* nodes is actually not 8, but 4 bytes. Let's fix it. Reported-by: Jun Nie <jun.nie@linaro.org> Reviewed-by: Mike Tipton <mdtipton@codeaurora.org> Fixes: 30c8fa3ec61a ("interconnect: qcom: Add MSM8916 interconnect provider driver") Link: https://lore.kernel.org/r/20200709130004.12462-1-georgi.djakov@linaro.org Signed-off-by: Georgi Djakov <georgi.djakov@linaro.org> Cc: stable <stable@vger.kernel.org> Link: https://lore.kernel.org/r/20200723083735.5616-3-georgi.djakov@linaro.org Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> --- drivers/interconnect/qcom/msm8916.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/interconnect/qcom/msm8916.c b/drivers/interconnect/qcom/msm8916.c index e94f3c5228b7c..42c6c55816626 100644 --- a/drivers/interconnect/qcom/msm8916.c +++ b/drivers/interconnect/qcom/msm8916.c @@ -197,13 +197,13 @@ DEFINE_QNODE(pcnoc_int_0, MSM8916_PNOC_INT_0, 8, -1, -1, MSM8916_PNOC_SNOC_MAS, DEFINE_QNODE(pcnoc_int_1, MSM8916_PNOC_INT_1, 8, -1, -1, MSM8916_PNOC_SNOC_MAS); DEFINE_QNODE(pcnoc_m_0, MSM8916_PNOC_MAS_0, 8, -1, -1, MSM8916_PNOC_INT_0); DEFINE_QNODE(pcnoc_m_1, MSM8916_PNOC_MAS_1, 8, -1, -1, MSM8916_PNOC_SNOC_MAS); -DEFINE_QNODE(pcnoc_s_0, MSM8916_PNOC_SLV_0, 8, -1, -1, MSM8916_SLAVE_CLK_CTL, MSM8916_SLAVE_TLMM, MSM8916_SLAVE_TCSR, MSM8916_SLAVE_SECURITY, MSM8916_SLAVE_MSS); -DEFINE_QNODE(pcnoc_s_1, MSM8916_PNOC_SLV_1, 8, -1, -1, MSM8916_SLAVE_IMEM_CFG, MSM8916_SLAVE_CRYPTO_0_CFG, MSM8916_SLAVE_MSG_RAM, MSM8916_SLAVE_PDM, MSM8916_SLAVE_PRNG); -DEFINE_QNODE(pcnoc_s_2, MSM8916_PNOC_SLV_2, 8, -1, -1, MSM8916_SLAVE_SPDM, MSM8916_SLAVE_BOOT_ROM, MSM8916_SLAVE_BIMC_CFG, MSM8916_SLAVE_PNOC_CFG, MSM8916_SLAVE_PMIC_ARB); -DEFINE_QNODE(pcnoc_s_3, MSM8916_PNOC_SLV_3, 8, -1, -1, MSM8916_SLAVE_MPM, MSM8916_SLAVE_SNOC_CFG, MSM8916_SLAVE_RBCPR_CFG, MSM8916_SLAVE_QDSS_CFG, MSM8916_SLAVE_DEHR_CFG); -DEFINE_QNODE(pcnoc_s_4, MSM8916_PNOC_SLV_4, 8, -1, -1, MSM8916_SLAVE_VENUS_CFG, MSM8916_SLAVE_CAMERA_CFG, MSM8916_SLAVE_DISPLAY_CFG); -DEFINE_QNODE(pcnoc_s_8, MSM8916_PNOC_SLV_8, 8, -1, -1, MSM8916_SLAVE_USB_HS, MSM8916_SLAVE_SDCC_1, MSM8916_SLAVE_BLSP_1); -DEFINE_QNODE(pcnoc_s_9, MSM8916_PNOC_SLV_9, 8, -1, -1, MSM8916_SLAVE_SDCC_2, MSM8916_SLAVE_LPASS, MSM8916_SLAVE_GRAPHICS_3D_CFG); +DEFINE_QNODE(pcnoc_s_0, MSM8916_PNOC_SLV_0, 4, -1, -1, MSM8916_SLAVE_CLK_CTL, MSM8916_SLAVE_TLMM, MSM8916_SLAVE_TCSR, MSM8916_SLAVE_SECURITY, MSM8916_SLAVE_MSS); +DEFINE_QNODE(pcnoc_s_1, MSM8916_PNOC_SLV_1, 4, -1, -1, MSM8916_SLAVE_IMEM_CFG, MSM8916_SLAVE_CRYPTO_0_CFG, MSM8916_SLAVE_MSG_RAM, MSM8916_SLAVE_PDM, MSM8916_SLAVE_PRNG); +DEFINE_QNODE(pcnoc_s_2, MSM8916_PNOC_SLV_2, 4, -1, -1, MSM8916_SLAVE_SPDM, MSM8916_SLAVE_BOOT_ROM, MSM8916_SLAVE_BIMC_CFG, MSM8916_SLAVE_PNOC_CFG, MSM8916_SLAVE_PMIC_ARB); +DEFINE_QNODE(pcnoc_s_3, MSM8916_PNOC_SLV_3, 4, -1, -1, MSM8916_SLAVE_MPM, MSM8916_SLAVE_SNOC_CFG, MSM8916_SLAVE_RBCPR_CFG, MSM8916_SLAVE_QDSS_CFG, MSM8916_SLAVE_DEHR_CFG); +DEFINE_QNODE(pcnoc_s_4, MSM8916_PNOC_SLV_4, 4, -1, -1, MSM8916_SLAVE_VENUS_CFG, MSM8916_SLAVE_CAMERA_CFG, MSM8916_SLAVE_DISPLAY_CFG); +DEFINE_QNODE(pcnoc_s_8, MSM8916_PNOC_SLV_8, 4, -1, -1, MSM8916_SLAVE_USB_HS, MSM8916_SLAVE_SDCC_1, MSM8916_SLAVE_BLSP_1); +DEFINE_QNODE(pcnoc_s_9, MSM8916_PNOC_SLV_9, 4, -1, -1, MSM8916_SLAVE_SDCC_2, MSM8916_SLAVE_LPASS, MSM8916_SLAVE_GRAPHICS_3D_CFG); DEFINE_QNODE(pcnoc_snoc_mas, MSM8916_PNOC_SNOC_MAS, 8, 29, -1, MSM8916_PNOC_SNOC_SLV); DEFINE_QNODE(pcnoc_snoc_slv, MSM8916_PNOC_SNOC_SLV, 8, -1, 45, MSM8916_SNOC_INT_0, MSM8916_SNOC_INT_BIMC, MSM8916_SNOC_INT_1); DEFINE_QNODE(qdss_int, MSM8916_SNOC_QDSS_INT, 8, -1, -1, MSM8916_SNOC_INT_0, MSM8916_SNOC_INT_BIMC);