From ea5ff5d351b520524019f7ff7f9ce418de2dad87 Mon Sep 17 00:00:00 2001 From: "T.J. Mercier" Date: Fri, 30 Aug 2024 19:26:26 +0000 Subject: [PATCH 01/25] dma-buf: heaps: Fix off-by-one in CMA heap fault handler Until VM_DONTEXPAND was added in commit 1c1914d6e8c6 ("dma-buf: heaps: Don't track CMA dma-buf pages under RssFile") it was possible to obtain a mapping larger than the buffer size via mremap and bypass the overflow check in dma_buf_mmap_internal. When using such a mapping to attempt to fault past the end of the buffer, the CMA heap fault handler also checks the fault offset against the buffer size, but gets the boundary wrong by 1. Fix the boundary check so that we don't read off the end of the pages array and insert an arbitrary page in the mapping. Reported-by: Xingyu Jin Fixes: a5d2d29e24be ("dma-buf: heaps: Move heap-helper logic into the cma_heap implementation") Cc: stable@vger.kernel.org # Applicable >= 5.10. Needs adjustments only for 5.10. Signed-off-by: T.J. Mercier Acked-by: John Stultz Signed-off-by: Sumit Semwal Link: https://patchwork.freedesktop.org/patch/msgid/20240830192627.2546033-1-tjmercier@google.com --- drivers/dma-buf/heaps/cma_heap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma-buf/heaps/cma_heap.c b/drivers/dma-buf/heaps/cma_heap.c index c384004b918e2..93be88b805fe7 100644 --- a/drivers/dma-buf/heaps/cma_heap.c +++ b/drivers/dma-buf/heaps/cma_heap.c @@ -165,7 +165,7 @@ static vm_fault_t cma_heap_vm_fault(struct vm_fault *vmf) struct vm_area_struct *vma = vmf->vma; struct cma_heap_buffer *buffer = vma->vm_private_data; - if (vmf->pgoff > buffer->pagecount) + if (vmf->pgoff >= buffer->pagecount) return VM_FAULT_SIGBUS; return vmf_insert_pfn(vma, vmf->address, page_to_pfn(buffer->pages[vmf->pgoff])); From d3d37f74683e2f16f2635ee265884f7ca69350ae Mon Sep 17 00:00:00 2001 From: Nikita Zhandarovich Date: Thu, 25 Jul 2024 08:59:25 -0700 Subject: [PATCH 02/25] drm/i915/guc: prevent a possible int overflow in wq offsets It may be possible for the sum of the values derived from i915_ggtt_offset() and __get_parent_scratch_offset()/ i915_ggtt_offset() to go over the u32 limit before being assigned to wq offsets of u64 type. Mitigate these issues by expanding one of the right operands to u64 to avoid any overflow issues just in case. Found by Linux Verification Center (linuxtesting.org) with static analysis tool SVACE. Fixes: c2aa552ff09d ("drm/i915/guc: Add multi-lrc context registration") Cc: Matthew Brost Cc: John Harrison Signed-off-by: Nikita Zhandarovich Link: https://patchwork.freedesktop.org/patch/msgid/20240725155925.14707-1-n.zhandarovich@fintech.ru Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi (cherry picked from commit 1f1c1bd56620b80ae407c5790743e17caad69cec) Signed-off-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 9400d0eb682b2..908ebfa229333 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -2842,9 +2842,9 @@ static void prepare_context_registration_info_v70(struct intel_context *ce, ce->parallel.guc.wqi_tail = 0; ce->parallel.guc.wqi_head = 0; - wq_desc_offset = i915_ggtt_offset(ce->state) + + wq_desc_offset = (u64)i915_ggtt_offset(ce->state) + __get_parent_scratch_offset(ce); - wq_base_offset = i915_ggtt_offset(ce->state) + + wq_base_offset = (u64)i915_ggtt_offset(ce->state) + __get_wq_offset(ce); info->wq_desc_lo = lower_32_bits(wq_desc_offset); info->wq_desc_hi = upper_32_bits(wq_desc_offset); From 6db9df4f7055eb4ea339e7b83ca676edd9ec1277 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Thu, 5 Sep 2024 09:24:18 +1000 Subject: [PATCH 03/25] drm/nouveau/fb: restore init() for ramgp102 init() was removed from ramgp102 when reworking the memory detection, as it was thought that the code was only necessary when the driver performs mclk changes, which nouveau doesn't support on pascal. However, it turns out that we still need to execute this on some GPUs to restore settings after DEVINIT, so revert to the original behaviour. v2: fix tags in commit message, cc stable Closes: https://gitlab.freedesktop.org/drm/nouveau/-/issues/319 Fixes: 2c0c15a22fa0 ("drm/nouveau/fb/gp102-ga100: switch to simpler vram size detection method") Cc: stable@vger.kernel.org # 6.6+ Signed-off-by: Ben Skeggs Signed-off-by: Danilo Krummrich Link: https://patchwork.freedesktop.org/patch/msgid/20240904232418.8590-1-bskeggs@nvidia.com --- drivers/gpu/drm/nouveau/nvkm/subdev/fb/ram.h | 2 ++ drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgp100.c | 2 +- drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgp102.c | 1 + 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ram.h b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ram.h index 50f0c1914f58e..4c3f743965798 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ram.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ram.h @@ -46,6 +46,8 @@ u32 gm107_ram_probe_fbp(const struct nvkm_ram_func *, u32 gm200_ram_probe_fbp_amount(const struct nvkm_ram_func *, u32, struct nvkm_device *, int, int *); +int gp100_ram_init(struct nvkm_ram *); + /* RAM type-specific MR calculation routines */ int nvkm_sddr2_calc(struct nvkm_ram *); int nvkm_sddr3_calc(struct nvkm_ram *); diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgp100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgp100.c index 378f6fb709907..8987a21e81d17 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgp100.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgp100.c @@ -27,7 +27,7 @@ #include #include -static int +int gp100_ram_init(struct nvkm_ram *ram) { struct nvkm_subdev *subdev = &ram->fb->subdev; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgp102.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgp102.c index 8550f5e473474..b6b6ee59019d7 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgp102.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgp102.c @@ -5,6 +5,7 @@ static const struct nvkm_ram_func gp102_ram = { + .init = gp100_ram_init, }; int From 7b565174492699fed52c5e4ef1326f948a74b466 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 19 Jul 2024 09:54:41 +0200 Subject: [PATCH 04/25] drm/stm: add COMMON_CLK dependency The added lvds driver and a change in the dsi driver resulted in failed builds when COMMON_CLK is disabled: x86_64-linux-ld: drivers/gpu/drm/stm/dw_mipi_dsi-stm.o: in function `dw_mipi_dsi_stm_remove': dw_mipi_dsi-stm.c:(.text+0x51e): undefined reference to `clk_hw_unregister' x86_64-linux-ld: drivers/gpu/drm/stm/lvds.o: in function `lvds_remove': lvds.c:(.text+0xe3): undefined reference to `of_clk_del_provider' x86_64-linux-ld: lvds.c:(.text+0xec): undefined reference to `clk_hw_unregister' x86_64-linux-ld: drivers/gpu/drm/stm/lvds.o: in function `lvds_pll_config': lvds.c:(.text+0xb5d): undefined reference to `clk_hw_get_rate' x86_64-linux-ld: drivers/gpu/drm/stm/lvds.o: in function `lvds_probe': lvds.c:(.text+0x1476): undefined reference to `clk_hw_register' x86_64-linux-ld: lvds.c:(.text+0x148b): undefined reference to `of_clk_hw_simple_get' x86_64-linux-ld: lvds.c:(.text+0x1493): undefined reference to `of_clk_add_hw_provider' x86_64-linux-ld: lvds.c:(.text+0x1535): undefined reference to `clk_hw_unregister' Add this as a dependency for the stm driver itself, since it will be required in practice anyway. Fixes: 185f99b61442 ("drm/stm: dsi: expose DSI PHY internal clock") Fixes: aca1cbc1c986 ("drm/stm: lvds: add new STM32 LVDS Display Interface Transmitter driver") Signed-off-by: Arnd Bergmann Link: https://patchwork.freedesktop.org/patch/msgid/20240719075454.3595358-1-arnd@kernel.org Signed-off-by: Raphael Gallais-Pou (cherry picked from commit 26dbffb2a4c4d4639c7b336f6b74a437c23dadd4) Signed-off-by: Maxime Ripard --- drivers/gpu/drm/stm/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/stm/Kconfig b/drivers/gpu/drm/stm/Kconfig index 1cc6b6cbdfa94..d7f41a87808e8 100644 --- a/drivers/gpu/drm/stm/Kconfig +++ b/drivers/gpu/drm/stm/Kconfig @@ -2,6 +2,7 @@ config DRM_STM tristate "DRM Support for STMicroelectronics SoC Series" depends on DRM && (ARCH_STM32 || COMPILE_TEST) + depends on COMMON_CLK select DRM_KMS_HELPER select DRM_GEM_DMA_HELPER select DRM_PANEL_BRIDGE From 8c7c44be57672e1474bf15a451011c291e85fda4 Mon Sep 17 00:00:00 2001 From: "T.J. Mercier" Date: Mon, 9 Sep 2024 20:53:59 +0000 Subject: [PATCH 05/25] drm/syncobj: Fix syncobj leak in drm_syncobj_eventfd_ioctl MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A syncobj reference is taken in drm_syncobj_find, but not released if eventfd_ctx_fdget or kzalloc fails. Put the reference in these error paths. Reported-by: Xingyu Jin Fixes: c7a472297169 ("drm/syncobj: add IOCTL to register an eventfd") Signed-off-by: T.J. Mercier Reviewed-by: Tvrtko Ursulin Reviewed-by. Christian König CC: stable@vger.kernel.org # 6.6+ Link: https://patchwork.freedesktop.org/patch/msgid/20240909205400.3498337-1-tjmercier@google.com Signed-off-by: Christian König --- drivers/gpu/drm/drm_syncobj.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c index a0e94217b511a..4fcfc0b9b386c 100644 --- a/drivers/gpu/drm/drm_syncobj.c +++ b/drivers/gpu/drm/drm_syncobj.c @@ -1464,6 +1464,7 @@ drm_syncobj_eventfd_ioctl(struct drm_device *dev, void *data, struct drm_syncobj *syncobj; struct eventfd_ctx *ev_fd_ctx; struct syncobj_eventfd_entry *entry; + int ret; if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ_TIMELINE)) return -EOPNOTSUPP; @@ -1479,13 +1480,15 @@ drm_syncobj_eventfd_ioctl(struct drm_device *dev, void *data, return -ENOENT; ev_fd_ctx = eventfd_ctx_fdget(args->fd); - if (IS_ERR(ev_fd_ctx)) - return PTR_ERR(ev_fd_ctx); + if (IS_ERR(ev_fd_ctx)) { + ret = PTR_ERR(ev_fd_ctx); + goto err_fdget; + } entry = kzalloc(sizeof(*entry), GFP_KERNEL); if (!entry) { - eventfd_ctx_put(ev_fd_ctx); - return -ENOMEM; + ret = -ENOMEM; + goto err_kzalloc; } entry->syncobj = syncobj; entry->ev_fd_ctx = ev_fd_ctx; @@ -1496,6 +1499,12 @@ drm_syncobj_eventfd_ioctl(struct drm_device *dev, void *data, drm_syncobj_put(syncobj); return 0; + +err_kzalloc: + eventfd_ctx_put(ev_fd_ctx); +err_fdget: + drm_syncobj_put(syncobj); + return ret; } int From d8c07bee1e636db7ee6ab64b958f7bfdd9ff8c1e Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Mon, 2 Sep 2024 09:46:59 +0800 Subject: [PATCH 06/25] drm/rockchip: Use iommu_paging_domain_alloc() Commit <421be3ee36a4> ("drm/rockchip: Refactor IOMMU initialisation") has refactored rockchip_drm_init_iommu() to pass a device that the domain is allocated for. Replace iommu_domain_alloc() with iommu_paging_domain_alloc() to retire the former. Signed-off-by: Lu Baolu Reviewed-by: Jason Gunthorpe Acked-by: Andy Yan Signed-off-by: Lyude Paul Link: https://patchwork.freedesktop.org/patch/msgid/20240902014700.66095-3-baolu.lu@linux.intel.com --- drivers/gpu/drm/rockchip/rockchip_drm_drv.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_drv.c b/drivers/gpu/drm/rockchip/rockchip_drm_drv.c index 44d769d9234d6..11e5d10de4d73 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_drv.c +++ b/drivers/gpu/drm/rockchip/rockchip_drm_drv.c @@ -103,13 +103,17 @@ static int rockchip_drm_init_iommu(struct drm_device *drm_dev) struct rockchip_drm_private *private = drm_dev->dev_private; struct iommu_domain_geometry *geometry; u64 start, end; + int ret; if (IS_ERR_OR_NULL(private->iommu_dev)) return 0; - private->domain = iommu_domain_alloc(private->iommu_dev->bus); - if (!private->domain) - return -ENOMEM; + private->domain = iommu_paging_domain_alloc(private->iommu_dev); + if (IS_ERR(private->domain)) { + ret = PTR_ERR(private->domain); + private->domain = NULL; + return ret; + } geometry = &private->domain->geometry; start = geometry->aperture_start; From 45c690aea8ee5b7d012cd593bd288540a4bfdbf0 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Mon, 2 Sep 2024 09:47:00 +0800 Subject: [PATCH 07/25] drm/tegra: Use iommu_paging_domain_alloc() Commit <17de3f5fdd35> ("iommu: Retire bus ops") removes iommu ops from the bus structure. The iommu subsystem no longer relies on bus for operations. So iommu_domain_alloc() interface is no longer relevant. Replace iommu_domain_alloc() with iommu_paging_domain_alloc() which takes the physical device from which the host1x_device virtual device was instantiated. This physical device is a common parent to all physical devices that are part of the virtual device. Suggested-by: Thierry Reding Signed-off-by: Lu Baolu Signed-off-by: Lyude Paul Link: https://patchwork.freedesktop.org/patch/msgid/20240902014700.66095-4-baolu.lu@linux.intel.com --- drivers/gpu/drm/tegra/drm.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c index 03d1c76aec2d3..d79c76a287f22 100644 --- a/drivers/gpu/drm/tegra/drm.c +++ b/drivers/gpu/drm/tegra/drm.c @@ -1135,6 +1135,7 @@ static bool host1x_drm_wants_iommu(struct host1x_device *dev) static int host1x_drm_probe(struct host1x_device *dev) { + struct device *dma_dev = dev->dev.parent; struct tegra_drm *tegra; struct drm_device *drm; int err; @@ -1149,8 +1150,8 @@ static int host1x_drm_probe(struct host1x_device *dev) goto put; } - if (host1x_drm_wants_iommu(dev) && iommu_present(&platform_bus_type)) { - tegra->domain = iommu_domain_alloc(&platform_bus_type); + if (host1x_drm_wants_iommu(dev) && device_iommu_mapped(dma_dev)) { + tegra->domain = iommu_paging_domain_alloc(dma_dev); if (!tegra->domain) { err = -ENOMEM; goto free; From 02c132166993c8f8ca1b319665181f72f22ebc6b Mon Sep 17 00:00:00 2001 From: David Belanger Date: Fri, 23 Aug 2024 13:50:03 -0400 Subject: [PATCH 08/25] drm/amdkfd: Add cache line size info Populate cache line size info in topology based on information from IP discovery table. Signed-off-by: David Belanger Reviewed-by: Sreekant Somasekharan Signed-off-by: Alex Deucher (cherry picked from commit 4e9fadacddca96a2e6fcee9cc9488b78eb7a6953) --- drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c index cd7b81b7b939a..48caecf7e72ed 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c @@ -1434,7 +1434,8 @@ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev, pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED | CRAT_CACHE_FLAGS_DATA_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE); - pcache_info[0].num_cu_shared = adev->gfx.config.gc_num_tcp_per_wpg / 2; + pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_tcp_per_wpg / 2; + pcache_info[i].cache_line_size = adev->gfx.config.gc_tcp_cache_line_size; i++; } /* Scalar L1 Instruction Cache per SQC */ @@ -1446,6 +1447,7 @@ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev, CRAT_CACHE_FLAGS_INST_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE); pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_sqc_per_wgp * 2; + pcache_info[i].cache_line_size = adev->gfx.config.gc_instruction_cache_line_size; i++; } /* Scalar L1 Data Cache per SQC */ @@ -1456,6 +1458,7 @@ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev, CRAT_CACHE_FLAGS_DATA_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE); pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_sqc_per_wgp * 2; + pcache_info[i].cache_line_size = adev->gfx.config.gc_scalar_data_cache_line_size; i++; } /* GL1 Data Cache per SA */ @@ -1468,6 +1471,7 @@ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev, CRAT_CACHE_FLAGS_DATA_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE); pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh; + pcache_info[i].cache_line_size = 0; i++; } /* L2 Data Cache per GPU (Total Tex Cache) */ @@ -1478,6 +1482,7 @@ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev, CRAT_CACHE_FLAGS_DATA_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE); pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh; + pcache_info[i].cache_line_size = adev->gfx.config.gc_tcc_cache_line_size; i++; } /* L3 Data Cache per GPU */ @@ -1488,6 +1493,7 @@ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev, CRAT_CACHE_FLAGS_DATA_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE); pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh; + pcache_info[i].cache_line_size = 0; i++; } return i; From a7aeb03888b92304e2fc7d4d1c242f54a312561b Mon Sep 17 00:00:00 2001 From: Tobias Jakobi Date: Mon, 2 Sep 2024 11:40:26 +0200 Subject: [PATCH 09/25] drm/amd/display: Avoid race between dcn10_set_drr() and dc_state_destruct() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit dc_state_destruct() nulls the resource context of the DC state. The pipe context passed to dcn10_set_drr() is a member of this resource context. If dc_state_destruct() is called parallel to the IRQ processing (which calls dcn10_set_drr() at some point), we can end up using already nulled function callback fields of struct stream_resource. The logic in dcn10_set_drr() already tries to avoid this, by checking tg against NULL. But if the nulling happens exactly after the NULL check and before the next access, then we get a race. Avoid this by copying tg first to a local variable, and then use this variable for all the operations. This should work, as long as nobody frees the resource pool where the timing generators live. Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3142 Fixes: 06ad7e164256 ("drm/amd/display: Destroy DC context while keeping DML and DML2") Signed-off-by: Tobias Jakobi Tested-by: Raoul van Rüschen Tested-by: Christopher Snowhill Reviewed-by: Harry Wentland Tested-by: Sefa Eyeoglu Signed-off-by: Mario Limonciello Signed-off-by: Alex Deucher (cherry picked from commit a3cc326a43bdc48fbdf53443e1027a03e309b643) Cc: stable@vger.kernel.org --- .../amd/display/dc/hwss/dcn10/dcn10_hwseq.c | 20 +++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c index 14a902ff3b8ac..1d2be574f6680 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c @@ -3207,15 +3207,19 @@ void dcn10_set_drr(struct pipe_ctx **pipe_ctx, * as well. */ for (i = 0; i < num_pipes; i++) { - if ((pipe_ctx[i]->stream_res.tg != NULL) && pipe_ctx[i]->stream_res.tg->funcs) { - if (pipe_ctx[i]->stream_res.tg->funcs->set_drr) - pipe_ctx[i]->stream_res.tg->funcs->set_drr( - pipe_ctx[i]->stream_res.tg, ¶ms); + /* dc_state_destruct() might null the stream resources, so fetch tg + * here first to avoid a race condition. The lifetime of the pointee + * itself (the timing_generator object) is not a problem here. + */ + struct timing_generator *tg = pipe_ctx[i]->stream_res.tg; + + if ((tg != NULL) && tg->funcs) { + if (tg->funcs->set_drr) + tg->funcs->set_drr(tg, ¶ms); if (adjust.v_total_max != 0 && adjust.v_total_min != 0) - if (pipe_ctx[i]->stream_res.tg->funcs->set_static_screen_control) - pipe_ctx[i]->stream_res.tg->funcs->set_static_screen_control( - pipe_ctx[i]->stream_res.tg, - event_triggers, num_frames); + if (tg->funcs->set_static_screen_control) + tg->funcs->set_static_screen_control( + tg, event_triggers, num_frames); } } } From e835d5144f5ef78e4f8828c63e2f0d61144f283a Mon Sep 17 00:00:00 2001 From: Tobias Jakobi Date: Mon, 2 Sep 2024 11:40:27 +0200 Subject: [PATCH 10/25] drm/amd/display: Avoid race between dcn35_set_drr() and dc_state_destruct() dc_state_destruct() nulls the resource context of the DC state. The pipe context passed to dcn35_set_drr() is a member of this resource context. If dc_state_destruct() is called parallel to the IRQ processing (which calls dcn35_set_drr() at some point), we can end up using already nulled function callback fields of struct stream_resource. The logic in dcn35_set_drr() already tries to avoid this, by checking tg against NULL. But if the nulling happens exactly after the NULL check and before the next access, then we get a race. Avoid this by copying tg first to a local variable, and then use this variable for all the operations. This should work, as long as nobody frees the resource pool where the timing generators live. Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3142 Fixes: 06ad7e164256 ("drm/amd/display: Destroy DC context while keeping DML and DML2") Signed-off-by: Tobias Jakobi Reviewed-by: Harry Wentland Signed-off-by: Mario Limonciello Signed-off-by: Alex Deucher (cherry picked from commit 0607a50c004798a96e62c089a4c34c220179dcb5) Cc: stable@vger.kernel.org --- .../amd/display/dc/hwss/dcn35/dcn35_hwseq.c | 20 +++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c index f115c7a285e77..d5e9aec52a059 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c @@ -1462,7 +1462,13 @@ void dcn35_set_drr(struct pipe_ctx **pipe_ctx, params.vertical_total_mid_frame_num = adjust.v_total_mid_frame_num; for (i = 0; i < num_pipes; i++) { - if ((pipe_ctx[i]->stream_res.tg != NULL) && pipe_ctx[i]->stream_res.tg->funcs) { + /* dc_state_destruct() might null the stream resources, so fetch tg + * here first to avoid a race condition. The lifetime of the pointee + * itself (the timing_generator object) is not a problem here. + */ + struct timing_generator *tg = pipe_ctx[i]->stream_res.tg; + + if ((tg != NULL) && tg->funcs) { struct dc_crtc_timing *timing = &pipe_ctx[i]->stream->timing; struct dc *dc = pipe_ctx[i]->stream->ctx->dc; @@ -1475,14 +1481,12 @@ void dcn35_set_drr(struct pipe_ctx **pipe_ctx, num_frames = 2 * (frame_rate % 60); } } - if (pipe_ctx[i]->stream_res.tg->funcs->set_drr) - pipe_ctx[i]->stream_res.tg->funcs->set_drr( - pipe_ctx[i]->stream_res.tg, ¶ms); + if (tg->funcs->set_drr) + tg->funcs->set_drr(tg, ¶ms); if (adjust.v_total_max != 0 && adjust.v_total_min != 0) - if (pipe_ctx[i]->stream_res.tg->funcs->set_static_screen_control) - pipe_ctx[i]->stream_res.tg->funcs->set_static_screen_control( - pipe_ctx[i]->stream_res.tg, - event_triggers, num_frames); + if (tg->funcs->set_static_screen_control) + tg->funcs->set_static_screen_control( + tg, event_triggers, num_frames); } } } From c5b1a06697eae4d365691195ab2e0b45539bc2a9 Mon Sep 17 00:00:00 2001 From: Leo Li Date: Thu, 5 Sep 2024 18:45:04 -0400 Subject: [PATCH 11/25] drm/amd/display: Do not reset planes based on crtc zpos_changed [Why] drm_normalize_zpos will set the crtc_state->zpos_changed to 1 if any of it's assigned planes changes zpos, or is removed/added from it. To have amdgpu_dm request a plane reset on this is too broad. For example, if only the cursor plane was moved from one crtc to another, the crtc's zpos_changed will be set to true. But that does not mean that the underlying primary plane requires a reset. [How] Narrow it down so that only the plane that has a change in zpos will require a reset. As a future TODO, we can further optimize this by only requiring a reset on z-order change. Z-order is different from z-pos, since a zpos change doesn't necessarily mean the z-ordering changed, and DC should only require a reset if the z-ordering changed. For example, the following zpos update does not change z-ordering: Plane A: zpos 2 -> 3 Plane B: zpos 1 -> 2 => Plane A is still on top of plane B: no reset needed Whereas this one does change z-ordering: Plane A: zpos 2 -> 1 Plane B: zpos 1 -> 2 => Plane A changed from on top, to below plane B: reset needed Fixes: 38e0c3df6dbd ("drm/amd/display: Move PRIMARY plane zpos higher") Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3569 Signed-off-by: Leo Li Reviewed-by: Harry Wentland Signed-off-by: Alex Deucher (cherry picked from commit 578aab4ecc73476393389440724b7a391cc0cea9) --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 5942fc4e1c867..e6b2786fe707a 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -10571,7 +10571,7 @@ static bool should_reset_plane(struct drm_atomic_state *state, * TODO: We can likely skip bandwidth validation if the only thing that * changed about the plane was it'z z-ordering. */ - if (new_crtc_state->zpos_changed) + if (old_plane_state->normalized_zpos != new_plane_state->normalized_zpos) return true; if (drm_atomic_crtc_needs_modeset(new_crtc_state)) From 612e3bacf8009e329342934ce403fd4ca553beba Mon Sep 17 00:00:00 2001 From: Kenneth Feng Date: Thu, 5 Sep 2024 15:38:18 +0800 Subject: [PATCH 12/25] drm/amd/pm: update the features set on smu v14.0.2/3 update the features set on smu v14.0.2/3 Signed-off-by: Kenneth Feng Reviewed-by: Yang Wang Signed-off-by: Alex Deucher (cherry picked from commit 25d48f2eb0af1f0e6f09f54a1a1716f48c0722c9) --- drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h | 11 ++++++++++- drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c | 9 +++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h index ac0dd6b97f8d5..e71a721c12b98 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h @@ -439,7 +439,16 @@ enum smu_clk_type { __SMU_DUMMY_MAP(BACO_CG), \ __SMU_DUMMY_MAP(SOC_CG), \ __SMU_DUMMY_MAP(LOW_POWER_DCNCLKS), \ - __SMU_DUMMY_MAP(WHISPER_MODE), + __SMU_DUMMY_MAP(WHISPER_MODE), \ + __SMU_DUMMY_MAP(EDC_PWRBRK), \ + __SMU_DUMMY_MAP(SOC_EDC_XVMIN), \ + __SMU_DUMMY_MAP(GFX_PSM_DIDT), \ + __SMU_DUMMY_MAP(APT_ALL_ENABLE), \ + __SMU_DUMMY_MAP(APT_SQ_THROTTLE), \ + __SMU_DUMMY_MAP(APT_PF_DCS), \ + __SMU_DUMMY_MAP(GFX_EDC_XVMIN), \ + __SMU_DUMMY_MAP(GFX_DIDT_XVMIN), \ + __SMU_DUMMY_MAP(FAN_ABNORMAL), #undef __SMU_DUMMY_MAP #define __SMU_DUMMY_MAP(feature) SMU_FEATURE_##feature##_BIT diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c index 0c09b8c4ff493..514151e3e6661 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c @@ -187,6 +187,15 @@ static struct cmn2asic_mapping smu_v14_0_2_feature_mask_map[SMU_FEATURE_COUNT] = FEA_MAP(MEM_TEMP_READ), FEA_MAP(ATHUB_MMHUB_PG), FEA_MAP(SOC_PCC), + FEA_MAP(EDC_PWRBRK), + FEA_MAP(SOC_EDC_XVMIN), + FEA_MAP(GFX_PSM_DIDT), + FEA_MAP(APT_ALL_ENABLE), + FEA_MAP(APT_SQ_THROTTLE), + FEA_MAP(APT_PF_DCS), + FEA_MAP(GFX_EDC_XVMIN), + FEA_MAP(GFX_DIDT_XVMIN), + FEA_MAP(FAN_ABNORMAL), [SMU_FEATURE_DPM_VCLK_BIT] = {1, FEATURE_MM_DPM_BIT}, [SMU_FEATURE_DPM_DCLK_BIT] = {1, FEATURE_MM_DPM_BIT}, [SMU_FEATURE_PPT_BIT] = {1, FEATURE_THROTTLERS_BIT}, From 7a0982523cf3ff00f35b210fc3405c528a2ce7af Mon Sep 17 00:00:00 2001 From: Kenneth Feng Date: Fri, 6 Sep 2024 20:46:54 +0800 Subject: [PATCH 13/25] drm/amd/pm: fix the pp_dpm_pcie issue on smu v14.0.2/3 fix the pp_dpm_pcie issue on smu v14.0.2/3 as below: 0: 2.5GT/s, x4 250Mhz 1: 8.0GT/s, x4 616Mhz * 2: 8.0GT/s, x4 1143Mhz * the middle level can be removed since it is always skipped on smu v14.0.2/3 Signed-off-by: Kenneth Feng Acked-by: Alex Deucher Signed-off-by: Alex Deucher (cherry picked from commit fedf6db3ea9dc5eda0b78cfbbb8f7a88b97e5b24) --- drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c index 514151e3e6661..2b45adecbed2e 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c @@ -683,6 +683,9 @@ static int smu_v14_0_2_set_default_dpm_table(struct smu_context *smu) pcie_table->clk_freq[pcie_table->num_of_link_levels] = skutable->LclkFreq[link_level]; pcie_table->num_of_link_levels++; + + if (link_level == 0) + link_level++; } /* dcefclk dpm table setup */ From 3a23aa0b9c24c682c506121e2e0f6cbcfdae9076 Mon Sep 17 00:00:00 2001 From: "David (Ming Qiang) Wu" Date: Fri, 16 Aug 2024 11:43:05 -0400 Subject: [PATCH 14/25] drm/amd/amdgpu: apply command submission parser for JPEG v2+ This patch extends the same cs parser from JPEG v4.0.3 to other JPEG versions (v2 and above). Rename to more common name as jpeg_v2_dec_ring_parse_cs() from jpeg_v4_0_3_dec_ring_parse_cs(). Acked-by: Alex Deucher Signed-off-by: David (Ming Qiang) Wu Signed-off-by: Alex Deucher (cherry picked from commit 88dcad2d07c8d82e6a097c8e74239eb67333bcf7) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c | 63 +++++++++++++++++++++++- drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.h | 6 +++ drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c | 2 + drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c | 1 + drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c | 1 + drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.h | 1 - drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c | 57 +-------------------- drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.h | 7 +-- drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c | 1 + drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c | 3 +- 10 files changed, 78 insertions(+), 64 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c index 98aa3ccd0d202..41c0f8750dc1d 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c @@ -23,6 +23,7 @@ #include "amdgpu.h" #include "amdgpu_jpeg.h" +#include "amdgpu_cs.h" #include "amdgpu_pm.h" #include "soc15.h" #include "soc15d.h" @@ -538,7 +539,11 @@ void jpeg_v2_0_dec_ring_emit_ib(struct amdgpu_ring *ring, amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JRBC_IB_VMID_INTERNAL_OFFSET, 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, (vmid | (vmid << 4) | (vmid << 8))); + + if (ring->funcs->parse_cs) + amdgpu_ring_write(ring, 0); + else + amdgpu_ring_write(ring, (vmid | (vmid << 4) | (vmid << 8))); amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JPEG_VMID_INTERNAL_OFFSET, 0, 0, PACKETJ_TYPE0)); @@ -764,6 +769,7 @@ static const struct amdgpu_ring_funcs jpeg_v2_0_dec_ring_vm_funcs = { .get_rptr = jpeg_v2_0_dec_ring_get_rptr, .get_wptr = jpeg_v2_0_dec_ring_get_wptr, .set_wptr = jpeg_v2_0_dec_ring_set_wptr, + .parse_cs = jpeg_v2_dec_ring_parse_cs, .emit_frame_size = SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + @@ -810,3 +816,58 @@ const struct amdgpu_ip_block_version jpeg_v2_0_ip_block = { .rev = 0, .funcs = &jpeg_v2_0_ip_funcs, }; + +/** + * jpeg_v2_dec_ring_parse_cs - command submission parser + * + * @parser: Command submission parser context + * @job: the job to parse + * @ib: the IB to parse + * + * Parse the command stream, return -EINVAL for invalid packet, + * 0 otherwise + */ +int jpeg_v2_dec_ring_parse_cs(struct amdgpu_cs_parser *parser, + struct amdgpu_job *job, + struct amdgpu_ib *ib) +{ + u32 i, reg, res, cond, type; + struct amdgpu_device *adev = parser->adev; + + for (i = 0; i < ib->length_dw ; i += 2) { + reg = CP_PACKETJ_GET_REG(ib->ptr[i]); + res = CP_PACKETJ_GET_RES(ib->ptr[i]); + cond = CP_PACKETJ_GET_COND(ib->ptr[i]); + type = CP_PACKETJ_GET_TYPE(ib->ptr[i]); + + if (res) /* only support 0 at the moment */ + return -EINVAL; + + switch (type) { + case PACKETJ_TYPE0: + if (cond != PACKETJ_CONDITION_CHECK0 || reg < JPEG_REG_RANGE_START || + reg > JPEG_REG_RANGE_END) { + dev_err(adev->dev, "Invalid packet [0x%08x]!\n", ib->ptr[i]); + return -EINVAL; + } + break; + case PACKETJ_TYPE3: + if (cond != PACKETJ_CONDITION_CHECK3 || reg < JPEG_REG_RANGE_START || + reg > JPEG_REG_RANGE_END) { + dev_err(adev->dev, "Invalid packet [0x%08x]!\n", ib->ptr[i]); + return -EINVAL; + } + break; + case PACKETJ_TYPE6: + if (ib->ptr[i] == CP_PACKETJ_NOP) + continue; + dev_err(adev->dev, "Invalid packet [0x%08x]!\n", ib->ptr[i]); + return -EINVAL; + default: + dev_err(adev->dev, "Unknown packet type %d !\n", type); + return -EINVAL; + } + } + + return 0; +} diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.h b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.h index 654e43e83e2c4..63fadda7a6733 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.h +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.h @@ -45,6 +45,9 @@ #define JRBC_DEC_EXTERNAL_REG_WRITE_ADDR 0x18000 +#define JPEG_REG_RANGE_START 0x4000 +#define JPEG_REG_RANGE_END 0x41c2 + void jpeg_v2_0_dec_ring_insert_start(struct amdgpu_ring *ring); void jpeg_v2_0_dec_ring_insert_end(struct amdgpu_ring *ring); void jpeg_v2_0_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, @@ -57,6 +60,9 @@ void jpeg_v2_0_dec_ring_emit_vm_flush(struct amdgpu_ring *ring, unsigned vmid, uint64_t pd_addr); void jpeg_v2_0_dec_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val); void jpeg_v2_0_dec_ring_nop(struct amdgpu_ring *ring, uint32_t count); +int jpeg_v2_dec_ring_parse_cs(struct amdgpu_cs_parser *parser, + struct amdgpu_job *job, + struct amdgpu_ib *ib); extern const struct amdgpu_ip_block_version jpeg_v2_0_ip_block; diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c index d8ef95c847c2a..eedb9a829d950 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c @@ -662,6 +662,7 @@ static const struct amdgpu_ring_funcs jpeg_v2_5_dec_ring_vm_funcs = { .get_rptr = jpeg_v2_5_dec_ring_get_rptr, .get_wptr = jpeg_v2_5_dec_ring_get_wptr, .set_wptr = jpeg_v2_5_dec_ring_set_wptr, + .parse_cs = jpeg_v2_dec_ring_parse_cs, .emit_frame_size = SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + @@ -691,6 +692,7 @@ static const struct amdgpu_ring_funcs jpeg_v2_6_dec_ring_vm_funcs = { .get_rptr = jpeg_v2_5_dec_ring_get_rptr, .get_wptr = jpeg_v2_5_dec_ring_get_wptr, .set_wptr = jpeg_v2_5_dec_ring_set_wptr, + .parse_cs = jpeg_v2_dec_ring_parse_cs, .emit_frame_size = SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c index 31cfa3ce6528d..b1e7fd25afbcb 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c @@ -560,6 +560,7 @@ static const struct amdgpu_ring_funcs jpeg_v3_0_dec_ring_vm_funcs = { .get_rptr = jpeg_v3_0_dec_ring_get_rptr, .get_wptr = jpeg_v3_0_dec_ring_get_wptr, .set_wptr = jpeg_v3_0_dec_ring_set_wptr, + .parse_cs = jpeg_v2_dec_ring_parse_cs, .emit_frame_size = SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c index 3dac8f259d7fb..6c5c1a68a9b7b 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c @@ -727,6 +727,7 @@ static const struct amdgpu_ring_funcs jpeg_v4_0_dec_ring_vm_funcs = { .get_rptr = jpeg_v4_0_dec_ring_get_rptr, .get_wptr = jpeg_v4_0_dec_ring_get_wptr, .set_wptr = jpeg_v4_0_dec_ring_set_wptr, + .parse_cs = jpeg_v2_dec_ring_parse_cs, .emit_frame_size = SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.h b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.h index 07d36c2abd6bb..47638fd4d4e21 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.h +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.h @@ -32,5 +32,4 @@ enum amdgpu_jpeg_v4_0_sub_block { }; extern const struct amdgpu_ip_block_version jpeg_v4_0_ip_block; - #endif /* __JPEG_V4_0_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c index 6ae5a784e1874..b55041f38cec9 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c @@ -23,9 +23,9 @@ #include "amdgpu.h" #include "amdgpu_jpeg.h" -#include "amdgpu_cs.h" #include "soc15.h" #include "soc15d.h" +#include "jpeg_v2_0.h" #include "jpeg_v4_0_3.h" #include "mmsch_v4_0_3.h" @@ -1089,7 +1089,7 @@ static const struct amdgpu_ring_funcs jpeg_v4_0_3_dec_ring_vm_funcs = { .get_rptr = jpeg_v4_0_3_dec_ring_get_rptr, .get_wptr = jpeg_v4_0_3_dec_ring_get_wptr, .set_wptr = jpeg_v4_0_3_dec_ring_set_wptr, - .parse_cs = jpeg_v4_0_3_dec_ring_parse_cs, + .parse_cs = jpeg_v2_dec_ring_parse_cs, .emit_frame_size = SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + @@ -1254,56 +1254,3 @@ static void jpeg_v4_0_3_set_ras_funcs(struct amdgpu_device *adev) { adev->jpeg.ras = &jpeg_v4_0_3_ras; } - -/** - * jpeg_v4_0_3_dec_ring_parse_cs - command submission parser - * - * @parser: Command submission parser context - * @job: the job to parse - * @ib: the IB to parse - * - * Parse the command stream, return -EINVAL for invalid packet, - * 0 otherwise - */ -int jpeg_v4_0_3_dec_ring_parse_cs(struct amdgpu_cs_parser *parser, - struct amdgpu_job *job, - struct amdgpu_ib *ib) -{ - uint32_t i, reg, res, cond, type; - struct amdgpu_device *adev = parser->adev; - - for (i = 0; i < ib->length_dw ; i += 2) { - reg = CP_PACKETJ_GET_REG(ib->ptr[i]); - res = CP_PACKETJ_GET_RES(ib->ptr[i]); - cond = CP_PACKETJ_GET_COND(ib->ptr[i]); - type = CP_PACKETJ_GET_TYPE(ib->ptr[i]); - - if (res) /* only support 0 at the moment */ - return -EINVAL; - - switch (type) { - case PACKETJ_TYPE0: - if (cond != PACKETJ_CONDITION_CHECK0 || reg < JPEG_REG_RANGE_START || reg > JPEG_REG_RANGE_END) { - dev_err(adev->dev, "Invalid packet [0x%08x]!\n", ib->ptr[i]); - return -EINVAL; - } - break; - case PACKETJ_TYPE3: - if (cond != PACKETJ_CONDITION_CHECK3 || reg < JPEG_REG_RANGE_START || reg > JPEG_REG_RANGE_END) { - dev_err(adev->dev, "Invalid packet [0x%08x]!\n", ib->ptr[i]); - return -EINVAL; - } - break; - case PACKETJ_TYPE6: - if (ib->ptr[i] == CP_PACKETJ_NOP) - continue; - dev_err(adev->dev, "Invalid packet [0x%08x]!\n", ib->ptr[i]); - return -EINVAL; - default: - dev_err(adev->dev, "Unknown packet type %d !\n", type); - return -EINVAL; - } - } - - return 0; -} diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.h b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.h index 71c54b294e157..747a3e5f68564 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.h +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.h @@ -46,9 +46,6 @@ #define JRBC_DEC_EXTERNAL_REG_WRITE_ADDR 0x18000 -#define JPEG_REG_RANGE_START 0x4000 -#define JPEG_REG_RANGE_END 0x41c2 - extern const struct amdgpu_ip_block_version jpeg_v4_0_3_ip_block; void jpeg_v4_0_3_dec_ring_emit_ib(struct amdgpu_ring *ring, @@ -65,7 +62,5 @@ void jpeg_v4_0_3_dec_ring_insert_end(struct amdgpu_ring *ring); void jpeg_v4_0_3_dec_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val); void jpeg_v4_0_3_dec_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, uint32_t val, uint32_t mask); -int jpeg_v4_0_3_dec_ring_parse_cs(struct amdgpu_cs_parser *parser, - struct amdgpu_job *job, - struct amdgpu_ib *ib); + #endif /* __JPEG_V4_0_3_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c index f96ac6bce526d..44eeed445ea91 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c @@ -768,6 +768,7 @@ static const struct amdgpu_ring_funcs jpeg_v4_0_5_dec_ring_vm_funcs = { .get_rptr = jpeg_v4_0_5_dec_ring_get_rptr, .get_wptr = jpeg_v4_0_5_dec_ring_get_wptr, .set_wptr = jpeg_v4_0_5_dec_ring_set_wptr, + .parse_cs = jpeg_v2_dec_ring_parse_cs, .emit_frame_size = SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c index f4daff90c7709..d662aa841f971 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c @@ -26,6 +26,7 @@ #include "amdgpu_pm.h" #include "soc15.h" #include "soc15d.h" +#include "jpeg_v2_0.h" #include "jpeg_v4_0_3.h" #include "vcn/vcn_5_0_0_offset.h" @@ -646,7 +647,7 @@ static const struct amdgpu_ring_funcs jpeg_v5_0_0_dec_ring_vm_funcs = { .get_rptr = jpeg_v5_0_0_dec_ring_get_rptr, .get_wptr = jpeg_v5_0_0_dec_ring_get_wptr, .set_wptr = jpeg_v5_0_0_dec_ring_set_wptr, - .parse_cs = jpeg_v4_0_3_dec_ring_parse_cs, + .parse_cs = jpeg_v2_dec_ring_parse_cs, .emit_frame_size = SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + From 8409fb50ce48d66cf9dc5391f03f05c56c430605 Mon Sep 17 00:00:00 2001 From: "David (Ming Qiang) Wu" Date: Thu, 5 Sep 2024 16:57:28 -0400 Subject: [PATCH 15/25] drm/amd/amdgpu: apply command submission parser for JPEG v1 Similar to jpeg_v2_dec_ring_parse_cs() but it has different register ranges and a few other registers access. Acked-by: Alex Deucher Signed-off-by: David (Ming Qiang) Wu Signed-off-by: Alex Deucher (cherry picked from commit 3d5adbdf1d01708777f2eda375227cbf7a98b9fe) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c | 76 +++++++++++++++++++++++++- drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.h | 11 ++++ 2 files changed, 86 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c index 71f43a5c7f721..6e0e88076224b 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c @@ -23,6 +23,7 @@ #include "amdgpu.h" #include "amdgpu_jpeg.h" +#include "amdgpu_cs.h" #include "soc15.h" #include "soc15d.h" #include "vcn_v1_0.h" @@ -34,6 +35,9 @@ static void jpeg_v1_0_set_dec_ring_funcs(struct amdgpu_device *adev); static void jpeg_v1_0_set_irq_funcs(struct amdgpu_device *adev); static void jpeg_v1_0_ring_begin_use(struct amdgpu_ring *ring); +static int jpeg_v1_dec_ring_parse_cs(struct amdgpu_cs_parser *parser, + struct amdgpu_job *job, + struct amdgpu_ib *ib); static void jpeg_v1_0_decode_ring_patch_wreg(struct amdgpu_ring *ring, uint32_t *ptr, uint32_t reg_offset, uint32_t val) { @@ -300,7 +304,10 @@ static void jpeg_v1_0_decode_ring_emit_ib(struct amdgpu_ring *ring, amdgpu_ring_write(ring, PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JRBC_IB_VMID), 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, (vmid | (vmid << 4))); + if (ring->funcs->parse_cs) + amdgpu_ring_write(ring, 0); + else + amdgpu_ring_write(ring, (vmid | (vmid << 4))); amdgpu_ring_write(ring, PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JPEG_VMID), 0, 0, PACKETJ_TYPE0)); @@ -554,6 +561,7 @@ static const struct amdgpu_ring_funcs jpeg_v1_0_decode_ring_vm_funcs = { .get_rptr = jpeg_v1_0_decode_ring_get_rptr, .get_wptr = jpeg_v1_0_decode_ring_get_wptr, .set_wptr = jpeg_v1_0_decode_ring_set_wptr, + .parse_cs = jpeg_v1_dec_ring_parse_cs, .emit_frame_size = 6 + 6 + /* hdp invalidate / flush */ SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + @@ -611,3 +619,69 @@ static void jpeg_v1_0_ring_begin_use(struct amdgpu_ring *ring) vcn_v1_0_set_pg_for_begin_use(ring, set_clocks); } + +/** + * jpeg_v1_dec_ring_parse_cs - command submission parser + * + * @parser: Command submission parser context + * @job: the job to parse + * @ib: the IB to parse + * + * Parse the command stream, return -EINVAL for invalid packet, + * 0 otherwise + */ +static int jpeg_v1_dec_ring_parse_cs(struct amdgpu_cs_parser *parser, + struct amdgpu_job *job, + struct amdgpu_ib *ib) +{ + u32 i, reg, res, cond, type; + int ret = 0; + struct amdgpu_device *adev = parser->adev; + + for (i = 0; i < ib->length_dw ; i += 2) { + reg = CP_PACKETJ_GET_REG(ib->ptr[i]); + res = CP_PACKETJ_GET_RES(ib->ptr[i]); + cond = CP_PACKETJ_GET_COND(ib->ptr[i]); + type = CP_PACKETJ_GET_TYPE(ib->ptr[i]); + + if (res || cond != PACKETJ_CONDITION_CHECK0) /* only allow 0 for now */ + return -EINVAL; + + if (reg >= JPEG_V1_REG_RANGE_START && reg <= JPEG_V1_REG_RANGE_END) + continue; + + switch (type) { + case PACKETJ_TYPE0: + if (reg != JPEG_V1_LMI_JPEG_WRITE_64BIT_BAR_HIGH && + reg != JPEG_V1_LMI_JPEG_WRITE_64BIT_BAR_LOW && + reg != JPEG_V1_LMI_JPEG_READ_64BIT_BAR_HIGH && + reg != JPEG_V1_LMI_JPEG_READ_64BIT_BAR_LOW && + reg != JPEG_V1_REG_CTX_INDEX && + reg != JPEG_V1_REG_CTX_DATA) { + ret = -EINVAL; + } + break; + case PACKETJ_TYPE1: + if (reg != JPEG_V1_REG_CTX_DATA) + ret = -EINVAL; + break; + case PACKETJ_TYPE3: + if (reg != JPEG_V1_REG_SOFT_RESET) + ret = -EINVAL; + break; + case PACKETJ_TYPE6: + if (ib->ptr[i] != CP_PACKETJ_NOP) + ret = -EINVAL; + break; + default: + ret = -EINVAL; + } + + if (ret) { + dev_err(adev->dev, "Invalid packet [0x%08x]!\n", ib->ptr[i]); + break; + } + } + + return ret; +} diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.h b/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.h index bbf33a6a39729..9654d22e03763 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.h +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.h @@ -29,4 +29,15 @@ int jpeg_v1_0_sw_init(void *handle); void jpeg_v1_0_sw_fini(void *handle); void jpeg_v1_0_start(struct amdgpu_device *adev, int mode); +#define JPEG_V1_REG_RANGE_START 0x8000 +#define JPEG_V1_REG_RANGE_END 0x803f + +#define JPEG_V1_LMI_JPEG_WRITE_64BIT_BAR_HIGH 0x8238 +#define JPEG_V1_LMI_JPEG_WRITE_64BIT_BAR_LOW 0x8239 +#define JPEG_V1_LMI_JPEG_READ_64BIT_BAR_HIGH 0x825a +#define JPEG_V1_LMI_JPEG_READ_64BIT_BAR_LOW 0x825b +#define JPEG_V1_REG_CTX_INDEX 0x8328 +#define JPEG_V1_REG_CTX_DATA 0x8329 +#define JPEG_V1_REG_SOFT_RESET 0x83a0 + #endif /*__JPEG_V1_0_H__*/ From 17ea4383649fdeaff3181ddcf1ff03350d42e591 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 6 Sep 2024 10:42:45 -0400 Subject: [PATCH 16/25] drm/amdgpu/atomfirmware: Silence UBSAN warning Per the comments, these are variable sized arrays. Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3613 Reviewed-by: Harry Wentland Signed-off-by: Alex Deucher (cherry picked from commit 81f7804ba84ee617ed594de934ed87bcc4f83531) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/include/atomfirmware.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/include/atomfirmware.h b/drivers/gpu/drm/amd/include/atomfirmware.h index 09cbc3afd6d89..b0fc22383e287 100644 --- a/drivers/gpu/drm/amd/include/atomfirmware.h +++ b/drivers/gpu/drm/amd/include/atomfirmware.h @@ -1038,7 +1038,7 @@ struct display_object_info_table_v1_4 uint16_t supporteddevices; uint8_t number_of_path; uint8_t reserved; - struct atom_display_object_path_v2 display_path[8]; //the real number of this included in the structure is calculated by using the (whole structure size - the header size- number_of_path)/size of atom_display_object_path + struct atom_display_object_path_v2 display_path[]; //the real number of this included in the structure is calculated by using the (whole structure size - the header size- number_of_path)/size of atom_display_object_path }; struct display_object_info_table_v1_5 { @@ -1048,7 +1048,7 @@ struct display_object_info_table_v1_5 { uint8_t reserved; // the real number of this included in the structure is calculated by using the // (whole structure size - the header size- number_of_path)/size of atom_display_object_path - struct atom_display_object_path_v3 display_path[8]; + struct atom_display_object_path_v3 display_path[]; }; /* From 2a2a865aee43dd6f1ddbb0581c2a2cc205beb768 Mon Sep 17 00:00:00 2001 From: Leo Li Date: Wed, 11 Sep 2024 09:06:50 -0400 Subject: [PATCH 17/25] drm/amd/display: Add all planes on CRTC to state for overlay cursor [Why] DC has a special commit path for native cursor, which use the built-in cursor pipe within DCN planes. This update path does not require all enabled planes to be added to the list of surface updates sent to DC. This is not the case for overlay cursor; it uses the same path as MPO commits. This update path requires all enabled planes to be added to the list of surface updates sent to DC. Otherwise, DC will disable planes not inside the list. [How] If overlay cursor is needed, add all planes on the same CRTC as this cursor to the atomic state. This is already done for non-cursor planes (MPO), just before the added lines. Fixes: 1b04dcca4fb1 ("drm/amd/display: Introduce overlay cursor mode") Closes: https://lore.kernel.org/lkml/f68020a3-c413-482d-beb2-5432d98a1d3e@amd.com Reviewed-by: Harry Wentland Signed-off-by: Leo Li Tested-by: Mikhail Gavrilov Signed-off-by: Alex Deucher (cherry picked from commit 0c8c5bdd7eaf291b6f727e98506fb68acee3a4cc) --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index e6b2786fe707a..1e069fa5211ee 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -11419,6 +11419,17 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev, drm_dbg(dev, "Failed to determine cursor mode\n"); goto fail; } + + /* + * If overlay cursor is needed, DC cannot go through the + * native cursor update path. All enabled planes on the CRTC + * need to be added for DC to not disable a plane by mistake + */ + if (dm_new_crtc_state->cursor_mode == DM_CURSOR_OVERLAY_MODE) { + ret = drm_atomic_add_affected_planes(state, crtc); + if (ret) + goto fail; + } } /* Remove exiting planes if they are modified */ From a2655358cbda38d53d4a443c7edc7951d91c3043 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Mon, 8 Jul 2024 10:33:01 -0700 Subject: [PATCH 18/25] drm/xe/gt: Remove double include The header generated/xe_wa_oob.h is included twice. Remove one. Fixes: 27cb2b7fec2a ("drm/xe/bmg: implement Wa_16023588340") Reported-by: kernel test robot Closes: https://lore.kernel.org/r/202407052122.AzuWSPuo-lkp@intel.com/ Reviewed-by: Michal Wajdeczko Link: https://patchwork.freedesktop.org/patch/msgid/20240708173301.1543871-1-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi (cherry picked from commit 3d122660dc70029d9cccb4e8670125f0affa959e) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_gt.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index b8c73f69fbaf1..cb9df15e71376 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -9,7 +9,6 @@ #include #include -#include #include From 572239f7f15223e4cf22a8233677b9a080c3d873 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Tue, 23 Jul 2024 12:07:14 -0700 Subject: [PATCH 19/25] drm/xe: Remove fence check from send_tlb_invalidation 'fence' argument in send_tlb_invalidation cannot be NULL, remove non-NULL check from send_tlb_invalidation. Reported-by: kernel test robot Reported-by: Dan Carpenter Closes: https://lore.kernel.org/r/202407231049.esig0Fkb-lkp@intel.com/ Fixes: 58bfe6674467 ("drm/xe: Drop xe_gt_tlb_invalidation_wait") Signed-off-by: Matthew Brost Reviewed-by: Nirmoy Das Link: https://patchwork.freedesktop.org/patch/msgid/20240723190714.1744653-1-matthew.brost@intel.com Signed-off-by: Nirmoy Das (cherry picked from commit 6482253e6e1ad1c3a76645a3899d3cfdb5b918cb) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c index 481d83d07367d..87cb76a8718c9 100644 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c @@ -182,7 +182,7 @@ static int send_tlb_invalidation(struct xe_guc *guc, action[1] = seqno; ret = xe_guc_ct_send_locked(&guc->ct, action, len, G2H_LEN_DW_TLB_INVALIDATE, 1); - if (!ret && fence) { + if (!ret) { spin_lock_irq(>->tlb_invalidation.pending_lock); /* * We haven't actually published the TLB fence as per @@ -203,7 +203,7 @@ static int send_tlb_invalidation(struct xe_guc *guc, tlb_timeout_jiffies(gt)); } spin_unlock_irq(>->tlb_invalidation.pending_lock); - } else if (ret < 0 && fence) { + } else if (ret < 0) { __invalidation_fence_signal(xe, fence); } if (!ret) { From 5e2d1d4dc1c15da3e1e3dc09dc3c1276274439b2 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 24 Jul 2024 09:43:41 -0700 Subject: [PATCH 20/25] drm/xe: Fix possible UAF in guc_exec_queue_process_msg Store xe_device ahead of processing message as message can be free'd in some cases. v2: - Including missing local changes v3: - Resend for CI Reported-by: kernel test robot Reported-by: Dan Carpenter Closes: https://lore.kernel.org/r/202407231445.rpisd1vA-lkp@intel.com/ Fixes: 55ea73aacfb9 ("drm/xe: Build PM into GuC CT layer") Signed-off-by: Matthew Brost Reviewed-by: Himal Prasad Ghimiray Link: https://patchwork.freedesktop.org/patch/msgid/20240724164341.1848954-1-matthew.brost@intel.com (cherry picked from commit 1a394b4f504f33eac8c38b6f42ba025105c7e869) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_guc_submit.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 77b0f0d8f7297..59b36c7998c24 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -1375,6 +1375,8 @@ static void __guc_exec_queue_process_msg_resume(struct xe_sched_msg *msg) static void guc_exec_queue_process_msg(struct xe_sched_msg *msg) { + struct xe_device *xe = guc_to_xe(exec_queue_to_guc(msg->private_data)); + trace_xe_sched_msg_recv(msg); switch (msg->opcode) { @@ -1394,7 +1396,7 @@ static void guc_exec_queue_process_msg(struct xe_sched_msg *msg) XE_WARN_ON("Unknown message type"); } - xe_pm_runtime_put(guc_to_xe(exec_queue_to_guc(msg->private_data))); + xe_pm_runtime_put(xe); } static const struct drm_sched_backend_ops drm_sched_ops = { From 062d59eb96b24990429c13706cb74ef09cec7e99 Mon Sep 17 00:00:00 2001 From: Nirmoy Das Date: Tue, 6 Aug 2024 13:07:22 +0200 Subject: [PATCH 21/25] drm/xe: Fix access_ok check in user_fence_create Check size of the data not size of the pointer. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202407300421.IBkAja96-lkp@intel.com/ Fixes: ddeb7989a98f ("drm/xe: Validate user fence during creation") Cc: Matthew Auld Cc: Matthew Brost Reviewed-by: Matthew Auld Reviewed-by: Tejas Upadhyay Reviewed-by: Apoorva Singh Link: https://patchwork.freedesktop.org/patch/msgid/20240806110722.28661-1-nirmoy.das@intel.com Signed-off-by: Nirmoy Das (cherry picked from commit e102b5ed6e283a144793cab8fcd95f61d0ddbadb) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_sync.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_sync.c b/drivers/gpu/drm/xe/xe_sync.c index e8d31e0108601..80499681bd583 100644 --- a/drivers/gpu/drm/xe/xe_sync.c +++ b/drivers/gpu/drm/xe/xe_sync.c @@ -55,7 +55,7 @@ static struct xe_user_fence *user_fence_create(struct xe_device *xe, u64 addr, struct xe_user_fence *ufence; u64 __user *ptr = u64_to_user_ptr(addr); - if (!access_ok(ptr, sizeof(ptr))) + if (!access_ok(ptr, sizeof(*ptr))) return ERR_PTR(-EFAULT); ufence = kmalloc(sizeof(*ufence), GFP_KERNEL); From dd10595c3232d362f5a01e5d616434b2371ae8d4 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 20 Aug 2024 22:00:34 +0300 Subject: [PATCH 22/25] drm/xe/display: fix compat IS_DISPLAY_STEP() range end It's supposed to be an open range at the end like in i915. Fingers crossed that nobody relies on this definition. Fixes: 44e694958b95 ("drm/xe/display: Implement display support") Reviewed-by: Lucas De Marchi Reviewed-by: Matt Roper Acked-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/fe8743770694e429f6902491cdb306c97bdf701a.1724180287.git.jani.nikula@intel.com Signed-off-by: Jani Nikula (cherry picked from commit 453afb1a439994deeacb8d9ecbb48c1f2348ea0a) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h index 2feedddf1e400..1f1ad4d3ef517 100644 --- a/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h +++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h @@ -83,7 +83,7 @@ static inline struct drm_i915_private *kdev_to_i915(struct device *kdev) #define HAS_GMD_ID(xe) GRAPHICS_VERx100(xe) >= 1270 /* Workarounds not handled yet */ -#define IS_DISPLAY_STEP(xe, first, last) ({u8 __step = (xe)->info.step.display; first <= __step && __step <= last; }) +#define IS_DISPLAY_STEP(xe, first, last) ({u8 __step = (xe)->info.step.display; first <= __step && __step < last; }) #define IS_LP(xe) (0) #define IS_GEN9_LP(xe) (0) From a262cc8d554217fbe67e083159584beee3ea9b11 Mon Sep 17 00:00:00 2001 From: Ashutosh Dixit Date: Mon, 9 Sep 2024 09:59:33 -0700 Subject: [PATCH 23/25] drm/xe/oa: Enable Xe2+ PES disaggregation Enable Xe2+ PES disaggregation (for OAG) to retrieve disaggregated metrics when disaggregated data is needed. Userspace can select whether to receive aggregated or disaggregated metrics via the particular OA configuration it uses (programmed via DRM_XE_OBSERVATION_OP_ADD_CONFIG). Bspec: 61101 Fixes: e936f885f1e9 ("drm/xe/oa/uapi: Expose OA stream fd") Signed-off-by: Ashutosh Dixit Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20240909165933.2638765-1-ashutosh.dixit@intel.com Cc: stable@vger.kernel.org (cherry picked from commit fb2551a0e93897aec7fb3d4f473ebc06b146d160) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/regs/xe_oa_regs.h | 1 + drivers/gpu/drm/xe/xe_oa.c | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/drivers/gpu/drm/xe/regs/xe_oa_regs.h b/drivers/gpu/drm/xe/regs/xe_oa_regs.h index 1189f5a540a82..a9b0091cb7ee1 100644 --- a/drivers/gpu/drm/xe/regs/xe_oa_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_oa_regs.h @@ -52,6 +52,7 @@ #define OAG_OABUFFER_MEMORY_SELECT REG_BIT(0) /* 0: PPGTT, 1: GGTT */ #define OAG_OACONTROL XE_REG(0xdaf4) +#define OAG_OACONTROL_OA_PES_DISAG_EN REG_GENMASK(27, 22) #define OAG_OACONTROL_OA_CCS_SELECT_MASK REG_GENMASK(18, 16) #define OAG_OACONTROL_OA_COUNTER_SEL_MASK REG_GENMASK(4, 2) #define OAG_OACONTROL_OA_COUNTER_ENABLE REG_BIT(0) diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c index 6d69f751bf78d..22f14eba2c634 100644 --- a/drivers/gpu/drm/xe/xe_oa.c +++ b/drivers/gpu/drm/xe/xe_oa.c @@ -440,6 +440,10 @@ static void xe_oa_enable(struct xe_oa_stream *stream) val = __format_to_oactrl(format, regs->oa_ctrl_counter_select_mask) | __oa_ccs_select(stream) | OAG_OACONTROL_OA_COUNTER_ENABLE; + if (GRAPHICS_VER(stream->oa->xe) >= 20 && + stream->hwe->oa_unit->type == DRM_XE_OA_UNIT_TYPE_OAG) + val |= OAG_OACONTROL_OA_PES_DISAG_EN; + xe_mmio_write32(stream->gt, regs->oa_ctrl, val); } From 9bd7ff293fc84792514aeafa06c5a17f05cb5f4b Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Wed, 11 Sep 2024 16:55:27 +0100 Subject: [PATCH 24/25] drm/xe/client: fix deadlock in show_meminfo() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is a real deadlock as well as sleeping in atomic() bug in here, if the bo put happens to be the last ref, since bo destruction wants to grab the same spinlock and sleeping locks. Fix that by dropping the ref using xe_bo_put_deferred(), and moving the final commit outside of the lock. Dropping the lock around the put is tricky since the bo can go out of scope and delete itself from the list, making it difficult to navigate to the next list entry. Fixes: 0845233388f8 ("drm/xe: Implement fdinfo memory stats printing") Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/2727 Signed-off-by: Matthew Auld Cc: Himal Prasad Ghimiray Cc: Tejas Upadhyay Cc: "Thomas Hellström" Cc: # v6.8+ Reviewed-by: Matthew Brost Reviewed-by: Tejas Upadhyay Link: https://patchwork.freedesktop.org/patch/msgid/20240911155527.178910-5-matthew.auld@intel.com (cherry picked from commit 0083b8e6f11d7662283a267d4ce7c966812ffd8a) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_drm_client.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_drm_client.c b/drivers/gpu/drm/xe/xe_drm_client.c index 7ddd59908334c..ec141357480f2 100644 --- a/drivers/gpu/drm/xe/xe_drm_client.c +++ b/drivers/gpu/drm/xe/xe_drm_client.c @@ -196,6 +196,7 @@ static void show_meminfo(struct drm_printer *p, struct drm_file *file) struct xe_drm_client *client; struct drm_gem_object *obj; struct xe_bo *bo; + LLIST_HEAD(deferred); unsigned int id; u32 mem_type; @@ -215,11 +216,14 @@ static void show_meminfo(struct drm_printer *p, struct drm_file *file) list_for_each_entry(bo, &client->bos_list, client_link) { if (!kref_get_unless_zero(&bo->ttm.base.refcount)) continue; + bo_meminfo(bo, stats); - xe_bo_put(bo); + xe_bo_put_deferred(bo, &deferred); } spin_unlock(&client->bos_lock); + xe_bo_put_commit(&deferred); + for (mem_type = XE_PL_SYSTEM; mem_type < TTM_NUM_MEM_TYPES; ++mem_type) { if (!xe_mem_type_to_name[mem_type]) continue; From 94c4aa266111262c96c98f822d1bccc494786fee Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Wed, 11 Sep 2024 16:55:28 +0100 Subject: [PATCH 25/25] drm/xe/client: add missing bo locking in show_meminfo() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit bo_meminfo() wants to inspect bo state like tt and the ttm resource, however this state can change at any point leading to stuff like NPD and UAF, if the bo lock is not held. Grab the bo lock when calling bo_meminfo(), ensuring we drop any spinlocks first. In the case of object_idr we now also need to hold a ref. v2 (MattB) - Also add xe_bo_assert_held() Fixes: 0845233388f8 ("drm/xe: Implement fdinfo memory stats printing") Signed-off-by: Matthew Auld Cc: Himal Prasad Ghimiray Cc: Tejas Upadhyay Cc: "Thomas Hellström" Cc: # v6.8+ Reviewed-by: Matthew Brost Reviewed-by: Tejas Upadhyay Link: https://patchwork.freedesktop.org/patch/msgid/20240911155527.178910-6-matthew.auld@intel.com (cherry picked from commit 4f63d712fa104c3ebefcb289d1e733e86d8698c7) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_drm_client.c | 39 +++++++++++++++++++++++++++--- 1 file changed, 36 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_drm_client.c b/drivers/gpu/drm/xe/xe_drm_client.c index ec141357480f2..1af95b9b91715 100644 --- a/drivers/gpu/drm/xe/xe_drm_client.c +++ b/drivers/gpu/drm/xe/xe_drm_client.c @@ -10,6 +10,7 @@ #include #include +#include "xe_assert.h" #include "xe_bo.h" #include "xe_bo_types.h" #include "xe_device_types.h" @@ -151,10 +152,13 @@ void xe_drm_client_add_bo(struct xe_drm_client *client, */ void xe_drm_client_remove_bo(struct xe_bo *bo) { + struct xe_device *xe = ttm_to_xe_device(bo->ttm.bdev); struct xe_drm_client *client = bo->client; + xe_assert(xe, !kref_read(&bo->ttm.base.refcount)); + spin_lock(&client->bos_lock); - list_del(&bo->client_link); + list_del_init(&bo->client_link); spin_unlock(&client->bos_lock); xe_drm_client_put(client); @@ -166,6 +170,8 @@ static void bo_meminfo(struct xe_bo *bo, u64 sz = bo->size; u32 mem_type; + xe_bo_assert_held(bo); + if (bo->placement.placement) mem_type = bo->placement.placement->mem_type; else @@ -207,7 +213,20 @@ static void show_meminfo(struct drm_printer *p, struct drm_file *file) idr_for_each_entry(&file->object_idr, obj, id) { struct xe_bo *bo = gem_to_xe_bo(obj); - bo_meminfo(bo, stats); + if (dma_resv_trylock(bo->ttm.base.resv)) { + bo_meminfo(bo, stats); + xe_bo_unlock(bo); + } else { + xe_bo_get(bo); + spin_unlock(&file->table_lock); + + xe_bo_lock(bo, false); + bo_meminfo(bo, stats); + xe_bo_unlock(bo); + + xe_bo_put(bo); + spin_lock(&file->table_lock); + } } spin_unlock(&file->table_lock); @@ -217,7 +236,21 @@ static void show_meminfo(struct drm_printer *p, struct drm_file *file) if (!kref_get_unless_zero(&bo->ttm.base.refcount)) continue; - bo_meminfo(bo, stats); + if (dma_resv_trylock(bo->ttm.base.resv)) { + bo_meminfo(bo, stats); + xe_bo_unlock(bo); + } else { + spin_unlock(&client->bos_lock); + + xe_bo_lock(bo, false); + bo_meminfo(bo, stats); + xe_bo_unlock(bo); + + spin_lock(&client->bos_lock); + /* The bo ref will prevent this bo from being removed from the list */ + xe_assert(xef->xe, !list_empty(&bo->client_link)); + } + xe_bo_put_deferred(bo, &deferred); } spin_unlock(&client->bos_lock);