diff --git a/Documentation/gpu/rfc/gpusvm.rst b/Documentation/gpu/rfc/gpusvm.rst index 073e46065d9c..bcf66a8137a6 100644 --- a/Documentation/gpu/rfc/gpusvm.rst +++ b/Documentation/gpu/rfc/gpusvm.rst @@ -67,14 +67,19 @@ Agreed upon design principles Overview of baseline design =========================== -Baseline design is simple as possible to get a working basline in which can be -built upon. - -.. kernel-doc:: drivers/gpu/drm/xe/drm_gpusvm.c +.. kernel-doc:: drivers/gpu/drm/drm_gpusvm.c :doc: Overview + +.. kernel-doc:: drivers/gpu/drm/drm_gpusvm.c :doc: Locking - :doc: Migrataion + +.. kernel-doc:: drivers/gpu/drm/drm_gpusvm.c + :doc: Migration + +.. kernel-doc:: drivers/gpu/drm/drm_gpusvm.c :doc: Partial Unmapping of Ranges + +.. kernel-doc:: drivers/gpu/drm/drm_gpusvm.c :doc: Examples Possible future design features diff --git a/drivers/gpu/drm/drm_gpusvm.c b/drivers/gpu/drm/drm_gpusvm.c index f314f5c4af0f..2451c816edd5 100644 --- a/drivers/gpu/drm/drm_gpusvm.c +++ b/drivers/gpu/drm/drm_gpusvm.c @@ -23,37 +23,42 @@ * DOC: Overview * * GPU Shared Virtual Memory (GPU SVM) layer for the Direct Rendering Manager (DRM) - * - * The GPU SVM layer is a component of the DRM framework designed to manage shared - * virtual memory between the CPU and GPU. It enables efficient data exchange and - * processing for GPU-accelerated applications by allowing memory sharing and + * is a component of the DRM framework designed to manage shared virtual memory + * between the CPU and GPU. It enables efficient data exchange and processing + * for GPU-accelerated applications by allowing memory sharing and * synchronization between the CPU's and GPU's virtual address spaces. * * Key GPU SVM Components: - * - Notifiers: Notifiers: Used for tracking memory intervals and notifying the - * GPU of changes, notifiers are sized based on a GPU SVM - * initialization parameter, with a recommendation of 512M or - * larger. They maintain a Red-BlacK tree and a list of ranges that - * fall within the notifier interval. Notifiers are tracked within - * a GPU SVM Red-BlacK tree and list and are dynamically inserted - * or removed as ranges within the interval are created or - * destroyed. - * - Ranges: Represent memory ranges mapped in a DRM device and managed - * by GPU SVM. They are sized based on an array of chunk sizes, which - * is a GPU SVM initialization parameter, and the CPU address space. - * Upon GPU fault, the largest aligned chunk that fits within the - * faulting CPU address space is chosen for the range size. Ranges are - * expected to be dynamically allocated on GPU fault and removed on an - * MMU notifier UNMAP event. As mentioned above, ranges are tracked in - * a notifier's Red-Black tree. - * - Operations: Define the interface for driver-specific GPU SVM operations - * such as range allocation, notifier allocation, and - * invalidations. - * - Device Memory Allocations: Embedded structure containing enough information - * for GPU SVM to migrate to / from device memory. - * - Device Memory Operations: Define the interface for driver-specific device - * memory operations release memory, populate pfns, - * and copy to / from device memory. + * + * - Notifiers: + * Used for tracking memory intervals and notifying the GPU of changes, + * notifiers are sized based on a GPU SVM initialization parameter, with a + * recommendation of 512M or larger. They maintain a Red-BlacK tree and a + * list of ranges that fall within the notifier interval. Notifiers are + * tracked within a GPU SVM Red-BlacK tree and list and are dynamically + * inserted or removed as ranges within the interval are created or + * destroyed. + * - Ranges: + * Represent memory ranges mapped in a DRM device and managed by GPU SVM. + * They are sized based on an array of chunk sizes, which is a GPU SVM + * initialization parameter, and the CPU address space. Upon GPU fault, + * the largest aligned chunk that fits within the faulting CPU address + * space is chosen for the range size. Ranges are expected to be + * dynamically allocated on GPU fault and removed on an MMU notifier UNMAP + * event. As mentioned above, ranges are tracked in a notifier's Red-Black + * tree. + * + * - Operations: + * Define the interface for driver-specific GPU SVM operations such as + * range allocation, notifier allocation, and invalidations. + * + * - Device Memory Allocations: + * Embedded structure containing enough information for GPU SVM to migrate + * to / from device memory. + * + * - Device Memory Operations: + * Define the interface for driver-specific device memory operations + * release memory, populate pfns, and copy to / from device memory. * * This layer provides interfaces for allocating, mapping, migrating, and * releasing memory ranges between the CPU and GPU. It handles all core memory @@ -63,14 +68,18 @@ * below. * * Expected Driver Components: - * - GPU page fault handler: Used to create ranges and notifiers based on the - * fault address, optionally migrate the range to - * device memory, and create GPU bindings. - * - Garbage collector: Used to unmap and destroy GPU bindings for ranges. - * Ranges are expected to be added to the garbage collector - * upon a MMU_NOTIFY_UNMAP event in notifier callback. - * - Notifier callback: Used to invalidate and DMA unmap GPU bindings for - * ranges. + * + * - GPU page fault handler: + * Used to create ranges and notifiers based on the fault address, + * optionally migrate the range to device memory, and create GPU bindings. + * + * - Garbage collector: + * Used to unmap and destroy GPU bindings for ranges. Ranges are expected + * to be added to the garbage collector upon a MMU_NOTIFY_UNMAP event in + * notifier callback. + * + * - Notifier callback: + * Used to invalidate and DMA unmap GPU bindings for ranges. */ /** @@ -83,9 +92,9 @@ * range RB tree and list, as well as the range's DMA mappings and sequence * number. GPU SVM manages all necessary locking and unlocking operations, * except for the recheck range's pages being valid - * (drm_gpusvm_range_pages_valid) when the driver is committing GPU bindings. This - * lock corresponds to the 'driver->update' lock mentioned in the HMM - * documentation (TODO: Link). Future revisions may transition from a GPU SVM + * (drm_gpusvm_range_pages_valid) when the driver is committing GPU bindings. + * This lock corresponds to the ``driver->update`` lock mentioned in + * Documentation/mm/hmm.rst. Future revisions may transition from a GPU SVM * global lock to a per-notifier lock if finer-grained locking is deemed * necessary. * @@ -102,11 +111,11 @@ * DOC: Migration * * The migration support is quite simple, allowing migration between RAM and - * device memory at the range granularity. For example, GPU SVM currently does not - * support mixing RAM and device memory pages within a range. This means that upon GPU - * fault, the entire range can be migrated to device memory, and upon CPU fault, the - * entire range is migrated to RAM. Mixed RAM and device memory storage within a range - * could be added in the future if required. + * device memory at the range granularity. For example, GPU SVM currently does + * not support mixing RAM and device memory pages within a range. This means + * that upon GPU fault, the entire range can be migrated to device memory, and + * upon CPU fault, the entire range is migrated to RAM. Mixed RAM and device + * memory storage within a range could be added in the future if required. * * The reasoning for only supporting range granularity is as follows: it * simplifies the implementation, and range sizes are driver-defined and should @@ -119,11 +128,11 @@ * Partial unmapping of ranges (e.g., 1M out of 2M is unmapped by CPU resulting * in MMU_NOTIFY_UNMAP event) presents several challenges, with the main one * being that a subset of the range still has CPU and GPU mappings. If the - * backing store for the range is in device memory, a subset of the backing store has - * references. One option would be to split the range and device memory backing store, - * but the implementation for this would be quite complicated. Given that - * partial unmappings are rare and driver-defined range sizes are relatively - * small, GPU SVM does not support splitting of ranges. + * backing store for the range is in device memory, a subset of the backing + * store has references. One option would be to split the range and device + * memory backing store, but the implementation for this would be quite + * complicated. Given that partial unmappings are rare and driver-defined range + * sizes are relatively small, GPU SVM does not support splitting of ranges. * * With no support for range splitting, upon partial unmapping of a range, the * driver is expected to invalidate and destroy the entire range. If the range @@ -144,6 +153,8 @@ * * 1) GPU page fault handler * + * .. code-block:: c + * * int driver_bind_range(struct drm_gpusvm *gpusvm, struct drm_gpusvm_range *range) * { * int err = 0; @@ -208,7 +219,9 @@ * return err; * } * - * 2) Garbage Collector. + * 2) Garbage Collector + * + * .. code-block:: c * * void __driver_garbage_collector(struct drm_gpusvm *gpusvm, * struct drm_gpusvm_range *range) @@ -231,7 +244,9 @@ * __driver_garbage_collector(gpusvm, range); * } * - * 3) Notifier callback. + * 3) Notifier callback + * + * .. code-block:: c * * void driver_invalidation(struct drm_gpusvm *gpusvm, * struct drm_gpusvm_notifier *notifier, @@ -499,7 +514,7 @@ drm_gpusvm_notifier_invalidate(struct mmu_interval_notifier *mni, return true; } -/** +/* * drm_gpusvm_notifier_ops - MMU interval notifier operations for GPU SVM */ static const struct mmu_interval_notifier_ops drm_gpusvm_notifier_ops = { @@ -2055,7 +2070,6 @@ static int __drm_gpusvm_migrate_to_ram(struct vm_area_struct *vas, /** * drm_gpusvm_range_evict - Evict GPU SVM range - * @pagemap: Pointer to the GPU SVM structure * @range: Pointer to the GPU SVM range to be removed * * This function evicts the specified GPU SVM range. This function will not @@ -2146,8 +2160,8 @@ static vm_fault_t drm_gpusvm_migrate_to_ram(struct vm_fault *vmf) return err ? VM_FAULT_SIGBUS : 0; } -/** - * drm_gpusvm_pagemap_ops() - Device page map operations for GPU SVM +/* + * drm_gpusvm_pagemap_ops - Device page map operations for GPU SVM */ static const struct dev_pagemap_ops drm_gpusvm_pagemap_ops = { .page_free = drm_gpusvm_page_free, diff --git a/drivers/gpu/drm/xe/display/xe_fb_pin.c b/drivers/gpu/drm/xe/display/xe_fb_pin.c index b69896baa20c..d918ae1c8061 100644 --- a/drivers/gpu/drm/xe/display/xe_fb_pin.c +++ b/drivers/gpu/drm/xe/display/xe_fb_pin.c @@ -82,7 +82,7 @@ write_dpt_remapped(struct xe_bo *bo, struct iosys_map *map, u32 *dpt_ofs, static int __xe_pin_fb_vma_dpt(const struct intel_framebuffer *fb, const struct i915_gtt_view *view, struct i915_vma *vma, - u64 physical_alignment) + unsigned int alignment) { struct xe_device *xe = to_xe_device(fb->base.dev); struct xe_tile *tile0 = xe_device_get_root_tile(xe); @@ -108,7 +108,7 @@ static int __xe_pin_fb_vma_dpt(const struct intel_framebuffer *fb, XE_BO_FLAG_VRAM0 | XE_BO_FLAG_GGTT | XE_BO_FLAG_PAGETABLE, - physical_alignment); + alignment); else dpt = xe_bo_create_pin_map_at_aligned(xe, tile0, NULL, dpt_size, ~0ull, @@ -116,7 +116,7 @@ static int __xe_pin_fb_vma_dpt(const struct intel_framebuffer *fb, XE_BO_FLAG_STOLEN | XE_BO_FLAG_GGTT | XE_BO_FLAG_PAGETABLE, - physical_alignment); + alignment); if (IS_ERR(dpt)) dpt = xe_bo_create_pin_map_at_aligned(xe, tile0, NULL, dpt_size, ~0ull, @@ -124,7 +124,7 @@ static int __xe_pin_fb_vma_dpt(const struct intel_framebuffer *fb, XE_BO_FLAG_SYSTEM | XE_BO_FLAG_GGTT | XE_BO_FLAG_PAGETABLE, - physical_alignment); + alignment); if (IS_ERR(dpt)) return PTR_ERR(dpt); @@ -194,7 +194,7 @@ write_ggtt_rotated(struct xe_bo *bo, struct xe_ggtt *ggtt, u32 *ggtt_ofs, u32 bo static int __xe_pin_fb_vma_ggtt(const struct intel_framebuffer *fb, const struct i915_gtt_view *view, struct i915_vma *vma, - u64 physical_alignment) + unsigned int alignment) { struct drm_gem_object *obj = intel_fb_bo(&fb->base); struct xe_bo *bo = gem_to_xe_bo(obj); @@ -277,7 +277,7 @@ static int __xe_pin_fb_vma_ggtt(const struct intel_framebuffer *fb, static struct i915_vma *__xe_pin_fb_vma(const struct intel_framebuffer *fb, const struct i915_gtt_view *view, - u64 physical_alignment) + unsigned int alignment) { struct drm_device *dev = fb->base.dev; struct xe_device *xe = to_xe_device(dev); @@ -327,9 +327,9 @@ static struct i915_vma *__xe_pin_fb_vma(const struct intel_framebuffer *fb, vma->bo = bo; if (intel_fb_uses_dpt(&fb->base)) - ret = __xe_pin_fb_vma_dpt(fb, view, vma, physical_alignment); + ret = __xe_pin_fb_vma_dpt(fb, view, vma, alignment); else - ret = __xe_pin_fb_vma_ggtt(fb, view, vma, physical_alignment); + ret = __xe_pin_fb_vma_ggtt(fb, view, vma, alignment); if (ret) goto err_unpin; @@ -422,7 +422,7 @@ int intel_plane_pin_fb(struct intel_plane_state *new_plane_state, struct i915_vma *vma; struct intel_framebuffer *intel_fb = to_intel_framebuffer(fb); struct intel_plane *plane = to_intel_plane(new_plane_state->uapi.plane); - u64 phys_alignment = plane->min_alignment(plane, fb, 0); + unsigned int alignment = plane->min_alignment(plane, fb, 0); if (reuse_vma(new_plane_state, old_plane_state)) return 0; @@ -430,7 +430,7 @@ int intel_plane_pin_fb(struct intel_plane_state *new_plane_state, /* We reject creating !SCANOUT fb's, so this is weird.. */ drm_WARN_ON(bo->ttm.base.dev, !(bo->flags & XE_BO_FLAG_SCANOUT)); - vma = __xe_pin_fb_vma(intel_fb, &new_plane_state->view.gtt, phys_alignment); + vma = __xe_pin_fb_vma(intel_fb, &new_plane_state->view.gtt, alignment); if (IS_ERR(vma)) return PTR_ERR(vma); diff --git a/drivers/gpu/drm/xe/tests/xe_rtp_test.c b/drivers/gpu/drm/xe/tests/xe_rtp_test.c index 36a3b5420fef..b0254b014fe4 100644 --- a/drivers/gpu/drm/xe/tests/xe_rtp_test.c +++ b/drivers/gpu/drm/xe/tests/xe_rtp_test.c @@ -320,7 +320,7 @@ static void xe_rtp_process_to_sr_tests(struct kunit *test) count_rtp_entries++; xe_rtp_process_ctx_enable_active_tracking(&ctx, &active, count_rtp_entries); - xe_rtp_process_to_sr(&ctx, param->entries, reg_sr); + xe_rtp_process_to_sr(&ctx, param->entries, count_rtp_entries, reg_sr); xa_for_each(®_sr->xa, idx, sre) { if (idx == param->expected_reg.addr) diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index bc1ff0a4e1e7..bc5714a5b36b 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -1496,14 +1496,6 @@ void xe_guc_stop(struct xe_guc *guc) int xe_guc_start(struct xe_guc *guc) { - if (!IS_SRIOV_VF(guc_to_xe(guc))) { - int err; - - err = xe_guc_pc_start(&guc->pc); - xe_gt_WARN(guc_to_gt(guc), err, "Failed to start GuC PC: %pe\n", - ERR_PTR(err)); - } - return xe_guc_submit_start(guc); } diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c index fc447751fe78..223b95de388c 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.c +++ b/drivers/gpu/drm/xe/xe_hw_engine.c @@ -400,10 +400,9 @@ xe_hw_engine_setup_default_lrc_state(struct xe_hw_engine *hwe) PREEMPT_GPGPU_THREAD_GROUP_LEVEL)), XE_RTP_ENTRY_FLAG(FOREACH_ENGINE) }, - {} }; - xe_rtp_process_to_sr(&ctx, lrc_setup, &hwe->reg_lrc); + xe_rtp_process_to_sr(&ctx, lrc_setup, ARRAY_SIZE(lrc_setup), &hwe->reg_lrc); } static void @@ -459,10 +458,9 @@ hw_engine_setup_default_state(struct xe_hw_engine *hwe) XE_RTP_ACTIONS(SET(CSFE_CHICKEN1(0), CS_PRIORITY_MEM_READ, XE_RTP_ACTION_FLAG(ENGINE_BASE))) }, - {} }; - xe_rtp_process_to_sr(&ctx, engine_entries, &hwe->reg_sr); + xe_rtp_process_to_sr(&ctx, engine_entries, ARRAY_SIZE(engine_entries), &hwe->reg_sr); } static const struct engine_info *find_engine_info(enum xe_engine_class class, int instance) diff --git a/drivers/gpu/drm/xe/xe_mocs.c b/drivers/gpu/drm/xe/xe_mocs.c index 54d199b5cfb2..31dade91a089 100644 --- a/drivers/gpu/drm/xe/xe_mocs.c +++ b/drivers/gpu/drm/xe/xe_mocs.c @@ -781,7 +781,9 @@ void xe_mocs_dump(struct xe_gt *gt, struct drm_printer *p) flags = get_mocs_settings(xe, &table); xe_pm_runtime_get_noresume(xe); - fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); + fw_ref = xe_force_wake_get(gt_to_fw(gt), + flags & HAS_LNCF_MOCS ? + XE_FORCEWAKE_ALL : XE_FW_GT); if (!fw_ref) goto err_fw; diff --git a/drivers/gpu/drm/xe/xe_reg_whitelist.c b/drivers/gpu/drm/xe/xe_reg_whitelist.c index edab5d4e3ba5..23f6c81d9994 100644 --- a/drivers/gpu/drm/xe/xe_reg_whitelist.c +++ b/drivers/gpu/drm/xe/xe_reg_whitelist.c @@ -88,7 +88,6 @@ static const struct xe_rtp_entry_sr register_whitelist[] = { RING_FORCE_TO_NONPRIV_ACCESS_RD | RING_FORCE_TO_NONPRIV_RANGE_4)) }, - {} }; static void whitelist_apply_to_hwe(struct xe_hw_engine *hwe) @@ -137,7 +136,8 @@ void xe_reg_whitelist_process_engine(struct xe_hw_engine *hwe) { struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe); - xe_rtp_process_to_sr(&ctx, register_whitelist, &hwe->reg_whitelist); + xe_rtp_process_to_sr(&ctx, register_whitelist, ARRAY_SIZE(register_whitelist), + &hwe->reg_whitelist); whitelist_apply_to_hwe(hwe); } diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c index d2f604aa96fa..917fc16de866 100644 --- a/drivers/gpu/drm/xe/xe_ring_ops.c +++ b/drivers/gpu/drm/xe/xe_ring_ops.c @@ -90,11 +90,10 @@ static int emit_flush_dw(u32 *dw, int i) return i; } -static int emit_flush_imm_ggtt(u32 addr, u32 value, bool invalidate_tlb, - u32 *dw, int i) +static int emit_flush_imm_ggtt(u32 addr, u32 value, u32 flags, u32 *dw, int i) { dw[i++] = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_IMM_DW | - (invalidate_tlb ? MI_INVALIDATE_TLB : 0); + flags; dw[i++] = addr | MI_FLUSH_DW_USE_GTT; dw[i++] = 0; dw[i++] = value; @@ -111,16 +110,13 @@ static int emit_bb_start(u64 batch_addr, u32 ppgtt_flag, u32 *dw, int i) return i; } -static int emit_flush_invalidate(u32 flag, u32 *dw, int i) +static int emit_flush_invalidate(u32 *dw, int i) { - dw[i] = MI_FLUSH_DW; - dw[i] |= flag; - dw[i++] |= MI_INVALIDATE_TLB | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_IMM_DW | - MI_FLUSH_DW_STORE_INDEX; - - dw[i++] = LRC_PPHWSP_FLUSH_INVAL_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT; + dw[i++] = MI_FLUSH_DW | MI_INVALIDATE_TLB | MI_FLUSH_DW_OP_STOREDW | + MI_FLUSH_IMM_DW | MI_FLUSH_DW_STORE_INDEX; + dw[i++] = LRC_PPHWSP_FLUSH_INVAL_SCRATCH_ADDR; + dw[i++] = 0; dw[i++] = 0; - dw[i++] = ~0U; return i; } @@ -257,7 +253,7 @@ static void __emit_job_gen12_simple(struct xe_sched_job *job, struct xe_lrc *lrc if (job->ring_ops_flush_tlb) { dw[i++] = preparser_disable(true); i = emit_flush_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc), - seqno, true, dw, i); + seqno, MI_INVALIDATE_TLB, dw, i); dw[i++] = preparser_disable(false); } else { i = emit_store_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc), @@ -273,7 +269,7 @@ static void __emit_job_gen12_simple(struct xe_sched_job *job, struct xe_lrc *lrc dw, i); } - i = emit_flush_imm_ggtt(xe_lrc_seqno_ggtt_addr(lrc), seqno, false, dw, i); + i = emit_flush_imm_ggtt(xe_lrc_seqno_ggtt_addr(lrc), seqno, 0, dw, i); i = emit_user_interrupt(dw, i); @@ -319,7 +315,7 @@ static void __emit_job_gen12_video(struct xe_sched_job *job, struct xe_lrc *lrc, if (job->ring_ops_flush_tlb) i = emit_flush_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc), - seqno, true, dw, i); + seqno, MI_INVALIDATE_TLB, dw, i); dw[i++] = preparser_disable(false); @@ -336,7 +332,7 @@ static void __emit_job_gen12_video(struct xe_sched_job *job, struct xe_lrc *lrc, dw, i); } - i = emit_flush_imm_ggtt(xe_lrc_seqno_ggtt_addr(lrc), seqno, false, dw, i); + i = emit_flush_imm_ggtt(xe_lrc_seqno_ggtt_addr(lrc), seqno, 0, dw, i); i = emit_user_interrupt(dw, i); @@ -413,7 +409,7 @@ static void emit_migration_job_gen12(struct xe_sched_job *job, if (!IS_SRIOV_VF(gt_to_xe(job->q->gt))) { /* XXX: Do we need this? Leaving for now. */ dw[i++] = preparser_disable(true); - i = emit_flush_invalidate(0, dw, i); + i = emit_flush_invalidate(dw, i); dw[i++] = preparser_disable(false); } diff --git a/drivers/gpu/drm/xe/xe_rtp.c b/drivers/gpu/drm/xe/xe_rtp.c index 7a1c78fdfc92..13bb62d3e615 100644 --- a/drivers/gpu/drm/xe/xe_rtp.c +++ b/drivers/gpu/drm/xe/xe_rtp.c @@ -237,6 +237,7 @@ static void rtp_mark_active(struct xe_device *xe, * the save-restore argument. * @ctx: The context for processing the table, with one of device, gt or hwe * @entries: Table with RTP definitions + * @n_entries: Number of entries to process, usually ARRAY_SIZE(entries) * @sr: Save-restore struct where matching rules execute the action. This can be * viewed as the "coalesced view" of multiple the tables. The bits for each * register set are expected not to collide with previously added entries @@ -247,6 +248,7 @@ static void rtp_mark_active(struct xe_device *xe, */ void xe_rtp_process_to_sr(struct xe_rtp_process_ctx *ctx, const struct xe_rtp_entry_sr *entries, + size_t n_entries, struct xe_reg_sr *sr) { const struct xe_rtp_entry_sr *entry; @@ -259,7 +261,9 @@ void xe_rtp_process_to_sr(struct xe_rtp_process_ctx *ctx, if (IS_SRIOV_VF(xe)) return; - for (entry = entries; entry && entry->name; entry++) { + xe_assert(xe, entries); + + for (entry = entries; entry - entries < n_entries; entry++) { bool match = false; if (entry->flags & XE_RTP_ENTRY_FLAG_FOREACH_ENGINE) { diff --git a/drivers/gpu/drm/xe/xe_rtp.h b/drivers/gpu/drm/xe/xe_rtp.h index 38b9f13bba5e..4fe736a11c42 100644 --- a/drivers/gpu/drm/xe/xe_rtp.h +++ b/drivers/gpu/drm/xe/xe_rtp.h @@ -430,7 +430,7 @@ void xe_rtp_process_ctx_enable_active_tracking(struct xe_rtp_process_ctx *ctx, void xe_rtp_process_to_sr(struct xe_rtp_process_ctx *ctx, const struct xe_rtp_entry_sr *entries, - struct xe_reg_sr *sr); + size_t n_entries, struct xe_reg_sr *sr); void xe_rtp_process(struct xe_rtp_process_ctx *ctx, const struct xe_rtp_entry *entries); diff --git a/drivers/gpu/drm/xe/xe_tuning.c b/drivers/gpu/drm/xe/xe_tuning.c index 77bc958f5a42..49ddbda7cdef 100644 --- a/drivers/gpu/drm/xe/xe_tuning.c +++ b/drivers/gpu/drm/xe/xe_tuning.c @@ -85,8 +85,6 @@ static const struct xe_rtp_entry_sr gt_tunings[] = { XE_RTP_RULES(MEDIA_VERSION(2000)), XE_RTP_ACTIONS(SET(XE2LPM_SCRATCH3_LBCF, RWFLUSHALLEN)) }, - - {} }; static const struct xe_rtp_entry_sr engine_tunings[] = { @@ -100,7 +98,6 @@ static const struct xe_rtp_entry_sr engine_tunings[] = { ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(SAMPLER_MODE, INDIRECT_STATE_BASE_ADDR_OVERRIDE)) }, - {} }; static const struct xe_rtp_entry_sr lrc_tunings[] = { @@ -138,8 +135,6 @@ static const struct xe_rtp_entry_sr lrc_tunings[] = { XE_RTP_ACTIONS(FIELD_SET(FF_MODE, VS_HIT_MAX_VALUE_MASK, REG_FIELD_PREP(VS_HIT_MAX_VALUE_MASK, 0x3f))) }, - - {} }; /** @@ -180,7 +175,7 @@ void xe_tuning_process_gt(struct xe_gt *gt) xe_rtp_process_ctx_enable_active_tracking(&ctx, gt->tuning_active.gt, ARRAY_SIZE(gt_tunings)); - xe_rtp_process_to_sr(&ctx, gt_tunings, >->reg_sr); + xe_rtp_process_to_sr(&ctx, gt_tunings, ARRAY_SIZE(gt_tunings), >->reg_sr); } EXPORT_SYMBOL_IF_KUNIT(xe_tuning_process_gt); @@ -191,7 +186,8 @@ void xe_tuning_process_engine(struct xe_hw_engine *hwe) xe_rtp_process_ctx_enable_active_tracking(&ctx, hwe->gt->tuning_active.engine, ARRAY_SIZE(engine_tunings)); - xe_rtp_process_to_sr(&ctx, engine_tunings, &hwe->reg_sr); + xe_rtp_process_to_sr(&ctx, engine_tunings, ARRAY_SIZE(engine_tunings), + &hwe->reg_sr); } EXPORT_SYMBOL_IF_KUNIT(xe_tuning_process_engine); @@ -210,7 +206,7 @@ void xe_tuning_process_lrc(struct xe_hw_engine *hwe) xe_rtp_process_ctx_enable_active_tracking(&ctx, hwe->gt->tuning_active.lrc, ARRAY_SIZE(lrc_tunings)); - xe_rtp_process_to_sr(&ctx, lrc_tunings, &hwe->reg_lrc); + xe_rtp_process_to_sr(&ctx, lrc_tunings, ARRAY_SIZE(lrc_tunings), &hwe->reg_lrc); } void xe_tuning_dump(struct xe_gt *gt, struct drm_printer *p) diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index 55eb453f4b1f..a25afb757f70 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -279,8 +279,6 @@ static const struct xe_rtp_entry_sr gt_was[] = { XE_RTP_ACTIONS(SET(VDBOX_CGCTL3F10(0), RAMDFTUNIT_CLKGATE_DIS)), XE_RTP_ENTRY_FLAG(FOREACH_ENGINE), }, - - {} }; static const struct xe_rtp_entry_sr engine_was[] = { @@ -624,8 +622,6 @@ static const struct xe_rtp_entry_sr engine_was[] = { FUNC(xe_rtp_match_first_render_or_compute)), XE_RTP_ACTIONS(SET(TDL_TSL_CHICKEN, RES_CHK_SPR_DIS)) }, - - {} }; static const struct xe_rtp_entry_sr lrc_was[] = { @@ -825,8 +821,6 @@ static const struct xe_rtp_entry_sr lrc_was[] = { DIS_PARTIAL_AUTOSTRIP | DIS_AUTOSTRIP)) }, - - {} }; static __maybe_unused const struct xe_rtp_entry oob_was[] = { @@ -868,7 +862,7 @@ void xe_wa_process_gt(struct xe_gt *gt) xe_rtp_process_ctx_enable_active_tracking(&ctx, gt->wa_active.gt, ARRAY_SIZE(gt_was)); - xe_rtp_process_to_sr(&ctx, gt_was, >->reg_sr); + xe_rtp_process_to_sr(&ctx, gt_was, ARRAY_SIZE(gt_was), >->reg_sr); } EXPORT_SYMBOL_IF_KUNIT(xe_wa_process_gt); @@ -886,7 +880,7 @@ void xe_wa_process_engine(struct xe_hw_engine *hwe) xe_rtp_process_ctx_enable_active_tracking(&ctx, hwe->gt->wa_active.engine, ARRAY_SIZE(engine_was)); - xe_rtp_process_to_sr(&ctx, engine_was, &hwe->reg_sr); + xe_rtp_process_to_sr(&ctx, engine_was, ARRAY_SIZE(engine_was), &hwe->reg_sr); } /** @@ -903,7 +897,7 @@ void xe_wa_process_lrc(struct xe_hw_engine *hwe) xe_rtp_process_ctx_enable_active_tracking(&ctx, hwe->gt->wa_active.lrc, ARRAY_SIZE(lrc_was)); - xe_rtp_process_to_sr(&ctx, lrc_was, &hwe->reg_lrc); + xe_rtp_process_to_sr(&ctx, lrc_was, ARRAY_SIZE(lrc_was), &hwe->reg_lrc); } /**