From 4b22f19cc7a9c671e0677db8bad2560d7ea2dba4 Mon Sep 17 00:00:00 2001
From: Rodrigo Vivi <rodrigo.vivi@intel.com>
Date: Thu, 6 Mar 2025 17:06:43 -0500
Subject: [PATCH 1/7] drm/xe/guc_pc: Remove duplicated pc_start call

xe_guc_pc_start() was getting called from both
xe_uc_init_hw() and from xe_guc_start().

But both are called from do_gt_restart() and only
xe_uc_init_hw() is called at initialization.

So, let's remove the duplication in the regular gt_restart
path.

The only place where xe_guc_pc_start() won't get called now
is on the gt_reset failure path. However, if gt_reset has
failed, it is really unlikely that the PC start will work
or is desired.

Cc: Vinay Belgaumkar <vinay.belgaumkar@intel.com>
Reviewed-by: Jonathan Cavitt <jonathan.cavitt@intel.com>
Reviewed-by: Vinay Belgaumkar <vinay.belgaumkar@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20250306220643.1014049-1-rodrigo.vivi@intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
(cherry picked from commit fc858ddf9c68696537cec530d2d48bf6ed06ea92)
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
---
 drivers/gpu/drm/xe/xe_guc.c | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c
index bc1ff0a4e1e7..bc5714a5b36b 100644
--- a/drivers/gpu/drm/xe/xe_guc.c
+++ b/drivers/gpu/drm/xe/xe_guc.c
@@ -1496,14 +1496,6 @@ void xe_guc_stop(struct xe_guc *guc)
 
 int xe_guc_start(struct xe_guc *guc)
 {
-	if (!IS_SRIOV_VF(guc_to_xe(guc))) {
-		int err;
-
-		err = xe_guc_pc_start(&guc->pc);
-		xe_gt_WARN(guc_to_gt(guc), err, "Failed to start GuC PC: %pe\n",
-			   ERR_PTR(err));
-	}
-
 	return xe_guc_submit_start(guc);
 }
 

From fd6c10e67b2986b68f0294cae584f873f7a2478c Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Fri, 7 Mar 2025 17:14:28 -0800
Subject: [PATCH 2/7] drm/gpusvm: Fix kernel-doc
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Due to wrong `.. kernel-doc` directive in Documentation/gpu/rfc/gpusvm.rst
the documentation was actually not parsing anything from
drivers/gpu/drm/drm_gpusvm.c. This fixes the kernel-doc include and all
warnings/errors created when doing so.

Cc: Simona Vetter <simona.vetter@ffwll.ch>
Cc: Dave Airlie <airlied@redhat.com>
Cc: Christian König <christian.koenig@amd.com>
Cc: dri-devel@lists.freedesktop.org
Cc: Matthew Brost <matthew.brost@intel.com>
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Closes: https://lore.kernel.org/intel-xe/20250307195239.57abcd2d@canb.auug.org.au/
Fixes: 99624bdff867 ("drm/gpusvm: Add support for GPU Shared Virtual Memory")
Reviewed-by: Matthew Brost <matthew.brost@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20250307-fix-svm-kerneldoc-v2-1-03c74b199620@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
(cherry picked from commit 4da1fb61e02a783fdd7eb725ea03d897b8ef19ea)
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
---
 Documentation/gpu/rfc/gpusvm.rst |  15 ++--
 drivers/gpu/drm/drm_gpusvm.c     | 124 +++++++++++++++++--------------
 2 files changed, 79 insertions(+), 60 deletions(-)

diff --git a/Documentation/gpu/rfc/gpusvm.rst b/Documentation/gpu/rfc/gpusvm.rst
index 073e46065d9c..bcf66a8137a6 100644
--- a/Documentation/gpu/rfc/gpusvm.rst
+++ b/Documentation/gpu/rfc/gpusvm.rst
@@ -67,14 +67,19 @@ Agreed upon design principles
 Overview of baseline design
 ===========================
 
-Baseline design is simple as possible to get a working basline in which can be
-built upon.
-
-.. kernel-doc:: drivers/gpu/drm/xe/drm_gpusvm.c
+.. kernel-doc:: drivers/gpu/drm/drm_gpusvm.c
    :doc: Overview
+
+.. kernel-doc:: drivers/gpu/drm/drm_gpusvm.c
    :doc: Locking
-   :doc: Migrataion
+
+.. kernel-doc:: drivers/gpu/drm/drm_gpusvm.c
+   :doc: Migration
+
+.. kernel-doc:: drivers/gpu/drm/drm_gpusvm.c
    :doc: Partial Unmapping of Ranges
+
+.. kernel-doc:: drivers/gpu/drm/drm_gpusvm.c
    :doc: Examples
 
 Possible future design features
diff --git a/drivers/gpu/drm/drm_gpusvm.c b/drivers/gpu/drm/drm_gpusvm.c
index f314f5c4af0f..2451c816edd5 100644
--- a/drivers/gpu/drm/drm_gpusvm.c
+++ b/drivers/gpu/drm/drm_gpusvm.c
@@ -23,37 +23,42 @@
  * DOC: Overview
  *
  * GPU Shared Virtual Memory (GPU SVM) layer for the Direct Rendering Manager (DRM)
- *
- * The GPU SVM layer is a component of the DRM framework designed to manage shared
- * virtual memory between the CPU and GPU. It enables efficient data exchange and
- * processing for GPU-accelerated applications by allowing memory sharing and
+ * is a component of the DRM framework designed to manage shared virtual memory
+ * between the CPU and GPU. It enables efficient data exchange and processing
+ * for GPU-accelerated applications by allowing memory sharing and
  * synchronization between the CPU's and GPU's virtual address spaces.
  *
  * Key GPU SVM Components:
- * - Notifiers: Notifiers: Used for tracking memory intervals and notifying the
- *		GPU of changes, notifiers are sized based on a GPU SVM
- *		initialization parameter, with a recommendation of 512M or
- *		larger. They maintain a Red-BlacK tree and a list of ranges that
- *		fall within the notifier interval. Notifiers are tracked within
- *		a GPU SVM Red-BlacK tree and list and are dynamically inserted
- *		or removed as ranges within the interval are created or
- *		destroyed.
- * - Ranges: Represent memory ranges mapped in a DRM device and managed
- *	     by GPU SVM. They are sized based on an array of chunk sizes, which
- *	     is a GPU SVM initialization parameter, and the CPU address space.
- *	     Upon GPU fault, the largest aligned chunk that fits within the
- *	     faulting CPU address space is chosen for the range size. Ranges are
- *	     expected to be dynamically allocated on GPU fault and removed on an
- *	     MMU notifier UNMAP event. As mentioned above, ranges are tracked in
- *	     a notifier's Red-Black tree.
- * - Operations: Define the interface for driver-specific GPU SVM operations
- *               such as range allocation, notifier allocation, and
- *               invalidations.
- * - Device Memory Allocations: Embedded structure containing enough information
- *                              for GPU SVM to migrate to / from device memory.
- * - Device Memory Operations: Define the interface for driver-specific device
- *                             memory operations release memory, populate pfns,
- *                             and copy to / from device memory.
+ *
+ * - Notifiers:
+ *	Used for tracking memory intervals and notifying the GPU of changes,
+ *	notifiers are sized based on a GPU SVM initialization parameter, with a
+ *	recommendation of 512M or larger. They maintain a Red-BlacK tree and a
+ *	list of ranges that fall within the notifier interval.  Notifiers are
+ *	tracked within a GPU SVM Red-BlacK tree and list and are dynamically
+ *	inserted or removed as ranges within the interval are created or
+ *	destroyed.
+ * - Ranges:
+ *	Represent memory ranges mapped in a DRM device and managed by GPU SVM.
+ *	They are sized based on an array of chunk sizes, which is a GPU SVM
+ *	initialization parameter, and the CPU address space.  Upon GPU fault,
+ *	the largest aligned chunk that fits within the faulting CPU address
+ *	space is chosen for the range size. Ranges are expected to be
+ *	dynamically allocated on GPU fault and removed on an MMU notifier UNMAP
+ *	event. As mentioned above, ranges are tracked in a notifier's Red-Black
+ *	tree.
+ *
+ * - Operations:
+ *	Define the interface for driver-specific GPU SVM operations such as
+ *	range allocation, notifier allocation, and invalidations.
+ *
+ * - Device Memory Allocations:
+ *	Embedded structure containing enough information for GPU SVM to migrate
+ *	to / from device memory.
+ *
+ * - Device Memory Operations:
+ *	Define the interface for driver-specific device memory operations
+ *	release memory, populate pfns, and copy to / from device memory.
  *
  * This layer provides interfaces for allocating, mapping, migrating, and
  * releasing memory ranges between the CPU and GPU. It handles all core memory
@@ -63,14 +68,18 @@
  * below.
  *
  * Expected Driver Components:
- * - GPU page fault handler: Used to create ranges and notifiers based on the
- *			     fault address, optionally migrate the range to
- *			     device memory, and create GPU bindings.
- * - Garbage collector: Used to unmap and destroy GPU bindings for ranges.
- *			Ranges are expected to be added to the garbage collector
- *			upon a MMU_NOTIFY_UNMAP event in notifier callback.
- * - Notifier callback: Used to invalidate and DMA unmap GPU bindings for
- *			ranges.
+ *
+ * - GPU page fault handler:
+ *	Used to create ranges and notifiers based on the fault address,
+ *	optionally migrate the range to device memory, and create GPU bindings.
+ *
+ * - Garbage collector:
+ *	Used to unmap and destroy GPU bindings for ranges.  Ranges are expected
+ *	to be added to the garbage collector upon a MMU_NOTIFY_UNMAP event in
+ *	notifier callback.
+ *
+ * - Notifier callback:
+ *	Used to invalidate and DMA unmap GPU bindings for ranges.
  */
 
 /**
@@ -83,9 +92,9 @@
  * range RB tree and list, as well as the range's DMA mappings and sequence
  * number. GPU SVM manages all necessary locking and unlocking operations,
  * except for the recheck range's pages being valid
- * (drm_gpusvm_range_pages_valid) when the driver is committing GPU bindings. This
- * lock corresponds to the 'driver->update' lock mentioned in the HMM
- * documentation (TODO: Link). Future revisions may transition from a GPU SVM
+ * (drm_gpusvm_range_pages_valid) when the driver is committing GPU bindings.
+ * This lock corresponds to the ``driver->update`` lock mentioned in
+ * Documentation/mm/hmm.rst. Future revisions may transition from a GPU SVM
  * global lock to a per-notifier lock if finer-grained locking is deemed
  * necessary.
  *
@@ -102,11 +111,11 @@
  * DOC: Migration
  *
  * The migration support is quite simple, allowing migration between RAM and
- * device memory at the range granularity. For example, GPU SVM currently does not
- * support mixing RAM and device memory pages within a range. This means that upon GPU
- * fault, the entire range can be migrated to device memory, and upon CPU fault, the
- * entire range is migrated to RAM. Mixed RAM and device memory storage within a range
- * could be added in the future if required.
+ * device memory at the range granularity. For example, GPU SVM currently does
+ * not support mixing RAM and device memory pages within a range. This means
+ * that upon GPU fault, the entire range can be migrated to device memory, and
+ * upon CPU fault, the entire range is migrated to RAM. Mixed RAM and device
+ * memory storage within a range could be added in the future if required.
  *
  * The reasoning for only supporting range granularity is as follows: it
  * simplifies the implementation, and range sizes are driver-defined and should
@@ -119,11 +128,11 @@
  * Partial unmapping of ranges (e.g., 1M out of 2M is unmapped by CPU resulting
  * in MMU_NOTIFY_UNMAP event) presents several challenges, with the main one
  * being that a subset of the range still has CPU and GPU mappings. If the
- * backing store for the range is in device memory, a subset of the backing store has
- * references. One option would be to split the range and device memory backing store,
- * but the implementation for this would be quite complicated. Given that
- * partial unmappings are rare and driver-defined range sizes are relatively
- * small, GPU SVM does not support splitting of ranges.
+ * backing store for the range is in device memory, a subset of the backing
+ * store has references. One option would be to split the range and device
+ * memory backing store, but the implementation for this would be quite
+ * complicated. Given that partial unmappings are rare and driver-defined range
+ * sizes are relatively small, GPU SVM does not support splitting of ranges.
  *
  * With no support for range splitting, upon partial unmapping of a range, the
  * driver is expected to invalidate and destroy the entire range. If the range
@@ -144,6 +153,8 @@
  *
  * 1) GPU page fault handler
  *
+ * .. code-block:: c
+ *
  *	int driver_bind_range(struct drm_gpusvm *gpusvm, struct drm_gpusvm_range *range)
  *	{
  *		int err = 0;
@@ -208,7 +219,9 @@
  *		return err;
  *	}
  *
- * 2) Garbage Collector.
+ * 2) Garbage Collector
+ *
+ * .. code-block:: c
  *
  *	void __driver_garbage_collector(struct drm_gpusvm *gpusvm,
  *					struct drm_gpusvm_range *range)
@@ -231,7 +244,9 @@
  *			__driver_garbage_collector(gpusvm, range);
  *	}
  *
- * 3) Notifier callback.
+ * 3) Notifier callback
+ *
+ * .. code-block:: c
  *
  *	void driver_invalidation(struct drm_gpusvm *gpusvm,
  *				 struct drm_gpusvm_notifier *notifier,
@@ -499,7 +514,7 @@ drm_gpusvm_notifier_invalidate(struct mmu_interval_notifier *mni,
 	return true;
 }
 
-/**
+/*
  * drm_gpusvm_notifier_ops - MMU interval notifier operations for GPU SVM
  */
 static const struct mmu_interval_notifier_ops drm_gpusvm_notifier_ops = {
@@ -2055,7 +2070,6 @@ static int __drm_gpusvm_migrate_to_ram(struct vm_area_struct *vas,
 
 /**
  * drm_gpusvm_range_evict - Evict GPU SVM range
- * @pagemap: Pointer to the GPU SVM structure
  * @range: Pointer to the GPU SVM range to be removed
  *
  * This function evicts the specified GPU SVM range. This function will not
@@ -2146,8 +2160,8 @@ static vm_fault_t drm_gpusvm_migrate_to_ram(struct vm_fault *vmf)
 	return err ? VM_FAULT_SIGBUS : 0;
 }
 
-/**
- * drm_gpusvm_pagemap_ops() - Device page map operations for GPU SVM
+/*
+ * drm_gpusvm_pagemap_ops - Device page map operations for GPU SVM
  */
 static const struct dev_pagemap_ops drm_gpusvm_pagemap_ops = {
 	.page_free = drm_gpusvm_page_free,

From cedf23842d7433eb32cb782a637bb870fb096a3b Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Thu, 6 Mar 2025 20:00:05 -0800
Subject: [PATCH 3/7] drm/xe/rtp: Drop sentinels from arg to
 xe_rtp_process_to_sr()

There's a mismatch on API: while xe_rtp_process_to_sr() processes
entries until an entry without name, the active tracking with
xe_rtp_process_ctx_enable_active_tracking() needs to use the number of
elements. The number of elements is taken everywhere using ARRAY_SIZE(),
but that will have one entry too many. This leads to the following
warning, as reported by lkp:

   drivers/gpu/drm/xe/xe_tuning.c: In function 'xe_tuning_dump':
>> include/drm/drm_print.h:228:31: warning: '%s' directive argument is null [-Wformat-overflow=]
     228 |         drm_printf((printer), "%.*s" fmt, (indent), "\t\t\t\t\tX", ##__VA_ARGS__)
         |                               ^~~~~~
   drivers/gpu/drm/xe/xe_tuning.c:226:17: note: in expansion of macro 'drm_printf_indent'
     226 |                 drm_printf_indent(p, 1, "%s\n", engine_tunings[idx].name);
         |                 ^~~~~~~~~~~~~~~~~

That's because it will still process the last entry when tracking the
active tunings. The same issue exists in the WAs. Change
xe_rtp_process_to_sr() to also take the number of elements so the empty
entry can be removed and the warning should go away. Fixing on the
active-tracking side would more fragile as the it would need a `- 1`
everywhere and continue to use a different approach for number of
elements.

Aside from the warning, it's a non-issue as there would always be enough
bits allocated and the last entry would never be active since
xe_rtp_process_to_sr() stops on the sentinel.

Reported-by: kernel test robot <lkp@intel.com>
Closes: https://lore.kernel.org/oe-kbuild-all/202503021906.P2MwAvyK-lkp@intel.com/
Cc: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20250306-fix-print-warning-v1-1-979c3dc03c0d@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
(cherry picked from commit 8aa8c2d4214e1771c32101d70740002662d31bb7)
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
---
 drivers/gpu/drm/xe/tests/xe_rtp_test.c |  2 +-
 drivers/gpu/drm/xe/xe_hw_engine.c      |  6 ++----
 drivers/gpu/drm/xe/xe_reg_whitelist.c  |  4 ++--
 drivers/gpu/drm/xe/xe_rtp.c            |  6 +++++-
 drivers/gpu/drm/xe/xe_rtp.h            |  2 +-
 drivers/gpu/drm/xe/xe_tuning.c         | 12 ++++--------
 drivers/gpu/drm/xe/xe_wa.c             | 12 +++---------
 7 files changed, 18 insertions(+), 26 deletions(-)

diff --git a/drivers/gpu/drm/xe/tests/xe_rtp_test.c b/drivers/gpu/drm/xe/tests/xe_rtp_test.c
index 36a3b5420fef..b0254b014fe4 100644
--- a/drivers/gpu/drm/xe/tests/xe_rtp_test.c
+++ b/drivers/gpu/drm/xe/tests/xe_rtp_test.c
@@ -320,7 +320,7 @@ static void xe_rtp_process_to_sr_tests(struct kunit *test)
 		count_rtp_entries++;
 
 	xe_rtp_process_ctx_enable_active_tracking(&ctx, &active, count_rtp_entries);
-	xe_rtp_process_to_sr(&ctx, param->entries, reg_sr);
+	xe_rtp_process_to_sr(&ctx, param->entries, count_rtp_entries, reg_sr);
 
 	xa_for_each(&reg_sr->xa, idx, sre) {
 		if (idx == param->expected_reg.addr)
diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c
index fc447751fe78..223b95de388c 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine.c
+++ b/drivers/gpu/drm/xe/xe_hw_engine.c
@@ -400,10 +400,9 @@ xe_hw_engine_setup_default_lrc_state(struct xe_hw_engine *hwe)
 					   PREEMPT_GPGPU_THREAD_GROUP_LEVEL)),
 		  XE_RTP_ENTRY_FLAG(FOREACH_ENGINE)
 		},
-		{}
 	};
 
-	xe_rtp_process_to_sr(&ctx, lrc_setup, &hwe->reg_lrc);
+	xe_rtp_process_to_sr(&ctx, lrc_setup, ARRAY_SIZE(lrc_setup), &hwe->reg_lrc);
 }
 
 static void
@@ -459,10 +458,9 @@ hw_engine_setup_default_state(struct xe_hw_engine *hwe)
 		  XE_RTP_ACTIONS(SET(CSFE_CHICKEN1(0), CS_PRIORITY_MEM_READ,
 				     XE_RTP_ACTION_FLAG(ENGINE_BASE)))
 		},
-		{}
 	};
 
-	xe_rtp_process_to_sr(&ctx, engine_entries, &hwe->reg_sr);
+	xe_rtp_process_to_sr(&ctx, engine_entries, ARRAY_SIZE(engine_entries), &hwe->reg_sr);
 }
 
 static const struct engine_info *find_engine_info(enum xe_engine_class class, int instance)
diff --git a/drivers/gpu/drm/xe/xe_reg_whitelist.c b/drivers/gpu/drm/xe/xe_reg_whitelist.c
index edab5d4e3ba5..23f6c81d9994 100644
--- a/drivers/gpu/drm/xe/xe_reg_whitelist.c
+++ b/drivers/gpu/drm/xe/xe_reg_whitelist.c
@@ -88,7 +88,6 @@ static const struct xe_rtp_entry_sr register_whitelist[] = {
 				   RING_FORCE_TO_NONPRIV_ACCESS_RD |
 				   RING_FORCE_TO_NONPRIV_RANGE_4))
 	},
-	{}
 };
 
 static void whitelist_apply_to_hwe(struct xe_hw_engine *hwe)
@@ -137,7 +136,8 @@ void xe_reg_whitelist_process_engine(struct xe_hw_engine *hwe)
 {
 	struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe);
 
-	xe_rtp_process_to_sr(&ctx, register_whitelist, &hwe->reg_whitelist);
+	xe_rtp_process_to_sr(&ctx, register_whitelist, ARRAY_SIZE(register_whitelist),
+			     &hwe->reg_whitelist);
 	whitelist_apply_to_hwe(hwe);
 }
 
diff --git a/drivers/gpu/drm/xe/xe_rtp.c b/drivers/gpu/drm/xe/xe_rtp.c
index 7a1c78fdfc92..13bb62d3e615 100644
--- a/drivers/gpu/drm/xe/xe_rtp.c
+++ b/drivers/gpu/drm/xe/xe_rtp.c
@@ -237,6 +237,7 @@ static void rtp_mark_active(struct xe_device *xe,
  *                        the save-restore argument.
  * @ctx: The context for processing the table, with one of device, gt or hwe
  * @entries: Table with RTP definitions
+ * @n_entries: Number of entries to process, usually ARRAY_SIZE(entries)
  * @sr: Save-restore struct where matching rules execute the action. This can be
  *      viewed as the "coalesced view" of multiple the tables. The bits for each
  *      register set are expected not to collide with previously added entries
@@ -247,6 +248,7 @@ static void rtp_mark_active(struct xe_device *xe,
  */
 void xe_rtp_process_to_sr(struct xe_rtp_process_ctx *ctx,
 			  const struct xe_rtp_entry_sr *entries,
+			  size_t n_entries,
 			  struct xe_reg_sr *sr)
 {
 	const struct xe_rtp_entry_sr *entry;
@@ -259,7 +261,9 @@ void xe_rtp_process_to_sr(struct xe_rtp_process_ctx *ctx,
 	if (IS_SRIOV_VF(xe))
 		return;
 
-	for (entry = entries; entry && entry->name; entry++) {
+	xe_assert(xe, entries);
+
+	for (entry = entries; entry - entries < n_entries; entry++) {
 		bool match = false;
 
 		if (entry->flags & XE_RTP_ENTRY_FLAG_FOREACH_ENGINE) {
diff --git a/drivers/gpu/drm/xe/xe_rtp.h b/drivers/gpu/drm/xe/xe_rtp.h
index 38b9f13bba5e..4fe736a11c42 100644
--- a/drivers/gpu/drm/xe/xe_rtp.h
+++ b/drivers/gpu/drm/xe/xe_rtp.h
@@ -430,7 +430,7 @@ void xe_rtp_process_ctx_enable_active_tracking(struct xe_rtp_process_ctx *ctx,
 
 void xe_rtp_process_to_sr(struct xe_rtp_process_ctx *ctx,
 			  const struct xe_rtp_entry_sr *entries,
-			  struct xe_reg_sr *sr);
+			  size_t n_entries, struct xe_reg_sr *sr);
 
 void xe_rtp_process(struct xe_rtp_process_ctx *ctx,
 		    const struct xe_rtp_entry *entries);
diff --git a/drivers/gpu/drm/xe/xe_tuning.c b/drivers/gpu/drm/xe/xe_tuning.c
index 77bc958f5a42..49ddbda7cdef 100644
--- a/drivers/gpu/drm/xe/xe_tuning.c
+++ b/drivers/gpu/drm/xe/xe_tuning.c
@@ -85,8 +85,6 @@ static const struct xe_rtp_entry_sr gt_tunings[] = {
 	  XE_RTP_RULES(MEDIA_VERSION(2000)),
 	  XE_RTP_ACTIONS(SET(XE2LPM_SCRATCH3_LBCF, RWFLUSHALLEN))
 	},
-
-	{}
 };
 
 static const struct xe_rtp_entry_sr engine_tunings[] = {
@@ -100,7 +98,6 @@ static const struct xe_rtp_entry_sr engine_tunings[] = {
 		       ENGINE_CLASS(RENDER)),
 	  XE_RTP_ACTIONS(SET(SAMPLER_MODE, INDIRECT_STATE_BASE_ADDR_OVERRIDE))
 	},
-	{}
 };
 
 static const struct xe_rtp_entry_sr lrc_tunings[] = {
@@ -138,8 +135,6 @@ static const struct xe_rtp_entry_sr lrc_tunings[] = {
 	  XE_RTP_ACTIONS(FIELD_SET(FF_MODE, VS_HIT_MAX_VALUE_MASK,
 				   REG_FIELD_PREP(VS_HIT_MAX_VALUE_MASK, 0x3f)))
 	},
-
-	{}
 };
 
 /**
@@ -180,7 +175,7 @@ void xe_tuning_process_gt(struct xe_gt *gt)
 	xe_rtp_process_ctx_enable_active_tracking(&ctx,
 						  gt->tuning_active.gt,
 						  ARRAY_SIZE(gt_tunings));
-	xe_rtp_process_to_sr(&ctx, gt_tunings, &gt->reg_sr);
+	xe_rtp_process_to_sr(&ctx, gt_tunings, ARRAY_SIZE(gt_tunings), &gt->reg_sr);
 }
 EXPORT_SYMBOL_IF_KUNIT(xe_tuning_process_gt);
 
@@ -191,7 +186,8 @@ void xe_tuning_process_engine(struct xe_hw_engine *hwe)
 	xe_rtp_process_ctx_enable_active_tracking(&ctx,
 						  hwe->gt->tuning_active.engine,
 						  ARRAY_SIZE(engine_tunings));
-	xe_rtp_process_to_sr(&ctx, engine_tunings, &hwe->reg_sr);
+	xe_rtp_process_to_sr(&ctx, engine_tunings, ARRAY_SIZE(engine_tunings),
+			     &hwe->reg_sr);
 }
 EXPORT_SYMBOL_IF_KUNIT(xe_tuning_process_engine);
 
@@ -210,7 +206,7 @@ void xe_tuning_process_lrc(struct xe_hw_engine *hwe)
 	xe_rtp_process_ctx_enable_active_tracking(&ctx,
 						  hwe->gt->tuning_active.lrc,
 						  ARRAY_SIZE(lrc_tunings));
-	xe_rtp_process_to_sr(&ctx, lrc_tunings, &hwe->reg_lrc);
+	xe_rtp_process_to_sr(&ctx, lrc_tunings, ARRAY_SIZE(lrc_tunings), &hwe->reg_lrc);
 }
 
 void xe_tuning_dump(struct xe_gt *gt, struct drm_printer *p)
diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c
index 55eb453f4b1f..a25afb757f70 100644
--- a/drivers/gpu/drm/xe/xe_wa.c
+++ b/drivers/gpu/drm/xe/xe_wa.c
@@ -279,8 +279,6 @@ static const struct xe_rtp_entry_sr gt_was[] = {
 	  XE_RTP_ACTIONS(SET(VDBOX_CGCTL3F10(0), RAMDFTUNIT_CLKGATE_DIS)),
 	  XE_RTP_ENTRY_FLAG(FOREACH_ENGINE),
 	},
-
-	{}
 };
 
 static const struct xe_rtp_entry_sr engine_was[] = {
@@ -624,8 +622,6 @@ static const struct xe_rtp_entry_sr engine_was[] = {
 		       FUNC(xe_rtp_match_first_render_or_compute)),
 	  XE_RTP_ACTIONS(SET(TDL_TSL_CHICKEN, RES_CHK_SPR_DIS))
 	},
-
-	{}
 };
 
 static const struct xe_rtp_entry_sr lrc_was[] = {
@@ -825,8 +821,6 @@ static const struct xe_rtp_entry_sr lrc_was[] = {
 			     DIS_PARTIAL_AUTOSTRIP |
 			     DIS_AUTOSTRIP))
 	},
-
-	{}
 };
 
 static __maybe_unused const struct xe_rtp_entry oob_was[] = {
@@ -868,7 +862,7 @@ void xe_wa_process_gt(struct xe_gt *gt)
 
 	xe_rtp_process_ctx_enable_active_tracking(&ctx, gt->wa_active.gt,
 						  ARRAY_SIZE(gt_was));
-	xe_rtp_process_to_sr(&ctx, gt_was, &gt->reg_sr);
+	xe_rtp_process_to_sr(&ctx, gt_was, ARRAY_SIZE(gt_was), &gt->reg_sr);
 }
 EXPORT_SYMBOL_IF_KUNIT(xe_wa_process_gt);
 
@@ -886,7 +880,7 @@ void xe_wa_process_engine(struct xe_hw_engine *hwe)
 
 	xe_rtp_process_ctx_enable_active_tracking(&ctx, hwe->gt->wa_active.engine,
 						  ARRAY_SIZE(engine_was));
-	xe_rtp_process_to_sr(&ctx, engine_was, &hwe->reg_sr);
+	xe_rtp_process_to_sr(&ctx, engine_was, ARRAY_SIZE(engine_was), &hwe->reg_sr);
 }
 
 /**
@@ -903,7 +897,7 @@ void xe_wa_process_lrc(struct xe_hw_engine *hwe)
 
 	xe_rtp_process_ctx_enable_active_tracking(&ctx, hwe->gt->wa_active.lrc,
 						  ARRAY_SIZE(lrc_was));
-	xe_rtp_process_to_sr(&ctx, lrc_was, &hwe->reg_lrc);
+	xe_rtp_process_to_sr(&ctx, lrc_was, ARRAY_SIZE(lrc_was), &hwe->reg_lrc);
 }
 
 /**

From 298661cd9cea55233cf60dee3ef9f736ddd1db7a Mon Sep 17 00:00:00 2001
From: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
Date: Fri, 7 Mar 2025 11:13:59 +0000
Subject: [PATCH 4/7] drm/xe: Fix MOCS debugfs LNCF readout

With only XE_FW_GT taken LNCF registers read back as all zeroes, leading
to a wild goose chase trying to figure out why is register programming
incorrect.

Fix it by grabbing XE_FORCEWAKE_ALL for affected platforms.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20250307111402.26577-2-tvrtko.ursulin@igalia.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
(cherry picked from commit 1182bc74b39ba3d124b544dab22d5672fae54b67)
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
---
 drivers/gpu/drm/xe/xe_mocs.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_mocs.c b/drivers/gpu/drm/xe/xe_mocs.c
index 54d199b5cfb2..31dade91a089 100644
--- a/drivers/gpu/drm/xe/xe_mocs.c
+++ b/drivers/gpu/drm/xe/xe_mocs.c
@@ -781,7 +781,9 @@ void xe_mocs_dump(struct xe_gt *gt, struct drm_printer *p)
 	flags = get_mocs_settings(xe, &table);
 
 	xe_pm_runtime_get_noresume(xe);
-	fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+	fw_ref = xe_force_wake_get(gt_to_fw(gt),
+				   flags & HAS_LNCF_MOCS ?
+				   XE_FORCEWAKE_ALL : XE_FW_GT);
 	if (!fw_ref)
 		goto err_fw;
 

From e2a0a6328ea7385db00c3d4f3067ded9bbb709a1 Mon Sep 17 00:00:00 2001
From: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
Date: Fri, 7 Mar 2025 11:14:00 +0000
Subject: [PATCH 5/7] drm/xe: Fix ring flush invalidation

Emit_flush_invalidate() is incorrectly marking the write to LRC_PPHWSP as
a GGTT write and also writing an atypical ~0 dword as the payload. Fix it.

While at it drop the unused flags argument.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20250307111402.26577-3-tvrtko.ursulin@igalia.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
(cherry picked from commit 08ea901d0b8f6ea261d9936e03fa690540af0126)
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
---
 drivers/gpu/drm/xe/xe_ring_ops.c | 15 ++++++---------
 1 file changed, 6 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c
index d2f604aa96fa..3d1b4d3d788f 100644
--- a/drivers/gpu/drm/xe/xe_ring_ops.c
+++ b/drivers/gpu/drm/xe/xe_ring_ops.c
@@ -111,16 +111,13 @@ static int emit_bb_start(u64 batch_addr, u32 ppgtt_flag, u32 *dw, int i)
 	return i;
 }
 
-static int emit_flush_invalidate(u32 flag, u32 *dw, int i)
+static int emit_flush_invalidate(u32 *dw, int i)
 {
-	dw[i] = MI_FLUSH_DW;
-	dw[i] |= flag;
-	dw[i++] |= MI_INVALIDATE_TLB | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_IMM_DW |
-		MI_FLUSH_DW_STORE_INDEX;
-
-	dw[i++] = LRC_PPHWSP_FLUSH_INVAL_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT;
+	dw[i++] = MI_FLUSH_DW | MI_INVALIDATE_TLB | MI_FLUSH_DW_OP_STOREDW |
+		  MI_FLUSH_IMM_DW | MI_FLUSH_DW_STORE_INDEX;
+	dw[i++] = LRC_PPHWSP_FLUSH_INVAL_SCRATCH_ADDR;
+	dw[i++] = 0;
 	dw[i++] = 0;
-	dw[i++] = ~0U;
 
 	return i;
 }
@@ -413,7 +410,7 @@ static void emit_migration_job_gen12(struct xe_sched_job *job,
 	if (!IS_SRIOV_VF(gt_to_xe(job->q->gt))) {
 		/* XXX: Do we need this? Leaving for now. */
 		dw[i++] = preparser_disable(true);
-		i = emit_flush_invalidate(0, dw, i);
+		i = emit_flush_invalidate(dw, i);
 		dw[i++] = preparser_disable(false);
 	}
 

From 11ef40eb57322322139de460f6370aec38da5a45 Mon Sep 17 00:00:00 2001
From: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
Date: Fri, 7 Mar 2025 11:14:01 +0000
Subject: [PATCH 6/7] drm/xe: Pass flags directly to emit_flush_imm_ggtt

This is more readable than the nameless booleans and will also come handy
later.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Reviewed-by: Tejas Upadhyay <tejas.upadhyay@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20250307111402.26577-4-tvrtko.ursulin@igalia.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
(cherry picked from commit 52a237e8d6c4abcda40c71268ee6cec75aa62799)
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
---
 drivers/gpu/drm/xe/xe_ring_ops.c | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c
index 3d1b4d3d788f..917fc16de866 100644
--- a/drivers/gpu/drm/xe/xe_ring_ops.c
+++ b/drivers/gpu/drm/xe/xe_ring_ops.c
@@ -90,11 +90,10 @@ static int emit_flush_dw(u32 *dw, int i)
 	return i;
 }
 
-static int emit_flush_imm_ggtt(u32 addr, u32 value, bool invalidate_tlb,
-			       u32 *dw, int i)
+static int emit_flush_imm_ggtt(u32 addr, u32 value, u32 flags, u32 *dw, int i)
 {
 	dw[i++] = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_IMM_DW |
-		(invalidate_tlb ? MI_INVALIDATE_TLB : 0);
+		  flags;
 	dw[i++] = addr | MI_FLUSH_DW_USE_GTT;
 	dw[i++] = 0;
 	dw[i++] = value;
@@ -254,7 +253,7 @@ static void __emit_job_gen12_simple(struct xe_sched_job *job, struct xe_lrc *lrc
 	if (job->ring_ops_flush_tlb) {
 		dw[i++] = preparser_disable(true);
 		i = emit_flush_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc),
-					seqno, true, dw, i);
+					seqno, MI_INVALIDATE_TLB, dw, i);
 		dw[i++] = preparser_disable(false);
 	} else {
 		i = emit_store_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc),
@@ -270,7 +269,7 @@ static void __emit_job_gen12_simple(struct xe_sched_job *job, struct xe_lrc *lrc
 						dw, i);
 	}
 
-	i = emit_flush_imm_ggtt(xe_lrc_seqno_ggtt_addr(lrc), seqno, false, dw, i);
+	i = emit_flush_imm_ggtt(xe_lrc_seqno_ggtt_addr(lrc), seqno, 0, dw, i);
 
 	i = emit_user_interrupt(dw, i);
 
@@ -316,7 +315,7 @@ static void __emit_job_gen12_video(struct xe_sched_job *job, struct xe_lrc *lrc,
 
 	if (job->ring_ops_flush_tlb)
 		i = emit_flush_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc),
-					seqno, true, dw, i);
+					seqno, MI_INVALIDATE_TLB, dw, i);
 
 	dw[i++] = preparser_disable(false);
 
@@ -333,7 +332,7 @@ static void __emit_job_gen12_video(struct xe_sched_job *job, struct xe_lrc *lrc,
 						dw, i);
 	}
 
-	i = emit_flush_imm_ggtt(xe_lrc_seqno_ggtt_addr(lrc), seqno, false, dw, i);
+	i = emit_flush_imm_ggtt(xe_lrc_seqno_ggtt_addr(lrc), seqno, 0, dw, i);
 
 	i = emit_user_interrupt(dw, i);
 

From 7b7b07c285c304317d00ea21c2a659167d4d4d12 Mon Sep 17 00:00:00 2001
From: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
Date: Fri, 7 Mar 2025 11:14:02 +0000
Subject: [PATCH 7/7] drm/xe: Use correct type width for alignment in fb
 pinning code

Plane->min_alignment returns an unsigned int so lets use that in the whole
relevant call chain.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20250307111402.26577-5-tvrtko.ursulin@igalia.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
(cherry picked from commit c36e3442ea1c4c63f9876486dd9091487a77c5f2)
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
---
 drivers/gpu/drm/xe/display/xe_fb_pin.c | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/xe/display/xe_fb_pin.c b/drivers/gpu/drm/xe/display/xe_fb_pin.c
index b69896baa20c..d918ae1c8061 100644
--- a/drivers/gpu/drm/xe/display/xe_fb_pin.c
+++ b/drivers/gpu/drm/xe/display/xe_fb_pin.c
@@ -82,7 +82,7 @@ write_dpt_remapped(struct xe_bo *bo, struct iosys_map *map, u32 *dpt_ofs,
 static int __xe_pin_fb_vma_dpt(const struct intel_framebuffer *fb,
 			       const struct i915_gtt_view *view,
 			       struct i915_vma *vma,
-			       u64 physical_alignment)
+			       unsigned int alignment)
 {
 	struct xe_device *xe = to_xe_device(fb->base.dev);
 	struct xe_tile *tile0 = xe_device_get_root_tile(xe);
@@ -108,7 +108,7 @@ static int __xe_pin_fb_vma_dpt(const struct intel_framebuffer *fb,
 						      XE_BO_FLAG_VRAM0 |
 						      XE_BO_FLAG_GGTT |
 						      XE_BO_FLAG_PAGETABLE,
-						      physical_alignment);
+						      alignment);
 	else
 		dpt = xe_bo_create_pin_map_at_aligned(xe, tile0, NULL,
 						      dpt_size,  ~0ull,
@@ -116,7 +116,7 @@ static int __xe_pin_fb_vma_dpt(const struct intel_framebuffer *fb,
 						      XE_BO_FLAG_STOLEN |
 						      XE_BO_FLAG_GGTT |
 						      XE_BO_FLAG_PAGETABLE,
-						      physical_alignment);
+						      alignment);
 	if (IS_ERR(dpt))
 		dpt = xe_bo_create_pin_map_at_aligned(xe, tile0, NULL,
 						      dpt_size,  ~0ull,
@@ -124,7 +124,7 @@ static int __xe_pin_fb_vma_dpt(const struct intel_framebuffer *fb,
 						      XE_BO_FLAG_SYSTEM |
 						      XE_BO_FLAG_GGTT |
 						      XE_BO_FLAG_PAGETABLE,
-						      physical_alignment);
+						      alignment);
 	if (IS_ERR(dpt))
 		return PTR_ERR(dpt);
 
@@ -194,7 +194,7 @@ write_ggtt_rotated(struct xe_bo *bo, struct xe_ggtt *ggtt, u32 *ggtt_ofs, u32 bo
 static int __xe_pin_fb_vma_ggtt(const struct intel_framebuffer *fb,
 				const struct i915_gtt_view *view,
 				struct i915_vma *vma,
-				u64 physical_alignment)
+				unsigned int alignment)
 {
 	struct drm_gem_object *obj = intel_fb_bo(&fb->base);
 	struct xe_bo *bo = gem_to_xe_bo(obj);
@@ -277,7 +277,7 @@ static int __xe_pin_fb_vma_ggtt(const struct intel_framebuffer *fb,
 
 static struct i915_vma *__xe_pin_fb_vma(const struct intel_framebuffer *fb,
 					const struct i915_gtt_view *view,
-					u64 physical_alignment)
+					unsigned int alignment)
 {
 	struct drm_device *dev = fb->base.dev;
 	struct xe_device *xe = to_xe_device(dev);
@@ -327,9 +327,9 @@ static struct i915_vma *__xe_pin_fb_vma(const struct intel_framebuffer *fb,
 
 	vma->bo = bo;
 	if (intel_fb_uses_dpt(&fb->base))
-		ret = __xe_pin_fb_vma_dpt(fb, view, vma, physical_alignment);
+		ret = __xe_pin_fb_vma_dpt(fb, view, vma, alignment);
 	else
-		ret = __xe_pin_fb_vma_ggtt(fb, view, vma,  physical_alignment);
+		ret = __xe_pin_fb_vma_ggtt(fb, view, vma,  alignment);
 	if (ret)
 		goto err_unpin;
 
@@ -422,7 +422,7 @@ int intel_plane_pin_fb(struct intel_plane_state *new_plane_state,
 	struct i915_vma *vma;
 	struct intel_framebuffer *intel_fb = to_intel_framebuffer(fb);
 	struct intel_plane *plane = to_intel_plane(new_plane_state->uapi.plane);
-	u64 phys_alignment = plane->min_alignment(plane, fb, 0);
+	unsigned int alignment = plane->min_alignment(plane, fb, 0);
 
 	if (reuse_vma(new_plane_state, old_plane_state))
 		return 0;
@@ -430,7 +430,7 @@ int intel_plane_pin_fb(struct intel_plane_state *new_plane_state,
 	/* We reject creating !SCANOUT fb's, so this is weird.. */
 	drm_WARN_ON(bo->ttm.base.dev, !(bo->flags & XE_BO_FLAG_SCANOUT));
 
-	vma = __xe_pin_fb_vma(intel_fb, &new_plane_state->view.gtt, phys_alignment);
+	vma = __xe_pin_fb_vma(intel_fb, &new_plane_state->view.gtt, alignment);
 
 	if (IS_ERR(vma))
 		return PTR_ERR(vma);