Skip to content

Commit

Permalink
drm/xe: Enable Fixed CCS mode setting
Browse files Browse the repository at this point in the history
Disable dynamic HW load balancing of compute resource assignment
to engines and instead enabled fixed mode of mapping compute
resources to engines on all platforms with more than one compute
engine.

By default enable only one CCS engine with all compute slices
assigned to it. This is the desired configuration for common
workloads.

PVC platform supports only the fixed CCS mode (workaround 16016805146).

v2: Rebase, make it platform agnostic
v3: Minor code refactoring

Reviewed-by: Andi Shyti <andi.shyti@linux.intel.com>
Signed-off-by: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
  • Loading branch information
Niranjana Vishwanathapura authored and Rodrigo Vivi committed Dec 21, 2023
1 parent b279b53 commit 0d97ecc
Show file tree
Hide file tree
Showing 9 changed files with 159 additions and 0 deletions.
1 change: 1 addition & 0 deletions drivers/gpu/drm/xe/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ xe-y += xe_bb.o \
xe_gsc.o \
xe_gsc_submit.o \
xe_gt.o \
xe_gt_ccs_mode.o \
xe_gt_clock.o \
xe_gt_debugfs.o \
xe_gt_idle.o \
Expand Down
14 changes: 14 additions & 0 deletions drivers/gpu/drm/xe/regs/xe_gt_regs.h
Original file line number Diff line number Diff line change
Expand Up @@ -402,8 +402,22 @@
#define COMP_CKN_IN REG_GENMASK(30, 29)

#define RCU_MODE XE_REG(0x14800, XE_REG_OPTION_MASKED)
#define RCU_MODE_FIXED_SLICE_CCS_MODE REG_BIT(1)
#define RCU_MODE_CCS_ENABLE REG_BIT(0)

/*
* Total of 4 cslices, where each cslice is in the form:
* [0-3] CCS ID
* [4-6] RSVD
* [7] Disabled
*/
#define CCS_MODE XE_REG(0x14804)
#define CCS_MODE_CSLICE_0_3_MASK REG_GENMASK(11, 0) /* 3 bits per cslice */
#define CCS_MODE_CSLICE_MASK 0x7 /* CCS0-3 + rsvd */
#define CCS_MODE_CSLICE_WIDTH ilog2(CCS_MODE_CSLICE_MASK + 1)
#define CCS_MODE_CSLICE(cslice, ccs) \
((ccs) << ((cslice) * CCS_MODE_CSLICE_WIDTH))

#define FORCEWAKE_ACK_GT XE_REG(0x130044)
#define FORCEWAKE_KERNEL BIT(0)
#define FORCEWAKE_USER BIT(1)
Expand Down
10 changes: 10 additions & 0 deletions drivers/gpu/drm/xe/xe_gt.c
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include "xe_force_wake.h"
#include "xe_ggtt.h"
#include "xe_gsc.h"
#include "xe_gt_ccs_mode.h"
#include "xe_gt_clock.h"
#include "xe_gt_idle.h"
#include "xe_gt_mcr.h"
Expand Down Expand Up @@ -450,6 +451,12 @@ static int all_fw_domain_init(struct xe_gt *gt)
if (err)
goto err_force_wake;

/* Configure default CCS mode of 1 engine with all resources */
if (xe_gt_ccs_mode_enabled(gt)) {
gt->ccs_mode = 1;
xe_gt_apply_ccs_mode(gt);
}

err = xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL);
XE_WARN_ON(err);
xe_device_mem_access_put(gt_to_xe(gt));
Expand Down Expand Up @@ -560,6 +567,9 @@ static int do_gt_restart(struct xe_gt *gt)
xe_reg_sr_apply_whitelist(hwe);
}

/* Get CCS mode in sync between sw/hw */
xe_gt_apply_ccs_mode(gt);

return 0;
}

Expand Down
2 changes: 2 additions & 0 deletions drivers/gpu/drm/xe/xe_gt.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
for_each_if(((hwe__) = (gt__)->hw_engines + (id__)) && \
xe_hw_engine_is_valid((hwe__)))

#define CCS_MASK(gt) (((gt)->info.engine_mask & XE_HW_ENGINE_CCS_MASK) >> XE_HW_ENGINE_CCS0)

#ifdef CONFIG_FAULT_INJECTION
extern struct fault_attr gt_reset_failure;
static inline bool xe_fault_inject_gt_reset(void)
Expand Down
78 changes: 78 additions & 0 deletions drivers/gpu/drm/xe/xe_gt_ccs_mode.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
// SPDX-License-Identifier: MIT
/*
* Copyright © 2023 Intel Corporation
*/

#include "regs/xe_gt_regs.h"
#include "xe_assert.h"
#include "xe_gt.h"
#include "xe_gt_ccs_mode.h"
#include "xe_mmio.h"

static void __xe_gt_apply_ccs_mode(struct xe_gt *gt, u32 num_engines)
{
u32 mode = CCS_MODE_CSLICE_0_3_MASK; /* disable all by default */
int num_slices = hweight32(CCS_MASK(gt));
struct xe_device *xe = gt_to_xe(gt);
int width, cslice = 0;
u32 config = 0;

xe_assert(xe, xe_gt_ccs_mode_enabled(gt));

xe_assert(xe, num_engines && num_engines <= num_slices);
xe_assert(xe, !(num_slices % num_engines));

/*
* Loop over all available slices and assign each a user engine.
* For example, if there are four compute slices available, the
* assignment of compute slices to compute engines would be,
*
* With 1 engine (ccs0):
* slice 0, 1, 2, 3: ccs0
*
* With 2 engines (ccs0, ccs1):
* slice 0, 2: ccs0
* slice 1, 3: ccs1
*
* With 4 engines (ccs0, ccs1, ccs2, ccs3):
* slice 0: ccs0
* slice 1: ccs1
* slice 2: ccs2
* slice 3: ccs3
*/
for (width = num_slices / num_engines; width; width--) {
struct xe_hw_engine *hwe;
enum xe_hw_engine_id id;

for_each_hw_engine(hwe, gt, id) {
if (hwe->class != XE_ENGINE_CLASS_COMPUTE)
continue;

if (hwe->logical_instance >= num_engines)
break;

config |= BIT(hwe->instance) << XE_HW_ENGINE_CCS0;

/* If a slice is fused off, leave disabled */
while ((CCS_MASK(gt) & BIT(cslice)) == 0)
cslice++;

mode &= ~CCS_MODE_CSLICE(cslice, CCS_MODE_CSLICE_MASK);
mode |= CCS_MODE_CSLICE(cslice, hwe->instance);
cslice++;
}
}

xe_mmio_write32(gt, CCS_MODE, mode);

xe_gt_info(gt, "CCS_MODE=%x config:%08x, num_engines:%d, num_slices:%d\n",
mode, config, num_engines, num_slices);
}

void xe_gt_apply_ccs_mode(struct xe_gt *gt)
{
if (!gt->ccs_mode)
return;

__xe_gt_apply_ccs_mode(gt, gt->ccs_mode);
}
23 changes: 23 additions & 0 deletions drivers/gpu/drm/xe/xe_gt_ccs_mode.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
/* SPDX-License-Identifier: MIT */
/*
* Copyright © 2023 Intel Corporation
*/

#ifndef _XE_GT_CCS_MODE_H_
#define _XE_GT_CCS_MODE_H_

#include "xe_device_types.h"
#include "xe_gt.h"
#include "xe_gt_types.h"
#include "xe_platform_types.h"

void xe_gt_apply_ccs_mode(struct xe_gt *gt);

static inline bool xe_gt_ccs_mode_enabled(const struct xe_gt *gt)
{
/* Check if there are more than one compute engines available */
return hweight32(CCS_MASK(gt)) > 1;
}

#endif

8 changes: 8 additions & 0 deletions drivers/gpu/drm/xe/xe_gt_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,14 @@ struct xe_gt {
spinlock_t lock;
} tlb_invalidation;

/**
* @ccs_mode: Number of compute engines enabled.
* Allows fixed mapping of available compute slices to compute engines.
* By default only the first available compute engine is enabled and all
* available compute slices are allocated to it.
*/
u32 ccs_mode;

/** @usm: unified shared memory state */
struct {
/**
Expand Down
3 changes: 3 additions & 0 deletions drivers/gpu/drm/xe/xe_guc_ads.c
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include "regs/xe_guc_regs.h"
#include "xe_bo.h"
#include "xe_gt.h"
#include "xe_gt_ccs_mode.h"
#include "xe_guc.h"
#include "xe_hw_engine.h"
#include "xe_lrc.h"
Expand Down Expand Up @@ -440,6 +441,8 @@ static unsigned int guc_mmio_regset_write(struct xe_guc_ads *ads,
{ .reg = RING_HWS_PGA(hwe->mmio_base), },
{ .reg = RING_IMR(hwe->mmio_base), },
{ .reg = RCU_MODE, .skip = hwe != hwe_rcs_reset_domain },
{ .reg = CCS_MODE,
.skip = hwe != hwe_rcs_reset_domain || !xe_gt_ccs_mode_enabled(hwe->gt) },
};
u32 i;

Expand Down
20 changes: 20 additions & 0 deletions drivers/gpu/drm/xe/xe_hw_engine.c
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#include "xe_execlist.h"
#include "xe_force_wake.h"
#include "xe_gt.h"
#include "xe_gt_ccs_mode.h"
#include "xe_gt_topology.h"
#include "xe_hw_fence.h"
#include "xe_irq.h"
Expand Down Expand Up @@ -282,6 +283,13 @@ void xe_hw_engine_enable_ring(struct xe_hw_engine *hwe)
hw_engine_mmio_read32(hwe, RING_MI_MODE(0));
}

static bool xe_hw_engine_match_fixed_cslice_mode(const struct xe_gt *gt,
const struct xe_hw_engine *hwe)
{
return xe_gt_ccs_mode_enabled(gt) &&
xe_rtp_match_first_render_or_compute(gt, hwe);
}

void
xe_hw_engine_setup_default_lrc_state(struct xe_hw_engine *hwe)
{
Expand All @@ -306,6 +314,12 @@ xe_hw_engine_setup_default_lrc_state(struct xe_hw_engine *hwe)
blit_cctl_val,
XE_RTP_ACTION_FLAG(ENGINE_BASE)))
},
/* Use Fixed slice CCS mode */
{ XE_RTP_NAME("RCU_MODE_FIXED_SLICE_CCS_MODE"),
XE_RTP_RULES(FUNC(xe_hw_engine_match_fixed_cslice_mode)),
XE_RTP_ACTIONS(FIELD_SET(RCU_MODE, RCU_MODE_FIXED_SLICE_CCS_MODE,
RCU_MODE_FIXED_SLICE_CCS_MODE))
},
{}
};

Expand Down Expand Up @@ -859,6 +873,12 @@ bool xe_hw_engine_is_reserved(struct xe_hw_engine *hwe)
if (hwe->class == XE_ENGINE_CLASS_OTHER)
return true;

/* Check for engines disabled by ccs_mode setting */
if (xe_gt_ccs_mode_enabled(gt) &&
hwe->class == XE_ENGINE_CLASS_COMPUTE &&
hwe->logical_instance >= gt->ccs_mode)
return true;

return xe->info.supports_usm && hwe->class == XE_ENGINE_CLASS_COPY &&
hwe->instance == gt->usm.reserved_bcs_instance;
}

0 comments on commit 0d97ecc

Please sign in to comment.