Skip to content

Commit

Permalink
drm/xe/guc: Prepare GuC register list and update ADS size for error c…
Browse files Browse the repository at this point in the history
…apture

Add referenced registers defines and list of registers.
Update GuC ADS size allocation to include space for
the lists of error state capture register descriptors.

Then, populate GuC ADS with the lists of registers we want
GuC to report back to host on engine reset events. This list
should include global, engine-class and engine-instance
registers for every engine-class type on the current hardware.

Ensure we allocate a persistent storage for the register lists
that are populated into ADS so that we don't need to allocate
memory during GT resets when GuC is reloaded and ADS population
happens again.

Signed-off-by: Zhanjun Dong <zhanjun.dong@intel.com>
Reviewed-by: Alan Previn <alan.previn.teres.alexis@intel.com>
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20241004193428.3311145-2-zhanjun.dong@intel.com
  • Loading branch information
Zhanjun Dong authored and Matt Roper committed Oct 8, 2024
1 parent d6d87a1 commit 9c8c7a7
Show file tree
Hide file tree
Showing 12 changed files with 979 additions and 37 deletions.
1 change: 1 addition & 0 deletions drivers/gpu/drm/xe/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ xe-y += xe_bb.o \
xe_gt_topology.o \
xe_guc.o \
xe_guc_ads.o \
xe_guc_capture.o \
xe_guc_ct.o \
xe_guc_db_mgr.o \
xe_guc_hwconfig.o \
Expand Down
186 changes: 186 additions & 0 deletions drivers/gpu/drm/xe/abi/guc_capture_abi.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,186 @@
/* SPDX-License-Identifier: MIT */
/*
* Copyright © 2024 Intel Corporation
*/

#ifndef _ABI_GUC_CAPTURE_ABI_H
#define _ABI_GUC_CAPTURE_ABI_H

#include <linux/types.h>

/* Capture List Index */
enum guc_capture_list_index_type {
GUC_CAPTURE_LIST_INDEX_PF = 0,
GUC_CAPTURE_LIST_INDEX_VF = 1,
};

#define GUC_CAPTURE_LIST_INDEX_MAX (GUC_CAPTURE_LIST_INDEX_VF + 1)

/* Register-types of GuC capture register lists */
enum guc_state_capture_type {
GUC_STATE_CAPTURE_TYPE_GLOBAL = 0,
GUC_STATE_CAPTURE_TYPE_ENGINE_CLASS,
GUC_STATE_CAPTURE_TYPE_ENGINE_INSTANCE
};

#define GUC_STATE_CAPTURE_TYPE_MAX (GUC_STATE_CAPTURE_TYPE_ENGINE_INSTANCE + 1)

/* Class indecies for capture_class and capture_instance arrays */
enum guc_capture_list_class_type {
GUC_CAPTURE_LIST_CLASS_RENDER_COMPUTE = 0,
GUC_CAPTURE_LIST_CLASS_VIDEO = 1,
GUC_CAPTURE_LIST_CLASS_VIDEOENHANCE = 2,
GUC_CAPTURE_LIST_CLASS_BLITTER = 3,
GUC_CAPTURE_LIST_CLASS_GSC_OTHER = 4,
};

#define GUC_CAPTURE_LIST_CLASS_MAX (GUC_CAPTURE_LIST_CLASS_GSC_OTHER + 1)

/**
* struct guc_mmio_reg - GuC MMIO reg state struct
*
* GuC MMIO reg state struct
*/
struct guc_mmio_reg {
/** @offset: MMIO Offset - filled in by Host */
u32 offset;
/** @value: MMIO Value - Used by Firmware to store value */
u32 value;
/** @flags: Flags for accessing the MMIO */
u32 flags;
/** @mask: Value of a mask to apply if mask with value is set */
u32 mask;
#define GUC_REGSET_MASKED BIT(0)
#define GUC_REGSET_STEERING_NEEDED BIT(1)
#define GUC_REGSET_MASKED_WITH_VALUE BIT(2)
#define GUC_REGSET_RESTORE_ONLY BIT(3)
#define GUC_REGSET_STEERING_GROUP GENMASK(16, 12)
#define GUC_REGSET_STEERING_INSTANCE GENMASK(23, 20)
} __packed;

/**
* struct guc_mmio_reg_set - GuC register sets
*
* GuC register sets
*/
struct guc_mmio_reg_set {
/** @address: register address */
u32 address;
/** @count: register count */
u16 count;
/** @reserved: reserved */
u16 reserved;
} __packed;

/**
* struct guc_debug_capture_list_header - Debug capture list header.
*
* Debug capture list header.
*/
struct guc_debug_capture_list_header {
/** @info: contains number of MMIO descriptors in the capture list. */
u32 info;
#define GUC_CAPTURELISTHDR_NUMDESCR GENMASK(15, 0)
} __packed;

/**
* struct guc_debug_capture_list - Debug capture list
*
* As part of ADS registration, these header structures (followed by
* an array of 'struct guc_mmio_reg' entries) are used to register with
* GuC microkernel the list of registers we want it to dump out prior
* to a engine reset.
*/
struct guc_debug_capture_list {
/** @header: Debug capture list header. */
struct guc_debug_capture_list_header header;
/** @regs: MMIO descriptors in the capture list. */
struct guc_mmio_reg regs[];
} __packed;

/**
* struct guc_state_capture_header_t - State capture header.
*
* Prior to resetting engines that have hung or faulted, GuC microkernel
* reports the engine error-state (register values that was read) by
* logging them into the shared GuC log buffer using these hierarchy
* of structures.
*/
struct guc_state_capture_header_t {
/**
* @owner: VFID
* BR[ 7: 0] MBZ when SRIOV is disabled. When SRIOV is enabled
* VFID is an integer in range [0, 63] where 0 means the state capture
* is corresponding to the PF and an integer N in range [1, 63] means
* the state capture is for VF N.
*/
u32 owner;
#define GUC_STATE_CAPTURE_HEADER_VFID GENMASK(7, 0)
/** @info: Engine class/instance and capture type info */
u32 info;
#define GUC_STATE_CAPTURE_HEADER_CAPTURE_TYPE GENMASK(3, 0) /* see guc_state_capture_type */
#define GUC_STATE_CAPTURE_HEADER_ENGINE_CLASS GENMASK(7, 4) /* see guc_capture_list_class_type */
#define GUC_STATE_CAPTURE_HEADER_ENGINE_INSTANCE GENMASK(11, 8)
/**
* @lrca: logical ring context address.
* if type-instance, LRCA (address) that hung, else set to ~0
*/
u32 lrca;
/**
* @guc_id: context_index.
* if type-instance, context index of hung context, else set to ~0
*/
u32 guc_id;
/** @num_mmio_entries: Number of captured MMIO entries. */
u32 num_mmio_entries;
#define GUC_STATE_CAPTURE_HEADER_NUM_MMIO_ENTRIES GENMASK(9, 0)
} __packed;

/**
* struct guc_state_capture_t - State capture.
*
* State capture
*/
struct guc_state_capture_t {
/** @header: State capture header. */
struct guc_state_capture_header_t header;
/** @mmio_entries: Array of captured guc_mmio_reg entries. */
struct guc_mmio_reg mmio_entries[];
} __packed;

/* State Capture Group Type */
enum guc_state_capture_group_type {
GUC_STATE_CAPTURE_GROUP_TYPE_FULL = 0,
GUC_STATE_CAPTURE_GROUP_TYPE_PARTIAL
};

#define GUC_STATE_CAPTURE_GROUP_TYPE_MAX (GUC_STATE_CAPTURE_GROUP_TYPE_PARTIAL + 1)

/**
* struct guc_state_capture_group_header_t - State capture group header
*
* State capture group header.
*/
struct guc_state_capture_group_header_t {
/** @owner: VFID */
u32 owner;
#define GUC_STATE_CAPTURE_GROUP_HEADER_VFID GENMASK(7, 0)
/** @info: Engine class/instance and capture type info */
u32 info;
#define GUC_STATE_CAPTURE_GROUP_HEADER_NUM_CAPTURES GENMASK(7, 0)
#define GUC_STATE_CAPTURE_GROUP_HEADER_CAPTURE_GROUP_TYPE GENMASK(15, 8)
} __packed;

/**
* struct guc_state_capture_group_t - State capture group.
*
* this is the top level structure where an error-capture dump starts
*/
struct guc_state_capture_group_t {
/** @grp_header: State capture group header. */
struct guc_state_capture_group_header_t grp_header;
/** @capture_entries: Array of state captures */
struct guc_state_capture_t capture_entries[];
} __packed;

#endif
2 changes: 2 additions & 0 deletions drivers/gpu/drm/xe/regs/xe_gt_regs.h
Original file line number Diff line number Diff line change
Expand Up @@ -567,4 +567,6 @@
#define GT_PERF_STATUS XE_REG(0x1381b4)
#define VOLTAGE_MASK REG_GENMASK(10, 0)

#define SFC_DONE(n) XE_REG(0x1cc000 + (n) * 0x1000)

#endif
5 changes: 5 additions & 0 deletions drivers/gpu/drm/xe/xe_guc.c
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
#include "xe_gt_sriov_vf.h"
#include "xe_gt_throttle.h"
#include "xe_guc_ads.h"
#include "xe_guc_capture.h"
#include "xe_guc_ct.h"
#include "xe_guc_db_mgr.h"
#include "xe_guc_hwconfig.h"
Expand Down Expand Up @@ -339,6 +340,10 @@ int xe_guc_init(struct xe_guc *guc)
if (ret)
goto out;

ret = xe_guc_capture_init(guc);
if (ret)
goto out;

ret = xe_guc_ads_init(&guc->ads);
if (ret)
goto out;
Expand Down
5 changes: 5 additions & 0 deletions drivers/gpu/drm/xe/xe_guc.h
Original file line number Diff line number Diff line change
Expand Up @@ -82,4 +82,9 @@ static inline struct xe_device *guc_to_xe(struct xe_guc *guc)
return gt_to_xe(guc_to_gt(guc));
}

static inline struct drm_device *guc_to_drm(struct xe_guc *guc)
{
return &guc_to_xe(guc)->drm;
}

#endif
Loading

0 comments on commit 9c8c7a7

Please sign in to comment.