Skip to content

Commit

Permalink
drm/xe/guc: Add capture size check in GuC log buffer
Browse files Browse the repository at this point in the history
Capture-nodes generated by GuC are placed in the GuC capture ring
buffer which is a sub-region of the larger Guc-Log-buffer.
Add capture output size check before allocating the shared buffer.

Signed-off-by: Zhanjun Dong <zhanjun.dong@intel.com>
Reviewed-by: Alan Previn <alan.previn.teres.alexis@intel.com>
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20241004193428.3311145-4-zhanjun.dong@intel.com
  • Loading branch information
Zhanjun Dong authored and Matt Roper committed Oct 8, 2024
1 parent b170d69 commit 84d15f4
Show file tree
Hide file tree
Showing 4 changed files with 174 additions and 2 deletions.
20 changes: 20 additions & 0 deletions drivers/gpu/drm/xe/abi/guc_log_abi.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
/* SPDX-License-Identifier: MIT */
/*
* Copyright © 2024 Intel Corporation
*/

#ifndef _ABI_GUC_LOG_ABI_H
#define _ABI_GUC_LOG_ABI_H

#include <linux/types.h>

/* GuC logging buffer types */
enum guc_log_buffer_type {
GUC_LOG_BUFFER_CRASH_DUMP,
GUC_LOG_BUFFER_DEBUG,
GUC_LOG_BUFFER_CAPTURE,
};

#define GUC_LOG_BUFFER_TYPE_MAX 3

#endif
83 changes: 82 additions & 1 deletion drivers/gpu/drm/xe/xe_guc_capture.c
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include "xe_gt_mcr.h"
#include "xe_gt_printk.h"
#include "xe_guc.h"
#include "xe_guc_ads.h"
#include "xe_guc_capture.h"
#include "xe_guc_capture_types.h"
#include "xe_guc_ct.h"
Expand Down Expand Up @@ -669,6 +670,85 @@ size_t xe_guc_capture_ads_input_worst_size(struct xe_guc *guc)
return PAGE_ALIGN(total_size);
}

static int guc_capture_output_size_est(struct xe_guc *guc)
{
struct xe_gt *gt = guc_to_gt(guc);
struct xe_hw_engine *hwe;
enum xe_hw_engine_id id;

int capture_size = 0;
size_t tmp = 0;

if (!guc->capture)
return -ENODEV;

/*
* If every single engine-instance suffered a failure in quick succession but
* were all unrelated, then a burst of multiple error-capture events would dump
* registers for every one engine instance, one at a time. In this case, GuC
* would even dump the global-registers repeatedly.
*
* For each engine instance, there would be 1 x guc_state_capture_group_t output
* followed by 3 x guc_state_capture_t lists. The latter is how the register
* dumps are split across different register types (where the '3' are global vs class
* vs instance).
*/
for_each_hw_engine(hwe, gt, id) {
enum guc_capture_list_class_type capture_class;

capture_class = xe_engine_class_to_guc_capture_class(hwe->class);
capture_size += sizeof(struct guc_state_capture_group_header_t) +
(3 * sizeof(struct guc_state_capture_header_t));

if (!guc_capture_getlistsize(guc, 0, GUC_STATE_CAPTURE_TYPE_GLOBAL,
0, &tmp, true))
capture_size += tmp;
if (!guc_capture_getlistsize(guc, 0, GUC_STATE_CAPTURE_TYPE_ENGINE_CLASS,
capture_class, &tmp, true))
capture_size += tmp;
if (!guc_capture_getlistsize(guc, 0, GUC_STATE_CAPTURE_TYPE_ENGINE_INSTANCE,
capture_class, &tmp, true))
capture_size += tmp;
}

return capture_size;
}

/*
* Add on a 3x multiplier to allow for multiple back-to-back captures occurring
* before the Xe can read the data out and process it
*/
#define GUC_CAPTURE_OVERBUFFER_MULTIPLIER 3

static void check_guc_capture_size(struct xe_guc *guc)
{
int capture_size = guc_capture_output_size_est(guc);
int spare_size = capture_size * GUC_CAPTURE_OVERBUFFER_MULTIPLIER;
u32 buffer_size = xe_guc_log_section_size_capture(&guc->log);

/*
* NOTE: capture_size is much smaller than the capture region
* allocation (DG2: <80K vs 1MB).
* Additionally, its based on space needed to fit all engines getting
* reset at once within the same G2H handler task slot. This is very
* unlikely. However, if GuC really does run out of space for whatever
* reason, we will see an separate warning message when processing the
* G2H event capture-notification, search for:
* xe_guc_STATE_CAPTURE_EVENT_STATUS_NOSPACE.
*/
if (capture_size < 0)
xe_gt_dbg(guc_to_gt(guc),
"Failed to calculate error state capture buffer minimum size: %d!\n",
capture_size);
if (capture_size > buffer_size)
xe_gt_dbg(guc_to_gt(guc), "Error state capture buffer maybe small: %d < %d\n",
buffer_size, capture_size);
else if (spare_size > buffer_size)
xe_gt_dbg(guc_to_gt(guc),
"Error state capture buffer lacks spare size: %d < %d (min = %d)\n",
buffer_size, spare_size, capture_size);
}

/*
* xe_guc_capture_steered_list_init - Init steering register list
* @guc: The GuC object
Expand All @@ -684,9 +764,10 @@ void xe_guc_capture_steered_list_init(struct xe_guc *guc)
* the end of the pre-populated render list.
*/
guc_capture_alloc_steered_lists(guc);
check_guc_capture_size(guc);
}

/**
/*
* xe_guc_capture_init - Init for GuC register capture
* @guc: The GuC object
*
Expand Down
66 changes: 66 additions & 0 deletions drivers/gpu/drm/xe/xe_guc_log.c
Original file line number Diff line number Diff line change
Expand Up @@ -270,4 +270,70 @@ int xe_guc_log_init(struct xe_guc_log *log)

return 0;
}

ALLOW_ERROR_INJECTION(xe_guc_log_init, ERRNO); /* See xe_pci_probe() */

static u32 xe_guc_log_section_size_crash(struct xe_guc_log *log)
{
return CRASH_BUFFER_SIZE;
}

static u32 xe_guc_log_section_size_debug(struct xe_guc_log *log)
{
return DEBUG_BUFFER_SIZE;
}

/**
* xe_guc_log_section_size_capture - Get capture buffer size within log sections.
* @log: The log object.
*
* This function will return the capture buffer size within log sections.
*
* Return: capture buffer size.
*/
u32 xe_guc_log_section_size_capture(struct xe_guc_log *log)
{
return CAPTURE_BUFFER_SIZE;
}

/**
* xe_guc_get_log_buffer_size - Get log buffer size for a type.
* @log: The log object.
* @type: The log buffer type
*
* Return: buffer size.
*/
u32 xe_guc_get_log_buffer_size(struct xe_guc_log *log, enum guc_log_buffer_type type)
{
switch (type) {
case GUC_LOG_BUFFER_CRASH_DUMP:
return xe_guc_log_section_size_crash(log);
case GUC_LOG_BUFFER_DEBUG:
return xe_guc_log_section_size_debug(log);
case GUC_LOG_BUFFER_CAPTURE:
return xe_guc_log_section_size_capture(log);
}
return 0;
}

/**
* xe_guc_get_log_buffer_offset - Get offset in log buffer for a type.
* @log: The log object.
* @type: The log buffer type
*
* This function will return the offset in the log buffer for a type.
* Return: buffer offset.
*/
u32 xe_guc_get_log_buffer_offset(struct xe_guc_log *log, enum guc_log_buffer_type type)
{
enum guc_log_buffer_type i;
u32 offset = PAGE_SIZE;/* for the log_buffer_states */

for (i = GUC_LOG_BUFFER_CRASH_DUMP; i < GUC_LOG_BUFFER_TYPE_MAX; ++i) {
if (i == type)
break;
offset += xe_guc_get_log_buffer_size(log, i);
}

return offset;
}
7 changes: 6 additions & 1 deletion drivers/gpu/drm/xe/xe_guc_log.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#define _XE_GUC_LOG_H_

#include "xe_guc_log_types.h"
#include "abi/guc_log_abi.h"

struct drm_printer;
struct xe_device;
Expand All @@ -18,7 +19,7 @@ struct xe_device;
#else
#define CRASH_BUFFER_SIZE SZ_8K
#define DEBUG_BUFFER_SIZE SZ_64K
#define CAPTURE_BUFFER_SIZE SZ_16K
#define CAPTURE_BUFFER_SIZE SZ_1M
#endif
/*
* While we're using plain log level in i915, GuC controls are much more...
Expand Down Expand Up @@ -50,4 +51,8 @@ xe_guc_log_get_level(struct xe_guc_log *log)
return log->level;
}

u32 xe_guc_log_section_size_capture(struct xe_guc_log *log);
u32 xe_guc_get_log_buffer_size(struct xe_guc_log *log, enum guc_log_buffer_type type);
u32 xe_guc_get_log_buffer_offset(struct xe_guc_log *log, enum guc_log_buffer_type type);

#endif

0 comments on commit 84d15f4

Please sign in to comment.