diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c b/drivers/gpu/drm/xe/xe_devcoredump.c index a2d8167134892..30f9e8cb328cc 100644 --- a/drivers/gpu/drm/xe/xe_devcoredump.c +++ b/drivers/gpu/drm/xe/xe_devcoredump.c @@ -17,6 +17,7 @@ #include "xe_force_wake.h" #include "xe_gt.h" #include "xe_gt_printk.h" +#include "xe_guc_capture.h" #include "xe_guc_ct.h" #include "xe_guc_log.h" #include "xe_guc_submit.h" @@ -134,6 +135,9 @@ static void xe_devcoredump_snapshot_free(struct xe_devcoredump_snapshot *ss) xe_guc_ct_snapshot_free(ss->guc.ct); ss->guc.ct = NULL; + xe_guc_capture_put_matched_nodes(&ss->gt->uc.guc); + ss->matched_node = NULL; + xe_guc_exec_queue_snapshot_free(ss->ge); ss->ge = NULL; @@ -217,6 +221,7 @@ static void xe_devcoredump_free(void *data) /* To prevent stale data on next snapshot, clear everything */ memset(&coredump->snapshot, 0, sizeof(coredump->snapshot)); coredump->captured = false; + coredump->job = NULL; drm_info(&coredump_to_xe(coredump)->drm, "Xe device coredump has been deleted.\n"); } @@ -227,8 +232,6 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump, struct xe_devcoredump_snapshot *ss = &coredump->snapshot; struct xe_exec_queue *q = job->q; struct xe_guc *guc = exec_queue_to_guc(q); - struct xe_hw_engine *hwe; - enum xe_hw_engine_id id; u32 adj_logical_mask = q->logical_mask; u32 width_mask = (0x1 << q->width) - 1; const char *process_name = "no process"; @@ -244,6 +247,7 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump, strscpy(ss->process_name, process_name); ss->gt = q->gt; + coredump->job = job; INIT_WORK(&ss->work, xe_devcoredump_deferred_snap_work); cookie = dma_fence_begin_signalling(); @@ -266,14 +270,7 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump, ss->job = xe_sched_job_snapshot_capture(job); ss->vm = xe_vm_snapshot_capture(q->vm); - for_each_hw_engine(hwe, q->gt, id) { - if (hwe->class != q->hwe->class || - !(BIT(hwe->logical_instance) & adj_logical_mask)) { - ss->hwe[id] = NULL; - continue; - } - ss->hwe[id] = xe_hw_engine_snapshot_capture(hwe); - } + xe_engine_snapshot_capture_for_job(job); queue_work(system_unbound_wq, &ss->work); diff --git a/drivers/gpu/drm/xe/xe_devcoredump_types.h b/drivers/gpu/drm/xe/xe_devcoredump_types.h index 06ac75ce63dd4..3703ddea12528 100644 --- a/drivers/gpu/drm/xe/xe_devcoredump_types.h +++ b/drivers/gpu/drm/xe/xe_devcoredump_types.h @@ -49,6 +49,12 @@ struct xe_devcoredump_snapshot { struct xe_hw_engine_snapshot *hwe[XE_NUM_HW_ENGINES]; /** @job: Snapshot of job state */ struct xe_sched_job_snapshot *job; + /** + * @matched_node: The matched capture node for timedout job + * this single-node tracker works because devcoredump will always only + * produce one hw-engine capture per devcoredump event + */ + struct __guc_capture_parsed_output *matched_node; /** @vm: Snapshot of VM state */ struct xe_vm_snapshot *vm; @@ -74,6 +80,8 @@ struct xe_devcoredump { bool captured; /** @snapshot: Snapshot is captured at time of the first crash */ struct xe_devcoredump_snapshot snapshot; + /** @job: Point to the faulting job */ + struct xe_sched_job *job; }; #endif diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.c b/drivers/gpu/drm/xe/xe_gt_mcr.c index e21eb6c0fc204..5013d674e17da 100644 --- a/drivers/gpu/drm/xe/xe_gt_mcr.c +++ b/drivers/gpu/drm/xe/xe_gt_mcr.c @@ -365,6 +365,19 @@ void xe_gt_mcr_get_dss_steering(struct xe_gt *gt, unsigned int dss, u16 *group, *instance = dss % gt->steering_dss_per_grp; } +/** + * xe_gt_mcr_steering_info_to_dss_id - Get DSS ID from group/instance steering + * @gt: GT structure + * @group: steering group ID + * @instance: steering instance ID + * + * Return: the coverted DSS id. + */ +u32 xe_gt_mcr_steering_info_to_dss_id(struct xe_gt *gt, u16 group, u16 instance) +{ + return group * dss_per_group(gt) + instance; +} + static void init_steering_dss(struct xe_gt *gt) { gt->steering_dss_per_grp = dss_per_group(gt); diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.h b/drivers/gpu/drm/xe/xe_gt_mcr.h index 8d119a0d54938..c0cd36021c246 100644 --- a/drivers/gpu/drm/xe/xe_gt_mcr.h +++ b/drivers/gpu/drm/xe/xe_gt_mcr.h @@ -28,6 +28,7 @@ void xe_gt_mcr_multicast_write(struct xe_gt *gt, struct xe_reg_mcr mcr_reg, void xe_gt_mcr_steering_dump(struct xe_gt *gt, struct drm_printer *p); void xe_gt_mcr_get_dss_steering(struct xe_gt *gt, unsigned int dss, u16 *group, u16 *instance); +u32 xe_gt_mcr_steering_info_to_dss_id(struct xe_gt *gt, u16 group, u16 instance); /* * Loop over each DSS and determine the group and instance IDs that diff --git a/drivers/gpu/drm/xe/xe_guc_capture.c b/drivers/gpu/drm/xe/xe_guc_capture.c index d5cea907d2e68..3853b778c4f0f 100644 --- a/drivers/gpu/drm/xe/xe_guc_capture.c +++ b/drivers/gpu/drm/xe/xe_guc_capture.c @@ -27,11 +27,17 @@ #include "xe_guc_capture.h" #include "xe_guc_capture_types.h" #include "xe_guc_ct.h" +#include "xe_guc_exec_queue_types.h" #include "xe_guc_log.h" +#include "xe_guc_submit_types.h" #include "xe_guc_submit.h" #include "xe_hw_engine_types.h" +#include "xe_hw_engine.h" +#include "xe_lrc.h" #include "xe_macros.h" #include "xe_map.h" +#include "xe_mmio.h" +#include "xe_sched_job.h" /* * struct __guc_capture_bufstate @@ -69,6 +75,9 @@ struct __guc_capture_parsed_output { u32 eng_inst; u32 guc_id; u32 lrca; + u32 type; + bool locked; + enum xe_hw_engine_snapshot_source_id source; struct gcap_reg_list_info { u32 vfid; u32 num_regs; @@ -275,6 +284,10 @@ struct xe_guc_state_capture { struct list_head outlist; }; +static void +guc_capture_remove_stale_matches_from_list(struct xe_guc_state_capture *gc, + struct __guc_capture_parsed_output *node); + static const struct __guc_mmio_reg_descr_group * guc_capture_get_device_reglist(struct xe_device *xe) { @@ -303,6 +316,22 @@ guc_capture_get_one_list(const struct __guc_mmio_reg_descr_group *reglists, return NULL; } +const struct __guc_mmio_reg_descr_group * +xe_guc_capture_get_reg_desc_list(struct xe_gt *gt, u32 owner, u32 type, + enum guc_capture_list_class_type capture_class, bool is_ext) +{ + const struct __guc_mmio_reg_descr_group *reglists; + + if (is_ext) { + struct xe_guc *guc = >->uc.guc; + + reglists = guc->capture->extlists; + } else { + reglists = guc_capture_get_device_reglist(gt_to_xe(gt)); + } + return guc_capture_get_one_list(reglists, owner, type, capture_class); +} + struct __ext_steer_reg { const char *name; struct xe_reg_mcr reg; @@ -814,13 +843,14 @@ static void guc_capture_add_node_to_list(struct __guc_capture_parsed_output *node, struct list_head *list) { - list_add_tail(&node->link, list); + list_add(&node->link, list); } static void guc_capture_add_node_to_outlist(struct xe_guc_state_capture *gc, struct __guc_capture_parsed_output *node) { + guc_capture_remove_stale_matches_from_list(gc, node); guc_capture_add_node_to_list(node, &gc->outlist); } @@ -831,6 +861,31 @@ guc_capture_add_node_to_cachelist(struct xe_guc_state_capture *gc, guc_capture_add_node_to_list(node, &gc->cachelist); } +static void +guc_capture_free_outlist_node(struct xe_guc_state_capture *gc, + struct __guc_capture_parsed_output *n) +{ + if (n) { + n->locked = 0; + list_del(&n->link); + /* put node back to cache list */ + guc_capture_add_node_to_cachelist(gc, n); + } +} + +static void +guc_capture_remove_stale_matches_from_list(struct xe_guc_state_capture *gc, + struct __guc_capture_parsed_output *node) +{ + struct __guc_capture_parsed_output *n, *ntmp; + int guc_id = node->guc_id; + + list_for_each_entry_safe(n, ntmp, &gc->outlist, link) { + if (n != node && !n->locked && n->guc_id == guc_id) + guc_capture_free_outlist_node(gc, n); + } +} + static void guc_capture_init_node(struct xe_guc *guc, struct __guc_capture_parsed_output *node) { @@ -1026,9 +1081,13 @@ guc_capture_get_prealloc_node(struct xe_guc *guc) } else { struct __guc_capture_parsed_output *n, *ntmp; - /* traverse down and steal back the oldest node already allocated */ - list_for_each_entry_safe(n, ntmp, &guc->capture->outlist, link) { - found = n; + /* + * traverse reversed and steal back the oldest node already + * allocated + */ + list_for_each_entry_safe_reverse(n, ntmp, &guc->capture->outlist, link) { + if (!n->locked) + found = n; } } if (found) { @@ -1221,6 +1280,8 @@ guc_capture_extract_reglists(struct xe_guc *guc, struct __guc_capture_bufstate * } node->is_partial = is_partial; node->reginfo[datatype].vfid = FIELD_GET(GUC_STATE_CAPTURE_HEADER_VFID, hdr.owner); + node->source = XE_ENGINE_CAPTURE_SOURCE_GUC; + node->type = datatype; switch (datatype) { case GUC_STATE_CAPTURE_TYPE_ENGINE_INSTANCE: @@ -1463,6 +1524,266 @@ guc_capture_create_prealloc_nodes(struct xe_guc *guc) __guc_capture_create_prealloc_nodes(guc); } +static struct guc_mmio_reg * +guc_capture_find_reg(struct gcap_reg_list_info *reginfo, u32 addr, u32 flags) +{ + int i; + + if (reginfo && reginfo->num_regs > 0) { + struct guc_mmio_reg *regs = reginfo->regs; + + if (regs) + for (i = 0; i < reginfo->num_regs; i++) + if (regs[i].offset == addr && regs[i].flags == flags) + return ®s[i]; + } + + return NULL; +} + +static void +snapshot_print_by_list_order(struct xe_hw_engine_snapshot *snapshot, struct drm_printer *p, + u32 type, const struct __guc_mmio_reg_descr_group *list) +{ + struct xe_gt *gt = snapshot->hwe->gt; + struct xe_device *xe = gt_to_xe(gt); + struct xe_guc *guc = >->uc.guc; + struct xe_devcoredump *devcoredump = &xe->devcoredump; + struct xe_devcoredump_snapshot *devcore_snapshot = &devcoredump->snapshot; + struct gcap_reg_list_info *reginfo = NULL; + u32 last_value, i; + bool is_ext; + + if (!list || list->num_regs == 0) + return; + XE_WARN_ON(!devcore_snapshot->matched_node); + + is_ext = list == guc->capture->extlists; + reginfo = &devcore_snapshot->matched_node->reginfo[type]; + + /* + * loop through descriptor first and find the register in the node + * this is more scalable for developer maintenance as it will ensure + * the printout matched the ordering of the static descriptor + * table-of-lists + */ + for (i = 0; i < list->num_regs; i++) { + const struct __guc_mmio_reg_descr *reg_desc = &list->list[i]; + struct guc_mmio_reg *reg; + u32 value; + + reg = guc_capture_find_reg(reginfo, reg_desc->reg.addr, reg_desc->flags); + if (!reg) + continue; + + value = reg->value; + if (reg_desc->data_type == REG_64BIT_LOW_DW) { + last_value = value; + /* Low 32 bit dword saved, continue for high 32 bit */ + continue; + } else if (reg_desc->data_type == REG_64BIT_HI_DW) { + u64 value_qw = ((u64)value << 32) | last_value; + + drm_printf(p, "\t%s: 0x%016llx\n", reg_desc->regname, value_qw); + continue; + } + + if (is_ext) { + int dss, group, instance; + + group = FIELD_GET(GUC_REGSET_STEERING_GROUP, reg_desc->flags); + instance = FIELD_GET(GUC_REGSET_STEERING_INSTANCE, reg_desc->flags); + dss = xe_gt_mcr_steering_info_to_dss_id(gt, group, instance); + + drm_printf(p, "\t%s[%u]: 0x%08x\n", reg_desc->regname, dss, value); + } else { + drm_printf(p, "\t%s: 0x%08x\n", reg_desc->regname, value); + } + } +} + +/** + * xe_engine_snapshot_print - Print out a given Xe HW Engine snapshot. + * @snapshot: Xe HW Engine snapshot object. + * @p: drm_printer where it will be printed out. + * + * This function prints out a given Xe HW Engine snapshot object. + */ +void xe_engine_guc_capture_print(struct xe_hw_engine_snapshot *snapshot, struct drm_printer *p) +{ + const char *grptype[GUC_STATE_CAPTURE_GROUP_TYPE_MAX] = { + "full-capture", + "partial-capture" + }; + int type; + const struct __guc_mmio_reg_descr_group *list; + enum guc_capture_list_class_type capture_class; + + struct xe_gt *gt; + struct xe_device *xe; + struct xe_devcoredump *devcoredump; + struct xe_devcoredump_snapshot *devcore_snapshot; + + if (!snapshot) + return; + + gt = snapshot->hwe->gt; + xe = gt_to_xe(gt); + devcoredump = &xe->devcoredump; + devcore_snapshot = &devcoredump->snapshot; + + if (!devcore_snapshot->matched_node) + return; + + xe_gt_assert(gt, snapshot->source <= XE_ENGINE_CAPTURE_SOURCE_GUC); + xe_gt_assert(gt, snapshot->hwe); + + capture_class = xe_engine_class_to_guc_capture_class(snapshot->hwe->class); + + drm_printf(p, "%s (physical), logical instance=%d\n", + snapshot->name ? snapshot->name : "", + snapshot->logical_instance); + drm_printf(p, "\tCapture_source: %s\n", + snapshot->source == XE_ENGINE_CAPTURE_SOURCE_GUC ? "GuC" : "Manual"); + drm_printf(p, "\tCoverage: %s\n", grptype[devcore_snapshot->matched_node->is_partial]); + drm_printf(p, "\tForcewake: domain 0x%x, ref %d\n", + snapshot->forcewake.domain, snapshot->forcewake.ref); + + for (type = GUC_STATE_CAPTURE_TYPE_GLOBAL; type < GUC_STATE_CAPTURE_TYPE_MAX; type++) { + list = xe_guc_capture_get_reg_desc_list(gt, GUC_CAPTURE_LIST_INDEX_PF, type, + capture_class, false); + snapshot_print_by_list_order(snapshot, p, type, list); + } + + if (capture_class == GUC_CAPTURE_LIST_CLASS_RENDER_COMPUTE) { + list = xe_guc_capture_get_reg_desc_list(gt, GUC_CAPTURE_LIST_INDEX_PF, + GUC_STATE_CAPTURE_TYPE_ENGINE_CLASS, + capture_class, true); + snapshot_print_by_list_order(snapshot, p, GUC_STATE_CAPTURE_TYPE_ENGINE_CLASS, + list); + } + + drm_puts(p, "\n"); +} + +/** + * xe_guc_capture_get_matching_and_lock - Matching GuC capture for the job. + * @job: The job object. + * + * Search within the capture outlist for the job, could be used for check if + * GuC capture is ready for the job. + * If found, the locked boolean of the node will be flagged. + * + * Returns: found guc-capture node ptr else NULL + */ +struct __guc_capture_parsed_output * +xe_guc_capture_get_matching_and_lock(struct xe_sched_job *job) +{ + struct xe_hw_engine *hwe; + enum xe_hw_engine_id id; + struct xe_exec_queue *q; + struct xe_device *xe; + u16 guc_class = GUC_LAST_ENGINE_CLASS + 1; + struct xe_devcoredump_snapshot *ss; + + if (!job) + return NULL; + + q = job->q; + if (!q || !q->gt) + return NULL; + + xe = gt_to_xe(q->gt); + if (xe->wedged.mode >= 2 || !xe_device_uc_enabled(xe)) + return NULL; + + ss = &xe->devcoredump.snapshot; + if (ss->matched_node && ss->matched_node->source == XE_ENGINE_CAPTURE_SOURCE_GUC) + return ss->matched_node; + + /* Find hwe for the job */ + for_each_hw_engine(hwe, q->gt, id) { + if (hwe != q->hwe) + continue; + guc_class = xe_engine_class_to_guc_class(hwe->class); + break; + } + + if (guc_class <= GUC_LAST_ENGINE_CLASS) { + struct __guc_capture_parsed_output *n, *ntmp; + struct xe_guc *guc = &q->gt->uc.guc; + u16 guc_id = q->guc->id; + u32 lrca = xe_lrc_ggtt_addr(q->lrc[0]); + + /* + * Look for a matching GuC reported error capture node from + * the internal output link-list based on engine, guc id and + * lrca info. + */ + list_for_each_entry_safe(n, ntmp, &guc->capture->outlist, link) { + if (n->eng_class == guc_class && n->eng_inst == hwe->instance && + n->guc_id == guc_id && n->lrca == lrca && + n->source == XE_ENGINE_CAPTURE_SOURCE_GUC) { + n->locked = 1; + return n; + } + } + } + return NULL; +} + +/** + * xe_engine_snapshot_capture_for_job - Take snapshot of associated engine + * @job: The job object + * + * Take snapshot of associated HW Engine + * + * Returns: None. + */ +void +xe_engine_snapshot_capture_for_job(struct xe_sched_job *job) +{ + struct xe_exec_queue *q = job->q; + struct xe_device *xe = gt_to_xe(q->gt); + struct xe_devcoredump *coredump = &xe->devcoredump; + struct xe_hw_engine *hwe; + enum xe_hw_engine_id id; + u32 adj_logical_mask = q->logical_mask; + + for_each_hw_engine(hwe, q->gt, id) { + if (hwe->class != q->hwe->class || + !(BIT(hwe->logical_instance) & adj_logical_mask)) { + coredump->snapshot.hwe[id] = NULL; + continue; + } + + if (!coredump->snapshot.hwe[id]) + coredump->snapshot.hwe[id] = xe_hw_engine_snapshot_capture(hwe, job); + + break; + } +} + +/* + * xe_guc_capture_put_matched_nodes - Cleanup macthed nodes + * @guc: The GuC object + * + * Free matched node and all nodes with the equal guc_id from + * GuC captured outlist + */ +void xe_guc_capture_put_matched_nodes(struct xe_guc *guc) +{ + struct xe_device *xe = guc_to_xe(guc); + struct xe_devcoredump *devcoredump = &xe->devcoredump; + struct __guc_capture_parsed_output *n = devcoredump->snapshot.matched_node; + + if (n) { + guc_capture_remove_stale_matches_from_list(guc->capture, n); + guc_capture_free_outlist_node(guc->capture, n); + devcoredump->snapshot.matched_node = NULL; + } +} + /* * xe_guc_capture_steered_list_init - Init steering register list * @guc: The GuC object diff --git a/drivers/gpu/drm/xe/xe_guc_capture.h b/drivers/gpu/drm/xe/xe_guc_capture.h index 4acf44472a63c..fe695ab08a749 100644 --- a/drivers/gpu/drm/xe/xe_guc_capture.h +++ b/drivers/gpu/drm/xe/xe_guc_capture.h @@ -12,6 +12,9 @@ #include "xe_guc_fwif.h" struct xe_guc; +struct xe_hw_engine; +struct xe_hw_engine_snapshot; +struct xe_sched_job; static inline enum guc_capture_list_class_type xe_guc_class_to_capture_class(u16 class) { @@ -44,7 +47,14 @@ int xe_guc_capture_getlistsize(struct xe_guc *guc, u32 owner, u32 type, enum guc_capture_list_class_type capture_class, size_t *size); int xe_guc_capture_getnullheader(struct xe_guc *guc, void **outptr, size_t *size); size_t xe_guc_capture_ads_input_worst_size(struct xe_guc *guc); +const struct __guc_mmio_reg_descr_group * +xe_guc_capture_get_reg_desc_list(struct xe_gt *gt, u32 owner, u32 type, + enum guc_capture_list_class_type capture_class, bool is_ext); +struct __guc_capture_parsed_output *xe_guc_capture_get_matching_and_lock(struct xe_sched_job *job); +void xe_engine_snapshot_capture_for_job(struct xe_sched_job *job); +void xe_engine_guc_capture_print(struct xe_hw_engine_snapshot *snapshot, struct drm_printer *p); void xe_guc_capture_steered_list_init(struct xe_guc *guc); +void xe_guc_capture_put_matched_nodes(struct xe_guc *guc); int xe_guc_capture_init(struct xe_guc *guc); #endif diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 4b6e953fb210a..c1ebc693a6172 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -1097,6 +1097,7 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) struct xe_gpu_scheduler *sched = &q->guc->sched; struct xe_guc *guc = exec_queue_to_guc(q); const char *process_name = "no process"; + struct xe_device *xe = guc_to_xe(guc); int err = -ETIME; pid_t pid = -1; int i = 0; @@ -1124,6 +1125,21 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) if (!skip_timeout_check && !xe_sched_job_started(job)) goto rearm; + /* + * If devcoredump not captured and GuC capture for the job is not ready + * do manual capture first and decide later if we need to use it + */ + if (!exec_queue_killed(q) && !xe->devcoredump.captured && + !xe_guc_capture_get_matching_and_lock(job)) { + /* take force wake before engine register manual capture */ + if (xe_force_wake_get(gt_to_fw(q->gt), XE_FORCEWAKE_ALL)) + xe_gt_info(q->gt, "failed to get forcewake for coredump capture\n"); + + xe_engine_snapshot_capture_for_job(job); + + xe_force_wake_put(gt_to_fw(q->gt), XE_FORCEWAKE_ALL); + } + /* * XXX: Sampling timeout doesn't work in wedged mode as we have to * modify scheduling state to read timestamp. We could read the @@ -2007,8 +2023,6 @@ int xe_guc_exec_queue_reset_handler(struct xe_guc *guc, u32 *msg, u32 len) xe_gt_info(gt, "Engine reset: engine_class=%s, logical_mask: 0x%x, guc_id=%d", xe_hw_engine_class_to_str(q->class), q->logical_mask, guc_id); - /* FIXME: Do error capture, most likely async */ - trace_xe_exec_queue_reset(q); /* @@ -2034,7 +2048,7 @@ int xe_guc_exec_queue_reset_handler(struct xe_guc *guc, u32 *msg, u32 len) * XE_GUC_ACTION_STATE_CAPTURE_NOTIFICATION to host, this function will be * called 1st to check status before process the data comes with the message. * - * Returns: None + * Returns: error code. 0 if success */ int xe_guc_error_capture_handler(struct xe_guc *guc, u32 *msg, u32 len) { diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c index 6c9c27304cdcd..18a06c2f78d17 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.c +++ b/drivers/gpu/drm/xe/xe_hw_engine.c @@ -24,6 +24,7 @@ #include "xe_gt_printk.h" #include "xe_gt_mcr.h" #include "xe_gt_topology.h" +#include "xe_guc_capture.h" #include "xe_hw_engine_group.h" #include "xe_hw_fence.h" #include "xe_irq.h" @@ -877,9 +878,69 @@ xe_hw_engine_snapshot_instdone_capture(struct xe_hw_engine *hwe, } } +static void +xe_hw_engine_manual_capture(struct xe_hw_engine *hwe, struct xe_hw_engine_snapshot *snapshot) +{ + u64 val; + + snapshot->reg.ring_execlist_status = + xe_hw_engine_mmio_read32(hwe, RING_EXECLIST_STATUS_LO(0)); + val = xe_hw_engine_mmio_read32(hwe, RING_EXECLIST_STATUS_HI(0)); + snapshot->reg.ring_execlist_status |= val << 32; + + snapshot->reg.ring_execlist_sq_contents = + xe_hw_engine_mmio_read32(hwe, RING_EXECLIST_SQ_CONTENTS_LO(0)); + val = xe_hw_engine_mmio_read32(hwe, RING_EXECLIST_SQ_CONTENTS_HI(0)); + snapshot->reg.ring_execlist_sq_contents |= val << 32; + + snapshot->reg.ring_acthd = xe_hw_engine_mmio_read32(hwe, RING_ACTHD(0)); + val = xe_hw_engine_mmio_read32(hwe, RING_ACTHD_UDW(0)); + snapshot->reg.ring_acthd |= val << 32; + + snapshot->reg.ring_bbaddr = xe_hw_engine_mmio_read32(hwe, RING_BBADDR(0)); + val = xe_hw_engine_mmio_read32(hwe, RING_BBADDR_UDW(0)); + snapshot->reg.ring_bbaddr |= val << 32; + + snapshot->reg.ring_dma_fadd = + xe_hw_engine_mmio_read32(hwe, RING_DMA_FADD(0)); + val = xe_hw_engine_mmio_read32(hwe, RING_DMA_FADD_UDW(0)); + snapshot->reg.ring_dma_fadd |= val << 32; + + snapshot->reg.ring_hwstam = xe_hw_engine_mmio_read32(hwe, RING_HWSTAM(0)); + snapshot->reg.ring_hws_pga = xe_hw_engine_mmio_read32(hwe, RING_HWS_PGA(0)); + snapshot->reg.ring_start = xe_hw_engine_mmio_read32(hwe, RING_START(0)); + if (GRAPHICS_VERx100(hwe->gt->tile->xe) >= 2000) { + val = xe_hw_engine_mmio_read32(hwe, RING_START_UDW(0)); + snapshot->reg.ring_start |= val << 32; + } + if (xe_gt_has_indirect_ring_state(hwe->gt)) { + snapshot->reg.indirect_ring_state = + xe_hw_engine_mmio_read32(hwe, INDIRECT_RING_STATE(0)); + } + + snapshot->reg.ring_head = + xe_hw_engine_mmio_read32(hwe, RING_HEAD(0)) & HEAD_ADDR; + snapshot->reg.ring_tail = + xe_hw_engine_mmio_read32(hwe, RING_TAIL(0)) & TAIL_ADDR; + snapshot->reg.ring_ctl = xe_hw_engine_mmio_read32(hwe, RING_CTL(0)); + snapshot->reg.ring_mi_mode = + xe_hw_engine_mmio_read32(hwe, RING_MI_MODE(0)); + snapshot->reg.ring_mode = xe_hw_engine_mmio_read32(hwe, RING_MODE(0)); + snapshot->reg.ring_imr = xe_hw_engine_mmio_read32(hwe, RING_IMR(0)); + snapshot->reg.ring_esr = xe_hw_engine_mmio_read32(hwe, RING_ESR(0)); + snapshot->reg.ring_emr = xe_hw_engine_mmio_read32(hwe, RING_EMR(0)); + snapshot->reg.ring_eir = xe_hw_engine_mmio_read32(hwe, RING_EIR(0)); + snapshot->reg.ipehr = xe_hw_engine_mmio_read32(hwe, RING_IPEHR(0)); + xe_hw_engine_snapshot_instdone_capture(hwe, snapshot); + + if (snapshot->hwe->class == XE_ENGINE_CLASS_COMPUTE) + snapshot->reg.rcu_mode = xe_mmio_read32(&hwe->gt->mmio, RCU_MODE); +} + /** * xe_hw_engine_snapshot_capture - Take a quick snapshot of the HW Engine. * @hwe: Xe HW Engine. + * @job: The job object. * * This can be printed out in a later stage like during dev_coredump * analysis. @@ -888,11 +949,11 @@ xe_hw_engine_snapshot_instdone_capture(struct xe_hw_engine *hwe, * caller, using `xe_hw_engine_snapshot_free`. */ struct xe_hw_engine_snapshot * -xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe) +xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe, struct xe_sched_job *job) { struct xe_hw_engine_snapshot *snapshot; size_t len; - u64 val; + struct __guc_capture_parsed_output *node; if (!xe_hw_engine_is_valid(hwe)) return NULL; @@ -937,58 +998,24 @@ xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe) if (IS_SRIOV_VF(gt_to_xe(hwe->gt))) return snapshot; - snapshot->reg.ring_execlist_status = - xe_hw_engine_mmio_read32(hwe, RING_EXECLIST_STATUS_LO(0)); - val = xe_hw_engine_mmio_read32(hwe, RING_EXECLIST_STATUS_HI(0)); - snapshot->reg.ring_execlist_status |= val << 32; - - snapshot->reg.ring_execlist_sq_contents = - xe_hw_engine_mmio_read32(hwe, RING_EXECLIST_SQ_CONTENTS_LO(0)); - val = xe_hw_engine_mmio_read32(hwe, RING_EXECLIST_SQ_CONTENTS_HI(0)); - snapshot->reg.ring_execlist_sq_contents |= val << 32; - - snapshot->reg.ring_acthd = xe_hw_engine_mmio_read32(hwe, RING_ACTHD(0)); - val = xe_hw_engine_mmio_read32(hwe, RING_ACTHD_UDW(0)); - snapshot->reg.ring_acthd |= val << 32; - - snapshot->reg.ring_bbaddr = xe_hw_engine_mmio_read32(hwe, RING_BBADDR(0)); - val = xe_hw_engine_mmio_read32(hwe, RING_BBADDR_UDW(0)); - snapshot->reg.ring_bbaddr |= val << 32; - - snapshot->reg.ring_dma_fadd = - xe_hw_engine_mmio_read32(hwe, RING_DMA_FADD(0)); - val = xe_hw_engine_mmio_read32(hwe, RING_DMA_FADD_UDW(0)); - snapshot->reg.ring_dma_fadd |= val << 32; - - snapshot->reg.ring_hwstam = xe_hw_engine_mmio_read32(hwe, RING_HWSTAM(0)); - snapshot->reg.ring_hws_pga = xe_hw_engine_mmio_read32(hwe, RING_HWS_PGA(0)); - snapshot->reg.ring_start = xe_hw_engine_mmio_read32(hwe, RING_START(0)); - if (GRAPHICS_VERx100(hwe->gt->tile->xe) >= 2000) { - val = xe_hw_engine_mmio_read32(hwe, RING_START_UDW(0)); - snapshot->reg.ring_start |= val << 32; - } - if (xe_gt_has_indirect_ring_state(hwe->gt)) { - snapshot->reg.indirect_ring_state = - xe_hw_engine_mmio_read32(hwe, INDIRECT_RING_STATE(0)); + if (job) { + /* If got guc capture, set source to GuC */ + node = xe_guc_capture_get_matching_and_lock(job); + if (node) { + struct xe_device *xe = gt_to_xe(hwe->gt); + struct xe_devcoredump *coredump = &xe->devcoredump; + + coredump->snapshot.matched_node = node; + snapshot->source = XE_ENGINE_CAPTURE_SOURCE_GUC; + xe_gt_dbg(hwe->gt, "Found and locked GuC-err-capture node"); + return snapshot; + } } - snapshot->reg.ring_head = - xe_hw_engine_mmio_read32(hwe, RING_HEAD(0)) & HEAD_ADDR; - snapshot->reg.ring_tail = - xe_hw_engine_mmio_read32(hwe, RING_TAIL(0)) & TAIL_ADDR; - snapshot->reg.ring_ctl = xe_hw_engine_mmio_read32(hwe, RING_CTL(0)); - snapshot->reg.ring_mi_mode = - xe_hw_engine_mmio_read32(hwe, RING_MI_MODE(0)); - snapshot->reg.ring_mode = xe_hw_engine_mmio_read32(hwe, RING_MODE(0)); - snapshot->reg.ring_imr = xe_hw_engine_mmio_read32(hwe, RING_IMR(0)); - snapshot->reg.ring_esr = xe_hw_engine_mmio_read32(hwe, RING_ESR(0)); - snapshot->reg.ring_emr = xe_hw_engine_mmio_read32(hwe, RING_EMR(0)); - snapshot->reg.ring_eir = xe_hw_engine_mmio_read32(hwe, RING_EIR(0)); - snapshot->reg.ipehr = xe_hw_engine_mmio_read32(hwe, RING_IPEHR(0)); - xe_hw_engine_snapshot_instdone_capture(hwe, snapshot); - - if (snapshot->hwe->class == XE_ENGINE_CLASS_COMPUTE) - snapshot->reg.rcu_mode = xe_mmio_read32(&hwe->gt->mmio, RCU_MODE); + /* otherwise, do manual capture */ + xe_hw_engine_manual_capture(hwe, snapshot); + snapshot->source = XE_ENGINE_CAPTURE_SOURCE_MANUAL; + xe_gt_dbg(hwe->gt, "Proceeding with manual engine snapshot"); return snapshot; } @@ -1036,19 +1063,9 @@ xe_hw_engine_snapshot_instdone_print(struct xe_hw_engine_snapshot *snapshot, str } } -/** - * xe_hw_engine_snapshot_print - Print out a given Xe HW Engine snapshot. - * @snapshot: Xe HW Engine snapshot object. - * @p: drm_printer where it will be printed out. - * - * This function prints out a given Xe HW Engine snapshot object. - */ -void xe_hw_engine_snapshot_print(struct xe_hw_engine_snapshot *snapshot, - struct drm_printer *p) +static void __xe_hw_engine_manual_print(struct xe_hw_engine_snapshot *snapshot, + struct drm_printer *p) { - if (!snapshot) - return; - drm_printf(p, "%s (physical), logical instance=%d\n", snapshot->name ? snapshot->name : "", snapshot->logical_instance); @@ -1086,6 +1103,24 @@ void xe_hw_engine_snapshot_print(struct xe_hw_engine_snapshot *snapshot, snapshot->reg.rcu_mode); } +/** + * xe_hw_engine_snapshot_print - Print out a given Xe HW Engine snapshot. + * @snapshot: Xe HW Engine snapshot object. + * @p: drm_printer where it will be printed out. + * + * This function prints out a given Xe HW Engine snapshot object. + */ +void xe_hw_engine_snapshot_print(struct xe_hw_engine_snapshot *snapshot, + struct drm_printer *p) +{ + if (!snapshot) + return; + + if (snapshot->source == XE_ENGINE_CAPTURE_SOURCE_MANUAL) + __xe_hw_engine_manual_print(snapshot, p); + else + xe_engine_guc_capture_print(snapshot, p); +} /** * xe_hw_engine_snapshot_free - Free all allocated objects for a given snapshot. * @snapshot: Xe HW Engine snapshot object. @@ -1119,7 +1154,7 @@ void xe_hw_engine_print(struct xe_hw_engine *hwe, struct drm_printer *p) { struct xe_hw_engine_snapshot *snapshot; - snapshot = xe_hw_engine_snapshot_capture(hwe); + snapshot = xe_hw_engine_snapshot_capture(hwe, NULL); xe_hw_engine_snapshot_print(snapshot, p); xe_hw_engine_snapshot_free(snapshot); } diff --git a/drivers/gpu/drm/xe/xe_hw_engine.h b/drivers/gpu/drm/xe/xe_hw_engine.h index 022819a4a8ebb..c2428326a366b 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.h +++ b/drivers/gpu/drm/xe/xe_hw_engine.h @@ -11,6 +11,7 @@ struct drm_printer; struct drm_xe_engine_class_instance; struct xe_device; +struct xe_sched_job; #ifdef CONFIG_DRM_XE_JOB_TIMEOUT_MIN #define XE_HW_ENGINE_JOB_TIMEOUT_MIN CONFIG_DRM_XE_JOB_TIMEOUT_MIN @@ -54,9 +55,8 @@ void xe_hw_engine_handle_irq(struct xe_hw_engine *hwe, u16 intr_vec); void xe_hw_engine_enable_ring(struct xe_hw_engine *hwe); u32 xe_hw_engine_mask_per_class(struct xe_gt *gt, enum xe_engine_class engine_class); - struct xe_hw_engine_snapshot * -xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe); +xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe, struct xe_sched_job *job); void xe_hw_engine_snapshot_free(struct xe_hw_engine_snapshot *snapshot); void xe_hw_engine_snapshot_print(struct xe_hw_engine_snapshot *snapshot, struct drm_printer *p); diff --git a/drivers/gpu/drm/xe/xe_hw_engine_types.h b/drivers/gpu/drm/xe/xe_hw_engine_types.h index be60edb3e673a..55805c78d9d17 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine_types.h +++ b/drivers/gpu/drm/xe/xe_hw_engine_types.h @@ -152,6 +152,11 @@ struct xe_hw_engine { struct xe_hw_engine_group *hw_engine_group; }; +enum xe_hw_engine_snapshot_source_id { + XE_ENGINE_CAPTURE_SOURCE_MANUAL, + XE_ENGINE_CAPTURE_SOURCE_GUC +}; + /** * struct xe_hw_engine_snapshot - Hardware engine snapshot * @@ -160,6 +165,8 @@ struct xe_hw_engine { struct xe_hw_engine_snapshot { /** @name: name of the hw engine */ char *name; + /** @source: Data source, either manual or GuC */ + enum xe_hw_engine_snapshot_source_id source; /** @hwe: hw engine */ struct xe_hw_engine *hwe; /** @logical_instance: logical instance of this hw engine */ diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index f0976230012aa..4f64c7f4e68de 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -38,24 +38,6 @@ #define LRC_INDIRECT_RING_STATE_SIZE SZ_4K -struct xe_lrc_snapshot { - struct xe_bo *lrc_bo; - void *lrc_snapshot; - unsigned long lrc_size, lrc_offset; - - u32 context_desc; - u32 indirect_context_desc; - u32 head; - struct { - u32 internal; - u32 memory; - } tail; - u32 start_seqno; - u32 seqno; - u32 ctx_timestamp; - u32 ctx_job_timestamp; -}; - static struct xe_device * lrc_to_xe(struct xe_lrc *lrc) { diff --git a/drivers/gpu/drm/xe/xe_lrc.h b/drivers/gpu/drm/xe/xe_lrc.h index c24542e893186..40d8f6906d3e4 100644 --- a/drivers/gpu/drm/xe/xe_lrc.h +++ b/drivers/gpu/drm/xe/xe_lrc.h @@ -17,9 +17,26 @@ enum xe_engine_class; struct xe_gt; struct xe_hw_engine; struct xe_lrc; -struct xe_lrc_snapshot; struct xe_vm; +struct xe_lrc_snapshot { + struct xe_bo *lrc_bo; + void *lrc_snapshot; + unsigned long lrc_size, lrc_offset; + + u32 context_desc; + u32 indirect_context_desc; + u32 head; + struct { + u32 internal; + u32 memory; + } tail; + u32 start_seqno; + u32 seqno; + u32 ctx_timestamp; + u32 ctx_job_timestamp; +}; + #define LRC_PPHWSP_SCRATCH_ADDR (0x34 * 4) struct xe_lrc *xe_lrc_create(struct xe_hw_engine *hwe, struct xe_vm *vm,