Skip to content

Commit

Permalink
drm/xe: Move survivability entirely to xe_pci
Browse files Browse the repository at this point in the history
There's an odd split between xe_pci.c and xe_device.c wrt
xe_survivability: it's initialized by xe_device, but then finalized by
xe_pci. Move it entirely to the outer layer, xe_pci, so it controls
the flow entirely.

This also allows to stop ignoring some of the errors. E.g.: if there's
an -ENOMEM, it shouldn't continue as if it survivability had been
enabled.

One change worth mentioning is that if "wait for lmem" fails, it will
also check the pcode status to decide if it should enter or not in
survivability mode, which it was not doing before. The bit from pcode
for that decision should remain the same after lmem failed
initialization, so it should be fine.

Cc: Riana Tauro <riana.tauro@intel.com>
Reviewed-by: Jonathan Cavitt <jonathan.cavitt@intel.com>
Reviewed-by: Riana Tauro <riana.tauro@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20250222001051.3012936-9-lucas.demarchi@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Lucas De Marchi committed Feb 25, 2025
1 parent d41d048 commit d40f275
Showing 5 changed files with 49 additions and 55 deletions.
7 changes: 1 addition & 6 deletions drivers/gpu/drm/xe/xe_device.c
Original file line number Diff line number Diff line change
@@ -53,7 +53,6 @@
#include "xe_pxp.h"
#include "xe_query.h"
#include "xe_sriov.h"
#include "xe_survivability_mode.h"
#include "xe_tile.h"
#include "xe_ttm_stolen_mgr.h"
#include "xe_ttm_sys_mgr.h"
@@ -695,12 +694,8 @@ int xe_device_probe_early(struct xe_device *xe)
update_device_info(xe);

err = xe_pcode_probe_early(xe);
if (err) {
if (xe_survivability_mode_required(xe))
xe_survivability_mode_init(xe);

if (err)
return err;
}

err = wait_for_lmem_ready(xe);
if (err)
2 changes: 1 addition & 1 deletion drivers/gpu/drm/xe/xe_heci_gsc.c
Original file line number Diff line number Diff line change
@@ -201,7 +201,7 @@ void xe_heci_gsc_init(struct xe_device *xe)
return;
}

if (!def->use_polling && !xe_survivability_mode_enabled(xe)) {
if (!def->use_polling && !xe_survivability_mode_is_enabled(xe)) {
ret = heci_gsc_irq_setup(xe);
if (ret)
goto fail;
17 changes: 9 additions & 8 deletions drivers/gpu/drm/xe/xe_pci.c
Original file line number Diff line number Diff line change
@@ -770,8 +770,8 @@ static void xe_pci_remove(struct pci_dev *pdev)
if (IS_SRIOV_PF(xe))
xe_pci_sriov_configure(pdev, 0);

if (xe_survivability_mode_enabled(xe))
return xe_survivability_mode_remove(xe);
if (xe_survivability_mode_is_enabled(xe))
return;

xe_device_remove(xe);
xe_pm_runtime_fini(xe);
@@ -846,13 +846,14 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
err = xe_device_probe_early(xe);

/*
* In Boot Survivability mode, no drm card is exposed
* and driver is loaded with bare minimum to allow
* for firmware to be flashed through mei. Return
* success if survivability mode is enabled.
* In Boot Survivability mode, no drm card is exposed and driver is
* loaded with bare minimum to allow for firmware to be flashed through
* mei. If early probe fails, check if survivability mode is flagged by
* HW to be enabled. In that case enable it and return success.
*/
if (err) {
if (xe_survivability_mode_enabled(xe))
if (xe_survivability_mode_required(xe) &&
xe_survivability_mode_enable(xe))
return 0;

return err;
@@ -946,7 +947,7 @@ static int xe_pci_suspend(struct device *dev)
struct xe_device *xe = pdev_to_xe_device(pdev);
int err;

if (xe_survivability_mode_enabled(xe))
if (xe_survivability_mode_is_enabled(xe))
return -EBUSY;

err = xe_pm_suspend(xe);
73 changes: 36 additions & 37 deletions drivers/gpu/drm/xe/xe_survivability_mode.c
Original file line number Diff line number Diff line change
@@ -127,40 +127,54 @@ static ssize_t survivability_mode_show(struct device *dev,

static DEVICE_ATTR_ADMIN_RO(survivability_mode);

static void enable_survivability_mode(struct pci_dev *pdev)
static void xe_survivability_mode_fini(void *arg)
{
struct xe_device *xe = arg;
struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
struct device *dev = &pdev->dev;

sysfs_remove_file(&dev->kobj, &dev_attr_survivability_mode.attr);
xe_heci_gsc_fini(xe);
}

static int enable_survivability_mode(struct pci_dev *pdev)
{
struct device *dev = &pdev->dev;
struct xe_device *xe = pdev_to_xe_device(pdev);
struct xe_survivability *survivability = &xe->survivability;
int ret = 0;

/* set survivability mode */
survivability->mode = true;
dev_info(dev, "In Survivability Mode\n");

/* create survivability mode sysfs */
ret = sysfs_create_file(&dev->kobj, &dev_attr_survivability_mode.attr);
if (ret) {
dev_warn(dev, "Failed to create survivability sysfs files\n");
return;
return ret;
}

ret = devm_add_action_or_reset(xe->drm.dev,
xe_survivability_mode_fini, xe);
if (ret)
return ret;

xe_heci_gsc_init(xe);

xe_vsec_init(xe);

survivability->mode = true;
dev_err(dev, "In Survivability Mode\n");

return 0;
}

/**
* xe_survivability_mode_enabled - check if survivability mode is enabled
* xe_survivability_mode_is_enabled - check if survivability mode is enabled
* @xe: xe device instance
*
* Returns true if in survivability mode, false otherwise
*/
bool xe_survivability_mode_enabled(struct xe_device *xe)
bool xe_survivability_mode_is_enabled(struct xe_device *xe)
{
struct xe_survivability *survivability = &xe->survivability;

return survivability->mode;
return xe->survivability.mode;
}

/**
@@ -183,44 +197,30 @@ bool xe_survivability_mode_required(struct xe_device *xe)
data = xe_mmio_read32(mmio, PCODE_SCRATCH(0));
survivability->boot_status = REG_FIELD_GET(BOOT_STATUS, data);

return (survivability->boot_status == NON_CRITICAL_FAILURE ||
survivability->boot_status == CRITICAL_FAILURE);
return survivability->boot_status == NON_CRITICAL_FAILURE ||
survivability->boot_status == CRITICAL_FAILURE;
}

/**
* xe_survivability_mode_remove - remove survivability mode
* xe_survivability_mode_enable - Initialize and enable the survivability mode
* @xe: xe device instance
*
* clean up sysfs entries of survivability mode
*/
void xe_survivability_mode_remove(struct xe_device *xe)
{
struct xe_survivability *survivability = &xe->survivability;
struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
struct device *dev = &pdev->dev;

sysfs_remove_file(&dev->kobj, &dev_attr_survivability_mode.attr);
xe_heci_gsc_fini(xe);
kfree(survivability->info);
}

/**
* xe_survivability_mode_init - Initialize the survivability mode
* @xe: xe device instance
* Initialize survivability information and enable survivability mode
*
* Initializes survivability information and enables survivability mode
* Return: 0 for success, negative error code otherwise.
*/
void xe_survivability_mode_init(struct xe_device *xe)
int xe_survivability_mode_enable(struct xe_device *xe)
{
struct xe_survivability *survivability = &xe->survivability;
struct xe_survivability_info *info;
struct pci_dev *pdev = to_pci_dev(xe->drm.dev);

survivability->size = MAX_SCRATCH_MMIO;

info = kcalloc(survivability->size, sizeof(*info), GFP_KERNEL);
info = devm_kcalloc(xe->drm.dev, survivability->size, sizeof(*info),
GFP_KERNEL);
if (!info)
return;
return -ENOMEM;

survivability->info = info;

@@ -229,9 +229,8 @@ void xe_survivability_mode_init(struct xe_device *xe)
/* Only log debug information and exit if it is a critical failure */
if (survivability->boot_status == CRITICAL_FAILURE) {
log_survivability_info(pdev);
kfree(survivability->info);
return;
return -ENXIO;
}

enable_survivability_mode(pdev);
return enable_survivability_mode(pdev);
}
5 changes: 2 additions & 3 deletions drivers/gpu/drm/xe/xe_survivability_mode.h
Original file line number Diff line number Diff line change
@@ -10,9 +10,8 @@

struct xe_device;

void xe_survivability_mode_init(struct xe_device *xe);
void xe_survivability_mode_remove(struct xe_device *xe);
bool xe_survivability_mode_enabled(struct xe_device *xe);
int xe_survivability_mode_enable(struct xe_device *xe);
bool xe_survivability_mode_is_enabled(struct xe_device *xe);
bool xe_survivability_mode_required(struct xe_device *xe);

#endif /* _XE_SURVIVABILITY_MODE_H_ */

0 comments on commit d40f275

Please sign in to comment.