Skip to content

Commit

Permalink
ASoC: SOF: Re-visit firmware state and panic tracking/handling
Browse files Browse the repository at this point in the history
Merge series from Peter Ujfalusi <peter.ujfalusi@linux.intel.com>:

this series will improve how we are tracking the firmware's state to be able to
avoid communication with it when it is not going to answer due to a panic and
we will attempt to force power cycle the DSP to recover at the next runtime
suspend time.

The state handling brings in other improvements on the way the kernel reports
errors and DSP panics to reduce the printed lines for normal users, but at the
same time allowing developers (or for bug reports) to have more precise
information available to track down the issue.

We can now place messages easily in the correct debug level and not bound to the
static ERROR for some of the print chains, causing excess amount or partial
information to be printed, confusing users and machines (CI).

I would have prefered to split this series up, but it was developed together to
achieve a single goal to reduce the noise, but also provide the details we need
to be able to rootcause issues.
  • Loading branch information
Mark Brown committed Dec 23, 2021
2 parents 8a2d8e4 + 34bfba9 commit 1d194b6
Show file tree
Hide file tree
Showing 21 changed files with 299 additions and 168 deletions.
22 changes: 22 additions & 0 deletions include/sound/sof.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,28 @@

struct snd_sof_dsp_ops;

/**
* enum sof_fw_state - DSP firmware state definitions
* @SOF_FW_BOOT_NOT_STARTED: firmware boot is not yet started
* @SOF_FW_BOOT_PREPARE: preparing for boot (firmware loading for exaqmple)
* @SOF_FW_BOOT_IN_PROGRESS: firmware boot is in progress
* @SOF_FW_BOOT_FAILED: firmware boot failed
* @SOF_FW_BOOT_READY_FAILED: firmware booted but fw_ready op failed
* @SOF_FW_BOOT_READY_OK: firmware booted and fw_ready op passed
* @SOF_FW_BOOT_COMPLETE: firmware is booted up and functional
* @SOF_FW_CRASHED: firmware crashed after successful boot
*/
enum sof_fw_state {
SOF_FW_BOOT_NOT_STARTED = 0,
SOF_FW_BOOT_PREPARE,
SOF_FW_BOOT_IN_PROGRESS,
SOF_FW_BOOT_FAILED,
SOF_FW_BOOT_READY_FAILED,
SOF_FW_BOOT_READY_OK,
SOF_FW_BOOT_COMPLETE,
SOF_FW_CRASHED,
};

/*
* SOF Platform data.
*/
Expand Down
119 changes: 74 additions & 45 deletions sound/soc/sof/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,30 @@
#endif

/* see SOF_DBG_ flags */
int sof_core_debug = IS_ENABLED(CONFIG_SND_SOC_SOF_DEBUG_ENABLE_FIRMWARE_TRACE);
static int sof_core_debug = IS_ENABLED(CONFIG_SND_SOC_SOF_DEBUG_ENABLE_FIRMWARE_TRACE);
module_param_named(sof_debug, sof_core_debug, int, 0444);
MODULE_PARM_DESC(sof_debug, "SOF core debug options (0x0 all off)");

/* SOF defaults if not provided by the platform in ms */
#define TIMEOUT_DEFAULT_IPC_MS 500
#define TIMEOUT_DEFAULT_BOOT_MS 2000

/**
* sof_debug_check_flag - check if a given flag(s) is set in sof_core_debug
* @mask: Flag or combination of flags to check
*
* Returns true if all bits set in mask is also set in sof_core_debug, otherwise
* false
*/
bool sof_debug_check_flag(int mask)
{
if ((sof_core_debug & mask) == mask)
return true;

return false;
}
EXPORT_SYMBOL(sof_debug_check_flag);

/*
* FW Panic/fault handling.
*/
Expand All @@ -52,78 +68,89 @@ static const struct sof_panic_msg panic_msg[] = {
{SOF_IPC_PANIC_ASSERT, "assertion failed"},
};

/*
/**
* sof_print_oops_and_stack - Handle the printing of DSP oops and stack trace
* @sdev: Pointer to the device's sdev
* @level: prink log level to use for the printing
* @panic_code: the panic code
* @tracep_code: tracepoint code
* @oops: Pointer to DSP specific oops data
* @panic_info: Pointer to the received panic information message
* @stack: Pointer to the call stack data
* @stack_words: Number of words in the stack data
*
* helper to be called from .dbg_dump callbacks. No error code is
* provided, it's left as an exercise for the caller of .dbg_dump
* (typically IPC or loader)
*/
void snd_sof_get_status(struct snd_sof_dev *sdev, u32 panic_code,
u32 tracep_code, void *oops,
struct sof_ipc_panic_info *panic_info,
void *stack, size_t stack_words)
void sof_print_oops_and_stack(struct snd_sof_dev *sdev, const char *level,
u32 panic_code, u32 tracep_code, void *oops,
struct sof_ipc_panic_info *panic_info,
void *stack, size_t stack_words)
{
u32 code;
int i;

/* is firmware dead ? */
if ((panic_code & SOF_IPC_PANIC_MAGIC_MASK) != SOF_IPC_PANIC_MAGIC) {
dev_err(sdev->dev, "unexpected fault %#010x trace %#010x\n",
panic_code, tracep_code);
dev_printk(level, sdev->dev, "unexpected fault %#010x trace %#010x\n",
panic_code, tracep_code);
return; /* no fault ? */
}

code = panic_code & (SOF_IPC_PANIC_MAGIC_MASK | SOF_IPC_PANIC_CODE_MASK);

for (i = 0; i < ARRAY_SIZE(panic_msg); i++) {
if (panic_msg[i].id == code) {
dev_err(sdev->dev, "reason: %s (%#x)\n", panic_msg[i].msg,
code & SOF_IPC_PANIC_CODE_MASK);
dev_err(sdev->dev, "trace point: %#010x\n", tracep_code);
dev_printk(level, sdev->dev, "reason: %s (%#x)\n",
panic_msg[i].msg, code & SOF_IPC_PANIC_CODE_MASK);
dev_printk(level, sdev->dev, "trace point: %#010x\n", tracep_code);
goto out;
}
}

/* unknown error */
dev_err(sdev->dev, "unknown panic code: %#x\n", code & SOF_IPC_PANIC_CODE_MASK);
dev_err(sdev->dev, "trace point: %#010x\n", tracep_code);
dev_printk(level, sdev->dev, "unknown panic code: %#x\n",
code & SOF_IPC_PANIC_CODE_MASK);
dev_printk(level, sdev->dev, "trace point: %#010x\n", tracep_code);

out:
dev_err(sdev->dev, "panic at %s:%d\n", panic_info->filename,
panic_info->linenum);
sof_oops(sdev, oops);
sof_stack(sdev, oops, stack, stack_words);
dev_printk(level, sdev->dev, "panic at %s:%d\n", panic_info->filename,
panic_info->linenum);
sof_oops(sdev, level, oops);
sof_stack(sdev, level, oops, stack, stack_words);
}
EXPORT_SYMBOL(snd_sof_get_status);
EXPORT_SYMBOL(sof_print_oops_and_stack);

/*
* FW Boot State Transition Diagram
*
* +-----------------------------------------------------------------------+
* | |
* ------------------ ------------------ |
* | | | | |
* | BOOT_FAILED | | READY_FAILED |-------------------------+ |
* | | | | | |
* ------------------ ------------------ | |
* ^ ^ | |
* | | | |
* (FW Boot Timeout) (FW_READY FAIL) | |
* | | | |
* | | | |
* ------------------ | ------------------ | |
* | | | | | | |
* | IN_PROGRESS |---------------+------------->| COMPLETE | | |
* | | (FW Boot OK) (FW_READY OK) | | | |
* ------------------ ------------------ | |
* ^ | | |
* | | | |
* (FW Loading OK) (System Suspend/Runtime Suspend)
* | | | |
* | | | |
* ------------------ ------------------ | | |
* | | | |<-----+ | |
* | PREPARE | | NOT_STARTED |<---------------------+ |
* | | | |<---------------------------+
* +----------------------------------------------------------------------+
* | |
* ------------------ ------------------ |
* | | | | |
* | BOOT_FAILED |<-------| READY_FAILED | |
* | |<--+ | | ------------------ |
* ------------------ | ------------------ | | |
* ^ | ^ | CRASHED |---+ |
* | | | | | | |
* (FW Boot Timeout) | (FW_READY FAIL) ------------------ | |
* | | | ^ | |
* | | | |(DSP Panic) | |
* ------------------ | | ------------------ | |
* | | | | | | | |
* | IN_PROGRESS |---------------+------------->| COMPLETE | | |
* | | (FW Boot OK) (FW_READY OK) | | | |
* ------------------ | ------------------ | |
* ^ | | | |
* | | | | |
* (FW Loading OK) | (System Suspend/Runtime Suspend)
* | | | | |
* | (FW Loading Fail) | | |
* ------------------ | ------------------ | | |
* | | | | |<-----+ | |
* | PREPARE |---+ | NOT_STARTED |<---------------------+ |
* | | | |<--------------------------+
* ------------------ ------------------
* | ^ | ^
* | | | |
Expand Down Expand Up @@ -186,6 +213,7 @@ static int sof_probe_continue(struct snd_sof_dev *sdev)
if (ret < 0) {
dev_err(sdev->dev, "error: failed to load DSP firmware %d\n",
ret);
sof_set_fw_state(sdev, SOF_FW_BOOT_FAILED);
goto fw_load_err;
}

Expand All @@ -199,10 +227,11 @@ static int sof_probe_continue(struct snd_sof_dev *sdev)
if (ret < 0) {
dev_err(sdev->dev, "error: failed to boot DSP firmware %d\n",
ret);
sof_set_fw_state(sdev, SOF_FW_BOOT_FAILED);
goto fw_run_err;
}

if (sof_core_debug & SOF_DBG_ENABLE_TRACE) {
if (sof_debug_check_flag(SOF_DBG_ENABLE_TRACE)) {
sdev->dtrace_is_supported = true;

/* init DMA trace */
Expand Down
35 changes: 23 additions & 12 deletions sound/soc/sof/debug.c
Original file line number Diff line number Diff line change
Expand Up @@ -930,45 +930,55 @@ void snd_sof_free_debug(struct snd_sof_dev *sdev)
EXPORT_SYMBOL_GPL(snd_sof_free_debug);

static const struct soc_fw_state_info {
enum snd_sof_fw_state state;
enum sof_fw_state state;
const char *name;
} fw_state_dbg[] = {
{SOF_FW_BOOT_NOT_STARTED, "SOF_FW_BOOT_NOT_STARTED"},
{SOF_FW_BOOT_PREPARE, "SOF_FW_BOOT_PREPARE"},
{SOF_FW_BOOT_IN_PROGRESS, "SOF_FW_BOOT_IN_PROGRESS"},
{SOF_FW_BOOT_FAILED, "SOF_FW_BOOT_FAILED"},
{SOF_FW_BOOT_READY_FAILED, "SOF_FW_BOOT_READY_FAILED"},
{SOF_FW_BOOT_READY_OK, "SOF_FW_BOOT_READY_OK"},
{SOF_FW_BOOT_COMPLETE, "SOF_FW_BOOT_COMPLETE"},
{SOF_FW_CRASHED, "SOF_FW_CRASHED"},
};

static void snd_sof_dbg_print_fw_state(struct snd_sof_dev *sdev)
static void snd_sof_dbg_print_fw_state(struct snd_sof_dev *sdev, const char *level)
{
int i;

for (i = 0; i < ARRAY_SIZE(fw_state_dbg); i++) {
if (sdev->fw_state == fw_state_dbg[i].state) {
dev_err(sdev->dev, "fw_state: %s (%d)\n", fw_state_dbg[i].name, i);
dev_printk(level, sdev->dev, "fw_state: %s (%d)\n",
fw_state_dbg[i].name, i);
return;
}
}

dev_err(sdev->dev, "fw_state: UNKNOWN (%d)\n", sdev->fw_state);
dev_printk(level, sdev->dev, "fw_state: UNKNOWN (%d)\n", sdev->fw_state);
}

void snd_sof_dsp_dbg_dump(struct snd_sof_dev *sdev, u32 flags)
void snd_sof_dsp_dbg_dump(struct snd_sof_dev *sdev, const char *msg, u32 flags)
{
bool print_all = !!(sof_core_debug & SOF_DBG_PRINT_ALL_DUMPS);
char *level = flags & SOF_DBG_DUMP_OPTIONAL ? KERN_DEBUG : KERN_ERR;
bool print_all = sof_debug_check_flag(SOF_DBG_PRINT_ALL_DUMPS);

if (flags & SOF_DBG_DUMP_OPTIONAL && !print_all)
return;

if (sof_ops(sdev)->dbg_dump && !sdev->dbg_dump_printed) {
dev_err(sdev->dev, "------------[ DSP dump start ]------------\n");
snd_sof_dbg_print_fw_state(sdev);
dev_printk(level, sdev->dev,
"------------[ DSP dump start ]------------\n");
if (msg)
dev_printk(level, sdev->dev, "%s\n", msg);
snd_sof_dbg_print_fw_state(sdev, level);
sof_ops(sdev)->dbg_dump(sdev, flags);
dev_err(sdev->dev, "------------[ DSP dump end ]------------\n");
dev_printk(level, sdev->dev,
"------------[ DSP dump end ]------------\n");
if (!print_all)
sdev->dbg_dump_printed = true;
} else if (msg) {
dev_printk(level, sdev->dev, "%s\n", msg);
}
}
EXPORT_SYMBOL(snd_sof_dsp_dbg_dump);
Expand All @@ -979,15 +989,15 @@ static void snd_sof_ipc_dump(struct snd_sof_dev *sdev)
dev_err(sdev->dev, "------------[ IPC dump start ]------------\n");
sof_ops(sdev)->ipc_dump(sdev);
dev_err(sdev->dev, "------------[ IPC dump end ]------------\n");
if (!(sof_core_debug & SOF_DBG_PRINT_ALL_DUMPS))
if (!sof_debug_check_flag(SOF_DBG_PRINT_ALL_DUMPS))
sdev->ipc_dump_printed = true;
}
}

void snd_sof_handle_fw_exception(struct snd_sof_dev *sdev)
{
if (IS_ENABLED(CONFIG_SND_SOC_SOF_DEBUG_RETAIN_DSP_CONTEXT) ||
(sof_core_debug & SOF_DBG_RETAIN_CTX)) {
sof_debug_check_flag(SOF_DBG_RETAIN_CTX)) {
/* should we prevent DSP entering D3 ? */
if (!sdev->ipc_dump_printed)
dev_info(sdev->dev,
Expand All @@ -997,7 +1007,8 @@ void snd_sof_handle_fw_exception(struct snd_sof_dev *sdev)

/* dump vital information to the logs */
snd_sof_ipc_dump(sdev);
snd_sof_dsp_dbg_dump(sdev, SOF_DBG_DUMP_REGS | SOF_DBG_DUMP_MBOX);
snd_sof_dsp_dbg_dump(sdev, "Firmware exception",
SOF_DBG_DUMP_REGS | SOF_DBG_DUMP_MBOX);
snd_sof_trace_notify_for_error(sdev);
}
EXPORT_SYMBOL(snd_sof_handle_fw_exception);
4 changes: 2 additions & 2 deletions sound/soc/sof/imx/imx-common.c
Original file line number Diff line number Diff line change
Expand Up @@ -69,8 +69,8 @@ void imx8_dump(struct snd_sof_dev *sdev, u32 flags)
IMX8_STACK_DUMP_SIZE);

/* Print the information to the console */
snd_sof_get_status(sdev, status, status, &xoops, &panic_info, stack,
IMX8_STACK_DUMP_SIZE);
sof_print_oops_and_stack(sdev, KERN_ERR, status, status, &xoops,
&panic_info, stack, IMX8_STACK_DUMP_SIZE);
}
EXPORT_SYMBOL(imx8_dump);

Expand Down
2 changes: 1 addition & 1 deletion sound/soc/sof/imx/imx8.c
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ static void imx8_dsp_handle_request(struct imx_dsp_ipc *ipc)

/* Check to see if the message is a panic code (0x0dead***) */
if ((p & SOF_IPC_PANIC_MAGIC_MASK) == SOF_IPC_PANIC_MAGIC)
snd_sof_dsp_panic(priv->sdev, p);
snd_sof_dsp_panic(priv->sdev, p, true);
else
snd_sof_ipc_msgs_rx(priv->sdev);
}
Expand Down
2 changes: 1 addition & 1 deletion sound/soc/sof/imx/imx8m.c
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ static void imx8m_dsp_handle_request(struct imx_dsp_ipc *ipc)

/* Check to see if the message is a panic code (0x0dead***) */
if ((p & SOF_IPC_PANIC_MAGIC_MASK) == SOF_IPC_PANIC_MAGIC)
snd_sof_dsp_panic(priv->sdev, p);
snd_sof_dsp_panic(priv->sdev, p, true);
else
snd_sof_ipc_msgs_rx(priv->sdev);
}
Expand Down
8 changes: 4 additions & 4 deletions sound/soc/sof/intel/atom.c
Original file line number Diff line number Diff line change
Expand Up @@ -70,8 +70,8 @@ void atom_dump(struct snd_sof_dev *sdev, u32 flags)
panic = snd_sof_dsp_read64(sdev, DSP_BAR, SHIM_IPCX);
atom_get_registers(sdev, &xoops, &panic_info, stack,
STACK_DUMP_SIZE);
snd_sof_get_status(sdev, status, panic, &xoops, &panic_info, stack,
STACK_DUMP_SIZE);
sof_print_oops_and_stack(sdev, KERN_ERR, status, panic, &xoops,
&panic_info, stack, STACK_DUMP_SIZE);

/* provide some context for firmware debug */
imrx = snd_sof_dsp_read64(sdev, DSP_BAR, SHIM_IMRX);
Expand Down Expand Up @@ -165,8 +165,8 @@ irqreturn_t atom_irq_thread(int irq, void *context)

/* Handle messages from DSP Core */
if ((ipcd & SOF_IPC_PANIC_MAGIC_MASK) == SOF_IPC_PANIC_MAGIC) {
snd_sof_dsp_panic(sdev, PANIC_OFFSET(ipcd) +
MBOX_OFFSET);
snd_sof_dsp_panic(sdev, PANIC_OFFSET(ipcd) + MBOX_OFFSET,
true);
} else {
snd_sof_ipc_msgs_rx(sdev);
}
Expand Down
8 changes: 4 additions & 4 deletions sound/soc/sof/intel/bdw.c
Original file line number Diff line number Diff line change
Expand Up @@ -258,8 +258,8 @@ static void bdw_dump(struct snd_sof_dev *sdev, u32 flags)
panic = snd_sof_dsp_read(sdev, BDW_DSP_BAR, SHIM_IPCX);
bdw_get_registers(sdev, &xoops, &panic_info, stack,
BDW_STACK_DUMP_SIZE);
snd_sof_get_status(sdev, status, panic, &xoops, &panic_info, stack,
BDW_STACK_DUMP_SIZE);
sof_print_oops_and_stack(sdev, KERN_ERR, status, panic, &xoops,
&panic_info, stack, BDW_STACK_DUMP_SIZE);

/* provide some context for firmware debug */
imrx = snd_sof_dsp_read(sdev, BDW_DSP_BAR, SHIM_IMRX);
Expand Down Expand Up @@ -344,8 +344,8 @@ static irqreturn_t bdw_irq_thread(int irq, void *context)

/* Handle messages from DSP Core */
if ((ipcd & SOF_IPC_PANIC_MAGIC_MASK) == SOF_IPC_PANIC_MAGIC) {
snd_sof_dsp_panic(sdev, BDW_PANIC_OFFSET(ipcx) +
MBOX_OFFSET);
snd_sof_dsp_panic(sdev, BDW_PANIC_OFFSET(ipcx) + MBOX_OFFSET,
true);
} else {
snd_sof_ipc_msgs_rx(sdev);
}
Expand Down
Loading

0 comments on commit 1d194b6

Please sign in to comment.