Skip to content

Commit

Permalink
mhi: pci_generic: Add health-check
Browse files Browse the repository at this point in the history
If the modem crashes for any reason, we may not be able to detect
it at MHI level (MHI registers not reachable anymore).

This patch implements a health-check mechanism to check regularly
that device is alive (MHI layer can communicate with). If device
is not alive (because a crash or unexpected reset), the recovery
procedure is triggered.

Tested successfully with Telit FN980m module.

Signed-off-by: Loic Poulain <loic.poulain@linaro.org>
Reviewed-by: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
Reviewed-by: Hemant Kumar <hemantk@codeaurora.org>
Signed-off-by: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
  • Loading branch information
Loic Poulain authored and Manivannan Sadhasivam committed Jan 4, 2021
1 parent b012ee6 commit 8562d4f
Showing 1 changed file with 35 additions and 0 deletions.
35 changes: 35 additions & 0 deletions drivers/bus/mhi/pci_generic.c
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,15 @@
#include <linux/mhi.h>
#include <linux/module.h>
#include <linux/pci.h>
#include <linux/timer.h>
#include <linux/workqueue.h>

#define MHI_PCI_DEFAULT_BAR_NUM 0

#define MHI_POST_RESET_DELAY_MS 500

#define HEALTH_CHECK_PERIOD (HZ * 2)

/**
* struct mhi_pci_dev_info - MHI PCI device specific information
* @config: MHI controller configuration
Expand Down Expand Up @@ -189,6 +193,7 @@ struct mhi_pci_device {
struct mhi_controller mhi_cntrl;
struct pci_saved_state *pci_state;
struct work_struct recovery_work;
struct timer_list health_check_timer;
unsigned long status;
};

Expand Down Expand Up @@ -326,6 +331,8 @@ static void mhi_pci_recovery_work(struct work_struct *work)

dev_warn(&pdev->dev, "device recovery started\n");

del_timer(&mhi_pdev->health_check_timer);

/* Clean up MHI state */
if (test_and_clear_bit(MHI_PCI_DEV_STARTED, &mhi_pdev->status)) {
mhi_power_down(mhi_cntrl, false);
Expand All @@ -351,6 +358,7 @@ static void mhi_pci_recovery_work(struct work_struct *work)
dev_dbg(&pdev->dev, "Recovery completed\n");

set_bit(MHI_PCI_DEV_STARTED, &mhi_pdev->status);
mod_timer(&mhi_pdev->health_check_timer, jiffies + HEALTH_CHECK_PERIOD);
return;

err_unprepare:
Expand All @@ -360,6 +368,21 @@ static void mhi_pci_recovery_work(struct work_struct *work)
dev_err(&pdev->dev, "Recovery failed\n");
}

static void health_check(struct timer_list *t)
{
struct mhi_pci_device *mhi_pdev = from_timer(mhi_pdev, t, health_check_timer);
struct mhi_controller *mhi_cntrl = &mhi_pdev->mhi_cntrl;

if (!mhi_pci_is_alive(mhi_cntrl)) {
dev_err(mhi_cntrl->cntrl_dev, "Device died\n");
queue_work(system_long_wq, &mhi_pdev->recovery_work);
return;
}

/* reschedule in two seconds */
mod_timer(&mhi_pdev->health_check_timer, jiffies + HEALTH_CHECK_PERIOD);
}

static int mhi_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
{
const struct mhi_pci_dev_info *info = (struct mhi_pci_dev_info *) id->driver_data;
Expand All @@ -376,6 +399,7 @@ static int mhi_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
return -ENOMEM;

INIT_WORK(&mhi_pdev->recovery_work, mhi_pci_recovery_work);
timer_setup(&mhi_pdev->health_check_timer, health_check, 0);

mhi_cntrl_config = info->config;
mhi_cntrl = &mhi_pdev->mhi_cntrl;
Expand Down Expand Up @@ -427,6 +451,9 @@ static int mhi_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)

set_bit(MHI_PCI_DEV_STARTED, &mhi_pdev->status);

/* start health check */
mod_timer(&mhi_pdev->health_check_timer, jiffies + HEALTH_CHECK_PERIOD);

return 0;

err_unprepare:
Expand All @@ -442,6 +469,7 @@ static void mhi_pci_remove(struct pci_dev *pdev)
struct mhi_pci_device *mhi_pdev = pci_get_drvdata(pdev);
struct mhi_controller *mhi_cntrl = &mhi_pdev->mhi_cntrl;

del_timer(&mhi_pdev->health_check_timer);
cancel_work_sync(&mhi_pdev->recovery_work);

if (test_and_clear_bit(MHI_PCI_DEV_STARTED, &mhi_pdev->status)) {
Expand All @@ -459,6 +487,8 @@ static void mhi_pci_reset_prepare(struct pci_dev *pdev)

dev_info(&pdev->dev, "reset\n");

del_timer(&mhi_pdev->health_check_timer);

/* Clean up MHI state */
if (test_and_clear_bit(MHI_PCI_DEV_STARTED, &mhi_pdev->status)) {
mhi_power_down(mhi_cntrl, false);
Expand Down Expand Up @@ -502,6 +532,7 @@ static void mhi_pci_reset_done(struct pci_dev *pdev)
}

set_bit(MHI_PCI_DEV_STARTED, &mhi_pdev->status);
mod_timer(&mhi_pdev->health_check_timer, jiffies + HEALTH_CHECK_PERIOD);
}

static pci_ers_result_t mhi_pci_error_detected(struct pci_dev *pdev,
Expand Down Expand Up @@ -562,6 +593,7 @@ static int __maybe_unused mhi_pci_suspend(struct device *dev)
struct mhi_pci_device *mhi_pdev = dev_get_drvdata(dev);
struct mhi_controller *mhi_cntrl = &mhi_pdev->mhi_cntrl;

del_timer(&mhi_pdev->health_check_timer);
cancel_work_sync(&mhi_pdev->recovery_work);

/* Transition to M3 state */
Expand Down Expand Up @@ -597,6 +629,9 @@ static int __maybe_unused mhi_pci_resume(struct device *dev)
goto err_recovery;
}

/* Resume health check */
mod_timer(&mhi_pdev->health_check_timer, jiffies + HEALTH_CHECK_PERIOD);

return 0;

err_recovery:
Expand Down

0 comments on commit 8562d4f

Please sign in to comment.