Skip to content

Commit

Permalink
mmc: core: Fix some driver hangs when dealing with broken devices
Browse files Browse the repository at this point in the history
There are infinite loops in the mmc code that can be caused by bad
hardware.  The code will loop forever if the device never comes back
from program mode, R1_STATE_PRG, and it is not ready for data,
R1_READY_FOR_DATA.

A long timeout is added to prevent the code from looping forever.
The timeout will occur if the device never comes back from program
state or the device never becomes ready for data.

It's not clear whether the timeout will do more than log a pr_err()
and then start a fresh hang all over again.  We may need to extend
this patch later to perform some kind of reset of the device (is
that possible?) or rejection of new I/O to the device.

Signed-off-by: Trey Ramsay <tramsay@linux.vnet.ibm.com>
Signed-off-by: Chris Ball <cjb@laptop.org>
  • Loading branch information
Trey Ramsay authored and Chris Ball committed Dec 6, 2012
1 parent e95baf1 commit 8fee476
Show file tree
Hide file tree
Showing 3 changed files with 43 additions and 1 deletion.
15 changes: 15 additions & 0 deletions drivers/mmc/card/block.c
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ MODULE_ALIAS("mmc:block");
#define INAND_CMD38_ARG_SECERASE 0x80
#define INAND_CMD38_ARG_SECTRIM1 0x81
#define INAND_CMD38_ARG_SECTRIM2 0x88
#define MMC_BLK_TIMEOUT_MS (10 * 60 * 1000) /* 10 minute timeout */

static DEFINE_MUTEX(block_mutex);

Expand Down Expand Up @@ -1034,13 +1035,27 @@ static int mmc_blk_err_check(struct mmc_card *card,
*/
if (!mmc_host_is_spi(card->host) && rq_data_dir(req) != READ) {
u32 status;
unsigned long timeout;

timeout = jiffies + msecs_to_jiffies(MMC_BLK_TIMEOUT_MS);
do {
int err = get_card_status(card, &status, 5);
if (err) {
pr_err("%s: error %d requesting status\n",
req->rq_disk->disk_name, err);
return MMC_BLK_CMD_ERR;
}

/* Timeout if the device never becomes ready for data
* and never leaves the program state.
*/
if (time_after(jiffies, timeout)) {
pr_err("%s: Card stuck in programming state!"\
" %s %s\n", mmc_hostname(card->host),
req->rq_disk->disk_name, __func__);

return MMC_BLK_CMD_ERR;
}
/*
* Some cards mishandle the status bits,
* so make sure to check both the busy
Expand Down
18 changes: 17 additions & 1 deletion drivers/mmc/core/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,9 @@
#include "sd_ops.h"
#include "sdio_ops.h"

/* If the device is not responding */
#define MMC_CORE_TIMEOUT_MS (10 * 60 * 1000) /* 10 minute timeout */

/*
* Background operations can take a long time, depending on the housekeeping
* operations the card has to perform.
Expand Down Expand Up @@ -1631,6 +1634,7 @@ static int mmc_do_erase(struct mmc_card *card, unsigned int from,
{
struct mmc_command cmd = {0};
unsigned int qty = 0;
unsigned long timeout;
int err;

/*
Expand Down Expand Up @@ -1708,6 +1712,7 @@ static int mmc_do_erase(struct mmc_card *card, unsigned int from,
if (mmc_host_is_spi(card->host))
goto out;

timeout = jiffies + msecs_to_jiffies(MMC_CORE_TIMEOUT_MS);
do {
memset(&cmd, 0, sizeof(struct mmc_command));
cmd.opcode = MMC_SEND_STATUS;
Expand All @@ -1721,8 +1726,19 @@ static int mmc_do_erase(struct mmc_card *card, unsigned int from,
err = -EIO;
goto out;
}

/* Timeout if the device never becomes ready for data and
* never leaves the program state.
*/
if (time_after(jiffies, timeout)) {
pr_err("%s: Card stuck in programming state! %s\n",
mmc_hostname(card->host), __func__);
err = -EIO;
goto out;
}

} while (!(cmd.resp[0] & R1_READY_FOR_DATA) ||
R1_CURRENT_STATE(cmd.resp[0]) == R1_STATE_PRG);
(R1_CURRENT_STATE(cmd.resp[0]) == R1_STATE_PRG));
out:
return err;
}
Expand Down
11 changes: 11 additions & 0 deletions drivers/mmc/core/mmc_ops.c
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@
#include "core.h"
#include "mmc_ops.h"

#define MMC_OPS_TIMEOUT_MS (10 * 60 * 1000) /* 10 minute timeout */

static int _mmc_select_card(struct mmc_host *host, struct mmc_card *card)
{
int err;
Expand Down Expand Up @@ -409,6 +411,7 @@ int __mmc_switch(struct mmc_card *card, u8 set, u8 index, u8 value,
{
int err;
struct mmc_command cmd = {0};
unsigned long timeout;
u32 status;

BUG_ON(!card);
Expand Down Expand Up @@ -437,6 +440,7 @@ int __mmc_switch(struct mmc_card *card, u8 set, u8 index, u8 value,
return 0;

/* Must check status to be sure of no errors */
timeout = jiffies + msecs_to_jiffies(MMC_OPS_TIMEOUT_MS);
do {
err = mmc_send_status(card, &status);
if (err)
Expand All @@ -445,6 +449,13 @@ int __mmc_switch(struct mmc_card *card, u8 set, u8 index, u8 value,
break;
if (mmc_host_is_spi(card->host))
break;

/* Timeout if the device never leaves the program state. */
if (time_after(jiffies, timeout)) {
pr_err("%s: Card stuck in programming state! %s\n",
mmc_hostname(card->host), __func__);
return -ETIMEDOUT;
}
} while (R1_CURRENT_STATE(status) == R1_STATE_PRG);

if (mmc_host_is_spi(card->host)) {
Expand Down

0 comments on commit 8fee476

Please sign in to comment.