Skip to content

Commit

Permalink
Adding in EEH support to the IBM FlashSystem 70/80 device driver
Browse files Browse the repository at this point in the history
Changes in v2 include:
o Fixed spelling of guarantee.
o Fixed potential memory leak if slot reset fails out.
o Changed list_for_each_entry_safe with list_for_each_entry.

Signed-off-by: Philip J Kelleher <pjk1939@linux.vnet.ibm.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
  • Loading branch information
Philip J Kelleher authored and Jens Axboe committed Mar 16, 2013
1 parent 1ebfd10 commit c95246c
Show file tree
Hide file tree
Showing 4 changed files with 436 additions and 67 deletions.
203 changes: 201 additions & 2 deletions drivers/block/rsxx/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
#include <linux/reboot.h>
#include <linux/slab.h>
#include <linux/bitops.h>
#include <linux/delay.h>

#include <linux/genhd.h>
#include <linux/idr.h>
Expand All @@ -52,6 +53,13 @@ static DEFINE_IDA(rsxx_disk_ida);
static DEFINE_SPINLOCK(rsxx_ida_lock);

/*----------------- Interrupt Control & Handling -------------------*/

static void rsxx_mask_interrupts(struct rsxx_cardinfo *card)
{
card->isr_mask = 0;
card->ier_mask = 0;
}

static void __enable_intr(unsigned int *mask, unsigned int intr)
{
*mask |= intr;
Expand All @@ -71,7 +79,8 @@ static void __disable_intr(unsigned int *mask, unsigned int intr)
*/
void rsxx_enable_ier(struct rsxx_cardinfo *card, unsigned int intr)
{
if (unlikely(card->halt))
if (unlikely(card->halt) ||
unlikely(card->eeh_state))
return;

__enable_intr(&card->ier_mask, intr);
Expand All @@ -80,14 +89,18 @@ void rsxx_enable_ier(struct rsxx_cardinfo *card, unsigned int intr)

void rsxx_disable_ier(struct rsxx_cardinfo *card, unsigned int intr)
{
if (unlikely(card->eeh_state))
return;

__disable_intr(&card->ier_mask, intr);
iowrite32(card->ier_mask, card->regmap + IER);
}

void rsxx_enable_ier_and_isr(struct rsxx_cardinfo *card,
unsigned int intr)
{
if (unlikely(card->halt))
if (unlikely(card->halt) ||
unlikely(card->eeh_state))
return;

__enable_intr(&card->isr_mask, intr);
Expand All @@ -97,6 +110,9 @@ void rsxx_enable_ier_and_isr(struct rsxx_cardinfo *card,
void rsxx_disable_ier_and_isr(struct rsxx_cardinfo *card,
unsigned int intr)
{
if (unlikely(card->eeh_state))
return;

__disable_intr(&card->isr_mask, intr);
__disable_intr(&card->ier_mask, intr);
iowrite32(card->ier_mask, card->regmap + IER);
Expand All @@ -115,6 +131,9 @@ static irqreturn_t rsxx_isr(int irq, void *pdata)
do {
reread_isr = 0;

if (unlikely(card->eeh_state))
break;

isr = ioread32(card->regmap + ISR);
if (isr == 0xffffffff) {
/*
Expand Down Expand Up @@ -304,6 +323,179 @@ static int card_shutdown(struct rsxx_cardinfo *card)
return 0;
}

static void rsxx_eeh_frozen(struct pci_dev *dev)
{
struct rsxx_cardinfo *card = pci_get_drvdata(dev);
int i;

dev_warn(&dev->dev, "IBM FlashSystem PCI: preparing for slot reset.\n");

card->eeh_state = 1;
rsxx_mask_interrupts(card);

/*
* We need to guarantee that the write for eeh_state and masking
* interrupts does not become reordered. This will prevent a possible
* race condition with the EEH code.
*/
wmb();

pci_disable_device(dev);

rsxx_eeh_save_issued_dmas(card);

rsxx_eeh_save_issued_creg(card);

for (i = 0; i < card->n_targets; i++) {
if (card->ctrl[i].status.buf)
pci_free_consistent(card->dev, STATUS_BUFFER_SIZE8,
card->ctrl[i].status.buf,
card->ctrl[i].status.dma_addr);
if (card->ctrl[i].cmd.buf)
pci_free_consistent(card->dev, COMMAND_BUFFER_SIZE8,
card->ctrl[i].cmd.buf,
card->ctrl[i].cmd.dma_addr);
}
}

static void rsxx_eeh_failure(struct pci_dev *dev)
{
struct rsxx_cardinfo *card = pci_get_drvdata(dev);
int i;

dev_err(&dev->dev, "IBM FlashSystem PCI: disabling failed card.\n");

card->eeh_state = 1;

for (i = 0; i < card->n_targets; i++)
del_timer_sync(&card->ctrl[i].activity_timer);

rsxx_eeh_cancel_dmas(card);
}

static int rsxx_eeh_fifo_flush_poll(struct rsxx_cardinfo *card)
{
unsigned int status;
int iter = 0;

/* We need to wait for the hardware to reset */
while (iter++ < 10) {
status = ioread32(card->regmap + PCI_RECONFIG);

if (status & RSXX_FLUSH_BUSY) {
ssleep(1);
continue;
}

if (status & RSXX_FLUSH_TIMEOUT)
dev_warn(CARD_TO_DEV(card), "HW: flash controller timeout\n");
return 0;
}

/* Hardware failed resetting itself. */
return -1;
}

static pci_ers_result_t rsxx_error_detected(struct pci_dev *dev,
enum pci_channel_state error)
{
if (dev->revision < RSXX_EEH_SUPPORT)
return PCI_ERS_RESULT_NONE;

if (error == pci_channel_io_perm_failure) {
rsxx_eeh_failure(dev);
return PCI_ERS_RESULT_DISCONNECT;
}

rsxx_eeh_frozen(dev);
return PCI_ERS_RESULT_NEED_RESET;
}

static pci_ers_result_t rsxx_slot_reset(struct pci_dev *dev)
{
struct rsxx_cardinfo *card = pci_get_drvdata(dev);
unsigned long flags;
int i;
int st;

dev_warn(&dev->dev,
"IBM FlashSystem PCI: recovering from slot reset.\n");

st = pci_enable_device(dev);
if (st)
goto failed_hw_setup;

pci_set_master(dev);

st = rsxx_eeh_fifo_flush_poll(card);
if (st)
goto failed_hw_setup;

rsxx_dma_queue_reset(card);

for (i = 0; i < card->n_targets; i++) {
st = rsxx_hw_buffers_init(dev, &card->ctrl[i]);
if (st)
goto failed_hw_buffers_init;
}

if (card->config_valid)
rsxx_dma_configure(card);

/* Clears the ISR register from spurious interrupts */
st = ioread32(card->regmap + ISR);

card->eeh_state = 0;

st = rsxx_eeh_remap_dmas(card);
if (st)
goto failed_remap_dmas;

spin_lock_irqsave(&card->irq_lock, flags);
if (card->n_targets & RSXX_MAX_TARGETS)
rsxx_enable_ier_and_isr(card, CR_INTR_ALL_G);
else
rsxx_enable_ier_and_isr(card, CR_INTR_ALL_C);
spin_unlock_irqrestore(&card->irq_lock, flags);

rsxx_kick_creg_queue(card);

for (i = 0; i < card->n_targets; i++) {
spin_lock(&card->ctrl[i].queue_lock);
if (list_empty(&card->ctrl[i].queue)) {
spin_unlock(&card->ctrl[i].queue_lock);
continue;
}
spin_unlock(&card->ctrl[i].queue_lock);

queue_work(card->ctrl[i].issue_wq,
&card->ctrl[i].issue_dma_work);
}

dev_info(&dev->dev, "IBM FlashSystem PCI: recovery complete.\n");

return PCI_ERS_RESULT_RECOVERED;

failed_hw_buffers_init:
failed_remap_dmas:
for (i = 0; i < card->n_targets; i++) {
if (card->ctrl[i].status.buf)
pci_free_consistent(card->dev,
STATUS_BUFFER_SIZE8,
card->ctrl[i].status.buf,
card->ctrl[i].status.dma_addr);
if (card->ctrl[i].cmd.buf)
pci_free_consistent(card->dev,
COMMAND_BUFFER_SIZE8,
card->ctrl[i].cmd.buf,
card->ctrl[i].cmd.dma_addr);
}
failed_hw_setup:
rsxx_eeh_failure(dev);
return PCI_ERS_RESULT_DISCONNECT;

}

/*----------------- Driver Initialization & Setup -------------------*/
/* Returns: 0 if the driver is compatible with the device
-1 if the driver is NOT compatible with the device */
Expand Down Expand Up @@ -383,6 +575,7 @@ static int rsxx_pci_probe(struct pci_dev *dev,

spin_lock_init(&card->irq_lock);
card->halt = 0;
card->eeh_state = 0;

spin_lock_irq(&card->irq_lock);
rsxx_disable_ier_and_isr(card, CR_INTR_ALL);
Expand Down Expand Up @@ -593,6 +786,11 @@ static void rsxx_pci_shutdown(struct pci_dev *dev)
card_shutdown(card);
}

static const struct pci_error_handlers rsxx_err_handler = {
.error_detected = rsxx_error_detected,
.slot_reset = rsxx_slot_reset,
};

static DEFINE_PCI_DEVICE_TABLE(rsxx_pci_ids) = {
{PCI_DEVICE(PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_FS70_FLASH)},
{PCI_DEVICE(PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_FS80_FLASH)},
Expand All @@ -608,6 +806,7 @@ static struct pci_driver rsxx_pci_driver = {
.remove = rsxx_pci_remove,
.suspend = rsxx_pci_suspend,
.shutdown = rsxx_pci_shutdown,
.err_handler = &rsxx_err_handler,
};

static int __init rsxx_core_init(void)
Expand Down
Loading

0 comments on commit c95246c

Please sign in to comment.