Skip to content

Commit

Permalink
cxlflash: Resolve oops in wait_port_offline
Browse files Browse the repository at this point in the history
If an async error interrupt is generated, and the error requires the FC
link to be reset, it cannot be performed in the interrupt context. So a
work element is scheduled to complete the link reset in a process
context. If either an EEH event or an escalation occurs in between when
the interrupt is generated and the scheduled work is started, the MMIO
space may no longer be available. This will cause an oops in the worker
thread.

[  606.806583] NIP kthread_data+0x28/0x40
[  606.806633] LR wq_worker_sleeping+0x30/0x100
[  606.806694] Call Trace:
[  606.806721] 0x50 (unreliable)
[  606.806796] wq_worker_sleeping+0x30/0x100
[  606.806884] __schedule+0x69c/0x8a0
[  606.806959] schedule+0x44/0xc0
[  606.807034] do_exit+0x770/0xb90
[  606.807109] die+0x300/0x460
[  606.807185] bad_page_fault+0xd8/0x150
[  606.807259] handle_page_fault+0x2c/0x30
[  606.807338] wait_port_offline.constprop.12+0x60/0x130 [cxlflash]

To prevent the problem space area from being unmapped, when there is
pending work, a mapcount (using the kref mechanism) is held.  The
mapcount is released only when the work is completed.  The last
reference release is tied to the unmapping service.

Signed-off-by: Manoj N. Kumar <manoj@linux.vnet.ibm.com>
Acked-by: Matthew R. Ochs <mrochs@linux.vnet.ibm.com>
Reviewed-by: Uma Krishnan <ukrishn@linux.vnet.ibm.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
  • Loading branch information
Manoj Kumar authored and Martin K. Petersen committed Jan 7, 2016
1 parent ee91e33 commit b45cdba
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 3 deletions.
2 changes: 2 additions & 0 deletions drivers/scsi/cxlflash/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,8 @@ struct afu {
struct sisl_host_map __iomem *host_map; /* MC host map */
struct sisl_ctrl_map __iomem *ctrl_map; /* MC control map */

struct kref mapcount;

ctx_hndl_t ctx_hndl; /* master's context handle */
u64 *hrrq_start;
u64 *hrrq_end;
Expand Down
27 changes: 24 additions & 3 deletions drivers/scsi/cxlflash/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -368,6 +368,7 @@ static int send_cmd(struct afu *afu, struct afu_cmd *cmd)

no_room:
afu->read_room = true;
kref_get(&cfg->afu->mapcount);
schedule_work(&cfg->work_q);
rc = SCSI_MLQUEUE_HOST_BUSY;
goto out;
Expand Down Expand Up @@ -473,6 +474,16 @@ static int send_tmf(struct afu *afu, struct scsi_cmnd *scp, u64 tmfcmd)
return rc;
}

static void afu_unmap(struct kref *ref)
{
struct afu *afu = container_of(ref, struct afu, mapcount);

if (likely(afu->afu_map)) {
cxl_psa_unmap((void __iomem *)afu->afu_map);
afu->afu_map = NULL;
}
}

/**
* cxlflash_driver_info() - information handler for this host driver
* @host: SCSI host associated with device.
Expand Down Expand Up @@ -503,6 +514,7 @@ static int cxlflash_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *scp)
ulong lock_flags;
short lflag = 0;
int rc = 0;
int kref_got = 0;

dev_dbg_ratelimited(dev, "%s: (scp=%p) %d/%d/%d/%llu "
"cdb=(%08X-%08X-%08X-%08X)\n",
Expand Down Expand Up @@ -547,6 +559,9 @@ static int cxlflash_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *scp)
goto out;
}

kref_get(&cfg->afu->mapcount);
kref_got = 1;

cmd->rcb.ctx_id = afu->ctx_hndl;
cmd->rcb.port_sel = port_sel;
cmd->rcb.lun_id = lun_to_lunid(scp->device->lun);
Expand Down Expand Up @@ -587,6 +602,8 @@ static int cxlflash_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *scp)
}

out:
if (kref_got)
kref_put(&afu->mapcount, afu_unmap);
pr_devel("%s: returning rc=%d\n", __func__, rc);
return rc;
}
Expand Down Expand Up @@ -661,6 +678,7 @@ static void stop_afu(struct cxlflash_cfg *cfg)
cxl_psa_unmap((void __iomem *)afu->afu_map);
afu->afu_map = NULL;
}
kref_put(&afu->mapcount, afu_unmap);
}
}

Expand Down Expand Up @@ -746,8 +764,8 @@ static void cxlflash_remove(struct pci_dev *pdev)
scsi_remove_host(cfg->host);
/* fall through */
case INIT_STATE_AFU:
term_afu(cfg);
cancel_work_sync(&cfg->work_q);
term_afu(cfg);
case INIT_STATE_PCI:
pci_release_regions(cfg->dev);
pci_disable_device(pdev);
Expand Down Expand Up @@ -1331,6 +1349,7 @@ static irqreturn_t cxlflash_async_err_irq(int irq, void *data)
__func__, port);
cfg->lr_state = LINK_RESET_REQUIRED;
cfg->lr_port = port;
kref_get(&cfg->afu->mapcount);
schedule_work(&cfg->work_q);
}

Expand All @@ -1351,6 +1370,7 @@ static irqreturn_t cxlflash_async_err_irq(int irq, void *data)

if (info->action & SCAN_HOST) {
atomic_inc(&cfg->scan_host_needed);
kref_get(&cfg->afu->mapcount);
schedule_work(&cfg->work_q);
}
}
Expand Down Expand Up @@ -1746,6 +1766,7 @@ static int init_afu(struct cxlflash_cfg *cfg)
rc = -ENOMEM;
goto err1;
}
kref_init(&afu->mapcount);

/* No byte reverse on reading afu_version or string will be backwards */
reg = readq(&afu->afu_map->global.regs.afu_version);
Expand Down Expand Up @@ -1780,8 +1801,7 @@ static int init_afu(struct cxlflash_cfg *cfg)
return rc;

err2:
cxl_psa_unmap((void __iomem *)afu->afu_map);
afu->afu_map = NULL;
kref_put(&afu->mapcount, afu_unmap);
err1:
term_mc(cfg, UNDO_START);
goto out;
Expand Down Expand Up @@ -2354,6 +2374,7 @@ static void cxlflash_worker_thread(struct work_struct *work)

if (atomic_dec_if_positive(&cfg->scan_host_needed) >= 0)
scsi_scan_host(cfg->host);
kref_put(&afu->mapcount, afu_unmap);
}

/**
Expand Down

0 comments on commit b45cdba

Please sign in to comment.