Skip to content

Commit

Permalink
drm/amdkfd: Add sanity checks in IRQ handlers
Browse files Browse the repository at this point in the history
Only accept interrupts from KFD VMIDs. Just checking for a PASID may
not be enough because amdgpu started using PASIDs to map VM faults
to processes.

Warn if an IRQ doesn't have a valid PASID (indicating a firmware bug).

Suggested-by: Shaoyun Liu <Shaoyun.Liu@amd.com>
Suggested-by: Oak Zeng <Oak.Zeng@amd.com>
Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Reviewed-by: Oded Gabbay <oded.gabbay@gmail.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
  • Loading branch information
Felix Kuehling authored and Oded Gabbay committed May 1, 2018
1 parent 2533f07 commit c129db1
Show file tree
Hide file tree
Showing 2 changed files with 39 additions and 21 deletions.
20 changes: 15 additions & 5 deletions drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c
Original file line number Diff line number Diff line change
Expand Up @@ -27,18 +27,28 @@
static bool cik_event_interrupt_isr(struct kfd_dev *dev,
const uint32_t *ih_ring_entry)
{
unsigned int pasid;
const struct cik_ih_ring_entry *ihre =
(const struct cik_ih_ring_entry *)ih_ring_entry;
unsigned int vmid, pasid;

/* Only handle interrupts from KFD VMIDs */
vmid = (ihre->ring_id & 0x0000ff00) >> 8;
if (vmid < dev->vm_info.first_vmid_kfd ||
vmid > dev->vm_info.last_vmid_kfd)
return 0;

/* If there is no valid PASID, it's likely a firmware bug */
pasid = (ihre->ring_id & 0xffff0000) >> 16;
if (WARN_ONCE(pasid == 0, "FW bug: No PASID in KFD interrupt"))
return 0;

/* Do not process in ISR, just request it to be forwarded to WQ. */
return (pasid != 0) &&
(ihre->source_id == CIK_INTSRC_CP_END_OF_PIPE ||
/* Interrupt types we care about: various signals and faults.
* They will be forwarded to a work queue (see below).
*/
return ihre->source_id == CIK_INTSRC_CP_END_OF_PIPE ||
ihre->source_id == CIK_INTSRC_SDMA_TRAP ||
ihre->source_id == CIK_INTSRC_SQ_INTERRUPT_MSG ||
ihre->source_id == CIK_INTSRC_CP_BAD_OPCODE);
ihre->source_id == CIK_INTSRC_CP_BAD_OPCODE;
}

static void cik_event_interrupt_wq(struct kfd_dev *dev,
Expand Down
40 changes: 24 additions & 16 deletions drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
Original file line number Diff line number Diff line change
Expand Up @@ -29,27 +29,35 @@ static bool event_interrupt_isr_v9(struct kfd_dev *dev,
const uint32_t *ih_ring_entry)
{
uint16_t source_id, client_id, pasid, vmid;
const uint32_t *data = ih_ring_entry;

source_id = SOC15_SOURCE_ID_FROM_IH_ENTRY(ih_ring_entry);
client_id = SOC15_CLIENT_ID_FROM_IH_ENTRY(ih_ring_entry);
pasid = SOC15_PASID_FROM_IH_ENTRY(ih_ring_entry);
/* Only handle interrupts from KFD VMIDs */
vmid = SOC15_VMID_FROM_IH_ENTRY(ih_ring_entry);
if (vmid < dev->vm_info.first_vmid_kfd ||
vmid > dev->vm_info.last_vmid_kfd)
return 0;

/* If there is no valid PASID, it's likely a firmware bug */
pasid = SOC15_PASID_FROM_IH_ENTRY(ih_ring_entry);
if (WARN_ONCE(pasid == 0, "FW bug: No PASID in KFD interrupt"))
return 0;

if (pasid) {
const uint32_t *data = ih_ring_entry;
source_id = SOC15_SOURCE_ID_FROM_IH_ENTRY(ih_ring_entry);
client_id = SOC15_CLIENT_ID_FROM_IH_ENTRY(ih_ring_entry);

pr_debug("client id 0x%x, source id %d, pasid 0x%x. raw data:\n",
client_id, source_id, pasid);
pr_debug("%8X, %8X, %8X, %8X, %8X, %8X, %8X, %8X.\n",
data[0], data[1], data[2], data[3],
data[4], data[5], data[6], data[7]);
}
pr_debug("client id 0x%x, source id %d, pasid 0x%x. raw data:\n",
client_id, source_id, pasid);
pr_debug("%8X, %8X, %8X, %8X, %8X, %8X, %8X, %8X.\n",
data[0], data[1], data[2], data[3],
data[4], data[5], data[6], data[7]);

return (pasid != 0) &&
(source_id == SOC15_INTSRC_CP_END_OF_PIPE ||
source_id == SOC15_INTSRC_SDMA_TRAP ||
source_id == SOC15_INTSRC_SQ_INTERRUPT_MSG ||
source_id == SOC15_INTSRC_CP_BAD_OPCODE);
/* Interrupt types we care about: various signals and faults.
* They will be forwarded to a work queue (see below).
*/
return source_id == SOC15_INTSRC_CP_END_OF_PIPE ||
source_id == SOC15_INTSRC_SDMA_TRAP ||
source_id == SOC15_INTSRC_SQ_INTERRUPT_MSG ||
source_id == SOC15_INTSRC_CP_BAD_OPCODE;
}

static void event_interrupt_wq_v9(struct kfd_dev *dev,
Expand Down

0 comments on commit c129db1

Please sign in to comment.