Skip to content

Commit

Permalink
drm/amdkfd: Add XCC instance to kgd2kfd interface (v3)
Browse files Browse the repository at this point in the history
Gfx 9 starts to have multiple XCC instances in one device. Add instance
parameter to kgd2kfd functions where XCC instance was hard coded as 0.
Also, update code to pass the correct instance number when running
on a multi-XCC setup.

v2: introduce the XCC instance to gfx v11 (Morris)
v3: rebase (Alex)

Signed-off-by: Amber Lin <Amber.Lin@amd.com>
Signed-off-by: Mukul Joshi <mukul.joshi@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Tested-by: Amber Lin <Amber.Lin@amd.com>
Signed-off-by: Morris Zhang <Shiwu.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
  • Loading branch information
Mukul Joshi authored and Alex Deucher committed Jun 9, 2023
1 parent 3c8bdb5 commit e2069a7
Show file tree
Hide file tree
Showing 17 changed files with 270 additions and 218 deletions.
38 changes: 19 additions & 19 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
#include "soc15.h"

static int kgd_gfx_v9_4_3_set_pasid_vmid_mapping(struct amdgpu_device *adev,
u32 pasid, unsigned int vmid)
u32 pasid, unsigned int vmid, uint32_t inst)
{
unsigned long timeout;

Expand All @@ -47,11 +47,11 @@ static int kgd_gfx_v9_4_3_set_pasid_vmid_mapping(struct amdgpu_device *adev,
uint32_t pasid_mapping = (pasid == 0) ? 0 : (uint32_t)pasid |
ATC_VMID0_PASID_MAPPING__VALID_MASK;

WREG32(SOC15_REG_OFFSET(ATHUB, 0,
WREG32(SOC15_REG_OFFSET(ATHUB, inst,
regATC_VMID0_PASID_MAPPING) + vmid, pasid_mapping);

timeout = jiffies + msecs_to_jiffies(10);
while (!(RREG32(SOC15_REG_OFFSET(ATHUB, 0,
while (!(RREG32(SOC15_REG_OFFSET(ATHUB, inst,
regATC_VMID_PASID_MAPPING_UPDATE_STATUS)) &
(1U << vmid))) {
if (time_after(jiffies, timeout)) {
Expand All @@ -61,13 +61,13 @@ static int kgd_gfx_v9_4_3_set_pasid_vmid_mapping(struct amdgpu_device *adev,
cpu_relax();
}

WREG32(SOC15_REG_OFFSET(ATHUB, 0,
WREG32(SOC15_REG_OFFSET(ATHUB, inst,
regATC_VMID_PASID_MAPPING_UPDATE_STATUS),
1U << vmid);

WREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT) + vmid,
WREG32(SOC15_REG_OFFSET(OSSSYS, inst, mmIH_VMID_0_LUT) + vmid,
pasid_mapping);
WREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT_MM) + vmid,
WREG32(SOC15_REG_OFFSET(OSSSYS, inst, mmIH_VMID_0_LUT_MM) + vmid,
pasid_mapping);

return 0;
Expand All @@ -81,20 +81,20 @@ static inline struct v9_mqd *get_mqd(void *mqd)
static int kgd_gfx_v9_4_3_hqd_load(struct amdgpu_device *adev, void *mqd,
uint32_t pipe_id, uint32_t queue_id,
uint32_t __user *wptr, uint32_t wptr_shift,
uint32_t wptr_mask, struct mm_struct *mm)
uint32_t wptr_mask, struct mm_struct *mm, uint32_t inst)
{
struct v9_mqd *m;
uint32_t *mqd_hqd;
uint32_t reg, hqd_base, hqd_end, data;

m = get_mqd(mqd);

kgd_gfx_v9_acquire_queue(adev, pipe_id, queue_id);
kgd_gfx_v9_acquire_queue(adev, pipe_id, queue_id, inst);

/* HQD registers extend to CP_HQD_AQL_DISPATCH_ID_HI */
mqd_hqd = &m->cp_mqd_base_addr_lo;
hqd_base = SOC15_REG_OFFSET(GC, 0, regCP_MQD_BASE_ADDR);
hqd_end = SOC15_REG_OFFSET(GC, 0, regCP_HQD_AQL_DISPATCH_ID_HI);
hqd_base = SOC15_REG_OFFSET(GC, inst, regCP_MQD_BASE_ADDR);
hqd_end = SOC15_REG_OFFSET(GC, inst, regCP_HQD_AQL_DISPATCH_ID_HI);

for (reg = hqd_base; reg <= hqd_end; reg++)
WREG32_RLC(reg, mqd_hqd[reg - hqd_base]);
Expand All @@ -103,7 +103,7 @@ static int kgd_gfx_v9_4_3_hqd_load(struct amdgpu_device *adev, void *mqd,
/* Activate doorbell logic before triggering WPTR poll. */
data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control,
CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
WREG32_RLC(SOC15_REG_OFFSET(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL),
WREG32_RLC(SOC15_REG_OFFSET(GC, inst, regCP_HQD_PQ_DOORBELL_CONTROL),
data);

if (wptr) {
Expand Down Expand Up @@ -133,29 +133,29 @@ static int kgd_gfx_v9_4_3_hqd_load(struct amdgpu_device *adev, void *mqd,
guessed_wptr += m->cp_hqd_pq_wptr_lo & ~(queue_size - 1);
guessed_wptr += (uint64_t)m->cp_hqd_pq_wptr_hi << 32;

WREG32_RLC(SOC15_REG_OFFSET(GC, 0, regCP_HQD_PQ_WPTR_LO),
WREG32_RLC(SOC15_REG_OFFSET(GC, inst, regCP_HQD_PQ_WPTR_LO),
lower_32_bits(guessed_wptr));
WREG32_RLC(SOC15_REG_OFFSET(GC, 0, regCP_HQD_PQ_WPTR_HI),
WREG32_RLC(SOC15_REG_OFFSET(GC, inst, regCP_HQD_PQ_WPTR_HI),
upper_32_bits(guessed_wptr));
WREG32_RLC(SOC15_REG_OFFSET(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR),
WREG32_RLC(SOC15_REG_OFFSET(GC, inst, regCP_HQD_PQ_WPTR_POLL_ADDR),
lower_32_bits((uintptr_t)wptr));
WREG32_RLC(SOC15_REG_OFFSET(GC, 0,
WREG32_RLC(SOC15_REG_OFFSET(GC, inst,
regCP_HQD_PQ_WPTR_POLL_ADDR_HI),
upper_32_bits((uintptr_t)wptr));
WREG32(SOC15_REG_OFFSET(GC, 0, regCP_PQ_WPTR_POLL_CNTL1),
WREG32(SOC15_REG_OFFSET(GC, inst, regCP_PQ_WPTR_POLL_CNTL1),
(uint32_t)kgd_gfx_v9_get_queue_mask(adev, pipe_id,
queue_id));
}

/* Start the EOP fetcher */
WREG32_RLC(SOC15_REG_OFFSET(GC, 0, regCP_HQD_EOP_RPTR),
WREG32_RLC(SOC15_REG_OFFSET(GC, inst, regCP_HQD_EOP_RPTR),
REG_SET_FIELD(m->cp_hqd_eop_rptr,
CP_HQD_EOP_RPTR, INIT_FETCHER, 1));

data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1);
WREG32_RLC(SOC15_REG_OFFSET(GC, 0, regCP_HQD_ACTIVE), data);
WREG32_RLC(SOC15_REG_OFFSET(GC, inst, regCP_HQD_ACTIVE), data);

kgd_gfx_v9_release_queue(adev);
kgd_gfx_v9_release_queue(adev, inst);

return 0;
}
Expand Down
22 changes: 12 additions & 10 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ static void kgd_program_sh_mem_settings(struct amdgpu_device *adev, uint32_t vmi
uint32_t sh_mem_config,
uint32_t sh_mem_ape1_base,
uint32_t sh_mem_ape1_limit,
uint32_t sh_mem_bases)
uint32_t sh_mem_bases, uint32_t inst)
{
lock_srbm(adev, 0, 0, 0, vmid);

Expand All @@ -91,7 +91,7 @@ static void kgd_program_sh_mem_settings(struct amdgpu_device *adev, uint32_t vmi
}

static int kgd_set_pasid_vmid_mapping(struct amdgpu_device *adev, u32 pasid,
unsigned int vmid)
unsigned int vmid, uint32_t inst)
{
/*
* We have to assume that there is no outstanding mapping.
Expand Down Expand Up @@ -135,7 +135,8 @@ static int kgd_set_pasid_vmid_mapping(struct amdgpu_device *adev, u32 pasid,
* but still works
*/

static int kgd_init_interrupts(struct amdgpu_device *adev, uint32_t pipe_id)
static int kgd_init_interrupts(struct amdgpu_device *adev, uint32_t pipe_id,
uint32_t inst)
{
uint32_t mec;
uint32_t pipe;
Expand Down Expand Up @@ -205,7 +206,7 @@ static inline struct v10_sdma_mqd *get_sdma_mqd(void *mqd)
static int kgd_hqd_load(struct amdgpu_device *adev, void *mqd,
uint32_t pipe_id, uint32_t queue_id,
uint32_t __user *wptr, uint32_t wptr_shift,
uint32_t wptr_mask, struct mm_struct *mm)
uint32_t wptr_mask, struct mm_struct *mm, uint32_t inst)
{
struct v10_compute_mqd *m;
uint32_t *mqd_hqd;
Expand Down Expand Up @@ -286,7 +287,7 @@ static int kgd_hqd_load(struct amdgpu_device *adev, void *mqd,

static int kgd_hiq_mqd_load(struct amdgpu_device *adev, void *mqd,
uint32_t pipe_id, uint32_t queue_id,
uint32_t doorbell_off)
uint32_t doorbell_off, uint32_t inst)
{
struct amdgpu_ring *kiq_ring = &adev->gfx.kiq[0].ring;
struct v10_compute_mqd *m;
Expand Down Expand Up @@ -338,7 +339,7 @@ static int kgd_hiq_mqd_load(struct amdgpu_device *adev, void *mqd,

static int kgd_hqd_dump(struct amdgpu_device *adev,
uint32_t pipe_id, uint32_t queue_id,
uint32_t (**dump)[2], uint32_t *n_regs)
uint32_t (**dump)[2], uint32_t *n_regs, uint32_t inst)
{
uint32_t i = 0, reg;
#define HQD_N_REGS 56
Expand Down Expand Up @@ -469,7 +470,7 @@ static int kgd_hqd_sdma_dump(struct amdgpu_device *adev,

static bool kgd_hqd_is_occupied(struct amdgpu_device *adev,
uint64_t queue_address, uint32_t pipe_id,
uint32_t queue_id)
uint32_t queue_id, uint32_t inst)
{
uint32_t act;
bool retval = false;
Expand Down Expand Up @@ -510,7 +511,7 @@ static bool kgd_hqd_sdma_is_occupied(struct amdgpu_device *adev, void *mqd)
static int kgd_hqd_destroy(struct amdgpu_device *adev, void *mqd,
enum kfd_preempt_type reset_type,
unsigned int utimeout, uint32_t pipe_id,
uint32_t queue_id)
uint32_t queue_id, uint32_t inst)
{
enum hqd_dequeue_request_type type;
unsigned long end_jiffies;
Expand Down Expand Up @@ -673,7 +674,7 @@ static bool get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev,

static int kgd_wave_control_execute(struct amdgpu_device *adev,
uint32_t gfx_index_val,
uint32_t sq_cmd)
uint32_t sq_cmd, uint32_t inst)
{
uint32_t data = 0;

Expand Down Expand Up @@ -709,7 +710,8 @@ static void set_vm_context_page_table_base(struct amdgpu_device *adev,
}

static void program_trap_handler_settings(struct amdgpu_device *adev,
uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr)
uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr,
uint32_t inst)
{
lock_srbm(adev, 0, 0, 0, vmid);

Expand Down
27 changes: 15 additions & 12 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ static void program_sh_mem_settings_v10_3(struct amdgpu_device *adev, uint32_t v
uint32_t sh_mem_config,
uint32_t sh_mem_ape1_base,
uint32_t sh_mem_ape1_limit,
uint32_t sh_mem_bases)
uint32_t sh_mem_bases, uint32_t inst)
{
lock_srbm(adev, 0, 0, 0, vmid);

Expand All @@ -93,7 +93,7 @@ static void program_sh_mem_settings_v10_3(struct amdgpu_device *adev, uint32_t v

/* ATC is defeatured on Sienna_Cichlid */
static int set_pasid_vmid_mapping_v10_3(struct amdgpu_device *adev, unsigned int pasid,
unsigned int vmid)
unsigned int vmid, uint32_t inst)
{
uint32_t value = pasid << IH_VMID_0_LUT__PASID__SHIFT;

Expand All @@ -105,7 +105,8 @@ static int set_pasid_vmid_mapping_v10_3(struct amdgpu_device *adev, unsigned int
return 0;
}

static int init_interrupts_v10_3(struct amdgpu_device *adev, uint32_t pipe_id)
static int init_interrupts_v10_3(struct amdgpu_device *adev, uint32_t pipe_id,
uint32_t inst)
{
uint32_t mec;
uint32_t pipe;
Expand Down Expand Up @@ -177,7 +178,7 @@ static inline struct v10_sdma_mqd *get_sdma_mqd(void *mqd)
static int hqd_load_v10_3(struct amdgpu_device *adev, void *mqd,
uint32_t pipe_id, uint32_t queue_id,
uint32_t __user *wptr, uint32_t wptr_shift,
uint32_t wptr_mask, struct mm_struct *mm)
uint32_t wptr_mask, struct mm_struct *mm, uint32_t inst)
{
struct v10_compute_mqd *m;
uint32_t *mqd_hqd;
Expand Down Expand Up @@ -273,7 +274,7 @@ static int hqd_load_v10_3(struct amdgpu_device *adev, void *mqd,

static int hiq_mqd_load_v10_3(struct amdgpu_device *adev, void *mqd,
uint32_t pipe_id, uint32_t queue_id,
uint32_t doorbell_off)
uint32_t doorbell_off, uint32_t inst)
{
struct amdgpu_ring *kiq_ring = &adev->gfx.kiq[0].ring;
struct v10_compute_mqd *m;
Expand Down Expand Up @@ -325,7 +326,7 @@ static int hiq_mqd_load_v10_3(struct amdgpu_device *adev, void *mqd,

static int hqd_dump_v10_3(struct amdgpu_device *adev,
uint32_t pipe_id, uint32_t queue_id,
uint32_t (**dump)[2], uint32_t *n_regs)
uint32_t (**dump)[2], uint32_t *n_regs, uint32_t inst)
{
uint32_t i = 0, reg;
#define HQD_N_REGS 56
Expand Down Expand Up @@ -456,7 +457,7 @@ static int hqd_sdma_dump_v10_3(struct amdgpu_device *adev,

static bool hqd_is_occupied_v10_3(struct amdgpu_device *adev,
uint64_t queue_address, uint32_t pipe_id,
uint32_t queue_id)
uint32_t queue_id, uint32_t inst)
{
uint32_t act;
bool retval = false;
Expand Down Expand Up @@ -498,7 +499,7 @@ static bool hqd_sdma_is_occupied_v10_3(struct amdgpu_device *adev,
static int hqd_destroy_v10_3(struct amdgpu_device *adev, void *mqd,
enum kfd_preempt_type reset_type,
unsigned int utimeout, uint32_t pipe_id,
uint32_t queue_id)
uint32_t queue_id, uint32_t inst)
{
enum hqd_dequeue_request_type type;
unsigned long end_jiffies;
Expand Down Expand Up @@ -586,7 +587,7 @@ static int hqd_sdma_destroy_v10_3(struct amdgpu_device *adev, void *mqd,

static int wave_control_execute_v10_3(struct amdgpu_device *adev,
uint32_t gfx_index_val,
uint32_t sq_cmd)
uint32_t sq_cmd, uint32_t inst)
{
uint32_t data = 0;

Expand Down Expand Up @@ -628,7 +629,8 @@ static void set_vm_context_page_table_base_v10_3(struct amdgpu_device *adev,
}

static void program_trap_handler_settings_v10_3(struct amdgpu_device *adev,
uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr)
uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr,
uint32_t inst)
{
lock_srbm(adev, 0, 0, 0, vmid);

Expand Down Expand Up @@ -765,7 +767,7 @@ uint32_t set_wave_launch_mode_v10_3(struct amdgpu_device *adev,
* deq_retry_wait_time -- Wait Count for Global Wave Syncs.
*/
void get_iq_wait_times_v10_3(struct amdgpu_device *adev,
uint32_t *wait_times)
uint32_t *wait_times, uint32_t inst)

{
*wait_times = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_IQ_WAIT_TIME2));
Expand All @@ -775,7 +777,8 @@ void build_grace_period_packet_info_v10_3(struct amdgpu_device *adev,
uint32_t wait_times,
uint32_t grace_period,
uint32_t *reg_offset,
uint32_t *reg_data)
uint32_t *reg_data,
uint32_t inst)
{
*reg_data = wait_times;

Expand Down
19 changes: 10 additions & 9 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ static void program_sh_mem_settings_v11(struct amdgpu_device *adev, uint32_t vmi
uint32_t sh_mem_config,
uint32_t sh_mem_ape1_base,
uint32_t sh_mem_ape1_limit,
uint32_t sh_mem_bases)
uint32_t sh_mem_bases, uint32_t inst)
{
lock_srbm(adev, 0, 0, 0, vmid);

Expand All @@ -89,7 +89,7 @@ static void program_sh_mem_settings_v11(struct amdgpu_device *adev, uint32_t vmi
}

static int set_pasid_vmid_mapping_v11(struct amdgpu_device *adev, unsigned int pasid,
unsigned int vmid)
unsigned int vmid, uint32_t inst)
{
uint32_t value = pasid << IH_VMID_0_LUT__PASID__SHIFT;

Expand All @@ -101,7 +101,8 @@ static int set_pasid_vmid_mapping_v11(struct amdgpu_device *adev, unsigned int p
return 0;
}

static int init_interrupts_v11(struct amdgpu_device *adev, uint32_t pipe_id)
static int init_interrupts_v11(struct amdgpu_device *adev, uint32_t pipe_id,
uint32_t inst)
{
uint32_t mec;
uint32_t pipe;
Expand Down Expand Up @@ -162,7 +163,7 @@ static inline struct v11_sdma_mqd *get_sdma_mqd(void *mqd)
static int hqd_load_v11(struct amdgpu_device *adev, void *mqd, uint32_t pipe_id,
uint32_t queue_id, uint32_t __user *wptr,
uint32_t wptr_shift, uint32_t wptr_mask,
struct mm_struct *mm)
struct mm_struct *mm, uint32_t inst)
{
struct v11_compute_mqd *m;
uint32_t *mqd_hqd;
Expand Down Expand Up @@ -258,7 +259,7 @@ static int hqd_load_v11(struct amdgpu_device *adev, void *mqd, uint32_t pipe_id,

static int hiq_mqd_load_v11(struct amdgpu_device *adev, void *mqd,
uint32_t pipe_id, uint32_t queue_id,
uint32_t doorbell_off)
uint32_t doorbell_off, uint32_t inst)
{
struct amdgpu_ring *kiq_ring = &adev->gfx.kiq[0].ring;
struct v11_compute_mqd *m;
Expand Down Expand Up @@ -310,7 +311,7 @@ static int hiq_mqd_load_v11(struct amdgpu_device *adev, void *mqd,

static int hqd_dump_v11(struct amdgpu_device *adev,
uint32_t pipe_id, uint32_t queue_id,
uint32_t (**dump)[2], uint32_t *n_regs)
uint32_t (**dump)[2], uint32_t *n_regs, uint32_t inst)
{
uint32_t i = 0, reg;
#define HQD_N_REGS 56
Expand Down Expand Up @@ -445,7 +446,7 @@ static int hqd_sdma_dump_v11(struct amdgpu_device *adev,
}

static bool hqd_is_occupied_v11(struct amdgpu_device *adev, uint64_t queue_address,
uint32_t pipe_id, uint32_t queue_id)
uint32_t pipe_id, uint32_t queue_id, uint32_t inst)
{
uint32_t act;
bool retval = false;
Expand Down Expand Up @@ -486,7 +487,7 @@ static bool hqd_sdma_is_occupied_v11(struct amdgpu_device *adev, void *mqd)
static int hqd_destroy_v11(struct amdgpu_device *adev, void *mqd,
enum kfd_preempt_type reset_type,
unsigned int utimeout, uint32_t pipe_id,
uint32_t queue_id)
uint32_t queue_id, uint32_t inst)
{
enum hqd_dequeue_request_type type;
unsigned long end_jiffies;
Expand Down Expand Up @@ -571,7 +572,7 @@ static int hqd_sdma_destroy_v11(struct amdgpu_device *adev, void *mqd,

static int wave_control_execute_v11(struct amdgpu_device *adev,
uint32_t gfx_index_val,
uint32_t sq_cmd)
uint32_t sq_cmd, uint32_t inst)
{
uint32_t data = 0;

Expand Down
Loading

0 comments on commit e2069a7

Please sign in to comment.