Skip to content

Commit

Permalink
drm/amdkfd: Use xcc mask for identifying xcc
Browse files Browse the repository at this point in the history
Instead of start xcc id and number of xcc per node, use the xcc mask
which is the mask of logical ids of xccs belonging to a parition.

Signed-off-by: Lijo Lazar <lijo.lazar@amd.com>
Reviewed-by: Le Ma <le.ma@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
  • Loading branch information
Lijo Lazar authored and Alex Deucher committed Jun 9, 2023
1 parent a75f227 commit c4050ff
Show file tree
Hide file tree
Showing 8 changed files with 95 additions and 95 deletions.
9 changes: 4 additions & 5 deletions drivers/gpu/drm/amd/amdkfd/kfd_device.c
Original file line number Diff line number Diff line change
Expand Up @@ -745,15 +745,14 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
node->vm_info.vmid_num_kfd = vmid_num_kfd;
node->xcp = amdgpu_get_next_xcp(kfd->adev->xcp_mgr, &xcp_idx);
/* TODO : Check if error handling is needed */
if (node->xcp)
if (node->xcp) {
amdgpu_xcp_get_inst_details(node->xcp, AMDGPU_XCP_GFX,
&node->xcc_mask);
else
++xcp_idx;
} else {
node->xcc_mask =
(1U << NUM_XCC(kfd->adev->gfx.xcc_mask)) - 1;

node->num_xcc_per_node = max(1U, kfd->adev->gfx.num_xcc_per_xcp);
node->start_xcc_id = node->num_xcc_per_node * i;
}

if (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 3) &&
partition_mode == AMDGPU_CPX_PARTITION_MODE &&
Expand Down
86 changes: 44 additions & 42 deletions drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
Original file line number Diff line number Diff line change
Expand Up @@ -136,16 +136,14 @@ static void init_sdma_bitmaps(struct device_queue_manager *dqm)
void program_sh_mem_settings(struct device_queue_manager *dqm,
struct qcm_process_device *qpd)
{
int xcc = 0;
uint32_t xcc_mask = dqm->dev->xcc_mask;
int xcc_id;

for (xcc = 0; xcc < dqm->dev->num_xcc_per_node; xcc++)
for_each_inst(xcc_id, xcc_mask)
dqm->dev->kfd2kgd->program_sh_mem_settings(
dqm->dev->adev, qpd->vmid,
qpd->sh_mem_config,
qpd->sh_mem_ape1_base,
qpd->sh_mem_ape1_limit,
qpd->sh_mem_bases,
dqm->dev->start_xcc_id + xcc);
dqm->dev->adev, qpd->vmid, qpd->sh_mem_config,
qpd->sh_mem_ape1_base, qpd->sh_mem_ape1_limit,
qpd->sh_mem_bases, xcc_id);
}

static void kfd_hws_hang(struct device_queue_manager *dqm)
Expand Down Expand Up @@ -427,14 +425,14 @@ static void deallocate_doorbell(struct qcm_process_device *qpd,
static void program_trap_handler_settings(struct device_queue_manager *dqm,
struct qcm_process_device *qpd)
{
int xcc = 0;
uint32_t xcc_mask = dqm->dev->xcc_mask;
int xcc_id;

if (dqm->dev->kfd2kgd->program_trap_handler_settings)
for (xcc = 0; xcc < dqm->dev->num_xcc_per_node; xcc++)
for_each_inst(xcc_id, xcc_mask)
dqm->dev->kfd2kgd->program_trap_handler_settings(
dqm->dev->adev, qpd->vmid,
qpd->tba_addr, qpd->tma_addr,
dqm->dev->start_xcc_id + xcc);
dqm->dev->adev, qpd->vmid, qpd->tba_addr,
qpd->tma_addr, xcc_id);
}

static int allocate_vmid(struct device_queue_manager *dqm,
Expand Down Expand Up @@ -697,7 +695,8 @@ static int dbgdev_wave_reset_wavefronts(struct kfd_node *dev, struct kfd_process
struct kfd_process_device *pdd;
int first_vmid_to_scan = dev->vm_info.first_vmid_kfd;
int last_vmid_to_scan = dev->vm_info.last_vmid_kfd;
int xcc = 0;
uint32_t xcc_mask = dev->xcc_mask;
int xcc_id;

reg_sq_cmd.u32All = 0;
reg_gfx_index.u32All = 0;
Expand Down Expand Up @@ -742,11 +741,10 @@ static int dbgdev_wave_reset_wavefronts(struct kfd_node *dev, struct kfd_process
reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_KILL;
reg_sq_cmd.bits.vm_id = vmid;

for (xcc = 0; xcc < dev->num_xcc_per_node; xcc++)
dev->kfd2kgd->wave_control_execute(dev->adev,
reg_gfx_index.u32All,
reg_sq_cmd.u32All,
dev->start_xcc_id + xcc);
for_each_inst(xcc_id, xcc_mask)
dev->kfd2kgd->wave_control_execute(
dev->adev, reg_gfx_index.u32All,
reg_sq_cmd.u32All, xcc_id);

return 0;
}
Expand Down Expand Up @@ -1258,12 +1256,12 @@ static int
set_pasid_vmid_mapping(struct device_queue_manager *dqm, u32 pasid,
unsigned int vmid)
{
int xcc = 0, ret;
uint32_t xcc_mask = dqm->dev->xcc_mask;
int xcc_id, ret;

for (xcc = 0; xcc < dqm->dev->num_xcc_per_node; xcc++) {
for_each_inst(xcc_id, xcc_mask) {
ret = dqm->dev->kfd2kgd->set_pasid_vmid_mapping(
dqm->dev->adev, pasid, vmid,
dqm->dev->start_xcc_id + xcc);
dqm->dev->adev, pasid, vmid, xcc_id);
if (ret)
break;
}
Expand All @@ -1273,15 +1271,14 @@ set_pasid_vmid_mapping(struct device_queue_manager *dqm, u32 pasid,

static void init_interrupts(struct device_queue_manager *dqm)
{
unsigned int i, xcc;
uint32_t xcc_mask = dqm->dev->xcc_mask;
unsigned int i, xcc_id;

for (i = 0 ; i < get_pipes_per_mec(dqm) ; i++) {
if (is_pipe_enabled(dqm, 0, i)) {
for (xcc = 0; xcc < dqm->dev->num_xcc_per_node; xcc++)
for_each_inst(xcc_id, xcc_mask)
dqm->dev->kfd2kgd->init_interrupts(
dqm->dev->adev, i,
dqm->dev->start_xcc_id +
xcc);
dqm->dev->adev, i, xcc_id);
}
}
}
Expand Down Expand Up @@ -2283,7 +2280,7 @@ static int allocate_hiq_sdma_mqd(struct device_queue_manager *dqm)
get_num_all_sdma_engines(dqm) *
dev->kfd->device_info.num_sdma_queues_per_engine +
(dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]->mqd_size *
dqm->dev->num_xcc_per_node);
NUM_XCC(dqm->dev->xcc_mask));

retval = amdgpu_amdkfd_alloc_gtt_mem(dev->adev, size,
&(mem_obj->gtt_mem), &(mem_obj->gpu_addr),
Expand Down Expand Up @@ -2489,27 +2486,29 @@ static void seq_reg_dump(struct seq_file *m,
int dqm_debugfs_hqds(struct seq_file *m, void *data)
{
struct device_queue_manager *dqm = data;
uint32_t xcc_mask = dqm->dev->xcc_mask;
uint32_t (*dump)[2], n_regs;
int pipe, queue;
int r = 0, xcc;
uint32_t inst;
int r = 0, xcc_id;
uint32_t sdma_engine_start;

if (!dqm->sched_running) {
seq_puts(m, " Device is stopped\n");
return 0;
}

for (xcc = 0; xcc < dqm->dev->num_xcc_per_node; xcc++) {
inst = dqm->dev->start_xcc_id + xcc;
for_each_inst(xcc_id, xcc_mask) {
r = dqm->dev->kfd2kgd->hqd_dump(dqm->dev->adev,
KFD_CIK_HIQ_PIPE, KFD_CIK_HIQ_QUEUE,
&dump, &n_regs, inst);
KFD_CIK_HIQ_PIPE,
KFD_CIK_HIQ_QUEUE, &dump,
&n_regs, xcc_id);
if (!r) {
seq_printf(m,
seq_printf(
m,
" Inst %d, HIQ on MEC %d Pipe %d Queue %d\n",
inst, KFD_CIK_HIQ_PIPE/get_pipes_per_mec(dqm)+1,
KFD_CIK_HIQ_PIPE%get_pipes_per_mec(dqm),
xcc_id,
KFD_CIK_HIQ_PIPE / get_pipes_per_mec(dqm) + 1,
KFD_CIK_HIQ_PIPE % get_pipes_per_mec(dqm),
KFD_CIK_HIQ_QUEUE);
seq_reg_dump(m, dump, n_regs);

Expand All @@ -2524,13 +2523,16 @@ int dqm_debugfs_hqds(struct seq_file *m, void *data)
dqm->dev->kfd->shared_resources.cp_queue_bitmap))
continue;

r = dqm->dev->kfd2kgd->hqd_dump(
dqm->dev->adev, pipe, queue, &dump, &n_regs, inst);
r = dqm->dev->kfd2kgd->hqd_dump(dqm->dev->adev,
pipe, queue,
&dump, &n_regs,
xcc_id);
if (r)
break;

seq_printf(m, " Inst %d, CP Pipe %d, Queue %d\n",
inst, pipe, queue);
seq_printf(m,
" Inst %d, CP Pipe %d, Queue %d\n",
xcc_id, pipe, queue);
seq_reg_dump(m, dump, n_regs);

kfree(dump);
Expand Down
2 changes: 1 addition & 1 deletion drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ struct kfd_mem_obj *allocate_sdma_mqd(struct kfd_node *dev,
dev->dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA]->mqd_size;

offset += dev->dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]->mqd_size *
dev->num_xcc_per_node;
NUM_XCC(dev->xcc_mask);

mqd_mem_obj->gtt_mem = (void *)((uint64_t)dev->dqm->hiq_sdma_mqd.gtt_mem
+ offset);
Expand Down
71 changes: 37 additions & 34 deletions drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ static struct kfd_mem_obj *allocate_mqd(struct kfd_node *node,
retval = amdgpu_amdkfd_alloc_gtt_mem(node->adev,
(ALIGN(q->ctl_stack_size, PAGE_SIZE) +
ALIGN(sizeof(struct v9_mqd), PAGE_SIZE)) *
node->num_xcc_per_node,
NUM_XCC(node->xcc_mask),
&(mqd_mem_obj->gtt_mem),
&(mqd_mem_obj->gpu_addr),
(void *)&(mqd_mem_obj->cpu_ptr), true);
Expand Down Expand Up @@ -482,7 +482,7 @@ static void init_mqd_hiq_v9_4_3(struct mqd_manager *mm, void **mqd,

memset(&xcc_mqd_mem_obj, 0x0, sizeof(struct kfd_mem_obj));

for (xcc = 0; xcc < mm->dev->num_xcc_per_node; xcc++) {
for (xcc = 0; xcc < NUM_XCC(mm->dev->xcc_mask); xcc++) {
kfd_get_hiq_xcc_mqd(mm->dev, &xcc_mqd_mem_obj, xcc);

init_mqd(mm, (void **)&m, &xcc_mqd_mem_obj, &xcc_gart_addr, q);
Expand All @@ -506,21 +506,21 @@ static int hiq_load_mqd_kiq_v9_4_3(struct mqd_manager *mm, void *mqd,
uint32_t pipe_id, uint32_t queue_id,
struct queue_properties *p, struct mm_struct *mms)
{
int xcc, err;
uint32_t xcc_mask = mm->dev->xcc_mask;
int xcc_id, err, inst = 0;
void *xcc_mqd;
uint32_t start_inst = mm->dev->start_xcc_id;
uint64_t hiq_mqd_size = kfd_hiq_mqd_stride(mm->dev);

for (xcc = 0; xcc < mm->dev->num_xcc_per_node; xcc++) {
xcc_mqd = mqd + hiq_mqd_size * xcc;
for_each_inst(xcc_id, xcc_mask) {
xcc_mqd = mqd + hiq_mqd_size * inst;
err = mm->dev->kfd2kgd->hiq_mqd_load(mm->dev->adev, xcc_mqd,
pipe_id, queue_id,
p->doorbell_off,
start_inst+xcc);
p->doorbell_off, xcc_id);
if (err) {
pr_debug("Failed to load HIQ MQD for XCC: %d\n", xcc);
pr_debug("Failed to load HIQ MQD for XCC: %d\n", inst);
break;
}
++inst;
}

return err;
Expand All @@ -530,20 +530,21 @@ static int destroy_hiq_mqd_v9_4_3(struct mqd_manager *mm, void *mqd,
enum kfd_preempt_type type, unsigned int timeout,
uint32_t pipe_id, uint32_t queue_id)
{
int xcc = 0, err;
uint32_t xcc_mask = mm->dev->xcc_mask;
int xcc_id, err, inst = 0;
void *xcc_mqd;
uint32_t start_inst = mm->dev->start_xcc_id;
uint64_t hiq_mqd_size = kfd_hiq_mqd_stride(mm->dev);

for (xcc = 0; xcc < mm->dev->num_xcc_per_node; xcc++) {
xcc_mqd = mqd + hiq_mqd_size * xcc;
for_each_inst(xcc_id, xcc_mask) {
xcc_mqd = mqd + hiq_mqd_size * inst;
err = mm->dev->kfd2kgd->hqd_destroy(mm->dev->adev, xcc_mqd,
type, timeout, pipe_id,
queue_id, start_inst+xcc);
queue_id, xcc_id);
if (err) {
pr_debug("Destroy MQD failed for xcc: %d\n", xcc);
pr_debug("Destroy MQD failed for xcc: %d\n", inst);
break;
}
++inst;
}

return err;
Expand Down Expand Up @@ -573,7 +574,7 @@ static void init_mqd_v9_4_3(struct mqd_manager *mm, void **mqd,
uint32_t local_xcc_start = mm->dev->dqm->current_logical_xcc_start++;

memset(&xcc_mqd_mem_obj, 0x0, sizeof(struct kfd_mem_obj));
for (xcc = 0; xcc < mm->dev->num_xcc_per_node; xcc++) {
for (xcc = 0; xcc < NUM_XCC(mm->dev->xcc_mask); xcc++) {
get_xcc_mqd(mqd_mem_obj, &xcc_mqd_mem_obj, offset*xcc);

init_mqd(mm, (void **)&m, &xcc_mqd_mem_obj, &xcc_gart_addr, q);
Expand All @@ -600,7 +601,7 @@ static void init_mqd_v9_4_3(struct mqd_manager *mm, void **mqd,
m->compute_tg_chunk_size = 1;
m->compute_current_logic_xcc_id =
(local_xcc_start + xcc) %
mm->dev->num_xcc_per_node;
NUM_XCC(mm->dev->xcc_mask);

switch (xcc) {
case 0:
Expand Down Expand Up @@ -633,7 +634,7 @@ static void update_mqd_v9_4_3(struct mqd_manager *mm, void *mqd,
int xcc = 0;
uint64_t size = mm->mqd_stride(mm, q);

for (xcc = 0; xcc < mm->dev->num_xcc_per_node; xcc++) {
for (xcc = 0; xcc < NUM_XCC(mm->dev->xcc_mask); xcc++) {
m = get_mqd(mqd + size * xcc);
update_mqd(mm, m, q, minfo);

Expand Down Expand Up @@ -661,24 +662,25 @@ static int destroy_mqd_v9_4_3(struct mqd_manager *mm, void *mqd,
enum kfd_preempt_type type, unsigned int timeout,
uint32_t pipe_id, uint32_t queue_id)
{
int xcc = 0, err;
uint32_t xcc_mask = mm->dev->xcc_mask;
int xcc_id, err, inst = 0;
void *xcc_mqd;
struct v9_mqd *m;
uint64_t mqd_offset;
uint32_t start_inst = mm->dev->start_xcc_id;

m = get_mqd(mqd);
mqd_offset = m->cp_mqd_stride_size;

for (xcc = 0; xcc < mm->dev->num_xcc_per_node; xcc++) {
xcc_mqd = mqd + mqd_offset * xcc;
for_each_inst(xcc_id, xcc_mask) {
xcc_mqd = mqd + mqd_offset * inst;
err = mm->dev->kfd2kgd->hqd_destroy(mm->dev->adev, xcc_mqd,
type, timeout, pipe_id,
queue_id, start_inst+xcc);
queue_id, xcc_id);
if (err) {
pr_debug("Destroy MQD failed for xcc: %d\n", xcc);
pr_debug("Destroy MQD failed for xcc: %d\n", inst);
break;
}
++inst;
}

return err;
Expand All @@ -690,21 +692,22 @@ static int load_mqd_v9_4_3(struct mqd_manager *mm, void *mqd,
{
/* AQL write pointer counts in 64B packets, PM4/CP counts in dwords. */
uint32_t wptr_shift = (p->format == KFD_QUEUE_FORMAT_AQL ? 4 : 0);
int xcc = 0, err;
uint32_t xcc_mask = mm->dev->xcc_mask;
int xcc_id, err, inst = 0;
void *xcc_mqd;
uint32_t start_inst = mm->dev->start_xcc_id;
uint64_t mqd_stride_size = mm->mqd_stride(mm, p);

for (xcc = 0; xcc < mm->dev->num_xcc_per_node; xcc++) {
xcc_mqd = mqd + mqd_stride_size * xcc;
err = mm->dev->kfd2kgd->hqd_load(mm->dev->adev, xcc_mqd,
pipe_id, queue_id,
(uint32_t __user *)p->write_ptr,
wptr_shift, 0, mms, start_inst+xcc);
for_each_inst(xcc_id, xcc_mask) {
xcc_mqd = mqd + mqd_stride_size * inst;
err = mm->dev->kfd2kgd->hqd_load(
mm->dev->adev, xcc_mqd, pipe_id, queue_id,
(uint32_t __user *)p->write_ptr, wptr_shift, 0, mms,
xcc_id);
if (err) {
pr_debug("Load MQD failed for xcc: %d\n", xcc);
pr_debug("Load MQD failed for xcc: %d\n", inst);
break;
}
++inst;
}

return err;
Expand All @@ -722,7 +725,7 @@ static int get_wave_state_v9_4_3(struct mqd_manager *mm, void *mqd,
uint64_t mqd_stride_size = mm->mqd_stride(mm, q);
u32 tmp_ctl_stack_used_size = 0, tmp_save_area_used_size = 0;

for (xcc = 0; xcc < mm->dev->num_xcc_per_node; xcc++) {
for (xcc = 0; xcc < NUM_XCC(mm->dev->xcc_mask); xcc++) {
xcc_mqd = mqd + mqd_stride_size * xcc;
xcc_ctl_stack = (void __user *)((uintptr_t)ctl_stack +
q->ctx_save_restore_area_size * xcc);
Expand Down
4 changes: 0 additions & 4 deletions drivers/gpu/drm/amd/amdkfd/kfd_priv.h
Original file line number Diff line number Diff line change
Expand Up @@ -274,10 +274,6 @@ struct kfd_node {
*/
struct kfd_vmid_info vm_info;
unsigned int id; /* topology stub index */
unsigned int num_xcc_per_node;
unsigned int start_xcc_id; /* Starting XCC instance
* number for the node
*/
uint32_t xcc_mask; /* Instance mask of XCCs present */
struct amdgpu_xcp *xcp;

Expand Down
Loading

0 comments on commit c4050ff

Please sign in to comment.