Skip to content

Commit

Permalink
Merge tag 'amd-drm-fixes-6.8-2024-02-01' of https://gitlab.freedeskto…
Browse files Browse the repository at this point in the history
…p.org/agd5f/linux into drm-fixes

amd-drm-fixes-6.8-2024-02-01:

amdgpu:
- Fix reboot issue seen on some 7000 series dGPUs
- Fix client init order for KFD
- Misc display fixes
- USB-C fix
- DCN 3.5 fixes
- Fix issues with GPU scheduler and GPU reset
- GPU firmware loading fix
- Misc fixes
- GC 11.5 fix
- VCN 4.0.5 fix
- IH overflow fix

amdkfd:
- SVM fixes
- Trap handler fix
- Fix device permission lookup
- Properly reserve BO before validating it

Signed-off-by: Dave Airlie <airlied@redhat.com>

From: Alex Deucher <alexander.deucher@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240201184108.4923-1-alexander.deucher@amd.com
  • Loading branch information
Dave Airlie committed Feb 2, 2024
2 parents 111a3f0 + 6813cdc commit a639525
Show file tree
Hide file tree
Showing 48 changed files with 217 additions and 225 deletions.
32 changes: 21 additions & 11 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
Original file line number Diff line number Diff line change
Expand Up @@ -141,11 +141,31 @@ static void amdgpu_amdkfd_reset_work(struct work_struct *work)
static const struct drm_client_funcs kfd_client_funcs = {
.unregister = drm_client_release,
};

int amdgpu_amdkfd_drm_client_create(struct amdgpu_device *adev)
{
int ret;

if (!adev->kfd.init_complete)
return 0;

ret = drm_client_init(&adev->ddev, &adev->kfd.client, "kfd",
&kfd_client_funcs);
if (ret) {
dev_err(adev->dev, "Failed to init DRM client: %d\n",
ret);
return ret;
}

drm_client_register(&adev->kfd.client);

return 0;
}

void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
{
int i;
int last_valid_bit;
int ret;

amdgpu_amdkfd_gpuvm_init_mem_limits();

Expand All @@ -164,12 +184,6 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
.enable_mes = adev->enable_mes,
};

ret = drm_client_init(&adev->ddev, &adev->kfd.client, "kfd", &kfd_client_funcs);
if (ret) {
dev_err(adev->dev, "Failed to init DRM client: %d\n", ret);
return;
}

/* this is going to have a few of the MSBs set that we need to
* clear
*/
Expand Down Expand Up @@ -208,10 +222,6 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)

adev->kfd.init_complete = kgd2kfd_device_init(adev->kfd.dev,
&gpu_resources);
if (adev->kfd.init_complete)
drm_client_register(&adev->kfd.client);
else
drm_client_release(&adev->kfd.client);

amdgpu_amdkfd_total_mem_size += adev->gmc.real_vram_size;

Expand Down
4 changes: 3 additions & 1 deletion drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,8 @@ int amdgpu_queue_mask_bit_to_set_resource_bit(struct amdgpu_device *adev,
struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context,
struct mm_struct *mm,
struct svm_range_bo *svm_bo);

int amdgpu_amdkfd_drm_client_create(struct amdgpu_device *adev);
#if defined(CONFIG_DEBUG_FS)
int kfd_debugfs_kfd_mem_limits(struct seq_file *m, void *data);
#endif
Expand Down Expand Up @@ -301,7 +303,7 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(struct amdgpu_device *adev,
struct kgd_mem *mem, void *drm_priv);
int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
struct amdgpu_device *adev, struct kgd_mem *mem, void *drm_priv);
void amdgpu_amdkfd_gpuvm_dmaunmap_mem(struct kgd_mem *mem, void *drm_priv);
int amdgpu_amdkfd_gpuvm_dmaunmap_mem(struct kgd_mem *mem, void *drm_priv);
int amdgpu_amdkfd_gpuvm_sync_memory(
struct amdgpu_device *adev, struct kgd_mem *mem, bool intr);
int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_mem *mem,
Expand Down
2 changes: 1 addition & 1 deletion drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
Original file line number Diff line number Diff line change
Expand Up @@ -290,7 +290,7 @@ static int suspend_resume_compute_scheduler(struct amdgpu_device *adev, bool sus
for (i = 0; i < adev->gfx.num_compute_rings; i++) {
struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];

if (!(ring && drm_sched_wqueue_ready(&ring->sched)))
if (!amdgpu_ring_sched_ready(ring))
continue;

/* stop secheduler and drain ring. */
Expand Down
20 changes: 17 additions & 3 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
Original file line number Diff line number Diff line change
Expand Up @@ -2085,21 +2085,35 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
return ret;
}

void amdgpu_amdkfd_gpuvm_dmaunmap_mem(struct kgd_mem *mem, void *drm_priv)
int amdgpu_amdkfd_gpuvm_dmaunmap_mem(struct kgd_mem *mem, void *drm_priv)
{
struct kfd_mem_attachment *entry;
struct amdgpu_vm *vm;
int ret;

vm = drm_priv_to_vm(drm_priv);

mutex_lock(&mem->lock);

ret = amdgpu_bo_reserve(mem->bo, true);
if (ret)
goto out;

list_for_each_entry(entry, &mem->attachments, list) {
if (entry->bo_va->base.vm == vm)
kfd_mem_dmaunmap_attachment(mem, entry);
if (entry->bo_va->base.vm != vm)
continue;
if (entry->bo_va->base.bo->tbo.ttm &&
!entry->bo_va->base.bo->tbo.ttm->sg)
continue;

kfd_mem_dmaunmap_attachment(mem, entry);
}

amdgpu_bo_unreserve(mem->bo);
out:
mutex_unlock(&mem->lock);

return ret;
}

int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
Expand Down
8 changes: 4 additions & 4 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
Original file line number Diff line number Diff line change
Expand Up @@ -1678,7 +1678,7 @@ static int amdgpu_debugfs_test_ib_show(struct seq_file *m, void *unused)
for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
struct amdgpu_ring *ring = adev->rings[i];

if (!ring || !drm_sched_wqueue_ready(&ring->sched))
if (!amdgpu_ring_sched_ready(ring))
continue;
drm_sched_wqueue_stop(&ring->sched);
}
Expand All @@ -1694,7 +1694,7 @@ static int amdgpu_debugfs_test_ib_show(struct seq_file *m, void *unused)
for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
struct amdgpu_ring *ring = adev->rings[i];

if (!ring || !drm_sched_wqueue_ready(&ring->sched))
if (!amdgpu_ring_sched_ready(ring))
continue;
drm_sched_wqueue_start(&ring->sched);
}
Expand Down Expand Up @@ -1916,8 +1916,8 @@ static int amdgpu_debugfs_ib_preempt(void *data, u64 val)

ring = adev->rings[val];

if (!ring || !ring->funcs->preempt_ib ||
!drm_sched_wqueue_ready(&ring->sched))
if (!amdgpu_ring_sched_ready(ring) ||
!ring->funcs->preempt_ib)
return -EINVAL;

/* the last preemption failed */
Expand Down
36 changes: 13 additions & 23 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
Original file line number Diff line number Diff line change
Expand Up @@ -4121,23 +4121,13 @@ int amdgpu_device_init(struct amdgpu_device *adev,
}
}
} else {
switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) {
case IP_VERSION(13, 0, 0):
case IP_VERSION(13, 0, 7):
case IP_VERSION(13, 0, 10):
r = psp_gpu_reset(adev);
break;
default:
tmp = amdgpu_reset_method;
/* It should do a default reset when loading or reloading the driver,
* regardless of the module parameter reset_method.
*/
amdgpu_reset_method = AMD_RESET_METHOD_NONE;
r = amdgpu_asic_reset(adev);
amdgpu_reset_method = tmp;
break;
}

tmp = amdgpu_reset_method;
/* It should do a default reset when loading or reloading the driver,
* regardless of the module parameter reset_method.
*/
amdgpu_reset_method = AMD_RESET_METHOD_NONE;
r = amdgpu_asic_reset(adev);
amdgpu_reset_method = tmp;
if (r) {
dev_err(adev->dev, "asic reset on init failed\n");
goto failed;
Expand Down Expand Up @@ -5031,7 +5021,7 @@ bool amdgpu_device_has_job_running(struct amdgpu_device *adev)
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
struct amdgpu_ring *ring = adev->rings[i];

if (!ring || !drm_sched_wqueue_ready(&ring->sched))
if (!amdgpu_ring_sched_ready(ring))
continue;

spin_lock(&ring->sched.job_list_lock);
Expand Down Expand Up @@ -5170,7 +5160,7 @@ int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
struct amdgpu_ring *ring = adev->rings[i];

if (!ring || !drm_sched_wqueue_ready(&ring->sched))
if (!amdgpu_ring_sched_ready(ring))
continue;

/* Clear job fence from fence drv to avoid force_completion
Expand Down Expand Up @@ -5637,7 +5627,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
struct amdgpu_ring *ring = tmp_adev->rings[i];

if (!ring || !drm_sched_wqueue_ready(&ring->sched))
if (!amdgpu_ring_sched_ready(ring))
continue;

drm_sched_stop(&ring->sched, job ? &job->base : NULL);
Expand Down Expand Up @@ -5706,7 +5696,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
struct amdgpu_ring *ring = tmp_adev->rings[i];

if (!ring || !drm_sched_wqueue_ready(&ring->sched))
if (!amdgpu_ring_sched_ready(ring))
continue;

drm_sched_start(&ring->sched, true);
Expand Down Expand Up @@ -6061,7 +6051,7 @@ pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_sta
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
struct amdgpu_ring *ring = adev->rings[i];

if (!ring || !drm_sched_wqueue_ready(&ring->sched))
if (!amdgpu_ring_sched_ready(ring))
continue;

drm_sched_stop(&ring->sched, NULL);
Expand Down Expand Up @@ -6189,7 +6179,7 @@ void amdgpu_pci_resume(struct pci_dev *pdev)
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
struct amdgpu_ring *ring = adev->rings[i];

if (!ring || !drm_sched_wqueue_ready(&ring->sched))
if (!amdgpu_ring_sched_ready(ring))
continue;

drm_sched_start(&ring->sched, true);
Expand Down
4 changes: 4 additions & 0 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
Original file line number Diff line number Diff line change
Expand Up @@ -2255,6 +2255,10 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,
if (ret)
goto err_pci;

ret = amdgpu_amdkfd_drm_client_create(adev);
if (ret)
goto err_pci;

/*
* 1. don't init fbdev on hw without DCE
* 2. don't init fbdev if there are no connectors
Expand Down
12 changes: 12 additions & 0 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
Original file line number Diff line number Diff line change
Expand Up @@ -635,6 +635,7 @@ int amdgpu_ring_test_helper(struct amdgpu_ring *ring)
ring->name);

ring->sched.ready = !r;

return r;
}

Expand Down Expand Up @@ -717,3 +718,14 @@ void amdgpu_ring_ib_on_emit_de(struct amdgpu_ring *ring)
if (ring->is_sw_ring)
amdgpu_sw_ring_ib_mark_offset(ring, AMDGPU_MUX_OFFSET_TYPE_DE);
}

bool amdgpu_ring_sched_ready(struct amdgpu_ring *ring)
{
if (!ring)
return false;

if (ring->no_scheduler || !drm_sched_wqueue_ready(&ring->sched))
return false;

return true;
}
2 changes: 1 addition & 1 deletion drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
Original file line number Diff line number Diff line change
Expand Up @@ -450,5 +450,5 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
int amdgpu_ib_pool_init(struct amdgpu_device *adev);
void amdgpu_ib_pool_fini(struct amdgpu_device *adev);
int amdgpu_ib_ring_tests(struct amdgpu_device *adev);

bool amdgpu_ring_sched_ready(struct amdgpu_ring *ring);
#endif
6 changes: 6 additions & 0 deletions drivers/gpu/drm/amd/amdgpu/cik_ih.c
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,12 @@ static u32 cik_ih_get_wptr(struct amdgpu_device *adev,
tmp = RREG32(mmIH_RB_CNTL);
tmp |= IH_RB_CNTL__WPTR_OVERFLOW_CLEAR_MASK;
WREG32(mmIH_RB_CNTL, tmp);

/* Unset the CLEAR_OVERFLOW bit immediately so new overflows
* can be detected.
*/
tmp &= ~IH_RB_CNTL__WPTR_OVERFLOW_CLEAR_MASK;
WREG32(mmIH_RB_CNTL, tmp);
}
return (wptr & ih->ptr_mask);
}
Expand Down
5 changes: 5 additions & 0 deletions drivers/gpu/drm/amd/amdgpu/cz_ih.c
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,11 @@ static u32 cz_ih_get_wptr(struct amdgpu_device *adev,
tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1);
WREG32(mmIH_RB_CNTL, tmp);

/* Unset the CLEAR_OVERFLOW bit immediately so new overflows
* can be detected.
*/
tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0);
WREG32(mmIH_RB_CNTL, tmp);

out:
return (wptr & ih->ptr_mask);
Expand Down
2 changes: 0 additions & 2 deletions drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
Original file line number Diff line number Diff line change
Expand Up @@ -4027,8 +4027,6 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev)
err = 0;
adev->gfx.mec2_fw = NULL;
}
amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC2);
amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC2_JT);

gfx_v10_0_check_fw_write_wait(adev);
out:
Expand Down
22 changes: 0 additions & 22 deletions drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
Original file line number Diff line number Diff line change
Expand Up @@ -107,23 +107,6 @@ static const struct soc15_reg_golden golden_settings_gc_11_0_1[] =
SOC15_REG_GOLDEN_VALUE(GC, 0, regTCP_CNTL2, 0xfcffffff, 0x0000000a)
};

static const struct soc15_reg_golden golden_settings_gc_11_5_0[] = {
SOC15_REG_GOLDEN_VALUE(GC, 0, regDB_DEBUG5, 0xffffffff, 0x00000800),
SOC15_REG_GOLDEN_VALUE(GC, 0, regGB_ADDR_CONFIG, 0x0c1807ff, 0x00000242),
SOC15_REG_GOLDEN_VALUE(GC, 0, regGCR_GENERAL_CNTL, 0x1ff1ffff, 0x00000500),
SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2A_ADDR_MATCH_MASK, 0xffffffff, 0xfffffff3),
SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2C_ADDR_MATCH_MASK, 0xffffffff, 0xfffffff3),
SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2C_CTRL, 0xffffffff, 0xf37fff3f),
SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2C_CTRL3, 0xfffffffb, 0x00f40188),
SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2C_CTRL4, 0xf0ffffff, 0x80009007),
SOC15_REG_GOLDEN_VALUE(GC, 0, regPA_CL_ENHANCE, 0xf1ffffff, 0x00880007),
SOC15_REG_GOLDEN_VALUE(GC, 0, regPC_CONFIG_CNTL_1, 0xffffffff, 0x00010000),
SOC15_REG_GOLDEN_VALUE(GC, 0, regTA_CNTL_AUX, 0xf7f7ffff, 0x01030000),
SOC15_REG_GOLDEN_VALUE(GC, 0, regTA_CNTL2, 0x007f0000, 0x00000000),
SOC15_REG_GOLDEN_VALUE(GC, 0, regTCP_CNTL2, 0xffcfffff, 0x0000200a),
SOC15_REG_GOLDEN_VALUE(GC, 0, regUTCL1_CTRL_2, 0xffffffff, 0x0000048f)
};

#define DEFAULT_SH_MEM_CONFIG \
((SH_MEM_ADDRESS_MODE_64 << SH_MEM_CONFIG__ADDRESS_MODE__SHIFT) | \
(SH_MEM_ALIGNMENT_MODE_UNALIGNED << SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) | \
Expand Down Expand Up @@ -304,11 +287,6 @@ static void gfx_v11_0_init_golden_registers(struct amdgpu_device *adev)
golden_settings_gc_11_0_1,
(const u32)ARRAY_SIZE(golden_settings_gc_11_0_1));
break;
case IP_VERSION(11, 5, 0):
soc15_program_register_sequence(adev,
golden_settings_gc_11_5_0,
(const u32)ARRAY_SIZE(golden_settings_gc_11_5_0));
break;
default:
break;
}
Expand Down
4 changes: 2 additions & 2 deletions drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
Original file line number Diff line number Diff line change
Expand Up @@ -915,8 +915,8 @@ static int gmc_v6_0_hw_init(void *handle)

if (amdgpu_emu_mode == 1)
return amdgpu_gmc_vram_checking(adev);
else
return r;

return 0;
}

static int gmc_v6_0_hw_fini(void *handle)
Expand Down
4 changes: 2 additions & 2 deletions drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
Original file line number Diff line number Diff line change
Expand Up @@ -1099,8 +1099,8 @@ static int gmc_v7_0_hw_init(void *handle)

if (amdgpu_emu_mode == 1)
return amdgpu_gmc_vram_checking(adev);
else
return r;

return 0;
}

static int gmc_v7_0_hw_fini(void *handle)
Expand Down
4 changes: 2 additions & 2 deletions drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
Original file line number Diff line number Diff line change
Expand Up @@ -1219,8 +1219,8 @@ static int gmc_v8_0_hw_init(void *handle)

if (amdgpu_emu_mode == 1)
return amdgpu_gmc_vram_checking(adev);
else
return r;

return 0;
}

static int gmc_v8_0_hw_fini(void *handle)
Expand Down
4 changes: 2 additions & 2 deletions drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
Original file line number Diff line number Diff line change
Expand Up @@ -2341,8 +2341,8 @@ static int gmc_v9_0_hw_init(void *handle)

if (amdgpu_emu_mode == 1)
return amdgpu_gmc_vram_checking(adev);
else
return r;

return 0;
}

/**
Expand Down
Loading

0 comments on commit a639525

Please sign in to comment.