From ada637e70f96862ff5ba20a169506b58cf567db9 Mon Sep 17 00:00:00 2001 From: Harry Wentland Date: Tue, 14 May 2019 09:05:37 -0400 Subject: [PATCH 1/9] drm/amd/display: Add ASICREV_IS_PICASSO [WHY] We only want to load DMCU FW on Picasso and Raven 2, not on Raven 1. Signed-off-by: Harry Wentland Reviewed-by: Nicholas Kazlauskas Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/include/dal_asic_id.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/include/dal_asic_id.h b/drivers/gpu/drm/amd/display/include/dal_asic_id.h index 34d6fdcb32e2f..4c8ce7938f010 100644 --- a/drivers/gpu/drm/amd/display/include/dal_asic_id.h +++ b/drivers/gpu/drm/amd/display/include/dal_asic_id.h @@ -138,13 +138,14 @@ #endif #define RAVEN_UNKNOWN 0xFF -#if defined(CONFIG_DRM_AMD_DC_DCN1_01) -#define ASICREV_IS_RAVEN2(eChipRev) ((eChipRev >= RAVEN2_A0) && (eChipRev < 0xF0)) -#endif /* DCN1_01 */ #define ASIC_REV_IS_RAVEN(eChipRev) ((eChipRev >= RAVEN_A0) && eChipRev < RAVEN_UNKNOWN) #define RAVEN1_F0 0xF0 #define ASICREV_IS_RV1_F0(eChipRev) ((eChipRev >= RAVEN1_F0) && (eChipRev < RAVEN_UNKNOWN)) +#if defined(CONFIG_DRM_AMD_DC_DCN1_01) +#define ASICREV_IS_PICASSO(eChipRev) ((eChipRev >= PICASSO_A0) && (eChipRev < RAVEN2_A0)) +#define ASICREV_IS_RAVEN2(eChipRev) ((eChipRev >= RAVEN2_A0) && (eChipRev < 0xF0)) +#endif /* DCN1_01 */ #define FAMILY_RV 142 /* DCN 1*/ From 55143dc23ca4792868ea8c17bce65ca7b3d3e8c4 Mon Sep 17 00:00:00 2001 From: Harry Wentland Date: Mon, 29 Apr 2019 09:39:15 -0400 Subject: [PATCH 2/9] drm/amd/display: Don't load DMCU for Raven 1 [WHY] Some early Raven boards had a bad SBIOS that doesn't play nicely with the DMCU FW. We thought the issues were fixed by ignoring errors on DMCU load but that doesn't seem to be the case. We've still seen reports of users unable to boot their systems at all. [HOW] Disable DMCU load on Raven 1. Only load it for Raven 2 and Picasso. Signed-off-by: Harry Wentland Reviewed-by: Nicholas Kazlauskas Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 995f9df66142e..0680c740f6fe2 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -29,6 +29,7 @@ #include "dm_services_types.h" #include "dc.h" #include "dc/inc/core_types.h" +#include "dal_asic_id.h" #include "vid.h" #include "amdgpu.h" @@ -640,7 +641,7 @@ static void amdgpu_dm_fini(struct amdgpu_device *adev) static int load_dmcu_fw(struct amdgpu_device *adev) { - const char *fw_name_dmcu; + const char *fw_name_dmcu = NULL; int r; const struct dmcu_firmware_header_v1_0 *hdr; @@ -663,7 +664,14 @@ static int load_dmcu_fw(struct amdgpu_device *adev) case CHIP_VEGA20: return 0; case CHIP_RAVEN: - fw_name_dmcu = FIRMWARE_RAVEN_DMCU; + if (ASICREV_IS_PICASSO(adev->external_rev_id)) + fw_name_dmcu = FIRMWARE_RAVEN_DMCU; +#if defined(CONFIG_DRM_AMD_DC_DCN1_01) + else if (ASICREV_IS_RAVEN2(adev->external_rev_id)) + fw_name_dmcu = FIRMWARE_RAVEN_DMCU; +#endif + else + return 0; break; default: DRM_ERROR("Unsupported ASIC type: 0x%X\n", adev->asic_type); From 0a5a9c276c335870a1cecc4f02b76d6d6f663c8b Mon Sep 17 00:00:00 2001 From: Kent Russell Date: Mon, 13 May 2019 09:00:05 -0400 Subject: [PATCH 3/9] drm/amdkfd: Add missing Polaris10 ID This was added to amdgpu but was missed in amdkfd Signed-off-by: Kent Russell Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.rg --- drivers/gpu/drm/amd/amdkfd/kfd_device.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index c1e4d44d6137a..4dd8489144d06 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -355,6 +355,7 @@ static const struct kfd_deviceid supported_devices[] = { { 0x67CF, &polaris10_device_info }, /* Polaris10 */ { 0x67D0, &polaris10_vf_device_info }, /* Polaris10 vf*/ { 0x67DF, &polaris10_device_info }, /* Polaris10 */ + { 0x6FDF, &polaris10_device_info }, /* Polaris10 */ { 0x67E0, &polaris11_device_info }, /* Polaris11 */ { 0x67E1, &polaris11_device_info }, /* Polaris11 */ { 0x67E3, &polaris11_device_info }, /* Polaris11 */ From 379109351f4f6f2405cf54e7a296055f589c3ad1 Mon Sep 17 00:00:00 2001 From: Flora Cui Date: Fri, 17 May 2019 11:33:56 +0800 Subject: [PATCH 4/9] drm/amdgpu: keep stolen memory on picasso otherwise screen corrupts during modprobe. Signed-off-by: Flora Cui Reviewed-by: Feifei Xu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 3fd79e07944db..4e6fcaeb13037 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -626,6 +626,7 @@ static bool gmc_v9_0_keep_stolen_memory(struct amdgpu_device *adev) case CHIP_VEGA10: return true; case CHIP_RAVEN: + return (adev->pdev->device == 0x15d8); case CHIP_VEGA12: case CHIP_VEGA20: default: From 5887a59961e2295c5b02f39dbc0ecf9212709b7b Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 17 May 2019 09:21:13 -0500 Subject: [PATCH 5/9] drm/amdgpu/soc15: skip reset on init Not necessary on soc15 and breaks driver reload on server cards. Acked-by: Amber Lin Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/soc15.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index 4900e4958decb..b7e594c2bfb43 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -730,6 +730,11 @@ static bool soc15_need_reset_on_init(struct amdgpu_device *adev) { u32 sol_reg; + /* Just return false for soc15 GPUs. Reset does not seem to + * be necessary. + */ + return false; + if (adev->flags & AMD_IS_APU) return false; From 067e75b3d786b6bd7863e86f9a8025284be87ac8 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 17 May 2019 09:31:43 -0500 Subject: [PATCH 6/9] drm/amdgpu/gmc9: set vram_width properly for SR-IOV For SR-IOV, vram_width can't be read from ATOM as RAVEN, and DF related registers is not readable, so hardcord is the only way to set the correct vram_width. Reviewed-by: Yintian Tao Signed-off-by: Trigger Huang Signed-off-by: Yintian Tao Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 4e6fcaeb13037..3b7370d914a53 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -813,8 +813,16 @@ static int gmc_v9_0_mc_init(struct amdgpu_device *adev) int chansize, numchan; int r; - if (amdgpu_emu_mode != 1) + if (amdgpu_sriov_vf(adev)) { + /* For Vega10 SR-IOV, vram_width can't be read from ATOM as RAVEN, + * and DF related registers is not readable, seems hardcord is the + * only way to set the correct vram_width + */ + adev->gmc.vram_width = 2048; + } else if (amdgpu_emu_mode != 1) { adev->gmc.vram_width = amdgpu_atomfirmware_get_vram_width(adev); + } + if (!adev->gmc.vram_width) { /* hbm memory channel size */ if (adev->flags & AMD_IS_APU) From 029f41535ac5e07e1fa17ca963a9f9fd05fb665f Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 15 May 2019 12:51:30 +0300 Subject: [PATCH 7/9] drm/amd/powerplay: fix locking in smu_feature_set_supported() There is a typo so the code unlocks twice instead of taking the lock and then releasing it. Fixes: f14a323db5b0 ("drm/amd/powerplay: implement update enabled feature state to smc for smu11") Signed-off-by: Dan Carpenter Reviewed-by: Huang Rui Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/amdgpu_smu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c index c058c784180ee..eec329ab60370 100644 --- a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c @@ -280,7 +280,7 @@ int smu_feature_set_supported(struct smu_context *smu, int feature_id, WARN_ON(feature_id > feature->feature_num); - mutex_unlock(&feature->mutex); + mutex_lock(&feature->mutex); if (enable) test_and_set_bit(feature_id, feature->supported); else From 057f91645cef412fe460b17fa50726c8a1c5921c Mon Sep 17 00:00:00 2001 From: Yintian Tao Date: Thu, 16 May 2019 13:07:26 +0800 Subject: [PATCH 8/9] drm/amdgpu: skip fw pri bo alloc for SRIOV MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PSP fw primary buffer is not used under SRIOV. Under SRIOV, VBIOS or hypervisor driver will load psp sos and psp sysdrv. Therefore, we don't need to allocate memory for it. v2: remove superfluous check for amdgpu_bo_free_kernel(). Signed-off-by: Yintian Tao Signed-off-by: Monk Liu Acked-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 05897b05766b1..86cc24b2e0aa6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -877,13 +877,16 @@ static int psp_load_fw(struct amdgpu_device *adev) if (!psp->cmd) return -ENOMEM; - ret = amdgpu_bo_create_kernel(adev, PSP_1_MEG, PSP_1_MEG, - AMDGPU_GEM_DOMAIN_GTT, - &psp->fw_pri_bo, - &psp->fw_pri_mc_addr, - &psp->fw_pri_buf); - if (ret) - goto failed; + /* this fw pri bo is not used under SRIOV */ + if (!amdgpu_sriov_vf(psp->adev)) { + ret = amdgpu_bo_create_kernel(adev, PSP_1_MEG, PSP_1_MEG, + AMDGPU_GEM_DOMAIN_GTT, + &psp->fw_pri_bo, + &psp->fw_pri_mc_addr, + &psp->fw_pri_buf); + if (ret) + goto failed; + } ret = amdgpu_bo_create_kernel(adev, PSP_FENCE_BUFFER_SIZE, PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, From 43d8107f0bdcaa4300e40231cc45ecbd1f77f73f Mon Sep 17 00:00:00 2001 From: Harish Kasiviswanathan Date: Fri, 25 Jan 2019 16:35:35 -0500 Subject: [PATCH 9/9] drm/amdkfd: Fix compute profile switching Fix compute profile switching on process termination. Add a dedicated reference counter to keep track of entry/exit to/from compute profile. This enables switching compute profiles for other reasons than process creation or termination. Signed-off-by: Harish Kasiviswanathan Signed-off-by: Eric Huang Reviewed-by: Felix Kuehling Signed-off-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_device.c | 16 ++++++++++++++++ .../drm/amd/amdkfd/kfd_device_queue_manager.c | 11 ++++++----- drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 7 +++++++ 3 files changed, 29 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 4dd8489144d06..765b58a17dc79 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -463,6 +463,7 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, kfd->pdev = pdev; kfd->init_complete = false; kfd->kfd2kgd = f2g; + atomic_set(&kfd->compute_profile, 0); mutex_init(&kfd->doorbell_mutex); memset(&kfd->doorbell_available_index, 0, @@ -1037,6 +1038,21 @@ void kgd2kfd_set_sram_ecc_flag(struct kfd_dev *kfd) atomic_inc(&kfd->sram_ecc_flag); } +void kfd_inc_compute_active(struct kfd_dev *kfd) +{ + if (atomic_inc_return(&kfd->compute_profile) == 1) + amdgpu_amdkfd_set_compute_idle(kfd->kgd, false); +} + +void kfd_dec_compute_active(struct kfd_dev *kfd) +{ + int count = atomic_dec_return(&kfd->compute_profile); + + if (count == 0) + amdgpu_amdkfd_set_compute_idle(kfd->kgd, true); + WARN_ONCE(count < 0, "Compute profile ref. count error"); +} + #if defined(CONFIG_DEBUG_FS) /* This function will send a package to HIQ to hang the HWS diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index c6c9530e704e0..ae381450601c5 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -811,8 +811,8 @@ static int register_process(struct device_queue_manager *dqm, retval = dqm->asic_ops.update_qpd(dqm, qpd); - if (dqm->processes_count++ == 0) - amdgpu_amdkfd_set_compute_idle(dqm->dev->kgd, false); + dqm->processes_count++; + kfd_inc_compute_active(dqm->dev); dqm_unlock(dqm); @@ -835,9 +835,8 @@ static int unregister_process(struct device_queue_manager *dqm, if (qpd == cur->qpd) { list_del(&cur->list); kfree(cur); - if (--dqm->processes_count == 0) - amdgpu_amdkfd_set_compute_idle( - dqm->dev->kgd, true); + dqm->processes_count--; + kfd_dec_compute_active(dqm->dev); goto out; } } @@ -1539,6 +1538,7 @@ static int process_termination_nocpsch(struct device_queue_manager *dqm, list_del(&cur->list); kfree(cur); dqm->processes_count--; + kfd_dec_compute_active(dqm->dev); break; } } @@ -1626,6 +1626,7 @@ static int process_termination_cpsch(struct device_queue_manager *dqm, list_del(&cur->list); kfree(cur); dqm->processes_count--; + kfd_dec_compute_active(dqm->dev); break; } } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 9e02309656758..487d5da337c10 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -279,6 +279,9 @@ struct kfd_dev { /* SRAM ECC flag */ atomic_t sram_ecc_flag; + + /* Compute Profile ref. count */ + atomic_t compute_profile; }; enum kfd_mempool { @@ -978,6 +981,10 @@ int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p); bool kfd_is_locked(void); +/* Compute profile */ +void kfd_inc_compute_active(struct kfd_dev *dev); +void kfd_dec_compute_active(struct kfd_dev *dev); + /* Debugfs */ #if defined(CONFIG_DEBUG_FS)