Skip to content

Commit

Permalink
Merge tag 'amd-drm-next-6.14-2025-01-10' of https://gitlab.freedeskto…
Browse files Browse the repository at this point in the history
…p.org/agd5f/linux into drm-next

amd-drm-next-6.14-2025-01-10:

amdgpu:
- Fix max surface handling in DC
- clang fixes
- DCN 3.5 fixes
- DCN 4.0.1 fixes
- DC CRC fixes
- DML updates
- DSC fixes
- PSR fixes
- DC add some divide by 0 checks
- SMU13 updates
- SR-IOV fixes
- RAS fixes
- Cleaner shader support for gfx10.3 dGPUs
- fix drm buddy trim handling
- SDMA engine reset updates
_ Fix RB bitmap setup
- Fix doorbell ttm cleanup
- Add CEC notifier support
- DPIA updates
- MST fixes

amdkfd:
- Shader debugger fixes
- Trap handler cleanup
- Cleanup includes
- Eviction fence wq fix

Signed-off-by: Dave Airlie <airlied@redhat.com>

From: Alex Deucher <alexander.deucher@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20250110172731.2960668-1-alexander.deucher@amd.com
  • Loading branch information
Dave Airlie committed Jan 13, 2025
2 parents 0dc8538 + 812a33a commit c3d590f
Show file tree
Hide file tree
Showing 167 changed files with 4,869 additions and 2,043 deletions.
6 changes: 6 additions & 0 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
Original file line number Diff line number Diff line change
Expand Up @@ -1131,6 +1131,9 @@ uint64_t kgd_gfx_v9_hqd_get_pq_addr(struct amdgpu_device *adev,
uint32_t low, high;
uint64_t queue_addr = 0;

if (!amdgpu_gpu_recovery)
return 0;

kgd_gfx_v9_acquire_queue(adev, pipe_id, queue_id, inst);
amdgpu_gfx_rlc_enter_safe_mode(adev, inst);

Expand Down Expand Up @@ -1179,6 +1182,9 @@ uint64_t kgd_gfx_v9_hqd_reset(struct amdgpu_device *adev,
uint32_t low, high, pipe_reset_data = 0;
uint64_t queue_addr = 0;

if (!amdgpu_gpu_recovery)
return 0;

kgd_gfx_v9_acquire_queue(adev, pipe_id, queue_id, inst);
amdgpu_gfx_rlc_enter_safe_mode(adev, inst);

Expand Down
6 changes: 2 additions & 4 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
Original file line number Diff line number Diff line change
Expand Up @@ -88,10 +88,8 @@ static void amdgpu_gem_object_free(struct drm_gem_object *gobj)
{
struct amdgpu_bo *aobj = gem_to_amdgpu_bo(gobj);

if (aobj) {
amdgpu_hmm_unregister(aobj);
ttm_bo_put(&aobj->tbo);
}
amdgpu_hmm_unregister(aobj);
ttm_bo_put(&aobj->tbo);
}

int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
Expand Down
17 changes: 11 additions & 6 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
Original file line number Diff line number Diff line change
Expand Up @@ -3020,10 +3020,7 @@ static int psp_hw_init(struct amdgpu_ip_block *ip_block)
struct amdgpu_device *adev = ip_block->adev;

mutex_lock(&adev->firmware.mutex);
/*
* This sequence is just used on hw_init only once, no need on
* resume.
*/

ret = amdgpu_ucode_init_bo(adev);
if (ret)
goto failed;
Expand Down Expand Up @@ -3148,6 +3145,10 @@ static int psp_resume(struct amdgpu_ip_block *ip_block)

mutex_lock(&adev->firmware.mutex);

ret = amdgpu_ucode_init_bo(adev);
if (ret)
goto failed;

ret = psp_hw_start(psp);
if (ret)
goto failed;
Expand Down Expand Up @@ -3891,10 +3892,12 @@ static ssize_t psp_usbc_pd_fw_sysfs_read(struct device *dev,
{
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = drm_to_adev(ddev);
struct amdgpu_ip_block *ip_block;
uint32_t fw_ver;
int ret;

if (!adev->ip_blocks[AMD_IP_BLOCK_TYPE_PSP].status.late_initialized) {
ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_PSP);
if (!ip_block || !ip_block->status.late_initialized) {
dev_info(adev->dev, "PSP block is not ready yet\n.");
return -EBUSY;
}
Expand Down Expand Up @@ -3923,8 +3926,10 @@ static ssize_t psp_usbc_pd_fw_sysfs_write(struct device *dev,
struct amdgpu_bo *fw_buf_bo = NULL;
uint64_t fw_pri_mc_addr;
void *fw_pri_cpu_addr;
struct amdgpu_ip_block *ip_block;

if (!adev->ip_blocks[AMD_IP_BLOCK_TYPE_PSP].status.late_initialized) {
ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_PSP);
if (!ip_block || !ip_block->status.late_initialized) {
dev_err(adev->dev, "PSP block is not ready yet.");
return -EBUSY;
}
Expand Down
21 changes: 16 additions & 5 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
Original file line number Diff line number Diff line change
Expand Up @@ -2832,8 +2832,10 @@ int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev,

mutex_lock(&con->recovery_lock);
data = con->eh_data;
if (!data)
if (!data) {
/* Returning 0 as the absence of eh_data is acceptable */
goto free;
}

for (i = 0; i < pages; i++) {
if (from_rom &&
Expand All @@ -2845,26 +2847,34 @@ int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev,
* one row
*/
if (amdgpu_umc_pages_in_a_row(adev, &err_data,
bps[i].retired_page << AMDGPU_GPU_PAGE_SHIFT))
bps[i].retired_page <<
AMDGPU_GPU_PAGE_SHIFT)) {
ret = -EINVAL;
goto free;
else
} else {
find_pages_per_pa = true;
}
} else {
/* unsupported cases */
ret = -EOPNOTSUPP;
goto free;
}
}
} else {
if (amdgpu_umc_pages_in_a_row(adev, &err_data,
bps[i].retired_page << AMDGPU_GPU_PAGE_SHIFT))
bps[i].retired_page << AMDGPU_GPU_PAGE_SHIFT)) {
ret = -EINVAL;
goto free;
}
}
} else {
if (from_rom && !find_pages_per_pa) {
if (bps[i].retired_page & UMC_CHANNEL_IDX_V2) {
/* bad page in any NPS mode in eeprom */
if (amdgpu_ras_mca2pa_by_idx(adev, &bps[i], &err_data))
if (amdgpu_ras_mca2pa_by_idx(adev, &bps[i], &err_data)) {
ret = -EINVAL;
goto free;
}
} else {
/* legacy bad page in eeprom, generated only in
* NPS1 mode
Expand All @@ -2881,6 +2891,7 @@ int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev,
/* non-nps1 mode, old RAS TA
* can't support it
*/
ret = -EOPNOTSUPP;
goto free;
}
}
Expand Down
4 changes: 2 additions & 2 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
Original file line number Diff line number Diff line change
Expand Up @@ -362,13 +362,13 @@ static int amdgpu_debugfs_sdma_sched_mask_set(void *data, u64 val)
if (!adev)
return -ENODEV;

mask = (1 << adev->sdma.num_instances) - 1;
mask = BIT_ULL(adev->sdma.num_instances) - 1;
if ((val & mask) == 0)
return -EINVAL;

for (i = 0; i < adev->sdma.num_instances; ++i) {
ring = &adev->sdma.instance[i].ring;
if (val & (1 << i))
if (val & BIT_ULL(i))
ring->sched.ready = true;
else
ring->sched.ready = false;
Expand Down
1 change: 1 addition & 0 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
Original file line number Diff line number Diff line change
Expand Up @@ -2066,6 +2066,7 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev)
ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_GDS);
ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_GWS);
ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_OA);
ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_DOORBELL);
ttm_device_fini(&adev->mman.bdev);
adev->mman.initialized = false;
DRM_INFO("amdgpu: ttm finalized\n");
Expand Down
2 changes: 1 addition & 1 deletion drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
Original file line number Diff line number Diff line change
Expand Up @@ -567,7 +567,6 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
else
remaining_size -= size;
}
mutex_unlock(&mgr->lock);

if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS && adjust_dcc_size) {
struct drm_buddy_block *dcc_block;
Expand All @@ -584,6 +583,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
(u64)vres->base.size,
&vres->blocks);
}
mutex_unlock(&mgr->lock);

vres->base.start = 0;
size = max_t(u64, amdgpu_vram_mgr_blocks_size(&vres->blocks),
Expand Down
18 changes: 18 additions & 0 deletions drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
#include "clearstate_gfx10.h"
#include "v10_structs.h"
#include "gfx_v10_0.h"
#include "gfx_v10_0_cleaner_shader.h"
#include "nbio_v2_3.h"

/*
Expand Down Expand Up @@ -4738,6 +4739,23 @@ static int gfx_v10_0_sw_init(struct amdgpu_ip_block *ip_block)
break;
}
switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(10, 3, 0):
case IP_VERSION(10, 3, 2):
case IP_VERSION(10, 3, 4):
case IP_VERSION(10, 3, 5):
adev->gfx.cleaner_shader_ptr = gfx_10_3_0_cleaner_shader_hex;
adev->gfx.cleaner_shader_size = sizeof(gfx_10_3_0_cleaner_shader_hex);
if (adev->gfx.me_fw_version >= 64 &&
adev->gfx.pfp_fw_version >= 100 &&
adev->gfx.mec_fw_version >= 122) {
adev->gfx.enable_cleaner_shader = true;
r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size);
if (r) {
adev->gfx.enable_cleaner_shader = false;
dev_err(adev->dev, "Failed to initialize cleaner shader\n");
}
}
break;
default:
adev->gfx.enable_cleaner_shader = false;
break;
Expand Down
56 changes: 56 additions & 0 deletions drivers/gpu/drm/amd/amdgpu/gfx_v10_0_cleaner_shader.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
/* SPDX-License-Identifier: MIT */
/*
* Copyright 2025 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/

/* Define the cleaner shader gfx_10_3_0 */
static const u32 gfx_10_3_0_cleaner_shader_hex[] = {
0xb0804004, 0xbf8a0000,
0xbe8203b8, 0xbefc0380,
0x7e008480, 0x7e028480,
0x7e048480, 0x7e068480,
0x7e088480, 0x7e0a8480,
0x7e0c8480, 0x7e0e8480,
0xbefc0302, 0x80828802,
0xbf84fff5, 0xbe8203ff,
0x80000000, 0x87020002,
0xbf840012, 0xbefe03c1,
0xbeff03c1, 0xd7650001,
0x0001007f, 0xd7660001,
0x0002027e, 0x16020288,
0xbe8203bf, 0xbefc03c1,
0xd9382000, 0x00020201,
0xd9386040, 0x00040401,
0xd70f6a01, 0x000202ff,
0x00000400, 0x80828102,
0xbf84fff7, 0xbefc03ff,
0x00000068, 0xbe803080,
0xbe813080, 0xbe823080,
0xbe833080, 0x80fc847c,
0xbf84fffa, 0xbeea0480,
0xbeec0480, 0xbeee0480,
0xbef00480, 0xbef20480,
0xbef40480, 0xbef60480,
0xbef80480, 0xbefa0480,
0xbf810000, 0xbf9f0000,
0xbf9f0000, 0xbf9f0000,
0xbf9f0000, 0xbf9f0000,
};
124 changes: 124 additions & 0 deletions drivers/gpu/drm/amd/amdgpu/gfx_v10_3_0_cleaner_shader.asm
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
/* SPDX-License-Identifier: MIT */
/*
* Copyright 2025 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/

// This shader is to clean LDS, SGPRs and VGPRs. It is first 64 Dwords or 256 bytes of 192 Dwords cleaner shader.
//To turn this shader program on for complitaion change this to main and lower shader main to main_1

// GFX10.3 : Clear SGPRs, VGPRs and LDS
// Launch 32 waves per CU (16 per SIMD) as a workgroup (threadgroup) to fill every wave slot
// Waves are "wave32" and have 64 VGPRs each, which uses all 1024 VGPRs per SIMD
// Waves are launched in "CU" mode, and the workgroup shares 64KB of LDS (half of the WGP's LDS)
// It takes 2 workgroups to use all of LDS: one on each CU of the WGP
// Each wave clears SGPRs 0 - 107
// Each wave clears VGPRs 0 - 63
// The first wave of the workgroup clears its 64KB of LDS
// The shader starts with "S_BARRIER" to ensure SPI has launched all waves of the workgroup
// before any wave in the workgroup could end. Without this, it is possible not all SGPRs get cleared.


shader main
asic(GFX10)
type(CS)
wave_size(32)
// Note: original source code from SQ team

//
// Create 32 waves in a threadgroup (CS waves)
// Each allocates 64 VGPRs
// The workgroup allocates all of LDS (64kbytes)
//
// Takes about 2500 clocks to run.
// (theorhetical fastest = 1024clks vgpr + 640lds = 1660 clks)
//
S_BARRIER
s_mov_b32 s2, 0x00000038 // Loop 64/8=8 times (loop unrolled for performance)
s_mov_b32 m0, 0
//
// CLEAR VGPRs
//
label_0005:
v_movreld_b32 v0, 0
v_movreld_b32 v1, 0
v_movreld_b32 v2, 0
v_movreld_b32 v3, 0
v_movreld_b32 v4, 0
v_movreld_b32 v5, 0
v_movreld_b32 v6, 0
v_movreld_b32 v7, 0
s_mov_b32 m0, s2
s_sub_u32 s2, s2, 8
s_cbranch_scc0 label_0005
//
s_mov_b32 s2, 0x80000000 // Bit31 is first_wave
s_and_b32 s2, s2, s0 // sgpr0 has tg_size (first_wave) term as in ucode only COMPUTE_PGM_RSRC2.tg_size_en is set
s_cbranch_scc0 label_0023 // Clean LDS if its first wave of ThreadGroup/WorkGroup
// CLEAR LDS
//
s_mov_b32 exec_lo, 0xffffffff
s_mov_b32 exec_hi, 0xffffffff
v_mbcnt_lo_u32_b32 v1, exec_hi, 0 // Set V1 to thread-ID (0..63)
v_mbcnt_hi_u32_b32 v1, exec_lo, v1 // Set V1 to thread-ID (0..63)
v_mul_u32_u24 v1, 0x00000008, v1 // * 8, so each thread is a double-dword address (8byte)
s_mov_b32 s2, 0x00000003f // 64 loop iterations
s_mov_b32 m0, 0xffffffff
// Clear all of LDS space
// Each FirstWave of WorkGroup clears 64kbyte block

label_001F:
ds_write2_b64 v1, v[2:3], v[2:3] offset1:32
ds_write2_b64 v1, v[4:5], v[4:5] offset0:64 offset1:96
v_add_co_u32 v1, vcc, 0x00000400, v1
s_sub_u32 s2, s2, 1
s_cbranch_scc0 label_001F

//
// CLEAR SGPRs
//
label_0023:
s_mov_b32 m0, 0x00000068 // Loop 108/4=27 times (loop unrolled for performance)
label_sgpr_loop:
s_movreld_b32 s0, 0
s_movreld_b32 s1, 0
s_movreld_b32 s2, 0
s_movreld_b32 s3, 0
s_sub_u32 m0, m0, 4
s_cbranch_scc0 label_sgpr_loop

//clear vcc
s_mov_b32 flat_scratch_lo, 0 //clear flat scratch lo SGPR
s_mov_b32 flat_scratch_hi, 0 //clear flat scratch hi SGPR
s_mov_b64 vcc, 0 //clear vcc
s_mov_b64 ttmp0, 0 //Clear ttmp0 and ttmp1
s_mov_b64 ttmp2, 0 //Clear ttmp2 and ttmp3
s_mov_b64 ttmp4, 0 //Clear ttmp4 and ttmp5
s_mov_b64 ttmp6, 0 //Clear ttmp6 and ttmp7
s_mov_b64 ttmp8, 0 //Clear ttmp8 and ttmp9
s_mov_b64 ttmp10, 0 //Clear ttmp10 and ttmp11
s_mov_b64 ttmp12, 0 //Clear ttmp12 and ttmp13
s_mov_b64 ttmp14, 0 //Clear ttmp14 and ttmp15

s_endpgm

end


Loading

0 comments on commit c3d590f

Please sign in to comment.