Skip to content

Commit

Permalink
drm/amdgpu: add interface to get die id from memory address
Browse files Browse the repository at this point in the history
And implement it for UMC v12_0. The die id is calculated from IPID
register in bad page retirement flow, but we don't store it on eeprom
and it can be also gotten from physical address.

v2: get PA_C4 and PA_R13 from MCA address since they may be cleared in
retired page.

Signed-off-by: Tao Zhou <tao.zhou1@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
  • Loading branch information
Tao Zhou authored and Alex Deucher committed Dec 10, 2024
1 parent 2206daa commit fcb600b
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 0 deletions.
2 changes: 2 additions & 0 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,8 @@ struct amdgpu_umc_ras {
struct ta_ras_query_address_input *addr_in,
struct ta_ras_query_address_output *addr_out,
bool dump_addr);
uint32_t (*get_die_id_from_pa)(struct amdgpu_device *adev,
uint64_t mca_addr, uint64_t retired_page);
};

struct amdgpu_umc_funcs {
Expand Down
26 changes: 26 additions & 0 deletions drivers/gpu/drm/amd/amdgpu/umc_v12_0.c
Original file line number Diff line number Diff line change
Expand Up @@ -619,6 +619,31 @@ static void umc_v12_0_query_ras_ecc_err_addr(struct amdgpu_device *adev,
mutex_unlock(&con->umc_ecc_log.lock);
}

static uint32_t umc_v12_0_get_die_id(struct amdgpu_device *adev,
uint64_t mca_addr, uint64_t retired_page)
{
uint32_t die = 0;

/* we only calculate die id for nps1 mode right now */
die += ((((retired_page >> 12) & 0x1ULL)^
((retired_page >> 20) & 0x1ULL) ^
((retired_page >> 27) & 0x1ULL) ^
((retired_page >> 34) & 0x1ULL) ^
((retired_page >> 41) & 0x1ULL)) << 0);

/* the original PA_C4 and PA_R13 may be cleared in retired_page, so
* get them from mca_addr.
*/
die += ((((retired_page >> 13) & 0x1ULL) ^
((mca_addr >> 5) & 0x1ULL) ^
((retired_page >> 28) & 0x1ULL) ^
((mca_addr >> 23) & 0x1ULL) ^
((retired_page >> 42) & 0x1ULL)) << 1);
die &= 3;

return die;
}

struct amdgpu_umc_ras umc_v12_0_ras = {
.ras_block = {
.hw_ops = &umc_v12_0_ras_hw_ops,
Expand All @@ -630,5 +655,6 @@ struct amdgpu_umc_ras umc_v12_0_ras = {
.check_ecc_err_status = umc_v12_0_check_ecc_err_status,
.update_ecc_status = umc_v12_0_update_ecc_status,
.convert_ras_err_addr = umc_v12_0_convert_error_address,
.get_die_id_from_pa = umc_v12_0_get_die_id,
};

0 comments on commit fcb600b

Please sign in to comment.