Skip to content

Commit

Permalink
drm/amdkfd: Sign-extend TMA address in trap handler
Browse files Browse the repository at this point in the history
SMEM instructions can reach addresses above 47 bits but require
bit 47 to be sign-extended through bits [63:48].

This allows the TMA to be relocated in a following patch.

Signed-off-by: Jay Cornwall <jay.cornwall@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
  • Loading branch information
Jay Cornwall authored and Alex Deucher committed Aug 7, 2023
1 parent 96c211f commit 05c899e
Show file tree
Hide file tree
Showing 3 changed files with 46 additions and 22 deletions.
58 changes: 36 additions & 22 deletions drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
Original file line number Diff line number Diff line change
Expand Up @@ -274,14 +274,14 @@ static const uint32_t cwsr_trap_gfx8_hex[] = {


static const uint32_t cwsr_trap_gfx9_hex[] = {
0xbf820001, 0xbf820254,
0xbf820001, 0xbf820258,
0xb8f8f802, 0x8978ff78,
0x00020006, 0xb8fbf803,
0x866eff78, 0x00002000,
0xbf840009, 0x866eff6d,
0x00ff0000, 0xbf85001e,
0x866eff7b, 0x00000400,
0xbf850051, 0xbf8e0010,
0xbf850055, 0xbf8e0010,
0xb8fbf803, 0xbf82fffa,
0x866eff7b, 0x03c00900,
0xbf850015, 0x866eff7b,
Expand All @@ -294,13 +294,15 @@ static const uint32_t cwsr_trap_gfx9_hex[] = {
0xbf850007, 0xb8eef801,
0x866eff6e, 0x00000800,
0xbf850003, 0x866eff7b,
0x00000400, 0xbf850036,
0x00000400, 0xbf85003a,
0xb8faf807, 0x867aff7a,
0x001f8000, 0x8e7a8b7a,
0x8977ff77, 0xfc000000,
0x87777a77, 0xba7ff807,
0x00000000, 0xb8faf812,
0xb8fbf813, 0x8efa887a,
0xbf0d8f7b, 0xbf840002,
0x877bff7b, 0xffff0000,
0xc0031bbd, 0x00000010,
0xbf8cc07f, 0x8e6e976e,
0x8977ff77, 0x00800000,
Expand Down Expand Up @@ -676,14 +678,14 @@ static const uint32_t cwsr_trap_gfx9_hex[] = {
};

static const uint32_t cwsr_trap_nv1x_hex[] = {
0xbf820001, 0xbf8201f1,
0xbf820001, 0xbf8201f5,
0xb0804004, 0xb978f802,
0x8a78ff78, 0x00020006,
0xb97bf803, 0x876eff78,
0x00002000, 0xbf840009,
0x876eff6d, 0x00ff0000,
0xbf85001e, 0x876eff7b,
0x00000400, 0xbf850057,
0x00000400, 0xbf85005b,
0xbf8e0010, 0xb97bf803,
0xbf82fffa, 0x876eff7b,
0x00000900, 0xbf850015,
Expand All @@ -697,7 +699,7 @@ static const uint32_t cwsr_trap_nv1x_hex[] = {
0xb96ef801, 0x876eff6e,
0x00000800, 0xbf850003,
0x876eff7b, 0x00000400,
0xbf85003c, 0x8a77ff77,
0xbf850040, 0x8a77ff77,
0xff000000, 0xb97af807,
0x877bff7a, 0x02000000,
0x8f7b867b, 0x88777b77,
Expand All @@ -706,6 +708,8 @@ static const uint32_t cwsr_trap_nv1x_hex[] = {
0x8a7aff7a, 0x023f8000,
0xb9faf807, 0xb97af812,
0xb97bf813, 0x8ffa887a,
0xbf0d8f7b, 0xbf840002,
0x887bff7b, 0xffff0000,
0xf4011bbd, 0xfa000010,
0xbf8cc07f, 0x8f6e976e,
0x8a77ff77, 0x00800000,
Expand Down Expand Up @@ -1094,14 +1098,14 @@ static const uint32_t cwsr_trap_nv1x_hex[] = {
};

static const uint32_t cwsr_trap_arcturus_hex[] = {
0xbf820001, 0xbf8202d0,
0xbf820001, 0xbf8202d4,
0xb8f8f802, 0x8978ff78,
0x00020006, 0xb8fbf803,
0x866eff78, 0x00002000,
0xbf840009, 0x866eff6d,
0x00ff0000, 0xbf85001e,
0x866eff7b, 0x00000400,
0xbf850051, 0xbf8e0010,
0xbf850055, 0xbf8e0010,
0xb8fbf803, 0xbf82fffa,
0x866eff7b, 0x03c00900,
0xbf850015, 0x866eff7b,
Expand All @@ -1114,13 +1118,15 @@ static const uint32_t cwsr_trap_arcturus_hex[] = {
0xbf850007, 0xb8eef801,
0x866eff6e, 0x00000800,
0xbf850003, 0x866eff7b,
0x00000400, 0xbf850036,
0x00000400, 0xbf85003a,
0xb8faf807, 0x867aff7a,
0x001f8000, 0x8e7a8b7a,
0x8977ff77, 0xfc000000,
0x87777a77, 0xba7ff807,
0x00000000, 0xb8faf812,
0xb8fbf813, 0x8efa887a,
0xbf0d8f7b, 0xbf840002,
0x877bff7b, 0xffff0000,
0xc0031bbd, 0x00000010,
0xbf8cc07f, 0x8e6e976e,
0x8977ff77, 0x00800000,
Expand Down Expand Up @@ -1572,14 +1578,14 @@ static const uint32_t cwsr_trap_arcturus_hex[] = {
};

static const uint32_t cwsr_trap_aldebaran_hex[] = {
0xbf820001, 0xbf8202db,
0xbf820001, 0xbf8202df,
0xb8f8f802, 0x8978ff78,
0x00020006, 0xb8fbf803,
0x866eff78, 0x00002000,
0xbf840009, 0x866eff6d,
0x00ff0000, 0xbf85001e,
0x866eff7b, 0x00000400,
0xbf850051, 0xbf8e0010,
0xbf850055, 0xbf8e0010,
0xb8fbf803, 0xbf82fffa,
0x866eff7b, 0x03c00900,
0xbf850015, 0x866eff7b,
Expand All @@ -1592,13 +1598,15 @@ static const uint32_t cwsr_trap_aldebaran_hex[] = {
0xbf850007, 0xb8eef801,
0x866eff6e, 0x00000800,
0xbf850003, 0x866eff7b,
0x00000400, 0xbf850036,
0x00000400, 0xbf85003a,
0xb8faf807, 0x867aff7a,
0x001f8000, 0x8e7a8b7a,
0x8977ff77, 0xfc000000,
0x87777a77, 0xba7ff807,
0x00000000, 0xb8faf812,
0xb8fbf813, 0x8efa887a,
0xbf0d8f7b, 0xbf840002,
0x877bff7b, 0xffff0000,
0xc0031bbd, 0x00000010,
0xbf8cc07f, 0x8e6e976e,
0x8977ff77, 0x00800000,
Expand Down Expand Up @@ -2061,14 +2069,14 @@ static const uint32_t cwsr_trap_aldebaran_hex[] = {
};

static const uint32_t cwsr_trap_gfx10_hex[] = {
0xbf820001, 0xbf82021c,
0xbf820001, 0xbf820220,
0xb0804004, 0xb978f802,
0x8a78ff78, 0x00020006,
0xb97bf803, 0x876eff78,
0x00002000, 0xbf840009,
0x876eff6d, 0x00ff0000,
0xbf85001e, 0x876eff7b,
0x00000400, 0xbf850041,
0x00000400, 0xbf850045,
0xbf8e0010, 0xb97bf803,
0xbf82fffa, 0x876eff7b,
0x00000900, 0xbf850015,
Expand All @@ -2082,8 +2090,10 @@ static const uint32_t cwsr_trap_gfx10_hex[] = {
0xb96ef801, 0x876eff6e,
0x00000800, 0xbf850003,
0x876eff7b, 0x00000400,
0xbf850026, 0xb97af812,
0xbf85002a, 0xb97af812,
0xb97bf813, 0x8ffa887a,
0xbf0d8f7b, 0xbf840002,
0x887bff7b, 0xffff0000,
0xf4011bbd, 0xfa000010,
0xbf8cc07f, 0x8f6e976e,
0x8a77ff77, 0x00800000,
Expand Down Expand Up @@ -2496,7 +2506,7 @@ static const uint32_t cwsr_trap_gfx10_hex[] = {
};

static const uint32_t cwsr_trap_gfx11_hex[] = {
0xbfa00001, 0xbfa00221,
0xbfa00001, 0xbfa00225,
0xb0804006, 0xb8f8f802,
0x9178ff78, 0x00020006,
0xb8fbf803, 0xbf0d9e6d,
Expand All @@ -2506,7 +2516,7 @@ static const uint32_t cwsr_trap_gfx11_hex[] = {
0xbfa10009, 0x8b6eff6d,
0x00ff0000, 0xbfa2001e,
0x8b6eff7b, 0x00000400,
0xbfa20041, 0xbf830010,
0xbfa20045, 0xbf830010,
0xb8fbf803, 0xbfa0fffa,
0x8b6eff7b, 0x00000900,
0xbfa20015, 0x8b6eff7b,
Expand All @@ -2519,9 +2529,11 @@ static const uint32_t cwsr_trap_gfx11_hex[] = {
0xbfa20007, 0xb8eef801,
0x8b6eff6e, 0x00000800,
0xbfa20003, 0x8b6eff7b,
0x00000400, 0xbfa20026,
0x00000400, 0xbfa2002a,
0xbefa4d82, 0xbf89fc07,
0x84fa887a, 0xf4005bbd,
0x84fa887a, 0xbf0d8f7b,
0xbfa10002, 0x8c7bff7b,
0xffff0000, 0xf4005bbd,
0xf8000010, 0xbf89fc07,
0x846e976e, 0x9177ff77,
0x00800000, 0x8c776e77,
Expand Down Expand Up @@ -2939,14 +2951,14 @@ static const uint32_t cwsr_trap_gfx11_hex[] = {
};

static const uint32_t cwsr_trap_gfx9_4_3_hex[] = {
0xbf820001, 0xbf8202d7,
0xbf820001, 0xbf8202db,
0xb8f8f802, 0x8978ff78,
0x00020006, 0xb8fbf803,
0x866eff78, 0x00002000,
0xbf840009, 0x866eff6d,
0x00ff0000, 0xbf85001a,
0x866eff7b, 0x00000400,
0xbf85004d, 0xbf8e0010,
0xbf850051, 0xbf8e0010,
0xb8fbf803, 0xbf82fffa,
0x866eff7b, 0x03c00900,
0xbf850011, 0x866eff7b,
Expand All @@ -2957,13 +2969,15 @@ static const uint32_t cwsr_trap_gfx9_4_3_hex[] = {
0x866e6f6e, 0xbf850006,
0x866eff6d, 0x00ff0000,
0xbf850003, 0x866eff7b,
0x00000400, 0xbf850036,
0x00000400, 0xbf85003a,
0xb8faf807, 0x867aff7a,
0x001f8000, 0x8e7a8b7a,
0x8979ff79, 0xfc000000,
0x87797a79, 0xba7ff807,
0x00000000, 0xb8faf812,
0xb8fbf813, 0x8efa887a,
0xbf0d8f7b, 0xbf840002,
0x877bff7b, 0xffff0000,
0xc0031bbd, 0x00000010,
0xbf8cc07f, 0x8e6e976e,
0x8979ff79, 0x00800000,
Expand Down
5 changes: 5 additions & 0 deletions drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,11 @@ L_FETCH_2ND_TRAP:
#endif
s_lshl_b64 [ttmp14, ttmp15], [ttmp14, ttmp15], 0x8

s_bitcmp1_b32 ttmp15, 0xF
s_cbranch_scc0 L_NO_SIGN_EXTEND_TMA
s_or_b32 ttmp15, ttmp15, 0xFFFF0000
L_NO_SIGN_EXTEND_TMA:

s_load_dword ttmp2, [ttmp14, ttmp15], 0x10 glc:1 // debug trap enabled flag
s_waitcnt lgkmcnt(0)
s_lshl_b32 ttmp2, ttmp2, TTMP11_DEBUG_TRAP_ENABLED_SHIFT
Expand Down
5 changes: 5 additions & 0 deletions drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm
Original file line number Diff line number Diff line change
Expand Up @@ -283,6 +283,11 @@ L_FETCH_2ND_TRAP:
s_getreg_b32 ttmp15, hwreg(HW_REG_SQ_SHADER_TMA_HI)
s_lshl_b64 [ttmp14, ttmp15], [ttmp14, ttmp15], 0x8

s_bitcmp1_b32 ttmp15, 0xF
s_cbranch_scc0 L_NO_SIGN_EXTEND_TMA
s_or_b32 ttmp15, ttmp15, 0xFFFF0000
L_NO_SIGN_EXTEND_TMA:

s_load_dword ttmp2, [ttmp14, ttmp15], 0x10 glc:1 // debug trap enabled flag
s_waitcnt lgkmcnt(0)
s_lshl_b32 ttmp2, ttmp2, TTMP_DEBUG_TRAP_ENABLED_SHIFT
Expand Down

0 comments on commit 05c899e

Please sign in to comment.