Skip to content

Commit

Permalink
drm/amdkfd: Workaround SQC store failure in gfx9 trap handler
Browse files Browse the repository at this point in the history
SQC stores may intermittently write incorrect data under concurrency
when module parameter noretry=1. This can cause failed context
save/restore cycles as the wavefront state is saved incorrectly.

Within each wavefront wait for SQC store acknowledgment before
issuing another.

Change-Id: I6539e2adc156a1a3a057ea3cf21b16669dd699ae
Signed-off-by: Jay Cornwall <Jay.Cornwall@amd.com>
  • Loading branch information
Jay Cornwall authored and Felix Kuehling committed Feb 12, 2018
1 parent 8cdadac commit 57c9bff
Showing 1 changed file with 48 additions and 25 deletions.
73 changes: 48 additions & 25 deletions drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ var SIM_RUN_HACK = 0 //any hack that needs to be made to run this code
var SGPR_SAVE_USE_SQC = 1 //use SQC D$ to do the write
var USE_MTBUF_INSTEAD_OF_MUBUF = 0 //becasue TC EMU curently asserts on 0 of // overload DFMT field to carry 4 more bits of stride for MUBUF opcodes
var SWIZZLE_EN = 0 //whether we use swizzled buffer addressing
var ACK_SQC_STORE = 1 //workaround for suspected SQC store bug causing incorrect stores under concurrency

/**************************************************************************/
/* variables */
Expand Down Expand Up @@ -1089,6 +1090,9 @@ function write_hwreg_to_mem(s, s_rsrc, s_mem_offset)
s_mov_b32 exec_lo, m0 //assuming exec_lo is not needed anymore from this point on
s_mov_b32 m0, s_mem_offset
s_buffer_store_dword s, s_rsrc, m0 glc:1
if ACK_SQC_STORE
s_waitcnt lgkmcnt(0)
end
s_add_u32 s_mem_offset, s_mem_offset, 4
s_mov_b32 m0, exec_lo
end
Expand All @@ -1098,9 +1102,21 @@ end
function write_16sgpr_to_mem(s, s_rsrc, s_mem_offset)

s_buffer_store_dwordx4 s[0], s_rsrc, 0 glc:1
if ACK_SQC_STORE
s_waitcnt lgkmcnt(0)
end
s_buffer_store_dwordx4 s[4], s_rsrc, 16 glc:1
if ACK_SQC_STORE
s_waitcnt lgkmcnt(0)
end
s_buffer_store_dwordx4 s[8], s_rsrc, 32 glc:1
if ACK_SQC_STORE
s_waitcnt lgkmcnt(0)
end
s_buffer_store_dwordx4 s[12], s_rsrc, 48 glc:1
if ACK_SQC_STORE
s_waitcnt lgkmcnt(0)
end
s_add_u32 s_rsrc[0], s_rsrc[0], 4*16
s_addc_u32 s_rsrc[1], s_rsrc[1], 0x0 // +scc
end
Expand Down Expand Up @@ -1145,7 +1161,7 @@ end
#endif

static const uint32_t cwsr_trap_gfx9_hex[] = {
0xbf820001, 0xbf820128,
0xbf820001, 0xbf820136,
0xb8f0f802, 0x89708670,
0xb8f1f803, 0x8674ff71,
0x00000400, 0xbf850021,
Expand Down Expand Up @@ -1196,35 +1212,40 @@ static const uint32_t cwsr_trap_gfx9_hex[] = {
0xbef60084, 0xbef600ff,
0x01000000, 0xbefe007c,
0xbefc007a, 0xc0611efa,
0x0000007c, 0x807a847a,
0xbefc007e, 0xbefe007c,
0xbefc007a, 0xc0611b3a,
0x0000007c, 0x807a847a,
0x0000007c, 0xbf8cc07f,
0x807a847a, 0xbefc007e,
0xbefe007c, 0xbefc007a,
0xc0611b3a, 0x0000007c,
0xbf8cc07f, 0x807a847a,
0xbefc007e, 0xbefe007c,
0xbefc007a, 0xc0611b7a,
0x0000007c, 0x807a847a,
0xbefc007e, 0xbefe007c,
0xbefc007a, 0xc0611bba,
0x0000007c, 0x807a847a,
0x0000007c, 0xbf8cc07f,
0x807a847a, 0xbefc007e,
0xbefe007c, 0xbefc007a,
0xc0611bba, 0x0000007c,
0xbf8cc07f, 0x807a847a,
0xbefc007e, 0xbefe007c,
0xbefc007a, 0xc0611bfa,
0x0000007c, 0x807a847a,
0xbefc007e, 0xbefe007c,
0xbefc007a, 0xc0611c3a,
0x0000007c, 0x807a847a,
0x0000007c, 0xbf8cc07f,
0x807a847a, 0xbefc007e,
0xbefe007c, 0xbefc007a,
0xc0611c3a, 0x0000007c,
0xbf8cc07f, 0x807a847a,
0xbefc007e, 0xb8f1f803,
0xbefe007c, 0xbefc007a,
0xc0611c7a, 0x0000007c,
0x807a847a, 0xbefc007e,
0xbefe007c, 0xbefc007a,
0xc0611cba, 0x0000007c,
0xbf8cc07f, 0x807a847a,
0xbefc007e, 0xbefe007c,
0xbefc007a, 0xc0611cba,
0x0000007c, 0xbf8cc07f,
0x807a847a, 0xbefc007e,
0xbefe007c, 0xbefc007a,
0xc0611cfa, 0x0000007c,
0x807a847a, 0xbefc007e,
0xb8fbf801, 0xbefe007c,
0xbefc007a, 0xc0611efa,
0x0000007c, 0x807a847a,
0xbf8cc07f, 0x807a847a,
0xbefc007e, 0xb8fbf801,
0xbefe007c, 0xbefc007a,
0xc0611efa, 0x0000007c,
0xbf8cc07f, 0x807a847a,
0xbefc007e, 0x8676ff7f,
0x04000000, 0xbeef0080,
0x876f6f76, 0xb8fa2a05,
Expand All @@ -1239,12 +1260,14 @@ static const uint32_t cwsr_trap_gfx9_hex[] = {
0xbe862b06, 0xbe882b08,
0xbe8a2b0a, 0xbe8c2b0c,
0xbe8e2b0e, 0xc06b003a,
0x00000000, 0xc06b013a,
0x00000010, 0xc06b023a,
0x00000020, 0xc06b033a,
0x00000030, 0x8074c074,
0x00000000, 0xbf8cc07f,
0xc06b013a, 0x00000010,
0xbf8cc07f, 0xc06b023a,
0x00000020, 0xbf8cc07f,
0xc06b033a, 0x00000030,
0xbf8cc07f, 0x8074c074,
0x82758075, 0x807c907c,
0xbf0a717c, 0xbf85ffeb,
0xbf0a717c, 0xbf85ffe7,
0xbef40172, 0xbefa0080,
0xbefe00c1, 0xbeff00c1,
0xbef600ff, 0x01000000,
Expand Down

0 comments on commit 57c9bff

Please sign in to comment.