diff --git a/install.sh b/install.sh index 17e909b..14d4f19 100755 --- a/install.sh +++ b/install.sh @@ -264,6 +264,8 @@ for f in libexec_startup/*; do done install_data misc_etc_files/request-key.d/id_resolver.conf \ "$DESTDIR$sysconfdir/request-key.d/id_resolver.conf" +install_data misc_etc_files/modprobe.d/amdgpu_gpu_recovery.conf \ + "$DESTDIR$sysconfdir/modprobe.d/amdgpu_gpu_recovery.conf" postinstall exit diff --git a/misc_etc_files/modprobe.d/amdgpu_gpu_recovery.conf b/misc_etc_files/modprobe.d/amdgpu_gpu_recovery.conf new file mode 100644 index 0000000..424e2c2 --- /dev/null +++ b/misc_etc_files/modprobe.d/amdgpu_gpu_recovery.conf @@ -0,0 +1,18 @@ +# Sometimes we have this on "fenchurch": +# +# kernel: [drm:amdgpu_job_timedout [amdgpu]] *ERROR* ring gfx timeout, signaled seq=16634335, emitted seq=16634337 +# kernel: [drm:amdgpu_job_timedout [amdgpu]] *ERROR* Process information: process Xorg pid 718 thread Xorg:cs0 pid 719 +# kernel: amdgpu 0000:01:00.0: amdgpu: GPU recovery disabled. +# +# From drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c: +# +# ** +# * DOC: gpu_recovery (int) +# * Set to enable GPU recovery mechanism (1 = enable, 0 = disable). The default is -1 (auto, disabled except SRIOV). +# */ +# MODULE_PARM_DESC(gpu_recovery, "Enable GPU recovery mechanism, (2 = advanced tdr mode, 1 = enable, 0 = disable, -1 = auto)"); +# module_param_named(gpu_recovery, amdgpu_gpu_recovery, int, 0444); +# +# So give it a try + +options amdgpu gpu_recovery=1