From 4bad691003faa171a2ccaca1ef524f350f42aad9 Mon Sep 17 00:00:00 2001 From: Donald Buczek Date: Thu, 17 Feb 2022 15:04:44 +0100 Subject: [PATCH] cuda-help: Use flock Currently the script assumes that it is not called multiple times in parallel. This is not true, because for job-init it is called by the forked user process. Use flock to avoid GPU allocation races. --- helper/gpu-setup | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/helper/gpu-setup b/helper/gpu-setup index a62abf0d..e2aff83c 100755 --- a/helper/gpu-setup +++ b/helper/gpu-setup @@ -179,13 +179,11 @@ job_init() { pid=$1 uid=$2 - # we have no locking here, but mxqd is single-threaded - test -d /dev/shm/mxqd/gpu_devs || die "$0: Not initialized (no dir /dev/shm/mxqd/gpu_devs)" shopt -s nullglob for d in /dev/shm/mxqd/gpu_devs/???; do - if [ ! -e $d/pid ]; then + if pid=$pid f=$d/pid flock $d/pid -c 'test -s $f && exit 1; echo $pid>$f'; then for f in $(cat $d/access-files); do case $f in /dev/nvidia-caps/nvidia-cap*) @@ -198,7 +196,6 @@ job_init() { ;; esac done - echo $pid > $d/pid cat $d/uuid exit fi