From 4bad691003faa171a2ccaca1ef524f350f42aad9 Mon Sep 17 00:00:00 2001
From: Donald Buczek <buczek@molgen.mpg.de>
Date: Thu, 17 Feb 2022 15:04:44 +0100
Subject: [PATCH] cuda-help: Use flock

Currently the script assumes that it is not called multiple times in
parallel. This is not true, because for job-init it is called by the
forked user process.

Use flock to avoid GPU allocation races.
---
 helper/gpu-setup | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/helper/gpu-setup b/helper/gpu-setup
index a62abf0d..e2aff83c 100755
--- a/helper/gpu-setup
+++ b/helper/gpu-setup
@@ -179,13 +179,11 @@ job_init() {
     pid=$1
     uid=$2
 
-    # we have no locking here, but mxqd is single-threaded
-
     test -d /dev/shm/mxqd/gpu_devs || die "$0: Not initialized (no dir /dev/shm/mxqd/gpu_devs)"
 
     shopt -s nullglob
     for d in /dev/shm/mxqd/gpu_devs/???; do
-        if [ ! -e $d/pid ]; then
+        if pid=$pid f=$d/pid flock $d/pid -c 'test -s $f && exit 1; echo $pid>$f'; then
             for f in $(cat $d/access-files); do
                 case $f in
                     /dev/nvidia-caps/nvidia-cap*)
@@ -198,7 +196,6 @@ job_init() {
                         ;;
                 esac
             done
-            echo $pid > $d/pid
             cat $d/uuid
             exit
         fi