Skip to content
Navigation Menu
Toggle navigation
Sign in
In this repository
All GitHub Enterprise
↵
Jump to
↵
No suggested jump to results
In this repository
All GitHub Enterprise
↵
Jump to
↵
In this organization
All GitHub Enterprise
↵
Jump to
↵
In this repository
All GitHub Enterprise
↵
Jump to
↵
Sign in
Reseting focus
You signed in with another tab or window.
Reload
to refresh your session.
You signed out in another tab or window.
Reload
to refresh your session.
You switched accounts on another tab or window.
Reload
to refresh your session.
Dismiss alert
{{ message }}
mariux64
/
mxq
Public
Notifications
You must be signed in to change notification settings
Fork
3
Star
3
Code
Issues
20
Pull requests
3
Actions
Projects
0
Wiki
Security
Insights
Additional navigation options
Code
Issues
Pull requests
Actions
Projects
Wiki
Security
Insights
Files
e6ed782
helper
gpu-setup
tmpdir-setup
manpages
mysql
web
.gitignore
.vimrc
Doxyfile
LICENSE
Makefile
README.md
keywordset.c
keywordset.h
mx_flock.c
mx_flock.h
mx_getopt.c
mx_getopt.h
mx_log.c
mx_log.h
mx_mysql.c
mx_mysql.h
mx_proc.c
mx_proc.h
mx_util.c
mx_util.h
mxq.h
mxq_daemon.c
mxq_daemon.h
mxq_group.c
mxq_group.h
mxq_job.c
mxq_job.h
mxq_log.c
mxq_reaper.c
mxqadmin.c
mxqd.c
mxqd.h
mxqd_control.c
mxqd_control.h
mxqdctl-hostconfig.sh
mxqdump.c
mxqkill.c
mxqps.c
mxqset.c
mxqsub.c
os-release
parser.y
ppidcache.c
ppidcache.h
test_keywordset.c
test_mx_log.c
test_mx_mysql.c
test_mx_util.c
test_mxqd_control.c
test_parser.c
xmalloc.h
Breadcrumbs
mxq
/
helper
/
gpu-setup
Blame
Blame
Latest commit
History
History
executable file
·
339 lines (285 loc) · 9.66 KB
Breadcrumbs
mxq
/
helper
/
gpu-setup
Top
File metadata and controls
Code
Blame
executable file
·
339 lines (285 loc) · 9.66 KB
Raw
#! /bin/bash usage() { cat <<EOF >&2 usage: $0 cmd $0 show # Show state $0 release # Release all GPUs, restore public access. Don't do this while mxqd or a job is running $0 init # Called by mxqd: Setup hardware as needed, send no. of GPUs to stdout $0 job-init PID UID # Called by mxqd: Find a free gpu for job and user, send UUID to stdout $0 job-release PID # Called by mxqd: Relase GPU of this job EOF exit 1 } die() { echo "$@" >&2 exit 1 } common_setup_a100() { gpu_uuid=$1 # Note: Nvidia regards persistence-mode as legacy.... nvidia-smi --persistence-mode=1 nvidia-smi mig --destroy-compute-instance --id $gpu_uuid >/dev/null || true nvidia-smi mig --destroy-gpu-instance --id $gpu_uuid >/dev/null || true # this is stored on the card nvram. We set it anyway # switching requires card reset nvidia-smi --id=$gpu_uuid --multi-instance-gpu=1 nvidia-smi --id=$gpu_uuid --gpu-reset umask 022 mkdir -p /dev/nvidia-caps } common_setup_a100_complete() { gpu_uuid=$1 caps_major=$(grep ' nvidia-caps$' /proc/devices|cut -f 1 -d' ') # calling `nvidia-smi --list-gpus` as root has the side effect of creating the cap files # for the gpu instances and the compute instances in /dev/nvidia-caps nvidia-smi --list-gpus i=0 for name in $(ls /proc/driver/nvidia/capabilities/gpu0/mig/|sort -k1.3n); do [[ $name =~ ^gi([0-9]+)$ ]] || continue instance_id=${BASH_REMATCH[1]} caps_minor=$(grep "^gpu0/gi$instance_id/access " /proc/driver/nvidia-caps/mig-minors|cut -f 2 -d ' ') caps_minor_compute=$(grep "^gpu0/gi$instance_id/ci0/access " /proc/driver/nvidia-caps/mig-minors|cut -f 2 -d ' ') d=$(printf "/dev/shm/mxqd/gpu_devs/%03d" $i) mkdir -p $d echo MIG-$gpu_uuid/$instance_id > $d/uuid access_files="/dev/nvidia-caps/nvidia-cap$caps_minor /dev/nvidia-caps/nvidia-cap$caps_minor_compute" echo $access_files > $d/access-files for f in $access_files; do chown root:root $f chmod go= $f done i=$((i+1)) done echo $i >&20 } policy_a100_split_7() { # This policy splits a single A100 with 40 GB into 7 1g.5gb gpu instances each with a # single default compute instance. # # The ID of the 1g.5gb profile is 19 (see `nvidia-smi mig -lgip`). # The placements for profile 19 are 0..6 (see `nvidia-smi mig -lgipp`). # # Strangely, the gpu instance idents created seem to vary from hardware to hardware. # With `for p in $(seq 0 6); do nvidia-smi mig -cgi 1g.5gb:$p;done` we've seen these gpu instances created: # # theredqueen: 7, 8 , 9, 10, 11, 12, 13 # whiterabbit: 7, 8 , 9, 10, 11, 12, 13 # bandersnatch: 7, 8 , 9, 10, 11, 12, 13 # jabberwocky: 11, 12, 13, 14, 7, 8 , 9 # # So we walk through /proc/driver/nvidia/capabilities/gpu0/mig/gi* after we've # created the gpu instances to find the gpu idents. gpu_uuid=$(nvidia-smi --query-gpu=uuid --format=csv,noheader,nounits) test "$gpu_uuid" || die "GPU not found!" common_setup_a100 $gpu_uuid for i in $(seq 0 6);do nvidia-smi mig --id $gpu_uuid --create-gpu-instance 1g.5gb:$i --default-compute-instance done common_setup_a100_complete $gpu_uuid } policy_a100_split_3() { # This policy splits a single A100 with 40 GB into 3 2g.10gb gpu instances each with a # single default compute instance. # # The ID of the 2g.10gb profile is 14 (see `nvidia-smi mig -lgip`). # The placements for profile 14 are 0,2,4 (see `nvidia-smi mig -lgipp`). gpu_uuid=$(nvidia-smi --query-gpu=uuid --format=csv,noheader,nounits) test "$gpu_uuid" || die "GPU not found!" common_setup_a100 $gpu_uuid for i in $(seq 0 2 4);do nvidia-smi mig --id $gpu_uuid --create-gpu-instance 2g.10gb:$i --default-compute-instance done common_setup_a100_complete $gpu_uuid } policy_a100_split_2() { # This policy splits a single A100 with 40 GB into 2 3g.20gb gpu instances each with a # single default compute instance. # # The ID of the 2g.10gb profile is 9 (see `nvidia-smi mig -lgip`). # The placements for profile 9 are 0,4 (see `nvidia-smi mig -lgipp`). gpu_uuid=$(nvidia-smi --query-gpu=uuid --format=csv,noheader,nounits) test "$gpu_uuid" || die "GPU not found!" common_setup_a100 $gpu_uuid for i in 0 4;do nvidia-smi mig --id $gpu_uuid --create-gpu-instance 3g.20gb:$i --default-compute-instance done common_setup_a100_complete $gpu_uuid } policy_phys_gpus() { # This policy just enumrate the physical GPUs and reserved them # all for mxqd # # Note: We assume, that we are getting the cards from nvidia-smi in the order # corresponding to the /dev/nvidiaNNN files enumeration. # # Note: Nvidia regards persistence-mode as legacy.... nvidia-smi --persistence-mode=1 # Create the device files nvidia-smi --list-gpus i=0 for gpu_uuid in $(nvidia-smi --query-gpu=uuid --format=csv,noheader,nounits); do d=$(printf "/dev/shm/mxqd/gpu_devs/%03d" $i) mkdir -p $d echo $gpu_uuid > $d/uuid echo /dev/nvidia$i > $d/access-files chown root:root /dev/nvidia$i chmod go= /dev/nvidia$i i=$((i+1)) done echo $i >&20 } init() { set -e if [ -d /dev/shm/mxqd/gpu_devs/ ]; then echo /dev/shm/mxqd/gpu_devs/??? | wc -w exit fi policy="$(/etc/mxq/gpu-policy)" if [ -z "$policy" ]; then die "$0: no policy returned for $(hostname) from etc/mxqd/gpu-policy" fi # we can't trust nvidia-smi not to write to stdout exec 20>&1 exec >&2 case "$policy" in a100-split-7) policy_a100_split_7 ;; a100-split-3) policy_a100_split_3 ;; a100-split-2) policy_a100_split_2 ;; phys-gpus) policy_phys_gpus ;; *) die "$0: policy returned by /etc/mxqd/gpu-policy not known" ;; esac } release() { set -e for d in /dev/shm/mxqd/gpu_devs/???; do if [ -e $d/pid ] && kill -0 $(cat $d/pid) 2>/dev/null; then echo "WARNING: job $(cat $d/pid) still active. GPU $(cat $d/uuid) not returned" else if [ -e $d/access-files ]; then for f in $(cat $d/access-files); do case $f in /dev/nvidia-caps/nvidia-cap*) chown root:root $f chmod 0444 $f ;; /dev/nvidia*) chown root:root $f chmod 0666 $f ;; esac done fi rm -rf $d fi done rmdir /dev/shm/mxqd/gpu_devs 2>/dev/null || true } job_init() { (( $# == 2 )) || usage pid=$1 uid=$2 echo "XXX $$ job_init $pid: called" >&2 test -d /dev/shm/mxqd/gpu_devs || die "$0: Not initialized (no dir /dev/shm/mxqd/gpu_devs)" shopt -s nullglob for d in /dev/shm/mxqd/gpu_devs/???; do if pid=$pid f=$d/pid flock $d/pid -c 'test -s $f && exit 1; echo $pid>$f'; then for f in $(cat $d/access-files); do case $f in /dev/nvidia-caps/nvidia-cap*) chown $uid:root $f chmod 0400 $f ;; /dev/nvidia*) chown $uid:root $f chmod 0600 $f ;; esac done cat $d/uuid echo "XXX $$ job_init $pid: allocated gpu from $d" >&2 exit fi done die "$0: logic error: no free GPUs!" } job_release() { (( $# == 1 )) || usage pid=$1 echo "XXX $$ job_release $pid: called" >&2 test -d /dev/shm/mxqd/gpu_devs || die "$0: Not initialized (no dir /dev/shm/mxqd/gpu_devs)" for d in /dev/shm/mxqd/gpu_devs/???; do if [ -e $d/pid ]; then test_pid="$(cat $d/pid 2>/dev/null)" if [ "$pid" = "$test_pid" ]; then echo "XXX $$ job_release $pid: found my pid in $d, releasing" >&2 rm $d/pid for f in $(cat $d/access-files); do case $f in /dev/nvidia-caps/nvidia-cap*) chown root:root $f chmod 0400 $f ;; /dev/nvidia*) chown root:root $f chmod 0600 $f ;; esac done exit 0 fi fi done die "$0: job_release: job with $pid has no GPU locked" } show() { if [ ! -d /dev/shm/mxqd/gpu_devs ]; then echo Not initialized exit fi devices=0 shopt -s nullglob for d in /dev/shm/mxqd/gpu_devs/???; do uuid=$(cat $d/uuid) if [ -e $d/pid ]; then job_id=$(cat $d/pid) else job_id="avail" fi access_files=$(cat $d/access-files) printf "%2d: %10s %s %s\n" $devices $job_id $uuid "$access_files" devices=$((devices+1)) done } (( $# > 0 )) || usage cmd="$1" shift; case "$cmd" in init) init ;; release) release ;; job-init) job_init "$@" ;; job-release) job_release "$@" ;; show) show ;; *) usage ;; esac
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
You can’t perform that action at this time.