diff --git a/Makefile b/Makefile index d898b93d..628def0e 100644 --- a/Makefile +++ b/Makefile @@ -347,6 +347,10 @@ mx_getopt.h += mx_getopt.h keywordset.h += keywordset.h +### ppidchache.h ------------------------------------------------------- + +ppidcache.h += ppidcache.h + ######################################################################## ### mx_getopt.o -------------------------------------------------------- @@ -489,6 +493,7 @@ mxqd.o: $(mxq_group.h) mxqd.o: $(mxq_job.h) mxqd.o: $(mx_mysql.h) mxqd.o: $(keywordset.h) +mxqd.o: $(ppidcache.h) mxqd.o: CFLAGS += $(CFLAGS_MYSQL) mxqd.o: CFLAGS += $(CFLAGS_MXQ_INITIAL_PATH) mxqd.o: CFLAGS += $(CFLAGS_MXQ_INITIAL_TMPDIR) @@ -522,6 +527,7 @@ clean: CLEAN += keywordset.o ### ppidcache.o ------------------------------------------------------- +ppidcache.o: $(ppidcache.h) ppidcache.o: $(mx_util.h) ppidcache.o: $(mx_proc.h) diff --git a/helper/gpu-setup b/helper/gpu-setup index da19fd76..d1ef1881 100755 --- a/helper/gpu-setup +++ b/helper/gpu-setup @@ -20,37 +20,12 @@ die() { exit 1 } -policy_a100_split_7() { - - # This policy splits a single A100 with 40 GB into 7 1g.5gb gpu instances each with a - # single default compute instance. - # - # The ID of the 1g.5gb profile is 19 (see `nvidia-smi mig -lgip`). - # The placements for profile 19 are 0..6 (see `nvidia-smi mig -lgipp`). - # We know that these placements create GPU instance IDs from 7 to 13 from - # the output of the `nvidia-smi mig --create-gpu-instance`commands. - # We know how the instance IDs map to minor device numbers from /proc/driver/nvidia-caps/mig-minors - # Note, that the minor numbers are only valid for gpu0, this is why this code is - # only suited for machines with a single GPU. - # - # PROFILE PLACEMENT GPU-INSTANCE-ID MINOR MINOR-COMPUTE-0 - # 1g.5gb 0 7 66 67 - # 1g.5gb 1 8 75 76 - # 1g.5gb 2 9 84 85 - # 1g.5gb 3 10 93 94 - # 1g.5gb 4 11 102 103 - # 1g.5gb 5 12 111 112 - # 1g.5gb 6 13 120 121 +common_setup_a100() { + gpu_uuid=$1 # Note: Nvidia regards persistence-mode as legacy.... - nvidia-smi --persistence-mode=1 - gpu_uuid=$(nvidia-smi --query-gpu=uuid --format=csv,noheader,nounits) - caps_major=$(grep ' nvidia-caps$' /proc/devices|cut -f 1 -d' ') - - test "$gpu_uuid" || die "GPU not found!" - nvidia-smi mig --destroy-compute-instance --id $gpu_uuid >/dev/null || true nvidia-smi mig --destroy-gpu-instance --id $gpu_uuid >/dev/null || true @@ -62,17 +37,20 @@ policy_a100_split_7() { umask 022 mkdir -p /dev/nvidia-caps - for i in $(seq 0 6);do - instance_id=$(($i+7)) - nvidia-smi mig --id $gpu_uuid --create-gpu-instance 1g.5gb:$i --default-compute-instance - done +} + +common_setup_a100_complete() { + gpu_uuid=$1 + caps_major=$(grep ' nvidia-caps$' /proc/devices|cut -f 1 -d' ') # calling `nvidia-smi --list-gpus` as root has the side effect of creating the cap files # for the gpu instances and the compute instances in /dev/nvidia-caps nvidia-smi --list-gpus - for i in $(seq 0 6);do - instance_id=$(($i+7)) + i=0 + for name in $(ls /proc/driver/nvidia/capabilities/gpu0/mig/|sort -k1.3n); do + [[ $name =~ ^gi([0-9]+)$ ]] || continue + instance_id=${BASH_REMATCH[1]} caps_minor=$(grep "^gpu0/gi$instance_id/access " /proc/driver/nvidia-caps/mig-minors|cut -f 2 -d ' ') caps_minor_compute=$(grep "^gpu0/gi$instance_id/ci0/access " /proc/driver/nvidia-caps/mig-minors|cut -f 2 -d ' ') d=$(printf "/dev/shm/mxqd/gpu_devs/%03d" $i) @@ -84,9 +62,61 @@ policy_a100_split_7() { chown root:root $f chmod go= $f done + i=$((i+1)) + done + + echo $i >&20 +} + +policy_a100_split_7() { + + # This policy splits a single A100 with 40 GB into 7 1g.5gb gpu instances each with a + # single default compute instance. + # + # The ID of the 1g.5gb profile is 19 (see `nvidia-smi mig -lgip`). + # The placements for profile 19 are 0..6 (see `nvidia-smi mig -lgipp`). + # + # Strangely, the gpu instance idents created seem to vary from hardware to hardware. + # With `for p in $(seq 0 6); do nvidia-smi mig -cgi 1g.5gb:$p;done` we've seen these gpu instances created: + # + # theredqueen: 7, 8 , 9, 10, 11, 12, 13 + # whiterabbit: 7, 8 , 9, 10, 11, 12, 13 + # bandersnatch: 7, 8 , 9, 10, 11, 12, 13 + # jabberwocky: 11, 12, 13, 14, 7, 8 , 9 + # + # So we walk through /proc/driver/nvidia/capabilities/gpu0/mig/gi* after we've + # created the gpu instances to find the gpu idents. + + gpu_uuid=$(nvidia-smi --query-gpu=uuid --format=csv,noheader,nounits) + test "$gpu_uuid" || die "GPU not found!" + + common_setup_a100 $gpu_uuid + + for i in $(seq 0 6);do + nvidia-smi mig --id $gpu_uuid --create-gpu-instance 1g.5gb:$i --default-compute-instance + done + + common_setup_a100_complete $gpu_uuid +} + +policy_a100_split_3() { + + # This policy splits a single A100 with 40 GB into 3 2g.10gb gpu instances each with a + # single default compute instance. + # + # The ID of the 2g.10gb profile is 14 (see `nvidia-smi mig -lgip`). + # The placements for profile 14 are 0,2,4 (see `nvidia-smi mig -lgipp`). + + gpu_uuid=$(nvidia-smi --query-gpu=uuid --format=csv,noheader,nounits) + test "$gpu_uuid" || die "GPU not found!" + + common_setup_a100 $gpu_uuid + + for i in $(seq 0 2 4);do + nvidia-smi mig --id $gpu_uuid --create-gpu-instance 2g.10gb:$i --default-compute-instance done - echo 7 >&20 + common_setup_a100_complete $gpu_uuid } policy_phys_gpus() { @@ -139,6 +169,9 @@ init() { a100-split-7) policy_a100_split_7 ;; + a100-split-3) + policy_a100_split_3 + ;; phys-gpus) policy_phys_gpus ;; diff --git a/mx_util.c b/mx_util.c index e8eef56e..8b675159 100644 --- a/mx_util.c +++ b/mx_util.c @@ -1306,7 +1306,7 @@ unsigned long mx_df(const char *path) { return s.f_bavail*s.f_frsize; } -time_t mx_clock_boottime() { +time_t mx_clock_boottime(void) { struct timespec ts; int res = clock_gettime(CLOCK_BOOTTIME, &ts); if (res != 0) { diff --git a/mx_util.h b/mx_util.h index 701eb850..e7971261 100644 --- a/mx_util.h +++ b/mx_util.h @@ -179,7 +179,7 @@ void _mx_sort_linked_list(void **list, int (*cmp)(void *o1,void *o2), void ** (* #define mx_sort_linked_list(list,cmp,getnextptr) _mx_sort_linked_list((void **)(list),(int (*)(void *,void *))(cmp),(void ** (*)(void *))(getnextptr)) unsigned long mx_df(const char *path); -time_t mx_clock_boottime(); +time_t mx_clock_boottime(void); char *mx_call_external(char *args, ...); #endif diff --git a/mxqd.c b/mxqd.c index a3ba9adf..32be11c0 100644 --- a/mxqd.c +++ b/mxqd.c @@ -81,9 +81,7 @@ static void print_usage(void) " -t, --max-time default: 0 (unlimited)\n" " --prerequisites default: ''\n" " -x, --max-memory-per-slot-soft \n" - " root user: default: /\n" - " non-root user: default: \n" - " --gpu\n" + " --gpu\n" "\n" " -X, --max-memory-per-slot-hard \n" " default: \n" @@ -336,7 +334,7 @@ static void read_hostconfig_retry(struct keywordset *kws) { static char gpu_setup_script[] = LIBEXECDIR "/mxq/gpu-setup"; -static int get_gpus() { +static int get_gpus(void) { char *line = mx_call_external(gpu_setup_script, "init", NULL); if (!line) { mx_log_err("gpu-setup init: %m"); diff --git a/mysql/create_tables.sql b/mysql/create_tables.sql index 3ff573f8..549b55a7 100644 --- a/mysql/create_tables.sql +++ b/mysql/create_tables.sql @@ -102,10 +102,6 @@ CREATE TABLE IF NOT EXISTS mxq_job ( date_start TIMESTAMP NOT NULL DEFAULT 0, date_end TIMESTAMP NOT NULL DEFAULT 0, - job_id_new INT8 UNSIGNED NULL DEFAULT NULL, - job_id_old INT8 UNSIGNED NULL DEFAULT NULL, - job_id_first INT8 UNSIGNED NULL DEFAULT NULL, - stats_max_sumrss INT8 UNSIGNED NOT NULL DEFAULT 0, stats_status INT4 UNSIGNED NOT NULL DEFAULT 0, diff --git a/mysql/migrate_014_remove_job_id_fields.sql b/mysql/migrate_014_remove_job_id_fields.sql new file mode 100644 index 00000000..e01a96bd --- /dev/null +++ b/mysql/migrate_014_remove_job_id_fields.sql @@ -0,0 +1,4 @@ +ALTER TABLE mxq_job + DROP COLUMN job_id_new, + DROP COLUMN job_id_old, + DROP COLUMN job_id_first; diff --git a/ppidcache.c b/ppidcache.c index 11d95204..35c1de3d 100644 --- a/ppidcache.c +++ b/ppidcache.c @@ -16,7 +16,7 @@ struct ppidcache { struct entry *entries; }; -struct ppidcache *ppidcache_new() { +struct ppidcache *ppidcache_new(void) { struct ppidcache *ppidcache = mx_malloc_forever(sizeof(struct ppidcache)); ppidcache->count = 0; ppidcache->alloc = 500; diff --git a/ppidcache.h b/ppidcache.h index ca4a7e37..99e4bb22 100644 --- a/ppidcache.h +++ b/ppidcache.h @@ -3,7 +3,7 @@ #include -struct ppidcache *ppidcache_new(); +struct ppidcache *ppidcache_new(void); void ppidcache_free (struct ppidcache *ppidcache); pid_t ppidcache_get_ppid(struct ppidcache *ppidcache, pid_t pid); int ppidcache_is_descendant(struct ppidcache *ppidcache, pid_t ancestor, pid_t candidate); diff --git a/test_mx_util.c b/test_mx_util.c index 028a9ebb..6f473093 100644 --- a/test_mx_util.c +++ b/test_mx_util.c @@ -347,7 +347,7 @@ static void test_mx_strscan(void) mx_proc_pid_stat_free_content(pps); } -static void test_mx_strvec() { +static void test_mx_strvec(void) { char **strvec; char *str; @@ -395,7 +395,7 @@ static void test_mx_strvec() { free(strvec); } -static void test_mx_strcat() { +static void test_mx_strcat(void) { char *str; char *str2; @@ -517,11 +517,11 @@ static void test_listsort(void) assert(o[9].next==NULL); } -static void test_mx_df() { +static void test_mx_df(void) { assert(mx_df("/") > 0); } -static void test_mx_call_external() { +static void test_mx_call_external(void) { char *line; errno = 999; diff --git a/web/pages/mxq/mxq.in b/web/pages/mxq/mxq.in index 544497d7..f91d6918 100755 --- a/web/pages/mxq/mxq.in +++ b/web/pages/mxq/mxq.in @@ -623,10 +623,6 @@ date_submit : $o{date_submit} date_start : $o{date_start} $ago date_end : $o{date_end} $rt -job_id_new : $o{job_id_new} -job_id_old : $o{job_id_old} -job_id_first : $o{job_id_first} - stats_max_sumrss : $o{stats_max_sumrss} kiB stats_status : $o{stats_status} diff --git a/xmalloc.h b/xmalloc.h index 23705bdd..0e90e1e8 100644 --- a/xmalloc.h +++ b/xmalloc.h @@ -4,7 +4,7 @@ #include #include -__attribute__ ((noreturn, unused)) static void out_of_memory() { +__attribute__ ((noreturn, unused)) static void out_of_memory(void) { fprintf(stderr,"out of memory\n"); abort(); }