Skip to content

next #151

Merged
merged 24 commits into from
Feb 17, 2024
Merged

next #151

Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
bcd42ce
mxqsub: Don't fail with --umask 000
donald Dec 30, 2023
3732792
gpu-setup: Improve error message
donald Dec 30, 2023
fcf28d2
gpu-setup: Add some temporary debug messages
donald Dec 22, 2022
6003349
gpu-setup: Don't unlock to early during release
donald Dec 30, 2023
5d0de79
Makefile: Disable warning for Bison-generated source
donald Jan 1, 2024
57cc235
mxq_reaper: Use printf-like arguments for die()
donald Jan 1, 2024
b4b4d87
mxq_reaper: Retry on spool file write errors
donald Jan 1, 2024
9aee649
sql: create_tables: Add default character set to tables
donald Jan 1, 2024
1da6ccc
sql: Create indexes on group_jobs_inq and group_jobs_running
donald Jan 2, 2024
03dee12
mxqd: Remove surplus \n from log messages
donald Jan 5, 2024
f0b38f1
mx_mysql: mx_mysql_statement_close*: Allow NULL
donald Jan 5, 2024
dbd4f12
mx_getopt: Remove a NULL pointer check
donald Jan 5, 2024
e3d223c
mxqsub: Remove default time warning
donald Jan 10, 2024
87f7e98
mx_util: Add mx_die()
donald Jan 10, 2024
1a0fe04
tree: Use mx_die() instead of static die() function
donald Jan 10, 2024
1f1de79
Makefile: Add -Werror
donald Jan 10, 2024
5518f3f
tree: Handle mx_mysql_option_set_default_file errors
donald Jan 10, 2024
79c9787
mx_mysql: Add a unused_result warning
donald Jan 10, 2024
94fb3bc
mx_mysql: Refactor two functions into void functions
donald Jan 10, 2024
83cbbbd
mxqd: Make setup_cronolog() into void function
donald Jan 10, 2024
67c431b
test_mx_util: Call mx_call_external with argv != NULL
donald Jan 10, 2024
41b3f83
mx_util: Fix calloc argument order
donald Jan 10, 2024
2856013
mxqkill: Update usage string
donald Jan 10, 2024
98cf93c
mysql: Restore column group_flags
donald Jan 15, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
10 changes: 9 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ CFLAGS_MYSQL += -DMX_MYSQL_FAIL_WAIT_DEFAULT=5

CFLAGS += -g
CFLAGS += -O3
CFLAGS += -Wall -Wextra -Wno-override-init
CFLAGS += -Wall -Wextra -Wno-override-init -Werror
CFLAGS += -DMXQ_VERSION=\"${MXQ_VERSION}\"
CFLAGS += -DMXQ_VERSIONFULL=\"${MXQ_VERSIONFULL}\"
CFLAGS += -DMXQ_VERSIONDATE=\"${MXQ_VERSIONDATE}\"
Expand Down Expand Up @@ -361,6 +361,14 @@ ppidcache.h += ppidcache.h

########################################################################

## parser.tab.o

# Disable "variable 'yynerrs' set but not used" diagnostic which
# appears with bison-3.4.2 + llvm 15.0.4

parser.tab.o: parser.tab.c parser.tab.h
$(call quiet-command,${CC} ${CFLAGS} -Wno-unused-but-set-variable -o $@ -c $<," CC $@")

### mx_getopt.o --------------------------------------------------------

mx_getopt.o: $(mx_getopt.h)
Expand Down
10 changes: 8 additions & 2 deletions helper/gpu-setup
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,8 @@ job_init() {
pid=$1
uid=$2

echo "XXX $$ job_init $pid: called" >&2

test -d /dev/shm/mxqd/gpu_devs || die "$0: Not initialized (no dir /dev/shm/mxqd/gpu_devs)"

shopt -s nullglob
Expand All @@ -253,6 +255,7 @@ job_init() {
esac
done
cat $d/uuid
echo "XXX $$ job_init $pid: allocated gpu from $d" >&2
exit
fi
done
Expand All @@ -263,12 +266,14 @@ job_release() {
(( $# == 1 )) || usage
pid=$1

echo "XXX $$ job_release $pid: called" >&2

test -d /dev/shm/mxqd/gpu_devs || die "$0: Not initialized (no dir /dev/shm/mxqd/gpu_devs)"
for d in /dev/shm/mxqd/gpu_devs/???; do
if [ -e $d/pid ]; then
test_pid="$(cat $d/pid 2>/dev/null)"
if [ "$pid" = "$test_pid" ]; then
rm $d/pid
echo "XXX $$ job_release $pid: found my pid in $d, releasing" >&2
for f in $(cat $d/access-files); do
case $f in
/dev/nvidia-caps/nvidia-cap*)
Expand All @@ -281,11 +286,12 @@ job_release() {
;;
esac
done
rm $d/pid
exit 0
fi
fi
done
die "$0: job_release: job with $pid has no GPU locked"
die "$0: job_release: job with pid $pid has no GPU locked"
}

show() {
Expand Down
3 changes: 1 addition & 2 deletions mx_getopt.c
Original file line number Diff line number Diff line change
Expand Up @@ -127,8 +127,7 @@ static int find_short_option(struct mx_option *options, char **name, char **opta

assert(short_opt);

if (optarg)
*optarg = NULL;
*optarg = NULL;

for (i=0, idx=-1; options[i].long_opt || options[i].short_opt; i++) {

Expand Down
15 changes: 6 additions & 9 deletions mx_mysql.c
Original file line number Diff line number Diff line change
Expand Up @@ -630,24 +630,21 @@ int mx_mysql_option_set_default_file(struct mx_mysql *mysql, char *fname)
return 0;
}

int mx_mysql_option_set_default_group(struct mx_mysql *mysql, char *group)
void mx_mysql_option_set_default_group(struct mx_mysql *mysql, char *group)
{
assert(mysql);

if (group && !(*group))
group = NULL;

mysql->default_group = group;

return 0;
}

int mx_mysql_option_set_reconnect(struct mx_mysql *mysql, int reconnect)
void mx_mysql_option_set_reconnect(struct mx_mysql *mysql, int reconnect)
{
assert(mysql);

mysql->reconnect = (bool)!!reconnect;
return 0;
}

static int mx_mysql_real_connect(struct mx_mysql *mysql, const char *host, const char *user, const char *passwd, const char *db, unsigned int port, const char *unix_socket, unsigned long client_flag)
Expand Down Expand Up @@ -1154,8 +1151,8 @@ struct mx_mysql_stmt *mx_mysql_statement_prepare(struct mx_mysql *mysql, char *s

int mx_mysql_statement_close(struct mx_mysql_stmt **stmt)
{
assert(stmt);
assert(*stmt);
if (*stmt == NULL)
return 0;

mx__mysql_stmt_free_result(*stmt);
mx__mysql_stmt_close(*stmt);
Expand All @@ -1169,8 +1166,8 @@ int mx_mysql_statement_close(struct mx_mysql_stmt **stmt)

int mx_mysql_statement_close_no_bind_cleanup(struct mx_mysql_stmt **stmt)
{
assert(stmt);
assert(*stmt);
if (*stmt == NULL)
return 0;

mx__mysql_stmt_free_result(*stmt);
mx__mysql_stmt_close(*stmt);
Expand Down
6 changes: 3 additions & 3 deletions mx_mysql.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,9 +77,9 @@ struct mx_mysql_stmt {

int mx_mysql_initialize(struct mx_mysql **mysql);

int mx_mysql_option_set_default_file(struct mx_mysql *mysql, char *fname);
int mx_mysql_option_set_default_group(struct mx_mysql *mysql, char *group);
int mx_mysql_option_set_reconnect(struct mx_mysql *mysql, int reconnect);
int mx_mysql_option_set_default_file(struct mx_mysql *mysql, char *fname) __attribute__ ((warn_unused_result));
void mx_mysql_option_set_default_group(struct mx_mysql *mysql, char *group);
void mx_mysql_option_set_reconnect(struct mx_mysql *mysql, int reconnect);

int mx_mysql_connect(struct mx_mysql **mysql);
int mx_mysql_connect_forever_sec(struct mx_mysql **mysql, unsigned int seconds);
Expand Down
10 changes: 9 additions & 1 deletion mx_util.c
Original file line number Diff line number Diff line change
Expand Up @@ -931,7 +931,7 @@ char **mx_strvec_new(void)
{
char **strvec;

strvec = calloc(sizeof(*strvec), 1);
strvec = calloc(1, sizeof(*strvec));
if (!strvec)
return NULL;

Expand Down Expand Up @@ -1414,3 +1414,11 @@ char *mx_pipe_external(char *helper, char **argv) {
errno = err;
return NULL;
}

void mx_die(const char *restrict fmt, ...) {
va_list ap;
va_start(ap, fmt);
vfprintf(stderr, fmt, ap);
va_end(ap);
_exit(1);
}
2 changes: 2 additions & 0 deletions mx_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,8 @@ int mx_fs_get_sizes(const char *path, unsigned long *free, unsigned long *total)
time_t mx_clock_boottime(void);
int mx_call_external(char *helper, char **argv);
char *mx_pipe_external(char *args, char **argv);
void mx_die(const char *restrict fmt, ...) __attribute__ ((noreturn,format (printf, 1, 2)));


#if __GLIBC__ <2 || __GLIBC__ == 2 && __GLIBC_MINOR__ < 34
#include <sys/syscall.h>
Expand Down
106 changes: 64 additions & 42 deletions mxq_reaper.c
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,17 @@
#include <sys/wait.h>
#include <errno.h>
#include <string.h>
#include <stdarg.h>

static const char REAPER_PNAME[] = "mxqd reaper";

__attribute__((noreturn)) static void die(char *msg) {
perror(msg);
__attribute__ ((noreturn))
__attribute__ ((format (printf, 1, 2)))
static void die(const char *restrict fmt, ...) {
va_list ap;
va_start(ap, fmt);
vfprintf(stderr, fmt, ap);
va_end(ap);
_exit(1);
}

Expand All @@ -36,18 +42,18 @@ int main(int argc, char **argv) {
struct timeval endtime;

if (prctl(PR_SET_NAME, REAPER_PNAME, NULL, NULL, NULL) == -1)
die("PR_SET_NAME");
die("PR_SET_NAME: %m\n");
user_pid = fork();
if (user_pid == 0) {
if (setreuid(uid, uid) == -1)
die("setreuid");
die("setreuid: %m\n");
execvp(user_argv[0], user_argv);
die(user_argv[0]);
die("%s: %m\n", user_argv[0]);
}
if (user_pid == -1)
die("fork");
die("fork: %m\n");
if (gettimeofday(&starttime, NULL) == -1)
die("gettimeofday");
die("gettimeofday: %m\n");
while (1) {
int status;
pid_t pid = wait(&status);
Expand All @@ -57,48 +63,64 @@ int main(int argc, char **argv) {
user_status = status;
}
if (gettimeofday(&endtime, NULL) == -1)
die("gettimeofday");
die("gettimeofday: %m\n");
timersub(&endtime, &starttime, &user_time);
if (getrusage(RUSAGE_CHILDREN, &user_rusage) == -1)
die("getrusage");
die("getrusage: %m\n");

if (user_time.tv_sec<30) {
int wait=30-user_time.tv_sec;
sleep(wait);
}

char *tmpfilename;
if (asprintf(&tmpfilename, "%s.tmp", spoolfilename) == -1)
die("");

FILE *out = fopen(tmpfilename,"w");
if (out == NULL)
die(tmpfilename);
fprintf(out,"1 %d %d %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld\n",
getpid(),
user_status,
user_time.tv_sec, user_time.tv_usec,
user_rusage.ru_utime.tv_sec, user_rusage.ru_utime.tv_usec,
user_rusage.ru_stime.tv_sec, user_rusage.ru_stime.tv_usec,
user_rusage.ru_maxrss,
user_rusage.ru_ixrss,
user_rusage.ru_idrss,
user_rusage.ru_isrss,
user_rusage.ru_minflt,
user_rusage.ru_majflt,
user_rusage.ru_nswap,
user_rusage.ru_inblock,
user_rusage.ru_oublock,
user_rusage.ru_msgsnd,
user_rusage.ru_msgrcv,
user_rusage.ru_nsignals,
user_rusage.ru_nvcsw,
user_rusage.ru_nivcsw
);
fflush(out);
fsync(fileno(out));
fclose(out);
if (rename(tmpfilename, spoolfilename) == -1)
die(spoolfilename);
while (1) {
// if anything fails, do not write to stderr, as this is the users
// stderr, just wait and retry.

char *tmpfilename;
if (asprintf(&tmpfilename, "%s.tmp", spoolfilename) == -1)
goto retry_1;

FILE *out = fopen(tmpfilename, "w");
if (out == NULL)
goto retry_2;
if (fprintf(out,"1 %d %d %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld\n",
getpid(),
user_status,
user_time.tv_sec, user_time.tv_usec,
user_rusage.ru_utime.tv_sec, user_rusage.ru_utime.tv_usec,
user_rusage.ru_stime.tv_sec, user_rusage.ru_stime.tv_usec,
user_rusage.ru_maxrss,
user_rusage.ru_ixrss,
user_rusage.ru_idrss,
user_rusage.ru_isrss,
user_rusage.ru_minflt,
user_rusage.ru_majflt,
user_rusage.ru_nswap,
user_rusage.ru_inblock,
user_rusage.ru_oublock,
user_rusage.ru_msgsnd,
user_rusage.ru_msgrcv,
user_rusage.ru_nsignals,
user_rusage.ru_nvcsw,
user_rusage.ru_nivcsw) < 0)
goto retry_3;
if (fflush(out) == EOF)
goto retry_3;
if (fsync(fileno(out)) == -1)
goto retry_3;
if (fclose(out) == EOF)
goto retry_2;
if (rename(tmpfilename, spoolfilename) == -1)
goto retry_2;
break;

retry_3:
fclose(out);
retry_2:
free(tmpfilename);
retry_1:
sleep(10);
}
return 0;
}
3 changes: 2 additions & 1 deletion mxqadmin.c
Original file line number Diff line number Diff line change
Expand Up @@ -369,7 +369,8 @@ int main(int argc, char *argv[])
res = mx_mysql_initialize(&mysql);
assert(res == 0);

mx_mysql_option_set_default_file(mysql, arg_mysql_default_file);
if (mx_mysql_option_set_default_file(mysql, arg_mysql_default_file) < 0)
mx_die("%s: %s\n", arg_mysql_default_file, mx_mysql_error());
mx_mysql_option_set_default_group(mysql, arg_mysql_default_group);

res = mx_mysql_connect_forever(&mysql);
Expand Down