diff --git a/Makefile b/Makefile index 3226b64c..fe694501 100644 --- a/Makefile +++ b/Makefile @@ -122,7 +122,7 @@ CFLAGS_MYSQL += -DMX_MYSQL_FAIL_WAIT_DEFAULT=5 CFLAGS += -g CFLAGS += -O3 -CFLAGS += -Wall -Wextra -Wno-override-init +CFLAGS += -Wall -Wextra -Wno-override-init -Werror CFLAGS += -DMXQ_VERSION=\"${MXQ_VERSION}\" CFLAGS += -DMXQ_VERSIONFULL=\"${MXQ_VERSIONFULL}\" CFLAGS += -DMXQ_VERSIONDATE=\"${MXQ_VERSIONDATE}\" @@ -361,6 +361,14 @@ ppidcache.h += ppidcache.h ######################################################################## +## parser.tab.o + +# Disable "variable 'yynerrs' set but not used" diagnostic which +# appears with bison-3.4.2 + llvm 15.0.4 + +parser.tab.o: parser.tab.c parser.tab.h + $(call quiet-command,${CC} ${CFLAGS} -Wno-unused-but-set-variable -o $@ -c $<," CC $@") + ### mx_getopt.o -------------------------------------------------------- mx_getopt.o: $(mx_getopt.h) diff --git a/helper/gpu-setup b/helper/gpu-setup index 9fc50e12..a0413fb3 100755 --- a/helper/gpu-setup +++ b/helper/gpu-setup @@ -235,6 +235,8 @@ job_init() { pid=$1 uid=$2 + echo "XXX $$ job_init $pid: called" >&2 + test -d /dev/shm/mxqd/gpu_devs || die "$0: Not initialized (no dir /dev/shm/mxqd/gpu_devs)" shopt -s nullglob @@ -253,6 +255,7 @@ job_init() { esac done cat $d/uuid + echo "XXX $$ job_init $pid: allocated gpu from $d" >&2 exit fi done @@ -263,12 +266,14 @@ job_release() { (( $# == 1 )) || usage pid=$1 + echo "XXX $$ job_release $pid: called" >&2 + test -d /dev/shm/mxqd/gpu_devs || die "$0: Not initialized (no dir /dev/shm/mxqd/gpu_devs)" for d in /dev/shm/mxqd/gpu_devs/???; do if [ -e $d/pid ]; then test_pid="$(cat $d/pid 2>/dev/null)" if [ "$pid" = "$test_pid" ]; then - rm $d/pid + echo "XXX $$ job_release $pid: found my pid in $d, releasing" >&2 for f in $(cat $d/access-files); do case $f in /dev/nvidia-caps/nvidia-cap*) @@ -281,11 +286,12 @@ job_release() { ;; esac done + rm $d/pid exit 0 fi fi done - die "$0: job_release: job with $pid has no GPU locked" + die "$0: job_release: job with pid $pid has no GPU locked" } show() { diff --git a/mx_getopt.c b/mx_getopt.c index 1ef82c20..0f47b35c 100644 --- a/mx_getopt.c +++ b/mx_getopt.c @@ -127,8 +127,7 @@ static int find_short_option(struct mx_option *options, char **name, char **opta assert(short_opt); - if (optarg) - *optarg = NULL; + *optarg = NULL; for (i=0, idx=-1; options[i].long_opt || options[i].short_opt; i++) { diff --git a/mx_mysql.c b/mx_mysql.c index 686eb0c4..ca983b04 100644 --- a/mx_mysql.c +++ b/mx_mysql.c @@ -630,7 +630,7 @@ int mx_mysql_option_set_default_file(struct mx_mysql *mysql, char *fname) return 0; } -int mx_mysql_option_set_default_group(struct mx_mysql *mysql, char *group) +void mx_mysql_option_set_default_group(struct mx_mysql *mysql, char *group) { assert(mysql); @@ -638,16 +638,13 @@ int mx_mysql_option_set_default_group(struct mx_mysql *mysql, char *group) group = NULL; mysql->default_group = group; - - return 0; } -int mx_mysql_option_set_reconnect(struct mx_mysql *mysql, int reconnect) +void mx_mysql_option_set_reconnect(struct mx_mysql *mysql, int reconnect) { assert(mysql); mysql->reconnect = (bool)!!reconnect; - return 0; } static int mx_mysql_real_connect(struct mx_mysql *mysql, const char *host, const char *user, const char *passwd, const char *db, unsigned int port, const char *unix_socket, unsigned long client_flag) @@ -1154,8 +1151,8 @@ struct mx_mysql_stmt *mx_mysql_statement_prepare(struct mx_mysql *mysql, char *s int mx_mysql_statement_close(struct mx_mysql_stmt **stmt) { - assert(stmt); - assert(*stmt); + if (*stmt == NULL) + return 0; mx__mysql_stmt_free_result(*stmt); mx__mysql_stmt_close(*stmt); @@ -1169,8 +1166,8 @@ int mx_mysql_statement_close(struct mx_mysql_stmt **stmt) int mx_mysql_statement_close_no_bind_cleanup(struct mx_mysql_stmt **stmt) { - assert(stmt); - assert(*stmt); + if (*stmt == NULL) + return 0; mx__mysql_stmt_free_result(*stmt); mx__mysql_stmt_close(*stmt); diff --git a/mx_mysql.h b/mx_mysql.h index 17f5a06b..173e48a7 100644 --- a/mx_mysql.h +++ b/mx_mysql.h @@ -77,9 +77,9 @@ struct mx_mysql_stmt { int mx_mysql_initialize(struct mx_mysql **mysql); -int mx_mysql_option_set_default_file(struct mx_mysql *mysql, char *fname); -int mx_mysql_option_set_default_group(struct mx_mysql *mysql, char *group); -int mx_mysql_option_set_reconnect(struct mx_mysql *mysql, int reconnect); +int mx_mysql_option_set_default_file(struct mx_mysql *mysql, char *fname) __attribute__ ((warn_unused_result)); +void mx_mysql_option_set_default_group(struct mx_mysql *mysql, char *group); +void mx_mysql_option_set_reconnect(struct mx_mysql *mysql, int reconnect); int mx_mysql_connect(struct mx_mysql **mysql); int mx_mysql_connect_forever_sec(struct mx_mysql **mysql, unsigned int seconds); diff --git a/mx_util.c b/mx_util.c index c7d5182e..824a08cb 100644 --- a/mx_util.c +++ b/mx_util.c @@ -931,7 +931,7 @@ char **mx_strvec_new(void) { char **strvec; - strvec = calloc(sizeof(*strvec), 1); + strvec = calloc(1, sizeof(*strvec)); if (!strvec) return NULL; @@ -1414,3 +1414,11 @@ char *mx_pipe_external(char *helper, char **argv) { errno = err; return NULL; } + +void mx_die(const char *restrict fmt, ...) { + va_list ap; + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + _exit(1); +} diff --git a/mx_util.h b/mx_util.h index f64a6964..8baad023 100644 --- a/mx_util.h +++ b/mx_util.h @@ -183,6 +183,8 @@ int mx_fs_get_sizes(const char *path, unsigned long *free, unsigned long *total) time_t mx_clock_boottime(void); int mx_call_external(char *helper, char **argv); char *mx_pipe_external(char *args, char **argv); +void mx_die(const char *restrict fmt, ...) __attribute__ ((noreturn,format (printf, 1, 2))); + #if __GLIBC__ <2 || __GLIBC__ == 2 && __GLIBC_MINOR__ < 34 #include diff --git a/mxq_reaper.c b/mxq_reaper.c index f843980a..7bca9e19 100644 --- a/mxq_reaper.c +++ b/mxq_reaper.c @@ -8,11 +8,17 @@ #include #include #include +#include static const char REAPER_PNAME[] = "mxqd reaper"; -__attribute__((noreturn)) static void die(char *msg) { - perror(msg); +__attribute__ ((noreturn)) +__attribute__ ((format (printf, 1, 2))) +static void die(const char *restrict fmt, ...) { + va_list ap; + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); _exit(1); } @@ -36,18 +42,18 @@ int main(int argc, char **argv) { struct timeval endtime; if (prctl(PR_SET_NAME, REAPER_PNAME, NULL, NULL, NULL) == -1) - die("PR_SET_NAME"); + die("PR_SET_NAME: %m\n"); user_pid = fork(); if (user_pid == 0) { if (setreuid(uid, uid) == -1) - die("setreuid"); + die("setreuid: %m\n"); execvp(user_argv[0], user_argv); - die(user_argv[0]); + die("%s: %m\n", user_argv[0]); } if (user_pid == -1) - die("fork"); + die("fork: %m\n"); if (gettimeofday(&starttime, NULL) == -1) - die("gettimeofday"); + die("gettimeofday: %m\n"); while (1) { int status; pid_t pid = wait(&status); @@ -57,48 +63,64 @@ int main(int argc, char **argv) { user_status = status; } if (gettimeofday(&endtime, NULL) == -1) - die("gettimeofday"); + die("gettimeofday: %m\n"); timersub(&endtime, &starttime, &user_time); if (getrusage(RUSAGE_CHILDREN, &user_rusage) == -1) - die("getrusage"); + die("getrusage: %m\n"); if (user_time.tv_sec<30) { int wait=30-user_time.tv_sec; sleep(wait); } - char *tmpfilename; - if (asprintf(&tmpfilename, "%s.tmp", spoolfilename) == -1) - die(""); - - FILE *out = fopen(tmpfilename,"w"); - if (out == NULL) - die(tmpfilename); - fprintf(out,"1 %d %d %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld\n", - getpid(), - user_status, - user_time.tv_sec, user_time.tv_usec, - user_rusage.ru_utime.tv_sec, user_rusage.ru_utime.tv_usec, - user_rusage.ru_stime.tv_sec, user_rusage.ru_stime.tv_usec, - user_rusage.ru_maxrss, - user_rusage.ru_ixrss, - user_rusage.ru_idrss, - user_rusage.ru_isrss, - user_rusage.ru_minflt, - user_rusage.ru_majflt, - user_rusage.ru_nswap, - user_rusage.ru_inblock, - user_rusage.ru_oublock, - user_rusage.ru_msgsnd, - user_rusage.ru_msgrcv, - user_rusage.ru_nsignals, - user_rusage.ru_nvcsw, - user_rusage.ru_nivcsw - ); - fflush(out); - fsync(fileno(out)); - fclose(out); - if (rename(tmpfilename, spoolfilename) == -1) - die(spoolfilename); + while (1) { + // if anything fails, do not write to stderr, as this is the users + // stderr, just wait and retry. + + char *tmpfilename; + if (asprintf(&tmpfilename, "%s.tmp", spoolfilename) == -1) + goto retry_1; + + FILE *out = fopen(tmpfilename, "w"); + if (out == NULL) + goto retry_2; + if (fprintf(out,"1 %d %d %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld\n", + getpid(), + user_status, + user_time.tv_sec, user_time.tv_usec, + user_rusage.ru_utime.tv_sec, user_rusage.ru_utime.tv_usec, + user_rusage.ru_stime.tv_sec, user_rusage.ru_stime.tv_usec, + user_rusage.ru_maxrss, + user_rusage.ru_ixrss, + user_rusage.ru_idrss, + user_rusage.ru_isrss, + user_rusage.ru_minflt, + user_rusage.ru_majflt, + user_rusage.ru_nswap, + user_rusage.ru_inblock, + user_rusage.ru_oublock, + user_rusage.ru_msgsnd, + user_rusage.ru_msgrcv, + user_rusage.ru_nsignals, + user_rusage.ru_nvcsw, + user_rusage.ru_nivcsw) < 0) + goto retry_3; + if (fflush(out) == EOF) + goto retry_3; + if (fsync(fileno(out)) == -1) + goto retry_3; + if (fclose(out) == EOF) + goto retry_2; + if (rename(tmpfilename, spoolfilename) == -1) + goto retry_2; + break; + +retry_3: + fclose(out); +retry_2: + free(tmpfilename); +retry_1: + sleep(10); + } return 0; } diff --git a/mxqadmin.c b/mxqadmin.c index c4746f3c..8b25ba57 100644 --- a/mxqadmin.c +++ b/mxqadmin.c @@ -369,7 +369,8 @@ int main(int argc, char *argv[]) res = mx_mysql_initialize(&mysql); assert(res == 0); - mx_mysql_option_set_default_file(mysql, arg_mysql_default_file); + if (mx_mysql_option_set_default_file(mysql, arg_mysql_default_file) < 0) + mx_die("%s: %s\n", arg_mysql_default_file, mx_mysql_error()); mx_mysql_option_set_default_group(mysql, arg_mysql_default_group); res = mx_mysql_connect_forever(&mysql); diff --git a/mxqd.c b/mxqd.c index 4c65a2a9..1a55545b 100644 --- a/mxqd.c +++ b/mxqd.c @@ -158,9 +158,8 @@ static void cpuset_clear_running(cpu_set_t *running,cpu_set_t *job) { } /**********************************************************************/ -static int setup_cronolog(char *cronolog, char *logdir, char *rellink, char *relformat) +static void setup_cronolog(char *cronolog, char *logdir, char *rellink, char *relformat) { - int res; int pipe_fd[2]; int pid; _mx_cleanup_free_ char *link = NULL; @@ -174,51 +173,34 @@ static int setup_cronolog(char *cronolog, char *logdir, char *rellink, char *rel format = strdup(relformat); } - if (!link || !format) { - mx_log_err("can't allocate filenames: (%m)"); - return 0; - } + if (!link || !format) + mx_die("can't allocate filenames: (%m)"); - res = pipe(pipe_fd); - if (res == -1) { - mx_log_err("can't create pipe for cronolog: (%m)"); - return 0; - } + if (pipe(pipe_fd) == -1) + mx_die("can't create pipe for cronolog: (%m)"); pid = fork(); if (pid < 0) { - mx_log_err("cronolog fork failed: %m"); - return 0; + mx_die("cronolog fork failed: %m"); } else if(pid == 0) { - res = dup2(pipe_fd[0], STDIN_FILENO); - if (res == -1) { - mx_log_err("dup2(fh=%d, %d) for cronolog stdin failed (%m)", pipe_fd[0], STDIN_FILENO); - return 0; - } + if (dup2(pipe_fd[0], STDIN_FILENO) == -1) + mx_die("dup2(fh=%d, %d) for cronolog stdin failed (%m)", pipe_fd[0], STDIN_FILENO); close(pipe_fd[0]); close(pipe_fd[1]); mxq_redirect_output("/dev/null", "/dev/null"); execl(cronolog, cronolog, "--link", link, format, NULL); - mx_log_err("execl('%s', ...) failed (%m)", cronolog); - _exit(EX__MAX + 1); + mx_die("execl('%s', ...) failed (%m)", cronolog); } - res = dup2(pipe_fd[1], STDOUT_FILENO); - if (res == -1) { - mx_log_err("dup2(fh=%d, %d) for cronolog stdout failed (%m)", pipe_fd[0], STDOUT_FILENO); - return 0; - } - res = dup2(STDOUT_FILENO, STDERR_FILENO); - if (res == -1) { - mx_log_err("dup2(fh=%d, %d) for cronolog stderr failed (%m)", STDOUT_FILENO, STDERR_FILENO); - return 0; - } + if (dup2(pipe_fd[1], STDOUT_FILENO) == -1) + mx_die("dup2(fh=%d, %d) for cronolog stdout failed (%m)", pipe_fd[0], STDOUT_FILENO); + if (dup2(STDOUT_FILENO, STDERR_FILENO) == -1) + mx_die("dup2(fh=%d, %d) for cronolog stderr failed (%m)", STDOUT_FILENO, STDERR_FILENO); + close(pipe_fd[0]); close(pipe_fd[1]); - - return pid; } @@ -723,17 +705,14 @@ static int server_init(struct mxq_server *server, int argc, char *argv[]) mx_log_err("MAIN: can't write to '%s': %m", arg_logdir); return -EX_IOERR; } - res = setup_cronolog("/usr/sbin/cronolog", arg_logdir, "mxqd_log", "%Y/mxqd_log-%Y-%m"); - if (!res) { - mx_log_err("MAIN: cronolog setup failed. exiting."); - return -EX_IOERR; - } + setup_cronolog("/usr/sbin/cronolog", arg_logdir, "mxqd_log", "%Y/mxqd_log-%Y-%m"); } res = mx_mysql_initialize(&(server->mysql)); assert(res == 0); - mx_mysql_option_set_default_file(server->mysql, arg_mysql_default_file); + if (mx_mysql_option_set_default_file(server->mysql, arg_mysql_default_file) < 0) + mx_die("%s: %s\n", arg_mysql_default_file, mx_mysql_error()); mx_mysql_option_set_default_group(server->mysql, arg_mysql_default_group); mx_mysql_option_set_reconnect(server->mysql, 1); @@ -1833,7 +1812,7 @@ static void kill_cancelled_jobs(struct ppidcache *ppidcache, struct mxq_server * " AND server_id = ?" ); if (!stmt) { - mx_log_err("mx_mysql_stmt_prepare: %s\n", mx_mysql_error()); + mx_log_err("mx_mysql_stmt_prepare: %s", mx_mysql_error()); return; } @@ -1843,7 +1822,7 @@ static void kill_cancelled_jobs(struct ppidcache *ppidcache, struct mxq_server * unsigned long long num_rows; int res = mx_mysql_statement_execute(stmt, &num_rows); if (res < 0) { - mx_log_err("mx_mysql_statement_execute: %s\n", mx_mysql_error()); + mx_log_err("mx_mysql_statement_execute: %s", mx_mysql_error()); return; } if (num_rows == 0) @@ -1855,7 +1834,7 @@ static void kill_cancelled_jobs(struct ppidcache *ppidcache, struct mxq_server * for (unsigned long i = 0 ; i < num_rows ; i++) { res = mx_mysql_statement_fetch(stmt); if (res < 0) { - mx_log_err("mx_mysql_statement_fetch: %s\n", mx_mysql_error()); + mx_log_err("mx_mysql_statement_fetch: %s", mx_mysql_error()); return; } mx_log_debug("kill running job id %lu", job_id); diff --git a/mxqdump.c b/mxqdump.c index 0e69b066..5cb51b1d 100644 --- a/mxqdump.c +++ b/mxqdump.c @@ -730,7 +730,8 @@ int main(int argc, char *argv[]) res = mx_mysql_initialize(&mysql); assert(res == 0); - mx_mysql_option_set_default_file(mysql, arg_mysql_default_file); + if (mx_mysql_option_set_default_file(mysql, arg_mysql_default_file) < 0) + mx_die("%s: %s\n", arg_mysql_default_file, mx_mysql_error()); mx_mysql_option_set_default_group(mysql, arg_mysql_default_group); res = mx_mysql_connect_forever(&mysql); diff --git a/mxqkill.c b/mxqkill.c index b790af81..05890a38 100644 --- a/mxqkill.c +++ b/mxqkill.c @@ -33,16 +33,6 @@ #define UINT64_SPECIAL_MIN (uint64_t)(-2) #define UINT64_HASVALUE(x) ((x) < UINT64_SPECIAL_MIN) -__attribute__ ((noreturn)) -__attribute__ ((format (printf, 1, 2))) -static void die(const char *restrict fmt, ...) { - va_list ap; - va_start(ap, fmt); - vfprintf(stderr, fmt, ap); - va_end(ap); - _exit(1); -} - static void print_usage(void) { mxq_print_generic_version(); @@ -51,6 +41,7 @@ static void print_usage(void) "Usage:\n" " %s [options]\n" " %s [options] --group-id=GROUPID\n" + " %s [options] --job-id=JOBID\n" "\n" "Synopsis:\n" " kill/cancel jobs running in MXQ cluster\n" @@ -59,7 +50,6 @@ static void print_usage(void) "\n" " -g, --group-id=GROUPID cancel/kill group \n" " -J, --job-id=JOBID cancel job \n" - " -u, --user=NAME|UID cancel group for user. (root only)\n" "\n" " -v, --verbose be more verbose\n" " --debug set debug log level (default: warning log level)\n" @@ -72,10 +62,15 @@ static void print_usage(void) " -M, --mysql-default-file[=MYSQLCNF] (default: %s)\n" " -S, --mysql-default-group[=MYSQLGROUP] (default: %s)\n" "\n" + "Privileged:\n" + "\n" + " -u, --user=NAME|UID assume given username\n" + "\n" "Environment:\n" " MXQ_MYSQL_DEFAULT_FILE change default for MYSQLCNF\n" " MXQ_MYSQL_DEFAULT_GROUP change default for MYSQLGROUP\n" "\n", + program_invocation_short_name, program_invocation_short_name, program_invocation_short_name, MXQ_MYSQL_DEFAULT_FILE_STR, @@ -161,27 +156,27 @@ static void verify_job_owner(struct mx_mysql *mysql, uint64_t job_id, uint64_t u " AND job_id = ?" ); if (!stmt) - die("mx_mysql_statement_prepare(): %s\n", mx_mysql_error()); + mx_die("mx_mysql_statement_prepare(): %s\n", mx_mysql_error()); mx_mysql_statement_param_bind(stmt, 0, uint64, &job_id); unsigned long long num_rows; int res = mx_mysql_statement_execute(stmt, &num_rows); if (res < 0) - die("mx_mysql_statement_execute(): %s\n", mx_mysql_error()); + mx_die("mx_mysql_statement_execute(): %s\n", mx_mysql_error()); if (num_rows == 0) - die("no such job_id %lu\n", job_id); + mx_die("no such job_id %lu\n", job_id); uint64_t uid; mx_mysql_statement_result_bind(stmt, 0, uint64, &uid); res = mx_mysql_statement_fetch(stmt); if (res < 0) - die("mx_mysql_statement_fetch: %s\n", mx_mysql_error()); + mx_die("mx_mysql_statement_fetch: %s\n", mx_mysql_error()); if (uid != user_uid) - die("job %lu: permission denied\n", job_id); + mx_die("job %lu: permission denied\n", job_id); mx_mysql_statement_close(&stmt); } @@ -192,16 +187,16 @@ static void set_job_cancelled(struct mx_mysql *mysql, uint64_t job_id) { " WHERE job_id = ?" ); if (!stmt) - die("mx_mysql_statement_prepare(): %s\n", mx_mysql_error()); + mx_die("mx_mysql_statement_prepare(): %s\n", mx_mysql_error()); mx_mysql_statement_param_bind(stmt, 0, uint64, &job_id); unsigned long long num_rows; int res = mx_mysql_statement_execute(stmt, &num_rows); if (res < 0) - die("mx_mysql_statement_execute(): %s\n", mx_mysql_error()); + mx_die("mx_mysql_statement_execute(): %s\n", mx_mysql_error()); if (num_rows == 0) - die("no such job_id %lu\n", job_id); + mx_die("no such job_id %lu\n", job_id); mx_mysql_statement_close(&stmt); } @@ -384,7 +379,8 @@ int main(int argc, char *argv[]) res = mx_mysql_initialize(&mysql); assert(res == 0); - mx_mysql_option_set_default_file(mysql, arg_mysql_default_file); + if (mx_mysql_option_set_default_file(mysql, arg_mysql_default_file) < 0) + mx_die("%s: %s\n", arg_mysql_default_file, mx_mysql_error()); mx_mysql_option_set_default_group(mysql, arg_mysql_default_group); res = mx_mysql_connect_forever(&mysql); diff --git a/mxqset.c b/mxqset.c index bf205385..3d4a24d1 100644 --- a/mxqset.c +++ b/mxqset.c @@ -11,16 +11,6 @@ #include "keywordset.h" #include "stdarg.h" -static __attribute__ ((noreturn)) void die(char *msg, ...) { - va_list ap; - - va_start(ap, msg); - fprintf(stderr, "%s: ",program_invocation_short_name); - vfprintf(stderr, msg, ap); - va_end(ap); - exit(1); -} - static void verify_group_permission(struct mx_mysql *mysql , long unsigned groupid, uid_t client_uid) { struct mx_mysql_stmt *stmt = NULL; unsigned long long num_rows; @@ -28,17 +18,17 @@ static void verify_group_permission(struct mx_mysql *mysql , long unsigned group stmt = mx_mysql_statement_prepare(mysql,"SELECT user_uid FROM mxq_group WHERE group_id = ? LIMIT 1"); if (!stmt) - die("%s\n", mx_mysql_error()); + mx_die("%s\n", mx_mysql_error()); mx_mysql_statement_param_bind(stmt, 0, uint64, &(groupid)); if (mx_mysql_statement_execute(stmt, &num_rows) < 0) - die("%s\n", mx_mysql_error()); + mx_die("%s\n", mx_mysql_error()); if (num_rows < 1) - die("no such group %ld\n", groupid); + mx_die("no such group %ld\n", groupid); mx_mysql_statement_result_bind(stmt, 0, uint32, &uid); if (mx_mysql_statement_fetch(stmt) < 0) - die("%s\n", mx_mysql_error()); + mx_die("%s\n", mx_mysql_error()); if ( client_uid != 0 && client_uid != uid ) - die("no permission to access this group\n"); + mx_die("no permission to access this group\n"); mx_mysql_statement_close(&stmt); } @@ -59,14 +49,14 @@ static void update_closed_flag(struct mx_mysql *mysql, long unsigned groupid, en stmt = mx_mysql_statement_prepare(mysql, "UPDATE mxq_group SET group_flags = ( group_flags & ?) | ? WHERE group_id = ?"); if (!stmt) - die("%s\n", mx_mysql_error()); + mx_die("%s\n", mx_mysql_error()); mx_mysql_statement_param_bind(stmt, 0, uint64, &(mask)); mx_mysql_statement_param_bind(stmt, 1, uint64, &(value)); mx_mysql_statement_param_bind(stmt, 2, uint64, &(groupid)); if (mx_mysql_statement_execute(stmt, &num_rows) < 0) - die("%s\n", mx_mysql_error()); + mx_die("%s\n", mx_mysql_error()); if (num_rows < 1) - die("no such group %ld\n", groupid); + mx_die("no such group %ld\n", groupid); mx_mysql_statement_close(&stmt); } @@ -81,15 +71,15 @@ static void update_blacklist(struct mx_mysql *mysql, long unsigned groupid, char stmt = mx_mysql_statement_prepare(mysql, "SELECT group_blacklist FROM mxq_group WHERE group_id = ? LIMIT 1"); if (!stmt) - die("%s\n", mx_mysql_error()); + mx_die("%s\n", mx_mysql_error()); mx_mysql_statement_param_bind(stmt, 0, uint64, &groupid); if (mx_mysql_statement_execute(stmt, &num_rows) < 0) - die("%s\n", mx_mysql_error()); + mx_die("%s\n", mx_mysql_error()); if (num_rows < 1) - die("no such group %ld\n", groupid); + mx_die("no such group %ld\n", groupid); mx_mysql_statement_result_bind(stmt, 0, string, &group_blacklist); if (mx_mysql_statement_fetch(stmt) < 0) - die("%s\n", mx_mysql_error()); + mx_die("%s\n", mx_mysql_error()); mx_mysql_statement_close(&stmt); blacklist = keywordset_new(group_blacklist); free(group_blacklist); @@ -103,13 +93,13 @@ static void update_blacklist(struct mx_mysql *mysql, long unsigned groupid, char stmt = mx_mysql_statement_prepare(mysql, "UPDATE mxq_group SET group_blacklist = ? WHERE group_id = ?"); if (!stmt) - die("%s\n", mx_mysql_error()); + mx_die("%s\n", mx_mysql_error()); mx_mysql_statement_param_bind(stmt, 0, string, &group_blacklist); mx_mysql_statement_param_bind(stmt, 1, uint64, &groupid); if (mx_mysql_statement_execute(stmt, &num_rows) < 0) - die("%s\n", mx_mysql_error()); + mx_die("%s\n", mx_mysql_error()); if (num_rows < 1) - die("no such group %ld\n", groupid); + mx_die("no such group %ld\n", groupid); mx_mysql_statement_close(&stmt); free(group_blacklist); keywordset_free(blacklist); @@ -126,15 +116,15 @@ static void update_whitelist(struct mx_mysql *mysql, long unsigned groupid, char stmt = mx_mysql_statement_prepare(mysql, "SELECT group_whitelist FROM mxq_group WHERE group_id = ? LIMIT 1"); if (!stmt) - die("%s\n", mx_mysql_error()); + mx_die("%s\n", mx_mysql_error()); mx_mysql_statement_param_bind(stmt, 0, uint64, &groupid); if (mx_mysql_statement_execute(stmt, &num_rows) < 0) - die("%s\n", mx_mysql_error()); + mx_die("%s\n", mx_mysql_error()); if (num_rows < 1) - die("no such group %ld\n", groupid); + mx_die("no such group %ld\n", groupid); mx_mysql_statement_result_bind(stmt, 0, string, &group_whitelist); if (mx_mysql_statement_fetch(stmt) < 0) - die("%s\n", mx_mysql_error()); + mx_die("%s\n", mx_mysql_error()); mx_mysql_statement_close(&stmt); whitelist = keywordset_new(group_whitelist); free(group_whitelist); @@ -148,13 +138,13 @@ static void update_whitelist(struct mx_mysql *mysql, long unsigned groupid, char stmt = mx_mysql_statement_prepare(mysql, "UPDATE mxq_group SET group_whitelist = ? WHERE group_id = ?"); if (!stmt) - die("%s\n", mx_mysql_error()); + mx_die("%s\n", mx_mysql_error()); mx_mysql_statement_param_bind(stmt, 0, string, &group_whitelist); mx_mysql_statement_param_bind(stmt, 1, uint64, &groupid); if (mx_mysql_statement_execute(stmt, &num_rows) < 0) - die("%s\n", mx_mysql_error()); + mx_die("%s\n", mx_mysql_error()); if (num_rows < 1) - die("no such group %ld\n", groupid); + mx_die("no such group %ld\n", groupid); mx_mysql_statement_close(&stmt); free(group_whitelist); keywordset_free(whitelist); @@ -226,7 +216,8 @@ int main(int argc, char **argv) { exit_usage(); assert(mx_mysql_initialize(&mysql) == 0); - mx_mysql_option_set_default_file(mysql, MXQ_MYSQL_DEFAULT_FILE); + if (mx_mysql_option_set_default_file(mysql, MXQ_MYSQL_DEFAULT_FILE) < 0) + mx_die("%s: %s\n", MXQ_MYSQL_DEFAULT_FILE, mx_mysql_error()); mx_mysql_option_set_default_group(mysql, MXQ_MYSQL_DEFAULT_GROUP); assert(mx_mysql_connect_forever(&mysql) == 0); diff --git a/mxqsub.c b/mxqsub.c index 26ae3b75..5ee8cee3 100644 --- a/mxqsub.c +++ b/mxqsub.c @@ -503,7 +503,6 @@ static int add_job(struct mx_mysql *mysql, struct mxq_job *j) assert(j->job_argv_str); assert(*j->job_argv_str); assert(j->job_stdout); assert(*j->job_stdout); assert(j->job_stderr); assert(*j->job_stderr); - assert(j->job_umask); assert(j->host_submit); assert(*j->host_submit); stmt = mx_mysql_statement_prepare(mysql, @@ -785,7 +784,7 @@ int main(int argc, char *argv[]) arg_program_name = NULL; arg_threads = 1; arg_memory = 2048; - arg_time = 0; + arg_time = 15; arg_max_per_node = 0; arg_workdir = current_workdir; arg_stdout = "/dev/null"; @@ -1042,11 +1041,6 @@ int main(int argc, char *argv[]) arg_program_name = argv[0]; } - if (!arg_time) { - arg_time = 15; - mx_log_warning("option '--runtime' or '-t' not used. Your job will get killed if it runs longer than the default of %d minutes.", arg_time); - } - if (arg_time > 60*24) { mx_log_warning("option '--runtime' specifies a runtime longer than 24h. Your job may get killed. Be sure to implement some check pointing."); } @@ -1166,7 +1160,8 @@ int main(int argc, char *argv[]) res = mx_mysql_initialize(&mysql); assert(res == 0); - mx_mysql_option_set_default_file(mysql, arg_mysql_default_file); + if (mx_mysql_option_set_default_file(mysql, arg_mysql_default_file) < 0) + mx_die("%s: %s\n", arg_mysql_default_file, mx_mysql_error()); mx_mysql_option_set_default_group(mysql, arg_mysql_default_group); res = mx_mysql_connect_forever(&mysql); diff --git a/mysql/create_tables.sql b/mysql/create_tables.sql index 3b2bcd06..41585f76 100644 --- a/mysql/create_tables.sql +++ b/mysql/create_tables.sql @@ -2,6 +2,7 @@ CREATE TABLE IF NOT EXISTS mxq_group ( group_id INT8 UNSIGNED NOT NULL PRIMARY KEY AUTO_INCREMENT, group_name VARCHAR(128) NOT NULL DEFAULT 'default', group_status INT1 UNSIGNED NOT NULL DEFAULT 0, + group_flags INT8 UNSIGNED NOT NULL DEFAULT 0, group_priority INT2 UNSIGNED NOT NULL DEFAULT 127, group_blacklist VARCHAR(1000) NOT NULL DEFAULT '', group_whitelist VARCHAR(1000) NOT NULL DEFAULT '', @@ -62,8 +63,10 @@ CREATE TABLE IF NOT EXISTS mxq_group ( dependency_of_group INT8 UNSIGNED NULL DEFAULT NULL, INDEX(group_id), - INDEX(group_name) -); + INDEX(group_name), + INDEX(group_jobs_inq), + INDEX(group_jobs_running) +) DEFAULT CHARSET=latin1; CREATE TABLE IF NOT EXISTS mxq_job ( job_id INT8 UNSIGNED NOT NULL PRIMARY KEY AUTO_INCREMENT, @@ -127,7 +130,7 @@ CREATE TABLE IF NOT EXISTS mxq_job ( INDEX (host_id(128)), INDEX (host_hostname(64)), INDEX (server_id(64)) -); +) DEFAULT CHARSET=latin1; CREATE TABLE IF NOT EXISTS mxq_daemon ( daemon_id INT4 UNSIGNED NOT NULL PRIMARY KEY AUTO_INCREMENT, @@ -169,4 +172,4 @@ CREATE TABLE IF NOT EXISTS mxq_daemon ( INDEX (daemon_name(64)), INDEX (hostname(64)) -); +) DEFAULT CHARSET=latin1; diff --git a/mysql/migrate_018_add_group_indexes.sql b/mysql/migrate_018_add_group_indexes.sql new file mode 100644 index 00000000..4615a3ad --- /dev/null +++ b/mysql/migrate_018_add_group_indexes.sql @@ -0,0 +1,2 @@ +CREATE INDEX group_jobs_inq ON mxq_group(group_jobs_inq); +CREATE INDEX group_jobs_running ON mxq_group(group_jobs_running); diff --git a/test_mx_util.c b/test_mx_util.c index 4c447d7e..7baa7165 100644 --- a/test_mx_util.c +++ b/test_mx_util.c @@ -597,11 +597,13 @@ static void test_mx_call_external(void) { int sts; errno = 999; - sts = mx_call_external("/usr/bin/true", NULL); + char *argv[] = { "", NULL }; + + sts = mx_call_external("/usr/bin/true", argv); assert(sts == 0); assert(errno == 999); - sts = mx_call_external("/usr/bin/false", NULL); + sts = mx_call_external("/usr/bin/false", argv); assert(sts == -1); assert(errno == EPROTO); }