From bba1452a9528161cc4c46d6b17f477b67da9377e Mon Sep 17 00:00:00 2001 From: Marius Tolzmann Date: Mon, 19 Oct 2015 15:56:54 +0200 Subject: [PATCH 1/8] mxqd: set MXQ_HOSTID in main process This enables external programs to read it from the process environment --- mxqd.c | 1 + 1 file changed, 1 insertion(+) diff --git a/mxqd.c b/mxqd.c index 48b5e9be..d4e53723 100644 --- a/mxqd.c +++ b/mxqd.c @@ -507,6 +507,7 @@ int server_init(struct mxq_server *server, int argc, char *argv[]) mx_proc_pid_stat_free(&pps); mx_asprintf_forever(&server->host_id, "%s-%llx-%x", server->boot_id, server->starttime, getpid()); + mx_setenv_forever("MXQ_HOSTID", server->host_id); server->slots = threads_total; res = cpuset_init(server); From 295c8df2c1bcae3f3afd60a45ec287eb6d644212 Mon Sep 17 00:00:00 2001 From: Marius Tolzmann Date: Thu, 22 Oct 2015 21:26:51 +0200 Subject: [PATCH 2/8] mx_proc: Move process related function from mx_util to mx_proc --- .gitignore | 1 + Makefile | 13 ++++++ mx_proc.c | 111 +++++++++++++++++++++++++++++++++++++++++++++++++ mx_proc.h | 58 ++++++++++++++++++++++++++ mx_util.c | 98 ------------------------------------------- mx_util.h | 51 ----------------------- mxqd.c | 8 ++-- test_mx_util.c | 24 ++++------- 8 files changed, 196 insertions(+), 168 deletions(-) create mode 100644 mx_proc.c create mode 100644 mx_proc.h diff --git a/.gitignore b/.gitignore index d6af43fc..f466977c 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,7 @@ mx_getopt.o mx_flock.o mx_log.o mx_util.o +mx_proc.o mxq_group.o mxqadmin.o mxqdump.o diff --git a/Makefile b/Makefile index dad0d7d2..ca31600f 100644 --- a/Makefile +++ b/Makefile @@ -242,6 +242,10 @@ mx_log.h += mx_log.h mx_util.h += mx_util.h +### mx_proc.h ---------------------------------------------------------- + +mx_proc.h += mx_proc.h + ### mx_flock.h --------------------------------------------------------- mx_flock.h += mx_flock.h @@ -292,6 +296,12 @@ mx_util.o: $(mx_log.h) clean: CLEAN += mx_util.o +### mx_proc ------------------------------------------------------------ + +mx_proc.o: $(mx_proc.h) + +clean: CLEAN += mx_proc.o + ### mx_flock.o --------------------------------------------------------- mx_flock.o: $(mx_flock.h) @@ -373,6 +383,7 @@ clean: CLEAN += mxq_job.o mxqd.o: $(mx_getopt.h) mxqd.o: $(mx_flock.h) mxqd.o: $(mx_util.h) +mxqd.o: $(mx_proc.h) mxqd.o: $(mx_log.h) mxqd.o: $(mxqd.h) mxqd.o: $(mxq_group.h) @@ -405,6 +416,7 @@ clean: CLEAN += mxqsub.o mxqd: mx_flock.o mxqd: mx_util.o +mxqd: mx_proc.o mxqd: mx_log.o mxqd: mxq_log.o mxqd: mx_getopt.o @@ -522,6 +534,7 @@ test_mx_util.o: $(mx_util.h) clean: CLEAN += test_mx_util.o test_mx_util: mx_util.o +test_mx_util: mx_proc.o test_mx_util: mx_log.o clean: CLEAN += test_mx_util diff --git a/mx_proc.c b/mx_proc.c new file mode 100644 index 00000000..2af1a5da --- /dev/null +++ b/mx_proc.c @@ -0,0 +1,111 @@ +#include +#include +#include +#include + +#include "mx_util.h" +#include "mx_proc.h" + +static int _mx_proc_pid_stat_strscan(char *str, struct mx_proc_pid_stat *pps) +{ + size_t res = 0; + char *p; + char *s; + + pps->comm = NULL; + + s = str; + + res += mx_strscan_ll(&s, &(pps->pid)); + + p = strrchr(s, ')'); + if (!p) + return -(errno=EINVAL); + + *p = 0; + s++; + + pps->comm = mx_strdup_forever(s); + s = p + 2; + + pps->state = *s; + res += !(*(s+1) == ' '); + s += 2; + + res += mx_strscan_ll(&s, &(pps->ppid)); + res += mx_strscan_ll(&s, &(pps->pgrp)); + res += mx_strscan_ll(&s, &(pps->session)); + res += mx_strscan_ll(&s, &(pps->tty_nr)); + res += mx_strscan_ll(&s, &(pps->tpgid)); + res += mx_strscan_ull(&s, &(pps->flags)); + res += mx_strscan_ull(&s, &(pps->minflt)); + res += mx_strscan_ull(&s, &(pps->cminflt)); + res += mx_strscan_ull(&s, &(pps->majflt)); + res += mx_strscan_ull(&s, &(pps->cmajflt)); + res += mx_strscan_ull(&s, &(pps->utime)); + res += mx_strscan_ull(&s, &(pps->stime)); + res += mx_strscan_ll(&s, &(pps->cutime)); + res += mx_strscan_ll(&s, &(pps->cstime)); + res += mx_strscan_ll(&s, &(pps->priority)); + res += mx_strscan_ll(&s, &(pps->nice)); + res += mx_strscan_ll(&s, &(pps->num_threads)); + res += mx_strscan_ll(&s, &(pps->itrealvalue)); + res += mx_strscan_ull(&s, &(pps->starttime)); + res += mx_strscan_ull(&s, &(pps->vsize)); + res += mx_strscan_ll(&s, &(pps->rss)); + res += mx_strscan_ull(&s, &(pps->rsslim)); + res += mx_strscan_ull(&s, &(pps->startcode)); + res += mx_strscan_ull(&s, &(pps->endcode)); + res += mx_strscan_ull(&s, &(pps->startstack)); + res += mx_strscan_ull(&s, &(pps->kstkesp)); + res += mx_strscan_ull(&s, &(pps->kstkeip)); + res += mx_strscan_ull(&s, &(pps->signal)); + res += mx_strscan_ull(&s, &(pps->blocked)); + res += mx_strscan_ull(&s, &(pps->sigignore)); + res += mx_strscan_ull(&s, &(pps->sigcatch)); + res += mx_strscan_ull(&s, &(pps->wchan)); + res += mx_strscan_ull(&s, &(pps->nswap)); + res += mx_strscan_ull(&s, &(pps->cnswap)); + res += mx_strscan_ll(&s, &(pps->exit_signal)); + res += mx_strscan_ll(&s, &(pps->processor)); + res += mx_strscan_ull(&s, &(pps->rt_priority)); + res += mx_strscan_ull(&s, &(pps->policy)); + res += mx_strscan_ull(&s, &(pps->delayacct_blkio_ticks)); + res += mx_strscan_ull(&s, &(pps->guest_time)); + res += mx_strscan_ll(&s, &(pps->cguest_time)); + + if (res != 0) + return -(errno=EINVAL); + + return 0; +} + +int mx_proc_pid_stat(struct mx_proc_pid_stat **pps, pid_t pid) +{ + _mx_cleanup_free_ char *fname = NULL; + _mx_cleanup_free_ char *line = NULL; + int res; + + mx_asprintf_forever(&fname, "/proc/%d/stat", pid); + + if (!*pps) + *pps = mx_calloc_forever(1, sizeof(**pps)); + + res = mx_read_first_line_from_file(fname, &line); + if (res < 0) + return res; + + res = _mx_proc_pid_stat_strscan(line, *pps); + if (res < 0) + return res; + + return 0; +} + +void mx_proc_pid_stat_free_content(struct mx_proc_pid_stat *pps) +{ + if (!pps) + return; + + mx_free_null(pps->comm); +} diff --git a/mx_proc.h b/mx_proc.h new file mode 100644 index 00000000..4a0efb8e --- /dev/null +++ b/mx_proc.h @@ -0,0 +1,58 @@ +#ifndef __MX_PROC_H__ +#define __MX_PROC_H__ 1 + +#include + +struct mx_proc_pid_stat { + long long int pid; /* 1 */ + char *comm; /* 2 (comm) */ + char state; /* 3 "RSDZTW" */ + long long int ppid; /* 4 */ + long long int pgrp; /* 5 */ + long long int session; /* 6 */ + long long int tty_nr; /* 7 */ + long long int tpgid; /* 8 */ + unsigned long long int flags; /* 9 */ + unsigned long long int minflt; /* 10 */ + unsigned long long int cminflt; /* 11 */ + unsigned long long int majflt; /* 12 */ + unsigned long long int cmajflt; /* 13 */ + unsigned long long int utime; /* 14 */ + unsigned long long int stime; /* 15 */ + long long int cutime; /* 16 */ + long long int cstime; /* 17 */ + long long int priority; /* 18 */ + long long int nice; /* 19 */ + long long int num_threads; /* 20 */ + long long int itrealvalue; /* 21 */ + unsigned long long int starttime; /* 22 */ + unsigned long long int vsize; /* 23 */ + long long int rss; /* 24 */ + unsigned long long int rsslim; /* 25 */ + unsigned long long int startcode; /* 26 */ + unsigned long long int endcode; /* 27 */ + unsigned long long int startstack; /* 28 */ + unsigned long long int kstkesp; /* 29 */ + unsigned long long int kstkeip; /* 30 */ + unsigned long long int signal; /* 31 */ + unsigned long long int blocked; /* 32 */ + unsigned long long int sigignore; /* 33 */ + unsigned long long int sigcatch; /* 34 */ + unsigned long long int wchan; /* 35 */ + unsigned long long int nswap; /* 36 */ + unsigned long long int cnswap; /* 37 */ + long long int exit_signal; /* 38 */ + long long int processor; /* 39 */ + unsigned long long int rt_priority; /* 40 */ + unsigned long long int policy; /* 41 */ + unsigned long long int delayacct_blkio_ticks; /* 42 */ + unsigned long long int guest_time; /* 43 */ + long long int cguest_time; /* 44 */ +}; + +int mx_proc_pid_stat_read(struct mx_proc_pid_stat *pps, char *fmt, ...); +int mx_proc_pid_stat(struct mx_proc_pid_stat **pps, pid_t pid); + +void mx_proc_pid_stat_free_content(struct mx_proc_pid_stat *pps); + +#endif diff --git a/mx_util.c b/mx_util.c index 24088b2c..f783ed38 100644 --- a/mx_util.c +++ b/mx_util.c @@ -856,104 +856,6 @@ int mx_strscan_ll(char **str, long long int *to) return res; } -int mx_strscan_proc_pid_stat(char *str, struct proc_pid_stat *pps) -{ - size_t res = 0; - char *p; - char *s; - - pps->comm = NULL; - - s = str; - - res += mx_strscan_ll(&s, &(pps->pid)); - - p = strrchr(s, ')'); - if (!p) - return -(errno=EINVAL); - - *p = 0; - s++; - - pps->comm = mx_strdup_forever(s); - s = p + 2; - - pps->state = *s; - res += !(*(s+1) == ' '); - s += 2; - - res += mx_strscan_ll(&s, &(pps->ppid)); - res += mx_strscan_ll(&s, &(pps->pgrp)); - res += mx_strscan_ll(&s, &(pps->session)); - res += mx_strscan_ll(&s, &(pps->tty_nr)); - res += mx_strscan_ll(&s, &(pps->tpgid)); - res += mx_strscan_ull(&s, &(pps->flags)); - res += mx_strscan_ull(&s, &(pps->minflt)); - res += mx_strscan_ull(&s, &(pps->cminflt)); - res += mx_strscan_ull(&s, &(pps->majflt)); - res += mx_strscan_ull(&s, &(pps->cmajflt)); - res += mx_strscan_ull(&s, &(pps->utime)); - res += mx_strscan_ull(&s, &(pps->stime)); - res += mx_strscan_ll(&s, &(pps->cutime)); - res += mx_strscan_ll(&s, &(pps->cstime)); - res += mx_strscan_ll(&s, &(pps->priority)); - res += mx_strscan_ll(&s, &(pps->nice)); - res += mx_strscan_ll(&s, &(pps->num_threads)); - res += mx_strscan_ll(&s, &(pps->itrealvalue)); - res += mx_strscan_ull(&s, &(pps->starttime)); - res += mx_strscan_ull(&s, &(pps->vsize)); - res += mx_strscan_ll(&s, &(pps->rss)); - res += mx_strscan_ull(&s, &(pps->rsslim)); - res += mx_strscan_ull(&s, &(pps->startcode)); - res += mx_strscan_ull(&s, &(pps->endcode)); - res += mx_strscan_ull(&s, &(pps->startstack)); - res += mx_strscan_ull(&s, &(pps->kstkesp)); - res += mx_strscan_ull(&s, &(pps->kstkeip)); - res += mx_strscan_ull(&s, &(pps->signal)); - res += mx_strscan_ull(&s, &(pps->blocked)); - res += mx_strscan_ull(&s, &(pps->sigignore)); - res += mx_strscan_ull(&s, &(pps->sigcatch)); - res += mx_strscan_ull(&s, &(pps->wchan)); - res += mx_strscan_ull(&s, &(pps->nswap)); - res += mx_strscan_ull(&s, &(pps->cnswap)); - res += mx_strscan_ll(&s, &(pps->exit_signal)); - res += mx_strscan_ll(&s, &(pps->processor)); - res += mx_strscan_ull(&s, &(pps->rt_priority)); - res += mx_strscan_ull(&s, &(pps->policy)); - res += mx_strscan_ull(&s, &(pps->delayacct_blkio_ticks)); - res += mx_strscan_ull(&s, &(pps->guest_time)); - res += mx_strscan_ll(&s, &(pps->cguest_time)); - - if (res != 0) - return -(errno=EINVAL); - - return 0; -} - -int mx_proc_pid_stat(struct proc_pid_stat *pps, pid_t pid) -{ - _mx_cleanup_free_ char *fname = NULL; - _mx_cleanup_free_ char *line = NULL; - int res; - - mx_asprintf_forever(&fname, "/proc/%d/stat", pid); - - res = mx_read_first_line_from_file(fname, &line); - if (res < 0) - return res; - - res = mx_strscan_proc_pid_stat(line, pps); - if (res < 0) - return res; - - return 0; -} - -void mx_proc_pid_stat_free(struct proc_pid_stat *pps) -{ - mx_free_null(pps->comm); -} - int mx_sleep(unsigned int seconds) { if (seconds) diff --git a/mx_util.h b/mx_util.h index aeb24894..16a27d37 100644 --- a/mx_util.h +++ b/mx_util.h @@ -10,53 +10,6 @@ #include "mx_log.h" -struct proc_pid_stat { - long long int pid; /* 1 */ - char *comm; /* 2 (comm) */ - char state; /* 3 "RSDZTW" */ - long long int ppid; /* 4 */ - long long int pgrp; /* 5 */ - long long int session; /* 6 */ - long long int tty_nr; /* 7 */ - long long int tpgid; /* 8 */ - unsigned long long int flags; /* 9 */ - unsigned long long int minflt; /* 10 */ - unsigned long long int cminflt; /* 11 */ - unsigned long long int majflt; /* 12 */ - unsigned long long int cmajflt; /* 13 */ - unsigned long long int utime; /* 14 */ - unsigned long long int stime; /* 15 */ - long long int cutime; /* 16 */ - long long int cstime; /* 17 */ - long long int priority; /* 18 */ - long long int nice; /* 19 */ - long long int num_threads; /* 20 */ - long long int itrealvalue; /* 21 */ - unsigned long long int starttime; /* 22 */ - unsigned long long int vsize; /* 23 */ - long long int rss; /* 24 */ - unsigned long long int rsslim; /* 25 */ - unsigned long long int startcode; /* 26 */ - unsigned long long int endcode; /* 27 */ - unsigned long long int startstack; /* 28 */ - unsigned long long int kstkesp; /* 29 */ - unsigned long long int kstkeip; /* 30 */ - unsigned long long int signal; /* 31 */ - unsigned long long int blocked; /* 32 */ - unsigned long long int sigignore; /* 33 */ - unsigned long long int sigcatch; /* 34 */ - unsigned long long int wchan; /* 35 */ - unsigned long long int nswap; /* 36 */ - unsigned long long int cnswap; /* 37 */ - long long int exit_signal; /* 38 */ - long long int processor; /* 39 */ - unsigned long long int rt_priority; /* 40 */ - unsigned long long int policy; /* 41 */ - unsigned long long int delayacct_blkio_ticks; /* 42 */ - unsigned long long int guest_time; /* 43 */ - long long int cguest_time; /* 44 */ -}; - #ifdef MX_NDEBUG # include # define mx_assert_return_minus_errno(test, eno) \ @@ -168,10 +121,6 @@ int mx_read_first_line_from_file(char *fname, char **line); int mx_strscan_ull(char **str, unsigned long long int *to); int mx_strscan_ll(char **str, long long int *to); -int mx_strscan_proc_pid_stat(char *str, struct proc_pid_stat *pps); - -int mx_proc_pid_stat(struct proc_pid_stat *pps, pid_t pid); -void mx_proc_pid_stat_free(struct proc_pid_stat *pps); int mx_sleep(unsigned int seconds); int mx_sleep_nofail(unsigned int seconds); diff --git a/mxqd.c b/mxqd.c index d4e53723..77706c73 100644 --- a/mxqd.c +++ b/mxqd.c @@ -34,6 +34,7 @@ #include "mxq_group.h" #include "mxq_job.h" #include "mx_mysql.h" +#include "mx_proc.h" #include "mxqd.h" #include "mxq.h" @@ -281,7 +282,8 @@ int server_init(struct mxq_server *server, int argc, char *argv[]) unsigned long memory_total = 2048; unsigned long memory_max = 0; int i; - struct proc_pid_stat pps = {0}; + + _mx_cleanup_free_ struct mx_proc_pid_stat *pps = NULL; struct mx_getopt_ctl optctl; struct mx_option opts[] = { @@ -503,8 +505,8 @@ int server_init(struct mxq_server *server, int argc, char *argv[]) res = mx_proc_pid_stat(&pps, getpid()); assert(res == 0); - server->starttime = pps.starttime; - mx_proc_pid_stat_free(&pps); + server->starttime = pps->starttime; + mx_proc_pid_stat_free_content(pps); mx_asprintf_forever(&server->host_id, "%s-%llx-%x", server->boot_id, server->starttime, getpid()); mx_setenv_forever("MXQ_HOSTID", server->host_id); diff --git a/test_mx_util.c b/test_mx_util.c index b290490a..817fbef5 100644 --- a/test_mx_util.c +++ b/test_mx_util.c @@ -8,6 +8,7 @@ #include #include "mx_util.h" +#include "mx_proc.h" static void test_mx_strskipwhitespaces(void) { @@ -305,8 +306,7 @@ static void test_mx_strscan(void) unsigned long long int ull; long long int ll; _mx_cleanup_free_ char *line = NULL; - struct proc_pid_stat pps = {0}; - struct proc_pid_stat pps2 = {0}; + _mx_cleanup_free_ struct mx_proc_pid_stat *pps = NULL; assert(s = strdup("123 456 -789 246 abc")); str = s; @@ -340,20 +340,12 @@ static void test_mx_strscan(void) assert(mx_streq(str, "")); assert(mx_streq(s, "123")); - assert(mx_read_first_line_from_file("/proc/self/stat", &line) > 0); - assert(mx_strscan_proc_pid_stat(line, &pps) == 0); - assert(pps.pid == getpid()); - assert(pps.ppid == getppid()); - assert(pps.state == 'R'); - assert(mx_streq(pps.comm, program_invocation_short_name) || mx_streq(pps.comm, "memcheck-amd64-")); - mx_proc_pid_stat_free(&pps); - - assert(mx_proc_pid_stat(&pps2, getpid()) == 0); - assert(pps2.pid == getpid()); - assert(pps2.ppid == getppid()); - assert(pps2.state == 'R'); - assert(mx_streq(pps2.comm, program_invocation_short_name) || mx_streq(pps2.comm, "memcheck-amd64-")); - mx_proc_pid_stat_free(&pps2); + assert(mx_proc_pid_stat(&pps, getpid()) == 0); + assert(pps->pid == getpid()); + assert(pps->ppid == getppid()); + assert(pps->state == 'R'); + assert(mx_streq(pps->comm, program_invocation_short_name) || mx_streq(pps->comm, "memcheck-amd64-")); + mx_proc_pid_stat_free(pps); } static void test_mx_strvec() { From 62ddc9b17a695bfb3ce921f2bd40197a35444d47 Mon Sep 17 00:00:00 2001 From: Marius Tolzmann Date: Sat, 24 Oct 2015 15:06:57 +0200 Subject: [PATCH 3/8] MXQ: update credits --- Makefile | 2 +- mxq.h | 5 +++-- mxqd.c | 3 ++- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index c0f38500..eb923d81 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ MXQ_VERSION_MAJOR = 0 MXQ_VERSION_MINOR = 17 MXQ_VERSION_PATCH = 0 MXQ_VERSION_EXTRA = "beta" -MXQ_VERSIONDATE = 2013-2015 +MXQ_VERSIONDATE = 2015 MXQ_VERSION_GIT := $(shell git describe --long 2>/dev/null) diff --git a/mxq.h b/mxq.h index 556db6f7..54b073f9 100644 --- a/mxq.h +++ b/mxq.h @@ -17,7 +17,7 @@ #endif #ifndef MXQ_VERSIONDATE -# define MXQ_VERSIONDATE "2015" +# define MXQ_VERSIONDATE "today" #endif #ifndef MXQ_MYSQL_DEFAULT_FILE @@ -53,7 +53,8 @@ static void mxq_print_generic_version(void) #ifdef MXQ_DEVELOPMENT "DEVELOPMENT VERSION: Do not use in production environments.\n" #endif - " by Marius Tolzmann " MXQ_VERSIONDATE "\n" + " by Marius Tolzmann 2013-" MXQ_VERSIONDATE "\n" + " and Donald Buczek 2015-" MXQ_VERSIONDATE "\n" " Max Planck Institute for Molecular Genetics - Berlin Dahlem\n", program_invocation_short_name ); diff --git a/mxqd.c b/mxqd.c index 261cb1d0..cc559967 100644 --- a/mxqd.c +++ b/mxqd.c @@ -1830,7 +1830,8 @@ int main(int argc, char *argv[]) } mx_log_info("mxqd - " MXQ_VERSIONFULL); - mx_log_info(" by Marius Tolzmann " MXQ_VERSIONDATE); + mx_log_info(" by Marius Tolzmann 2013-" MXQ_VERSIONDATE); + mx_log_info(" and Donald Buczek 2015-" MXQ_VERSIONDATE); mx_log_info(" Max Planck Institute for Molecular Genetics - Berlin Dahlem"); #ifdef MXQ_DEVELOPMENT mx_log_warning("DEVELOPMENT VERSION: Do not use in production environments.\n"); From bd9deb9c62ca62f89c43e6bd2c9cdb66d9bfdca5 Mon Sep 17 00:00:00 2001 From: Marius Tolzmann Date: Sat, 24 Oct 2015 15:32:40 +0200 Subject: [PATCH 4/8] README: Add development instructions --- README.md | 49 ++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 44 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index fb37fe79..6dfb2602 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,6 @@ -# mxq -MXQ - mariux64 job scheduling system +# MXQ - mariux64 job scheduling system +- by Marius Tolzmann 2013-2015 +- and Donald Buczek 2015-2015 ## Sources ### Main git repository @@ -13,11 +14,49 @@ https://github.com/mariux/mxq ## Installation ### Install using `GNU make` ``` -make -make install [DESTDIR=...] +make +make install +``` +``` +make PREFIX=... +make PREFIX=... [DESTDIR=...] install ``` ### Install using `bee` ``` -bee init $(bee download git://github.molgen.mpg.de/mariux64/mxq.git) -e +bee init $(bee download git://github.molgen.mpg.de/mariux64/mxq.git) --execute bee update mxq ``` +``` +bee init $(bee download git://github.molgen.mpg.de/mariux64/mxq.git) --prefix=... --execute +bee update mxq +``` + +## Initial setup +Definitions of the tables and triggers for the MySQL database can be found in +[mysql/create_tables.sql](https://github.molgen.mpg.de/mariux64/mxq/blob/master/mysql/create_tables.sql) +Be sure to create those once and check the same +[directory for alter_tables*.sql`](https://github.molgen.mpg.de/mariux64/mxq/blob/master/mysql/) +files when upgrading. + + +## Development builds +The `devel` target in the Makefile will enable all devolopment features +by defining `MXQ_DEVELOPMENT` when compiling C sources. + +``` +make clean +make devel PREFIX=/path/to/test +make install PREFIX=/path/to/test +``` + +### Differences to production builds +Some new features and improvements are enabled in development builds. +Those features may not be tested for every situation yet and may result +in database corruption and/or failing jobs. + +#### changed `mxqd` default options +In devolopment builds `--no-log` is default (enable loggin with `--log`) + +#### Development database access +Devolopment builds default to use `[mxqdevel]` groups from mysql config files +for servers and clients (instead of the default `[mxqclient]` and `[mxqd]` groups) From cd27987b5d4153ad75586cc3ed67cf879e53d046 Mon Sep 17 00:00:00 2001 From: Marius Tolzmann Date: Thu, 22 Oct 2015 21:28:45 +0200 Subject: [PATCH 5/8] mx_proc: Add process tree related functions --- mx_proc.c | 350 +++++++++++++++++++++++++++++++++++++++++++++++++++++- mx_proc.h | 31 +++++ 2 files changed, 377 insertions(+), 4 deletions(-) diff --git a/mx_proc.c b/mx_proc.c index 2af1a5da..611cda8b 100644 --- a/mx_proc.c +++ b/mx_proc.c @@ -2,6 +2,8 @@ #include #include #include +#include +#include #include "mx_util.h" #include "mx_proc.h" @@ -81,21 +83,57 @@ static int _mx_proc_pid_stat_strscan(char *str, struct mx_proc_pid_stat *pps) } int mx_proc_pid_stat(struct mx_proc_pid_stat **pps, pid_t pid) +{ + struct mx_proc_pid_stat *pstat; + int res; + + pstat = *pps; + if (!pstat) + pstat = mx_calloc_forever(1, sizeof(*pstat)); + + res = mx_proc_pid_stat_read(pstat, "/proc/%d/stat", pid); + if (res < 0) + return res; + + *pps = pstat; + return 0; +} + +int mx_proc_pid_task_tid_stat(struct mx_proc_pid_stat **pps, pid_t pid, pid_t tid) +{ + struct mx_proc_pid_stat *pstat; + int res; + + pstat = *pps; + if (!pstat) + pstat = mx_calloc_forever(1, sizeof(*pstat)); + + res = mx_proc_pid_stat_read(pstat, "/proc/%d/task/%d/stat", pid, tid); + if (res < 0) + return res; + + *pps = pstat; + return 0; +} + +int mx_proc_pid_stat_read(struct mx_proc_pid_stat *pps, char *fmt, ...) { _mx_cleanup_free_ char *fname = NULL; _mx_cleanup_free_ char *line = NULL; + va_list ap; int res; - mx_asprintf_forever(&fname, "/proc/%d/stat", pid); + assert(pps); - if (!*pps) - *pps = mx_calloc_forever(1, sizeof(**pps)); + va_start(ap, fmt); + mx_vasprintf_forever(&fname, fmt, ap); + va_end(ap); res = mx_read_first_line_from_file(fname, &line); if (res < 0) return res; - res = _mx_proc_pid_stat_strscan(line, *pps); + res = _mx_proc_pid_stat_strscan(line, pps); if (res < 0) return res; @@ -109,3 +147,307 @@ void mx_proc_pid_stat_free_content(struct mx_proc_pid_stat *pps) mx_free_null(pps->comm); } + +static void mx_proc_tree_update_parent_pinfo(struct mx_proc_tree_node *this, struct mx_proc_info *pinfo) +{ + if (!this) + return; + + this->pinfo.sum_rss += pinfo->sum_rss; + + mx_proc_tree_update_parent_pinfo(this->parent, pinfo); +} + +static void mx_proc_tree_add_to_list_sorted(struct mx_proc_tree_node **ptn_ptr, struct mx_proc_tree_node *new) +{ + struct mx_proc_tree_node *current; + + assert(new); + assert(new->pinfo.pstat); + assert(!new->next); + assert(new->pinfo.pstat->pid > 0); + + current = *ptn_ptr; + + /* update stats */ + if (new->parent) { + new->parent->nchilds++; + mx_proc_tree_update_parent_pinfo(new->parent, &(new->pinfo)); + } + + /* empty list? -> start new list */ + if (!current) { + *ptn_ptr = new; + return; + } + + /* new is first entry */ + if (new->pinfo.pstat->pid < current->pinfo.pstat->pid) { + new->next = current; + *ptn_ptr = new; + return; + } + + /* find position */ + while (1) { + assert(new->pinfo.pstat->pid > current->pinfo.pstat->pid); + + /* new is last entry */ + if (!current->next) { + current->next = new; + break; + } + + assert(current->next->pinfo.pstat->pid > current->pinfo.pstat->pid); + + /* add new between current and current->next */ + if (new->pinfo.pstat->pid < current->next->pinfo.pstat->pid) { + new->next = current->next; + current->next = new; + break; + } + + current = current->next; + } + + return; +} + +static struct mx_proc_tree_node *mx_proc_tree_find_by_pid(struct mx_proc_tree_node *ptn, long long int pid) +{ + assert(ptn); + assert(pid >= 0); + + struct mx_proc_tree_node *current; + struct mx_proc_tree_node *node; + + if (pid == 0) + return NULL; + + current = ptn; + + for (current = ptn; current; current=current->next) { + if (current->pinfo.pstat->pid == pid) + return current; + + if (!current->childs) + continue; + + node = mx_proc_tree_find_by_pid(current->childs, pid); + if (node) + return node; + } + + return NULL; +} + +#define ppid_or_pgrp(x) (((x)->ppid != 1 || (x)->pid == (x)->pgrp) ? (x)->ppid : (x)->pgrp) + +static struct mx_proc_tree_node *mx_proc_tree_add(struct mx_proc_tree *pt, struct mx_proc_pid_stat *pps) +{ + assert(pps); + assert(pt); + struct mx_proc_tree_node *new; + struct mx_proc_tree_node *current; + struct mx_proc_tree_node *next; + struct mx_proc_tree_node *parent; + + new = mx_calloc_forever(1, sizeof(*new)); + + pt->nentries++; + + new->pinfo.pstat = pps; + new->pinfo.sum_rss = pps->rss; + + if (!(pt->root)) { + pt->root = new; + return new; + } + + assert(pt->root); + + /* new is second to last roots parent? -> collect */ + current = pt->root; + while (current->next) { + if (current->next->pinfo.pstat->ppid != new->pinfo.pstat->pid) { + current = current->next; + continue; + } + assert(current->next->pinfo.pstat->ppid == new->pinfo.pstat->pid); + + /* disconnect next */ + next = current->next; + current->next = current->next->next; + next->next = NULL; + + /* add as child of new */ + next->parent = new; + mx_proc_tree_add_to_list_sorted(&new->childs, next); + } + + /* new is first roots parent? -> new is new root */ + if (pt->root->pinfo.pstat->ppid == new->pinfo.pstat->pid) { + assert(!new->next); + + current = pt->root; + pt->root = pt->root->next; + + current->next = NULL; + current->parent = new; + + mx_proc_tree_add_to_list_sorted(&new->childs, current); + + if (!(pt->root)) { + pt->root = new; + return new; + } + } + + + parent = mx_proc_tree_find_by_pid(pt->root, new->pinfo.pstat->ppid); + if (parent) { + new->parent = parent; + mx_proc_tree_add_to_list_sorted(&parent->childs, new); + } else { + mx_proc_tree_add_to_list_sorted(&pt->root, new); + } + + return new; +} + +static void mx_proc_tree_reorder_roots(struct mx_proc_tree *pt) +{ + struct mx_proc_tree_node *current; + struct mx_proc_tree_node *pid1; + struct mx_proc_tree_node *last = NULL; + struct mx_proc_tree_node *next = NULL; + + for (current = pt->root; current; current = current->next) { + if (current->pinfo.pstat->pid == 1) { + pid1 = current; + break; + } + } + + if (!pid1) + return; + + for (current = pt->root; current; current = next) { + next = current->next; + + if (current->pinfo.pstat->ppid != 1) { + last = current; + continue; + } + + if (!last) { + if (!current->next) + return; + pt->root = current->next; + } else { + last->next = current->next; + } + current->next = NULL; + current->parent = pid1; + mx_proc_tree_add_to_list_sorted(&pid1->childs, current); + } +} + +static int _mx_filter_numbers(const struct dirent *d) +{ + if (!isdigit(d->d_name[0])) + return 0; + + return 1; +} + +int mx_proc_tree(struct mx_proc_tree **newtree) +{ + struct mx_proc_tree *pt; + struct dirent **namelist = NULL; + struct mx_proc_pid_stat *pps; + int n; + int i; + int res; + unsigned long long int pid; + + assert(*newtree == NULL); + + pt = mx_calloc_forever(1, sizeof(*pt)); + + n = scandir("/proc", &namelist, _mx_filter_numbers, NULL); + if (n < 0) + return -errno; + + if (n == 0) + return -(errno=ENOENT); + + for (i=0; i < n; i++) { + res = mx_strtoull(namelist[i]->d_name, &pid); + free(namelist[i]); + if (res < 0) + continue; + + pps = NULL; + res = mx_proc_pid_stat(&pps, pid); + if (res < 0) + continue; + + mx_proc_tree_add(pt, pps); + } + free(namelist); + + mx_proc_tree_reorder_roots(pt); + + *newtree = pt; + return 0; +} + +static void _mx_proc_tree_node_free_recursive(struct mx_proc_tree_node *ptn) +{ + assert(ptn); + + struct mx_proc_tree_node *current; + struct mx_proc_tree_node *next; + + for (current = ptn; current; current=next) { + + if (current->childs) + _mx_proc_tree_node_free_recursive(current->childs); + + next = current->next; + + mx_proc_pid_stat_free_content(current->pinfo.pstat); + mx_free_null(current->pinfo.pstat); + mx_free_null(current); + } + + return; +} + +int mx_proc_tree_free(struct mx_proc_tree **tree) +{ + struct mx_proc_tree *pt; + + pt = *tree; + + _mx_proc_tree_node_free_recursive(pt->root); + + mx_free_null(*tree); + + return 0; +} + +struct mx_proc_info *mx_proc_tree_proc_info(struct mx_proc_tree *tree, pid_t pid) +{ + struct mx_proc_tree_node *ptn; + + assert(tree); + + ptn = mx_proc_tree_find_by_pid(tree->root, pid); + + if (!ptn) + return NULL; + + return &(ptn->pinfo); +} diff --git a/mx_proc.h b/mx_proc.h index 4a0efb8e..9f16806e 100644 --- a/mx_proc.h +++ b/mx_proc.h @@ -3,6 +3,29 @@ #include +struct mx_proc_info { + struct mx_proc_pid_stat *pstat; + + unsigned long long int sum_rss; + + char **environment; +}; + +struct mx_proc_tree { + struct mx_proc_tree_node *root; + int nentries; +}; + +struct mx_proc_tree_node { + struct mx_proc_tree_node *parent; + struct mx_proc_tree_node *next; + + struct mx_proc_info pinfo; + + unsigned long long int nchilds; + struct mx_proc_tree_node *childs; +}; + struct mx_proc_pid_stat { long long int pid; /* 1 */ char *comm; /* 2 (comm) */ @@ -51,8 +74,16 @@ struct mx_proc_pid_stat { }; int mx_proc_pid_stat_read(struct mx_proc_pid_stat *pps, char *fmt, ...); + int mx_proc_pid_stat(struct mx_proc_pid_stat **pps, pid_t pid); +int mx_proc_pid_task_tid_stat(struct mx_proc_pid_stat **pps, pid_t pid, pid_t tid); void mx_proc_pid_stat_free_content(struct mx_proc_pid_stat *pps); +int mx_proc_tree(struct mx_proc_tree **newtree); + +int mx_proc_tree_free(struct mx_proc_tree **tree); + +struct mx_proc_info *mx_proc_tree_proc_info(struct mx_proc_tree *tree, pid_t pid); + #endif From 0a6148d3af69c52c5cba3e5e8fc33cd24baef955 Mon Sep 17 00:00:00 2001 From: Marius Tolzmann Date: Thu, 22 Oct 2015 16:01:47 +0200 Subject: [PATCH 6/8] mxqps: Add new tool to list mxqd processes --- .gitignore | 2 ++ Makefile | 15 +++++++++ mxqps.c | 94 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 111 insertions(+) create mode 100644 mxqps.c diff --git a/.gitignore b/.gitignore index f466977c..4509600b 100644 --- a/.gitignore +++ b/.gitignore @@ -3,6 +3,7 @@ mx_flock.o mx_log.o mx_util.o mx_proc.o +mxqps.o mxq_group.o mxqadmin.o mxqdump.o @@ -22,6 +23,7 @@ mxqadmin mxqdump mxqkill mxqd +mxqps test_mx_util test_mx_log test_mx_mysql diff --git a/Makefile b/Makefile index ca31600f..a1a38766 100644 --- a/Makefile +++ b/Makefile @@ -495,6 +495,21 @@ clean: CLEAN += mxqkill install:: mxqkill $(call quiet-installforuser,$(SUID_MODE),$(UID_CLIENT),$(GID_CLIENT),mxqkill,${DESTDIR}${BINDIR}/mxqkill) +### mxqps ------------------------------------------------------------- + +mxqps.o: $(mx_proc.h) +mxqps.o: $(mx_util.h) + +clean: CLEAN += mxqps.o + +mxqps: mx_log.o +mxqps: mx_util.o +mxqps: mx_proc.o + +build: mxqps + +clean: CLEAN += mxqps + ######################################################################## fix: FIX += mxqdctl-hostconfig.sh diff --git a/mxqps.c b/mxqps.c new file mode 100644 index 00000000..5088edc0 --- /dev/null +++ b/mxqps.c @@ -0,0 +1,94 @@ + +#include +#include +#include +#include +#include +#include + +#include "mx_util.h" +#include "mx_log.h" +#include "mx_proc.h" + + +int filter(const struct dirent *d) +{ + if (!isdigit(d->d_name[0])) + return 0; + + return 1; +} + +#define MX_PROC_TREE_NODE_IS_KERNEL_THREAD(x) ((x)->pinfo.pstat->ppid == 0 && (x)->pinfo.sum_rss == 0) + +int mx_proc_tree_node_print_debug(struct mx_proc_tree_node *ptn, int lvl) +{ + assert(ptn); + + struct mx_proc_tree_node *current; + + current = ptn; + + long pagesize; + + pagesize = sysconf(_SC_PAGESIZE); + assert(pagesize); + + for (current = ptn; current; current=current->next) { + if (MX_PROC_TREE_NODE_IS_KERNEL_THREAD(current)) + continue; + + printf("%7lld %7lld %7lld %7lld %15lld %15lld %7lld", + current->pinfo.pstat->pid, + current->pinfo.pstat->ppid, + current->pinfo.pstat->pgrp, + current->pinfo.pstat->session, + current->pinfo.pstat->rss*pagesize/1024, + current->pinfo.sum_rss*pagesize/1024, + current->pinfo.pstat->num_threads); + + if (lvl>0) + printf("%*s", lvl*4, "\\_"); + assert(current->pinfo.pstat); + printf(" %s\n", current->pinfo.pstat->comm); + + if (!current->childs) + continue; + + mx_proc_tree_node_print_debug(current->childs, lvl+(current->parent != NULL)); + } + + return 0; +} + +int mx_proc_tree_print_debug(struct mx_proc_tree *pt) +{ + assert(pt); + printf("%7s %7s %7s %7s %15s %15s %7s COMMAND\n", + "PID", + "PPID", + "PGRP", + "SESSION", + "RSS", + "SUMRSS", + "THREADS"); + mx_proc_tree_node_print_debug(pt->root, 0); + return 0; +} + +int main(void) +{ + int res; + struct mx_proc_tree *pt = NULL; + + res = mx_proc_tree(&pt); + assert(res == 0); + + mx_proc_tree_print_debug(pt); + + mx_proc_tree_free(&pt); + + + return 0; + +} From 875002aead967904321a318754e7ce5dc38d46fa Mon Sep 17 00:00:00 2001 From: Marius Tolzmann Date: Fri, 23 Oct 2015 14:41:23 +0200 Subject: [PATCH 7/8] mx_proc: Use pgrp in addition to ppid to group processes --- mx_proc.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/mx_proc.c b/mx_proc.c index 611cda8b..1739ae84 100644 --- a/mx_proc.c +++ b/mx_proc.c @@ -269,11 +269,11 @@ static struct mx_proc_tree_node *mx_proc_tree_add(struct mx_proc_tree *pt, struc /* new is second to last roots parent? -> collect */ current = pt->root; while (current->next) { - if (current->next->pinfo.pstat->ppid != new->pinfo.pstat->pid) { + if (ppid_or_pgrp(current->next->pinfo.pstat) != new->pinfo.pstat->pid) { current = current->next; continue; } - assert(current->next->pinfo.pstat->ppid == new->pinfo.pstat->pid); + assert(ppid_or_pgrp(current->next->pinfo.pstat) == new->pinfo.pstat->pid); /* disconnect next */ next = current->next; @@ -286,7 +286,7 @@ static struct mx_proc_tree_node *mx_proc_tree_add(struct mx_proc_tree *pt, struc } /* new is first roots parent? -> new is new root */ - if (pt->root->pinfo.pstat->ppid == new->pinfo.pstat->pid) { + if (ppid_or_pgrp(pt->root->pinfo.pstat)== new->pinfo.pstat->pid) { assert(!new->next); current = pt->root; @@ -303,8 +303,7 @@ static struct mx_proc_tree_node *mx_proc_tree_add(struct mx_proc_tree *pt, struc } } - - parent = mx_proc_tree_find_by_pid(pt->root, new->pinfo.pstat->ppid); + parent = mx_proc_tree_find_by_pid(pt->root, ppid_or_pgrp(new->pinfo.pstat)); if (parent) { new->parent = parent; mx_proc_tree_add_to_list_sorted(&parent->childs, new); From 0381a76040573368b8c0d3df5da5e5e7e2286645 Mon Sep 17 00:00:00 2001 From: Marius Tolzmann Date: Fri, 23 Oct 2015 23:37:17 +0200 Subject: [PATCH 8/8] mxqd: Kill jobs over memory at most every 60 seconds this resolves https://github.molgen.mpg.de/mariux64/mxq/issues/16 --- mxqd.c | 65 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ mxqd.h | 2 ++ 2 files changed, 67 insertions(+) diff --git a/mxqd.c b/mxqd.c index 09b8c5ba..b15d558f 100644 --- a/mxqd.c +++ b/mxqd.c @@ -1574,6 +1574,69 @@ int killall_over_time(struct mxq_server *server) return 0; } +int killall_over_memory(struct mxq_server *server) +{ + struct mxq_user_list *user; + struct mxq_group_list *group; + struct mxq_job_list *job; + struct mx_proc_tree *pt = NULL; + struct mx_proc_info *pinfo; + long pagesize; + pid_t pid; + unsigned long long int memory; + int res; + + assert(server); + + if (!server->jobs_running) + return 0; + + /* limit killing to every >= 10 seconds */ + mx_within_rate_limit_or_return(10, 0); + + pagesize = sysconf(_SC_PAGESIZE); + if (!pagesize) { + mx_log_warning("killall_over_memory(): Can't get _SC_PAGESIZE. Assuming 4096."); + pagesize = 4096; + } + + res = mx_proc_tree(&pt); + if (res < 0) { + mx_log_err("killall_over_memory(): Reading process tree failed: %m"); + return res; + } + + for (user=server->users; user; user=user->next) { + for (group=user->groups; group; group=group->next) { + for (job=group->jobs; job; job=job->next) { + pid = job->job.host_pid; + + pinfo = mx_proc_tree_proc_info(pt, pid); + if (!pinfo) { + mx_log_warning("killall_over_memory(): Can't find process with pid %llu in process tree", pid); + continue; + } + + memory = pinfo->sum_rss * pagesize / 1024 / 1024; + + if (job->max_sum_rss < memory) + job->max_sum_rss = memory; + + if (memory <= group->group.job_memory) + continue; + + mx_log_info("killall_over_memory(): used(%llu) > requested(%llu): Sending signal=KILL to job=%s(%d):%lu:%lu pgrp=%d", + memory, group->group.job_memory, + group->group.user_name, group->group.user_uid, group->group.group_id, job->job.job_id, pid); + + kill(-pid, SIGKILL); + } + } + } + mx_proc_tree_free(&pt); + return 0; +} + int killallcancelled(struct mxq_server *server, int sig, unsigned int pgrp) { struct mxq_user_list *user; @@ -1867,6 +1930,7 @@ int main(int argc, char *argv[]) killallcancelled(&server, SIGTERM, 0); killallcancelled(&server, SIGINT, 0); killall_over_time(&server); + killall_over_memory(&server); if (!server.group_cnt) { assert(!server.jobs_running); @@ -1914,6 +1978,7 @@ int main(int argc, char *argv[]) killallcancelled(&server, SIGTERM, 0); killallcancelled(&server, SIGINT, 0); killall_over_time(&server); + killall_over_memory(&server); mx_log_info("jobs_running=%lu global_sigint_cnt=%d global_sigterm_cnt=%d : Exiting. Wating for jobs to finish. Sleeping for a while.", server.jobs_running, global_sigint_cnt, global_sigterm_cnt); diff --git a/mxqd.h b/mxqd.h index 4de72d44..4bc1cd0a 100644 --- a/mxqd.h +++ b/mxqd.h @@ -10,6 +10,8 @@ struct mxq_job_list { struct mxq_job job; + unsigned long long int max_sum_rss; + pid_t pid; };