diff --git a/.gitignore b/.gitignore index d6af43fc..4509600b 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,8 @@ mx_getopt.o mx_flock.o mx_log.o mx_util.o +mx_proc.o +mxqps.o mxq_group.o mxqadmin.o mxqdump.o @@ -21,6 +23,7 @@ mxqadmin mxqdump mxqkill mxqd +mxqps test_mx_util test_mx_log test_mx_mysql diff --git a/Makefile b/Makefile index 716bc3f3..1d1bd5a2 100644 --- a/Makefile +++ b/Makefile @@ -1,8 +1,8 @@ MXQ_VERSION_MAJOR = 0 -MXQ_VERSION_MINOR = 17 -MXQ_VERSION_PATCH = 0 +MXQ_VERSION_MINOR = 18 +MXQ_VERSION_PATCH = 2 MXQ_VERSION_EXTRA = "beta" -MXQ_VERSIONDATE = 2013-2015 +MXQ_VERSIONDATE = 2015 MXQ_VERSION_GIT := $(shell git describe --long 2>/dev/null) @@ -25,6 +25,7 @@ LIBEXECDIR = ${EPREFIX}/libexec DATADIR = ${PREFIX}/share MANDIR = ${DATADIR}/man SYSCONFDIR = ${PREFIX}/etc +LOCALSTATEDIR = ${PREFIX}/var DESTDIR= @@ -43,6 +44,11 @@ ifneq (, $(filter /usr /usr/local, ${PREFIX})) SYSCONFDIR = /etc endif +### set localstatedir /var if prefix /usr || /usr/local +ifneq (, $(filter /usr /usr/local, ${PREFIX})) + LOCALSTATEDIR = /var +endif + ######################################################################## ### strip /mxq from SYSCONFDIR if set @@ -60,13 +66,17 @@ CGIDIR = ${LIBEXECDIR}/mxq/cgi ######################################################################## MXQ_MYSQL_DEFAULT_FILE = ${SYSCONFDIR}/mxq/mysql.cnf -MXQ_MYSQL_DEFAULT_GROUP = mxqclient +MXQ_MYSQL_DEFAULT_GROUP_CLIENT = mxqclient +MXQ_MYSQL_DEFAULT_GROUP_SERVER = mxqd +MXQ_MYSQL_DEFAULT_GROUP_DEVELOPMENT = mxqdevel MXQ_INITIAL_PATH = /sbin:/bin:/usr/sbin:/usr/bin:/usr/local/sbin:/usr/local/bin MXQ_INITIAL_TMPDIR = /tmp CFLAGS_MXQ_MYSQL_DEFAULT_FILE = -DMXQ_MYSQL_DEFAULT_FILE=\"$(MXQ_MYSQL_DEFAULT_FILE)\" -CFLAGS_MXQ_MYSQL_DEFAULT_GROUP = -DMXQ_MYSQL_DEFAULT_GROUP=\"$(MXQ_MYSQL_DEFAULT_GROUP)\" +CFLAGS_MXQ_MYSQL_DEFAULT_GROUP = -DMXQ_MYSQL_DEFAULT_GROUP_CLIENT=\"$(MXQ_MYSQL_DEFAULT_GROUP_CLIENT)\" +CFLAGS_MXQ_MYSQL_DEFAULT_GROUP += -DMXQ_MYSQL_DEFAULT_GROUP_SERVER=\"$(MXQ_MYSQL_DEFAULT_GROUP_SERVER)\" +CFLAGS_MXQ_MYSQL_DEFAULT_GROUP += -DMXQ_MYSQL_DEFAULT_GROUP_DEVELOPMENT=\"$(MXQ_MYSQL_DEFAULT_GROUP_DEVELOPMENT)\" CFLAGS_MXQ_INITIAL_PATH = -DMXQ_INITIAL_PATH=\"$(MXQ_INITIAL_PATH)\" CFLAGS_MXQ_INITIAL_TMPDIR = -DMXQ_INITIAL_TMPDIR=\"$(MXQ_INITIAL_TMPDIR)\" @@ -121,6 +131,7 @@ CFLAGS += -DLIBEXECDIR=\"${LIBEXECDIR}\" CFLAGS += -DDATADIR=\"${DATADIR}\" CFLAGS += -DMANDIR=\"${MANDIR}\" CFLAGS += -DSYSCONFDIR=\"${SYSCONFDIR}\" +CFLAGS += -DLOCALSTATEDIR=\"${LOCALSTATEDIR}\" CFLAGS += $(EXTRA_CFLAGS) ######################################################################## @@ -182,11 +193,11 @@ manpages/%: manpages/%.xml .PHONY: all .PHONY: build -all: build +all: build test -.PHONY: nonroot -nonroot: CFLAGS += -DRUNASNORMALUSER -nonroot: all +.PHONY: devel +devel: CFLAGS += -DMXQ_DEVELOPMENT +devel: all ######################################################################## @@ -255,6 +266,10 @@ mx_log.h += mx_log.h mx_util.h += mx_util.h +### mx_proc.h ---------------------------------------------------------- + +mx_proc.h += mx_proc.h + ### mx_flock.h --------------------------------------------------------- mx_flock.h += mx_flock.h @@ -305,6 +320,12 @@ mx_util.o: $(mx_log.h) clean: CLEAN += mx_util.o +### mx_proc ------------------------------------------------------------ + +mx_proc.o: $(mx_proc.h) + +clean: CLEAN += mx_proc.o + ### mx_flock.o --------------------------------------------------------- mx_flock.o: $(mx_flock.h) @@ -386,6 +407,7 @@ clean: CLEAN += mxq_job.o mxqd.o: $(mx_getopt.h) mxqd.o: $(mx_flock.h) mxqd.o: $(mx_util.h) +mxqd.o: $(mx_proc.h) mxqd.o: $(mx_log.h) mxqd.o: $(mxqd.h) mxqd.o: $(mxq_group.h) @@ -418,6 +440,7 @@ clean: CLEAN += mxqsub.o mxqd: mx_flock.o mxqd: mx_util.o +mxqd: mx_proc.o mxqd: mx_log.o mxqd: mxq_log.o mxqd: mx_getopt.o @@ -496,6 +519,21 @@ clean: CLEAN += mxqkill install:: mxqkill $(call quiet-installforuser,$(SUID_MODE),$(UID_CLIENT),$(GID_CLIENT),mxqkill,${DESTDIR}${BINDIR}/mxqkill) +### mxqps ------------------------------------------------------------- + +mxqps.o: $(mx_proc.h) +mxqps.o: $(mx_util.h) + +clean: CLEAN += mxqps.o + +mxqps: mx_log.o +mxqps: mx_util.o +mxqps: mx_proc.o + +build: mxqps + +clean: CLEAN += mxqps + ######################################################################## fix: FIX += mxqdctl-hostconfig.sh @@ -535,6 +573,7 @@ test_mx_util.o: $(mx_util.h) clean: CLEAN += test_mx_util.o test_mx_util: mx_util.o +test_mx_util: mx_proc.o test_mx_util: mx_log.o clean: CLEAN += test_mx_util diff --git a/README.md b/README.md index fb37fe79..2e3f3970 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,6 @@ -# mxq -MXQ - mariux64 job scheduling system +# MXQ - mariux64 job scheduling system +- by Marius Tolzmann 2013-2015 +- and Donald Buczek 2015-2015 ## Sources ### Main git repository @@ -13,11 +14,56 @@ https://github.com/mariux/mxq ## Installation ### Install using `GNU make` ``` -make -make install [DESTDIR=...] +make install +``` + +or to specify a prefix: +``` +make PREFIX=... [DESTDIR=...] install ``` ### Install using `bee` ``` -bee init $(bee download git://github.molgen.mpg.de/mariux64/mxq.git) -e +bee init $(bee download git://github.molgen.mpg.de/mariux64/mxq.git) --execute bee update mxq ``` + +or to specify a prefix +``` +bee init $(bee download git://github.molgen.mpg.de/mariux64/mxq.git) --prefix=... --execute +bee update mxq +``` + +## Initial setup +Definitions of the tables for the MySQL database can be found in +[mysql/create_tables.sql](https://github.molgen.mpg.de/mariux64/mxq/blob/master/mysql/create_tables.sql) +and +[mysql/create_trigger.sql](https://github.molgen.mpg.de/mariux64/mxq/blob/master/mysql/create_trigger.sql) +Be sure to create those once and check the same +[directory for alter_tables*.sql`](https://github.molgen.mpg.de/mariux64/mxq/blob/master/mysql/) +files when upgrading. +``` +mysql [options] [database] +#include +#include +#include +#include +#include + +#include "mx_util.h" +#include "mx_proc.h" + +static int _mx_proc_pid_stat_strscan(char *str, struct mx_proc_pid_stat *pps) +{ + size_t res = 0; + char *p; + char *s; + + pps->comm = NULL; + + s = str; + + res += mx_strscan_ll(&s, &(pps->pid)); + + p = strrchr(s, ')'); + if (!p) + return -(errno=EINVAL); + + *p = 0; + s++; + + pps->comm = mx_strdup_forever(s); + s = p + 2; + + pps->state = *s; + res += !(*(s+1) == ' '); + s += 2; + + res += mx_strscan_ll(&s, &(pps->ppid)); + res += mx_strscan_ll(&s, &(pps->pgrp)); + res += mx_strscan_ll(&s, &(pps->session)); + res += mx_strscan_ll(&s, &(pps->tty_nr)); + res += mx_strscan_ll(&s, &(pps->tpgid)); + res += mx_strscan_ull(&s, &(pps->flags)); + res += mx_strscan_ull(&s, &(pps->minflt)); + res += mx_strscan_ull(&s, &(pps->cminflt)); + res += mx_strscan_ull(&s, &(pps->majflt)); + res += mx_strscan_ull(&s, &(pps->cmajflt)); + res += mx_strscan_ull(&s, &(pps->utime)); + res += mx_strscan_ull(&s, &(pps->stime)); + res += mx_strscan_ll(&s, &(pps->cutime)); + res += mx_strscan_ll(&s, &(pps->cstime)); + res += mx_strscan_ll(&s, &(pps->priority)); + res += mx_strscan_ll(&s, &(pps->nice)); + res += mx_strscan_ll(&s, &(pps->num_threads)); + res += mx_strscan_ll(&s, &(pps->itrealvalue)); + res += mx_strscan_ull(&s, &(pps->starttime)); + res += mx_strscan_ull(&s, &(pps->vsize)); + res += mx_strscan_ll(&s, &(pps->rss)); + res += mx_strscan_ull(&s, &(pps->rsslim)); + res += mx_strscan_ull(&s, &(pps->startcode)); + res += mx_strscan_ull(&s, &(pps->endcode)); + res += mx_strscan_ull(&s, &(pps->startstack)); + res += mx_strscan_ull(&s, &(pps->kstkesp)); + res += mx_strscan_ull(&s, &(pps->kstkeip)); + res += mx_strscan_ull(&s, &(pps->signal)); + res += mx_strscan_ull(&s, &(pps->blocked)); + res += mx_strscan_ull(&s, &(pps->sigignore)); + res += mx_strscan_ull(&s, &(pps->sigcatch)); + res += mx_strscan_ull(&s, &(pps->wchan)); + res += mx_strscan_ull(&s, &(pps->nswap)); + res += mx_strscan_ull(&s, &(pps->cnswap)); + res += mx_strscan_ll(&s, &(pps->exit_signal)); + res += mx_strscan_ll(&s, &(pps->processor)); + res += mx_strscan_ull(&s, &(pps->rt_priority)); + res += mx_strscan_ull(&s, &(pps->policy)); + res += mx_strscan_ull(&s, &(pps->delayacct_blkio_ticks)); + res += mx_strscan_ull(&s, &(pps->guest_time)); + res += mx_strscan_ll(&s, &(pps->cguest_time)); + + if (res != 0) + return -(errno=EINVAL); + + return 0; +} + +int mx_proc_pid_stat(struct mx_proc_pid_stat **pps, pid_t pid) +{ + struct mx_proc_pid_stat *pstat; + int res; + + pstat = *pps; + if (!pstat) + pstat = mx_calloc_forever(1, sizeof(*pstat)); + + res = mx_proc_pid_stat_read(pstat, "/proc/%d/stat", pid); + if (res < 0) + return res; + + *pps = pstat; + return 0; +} + +int mx_proc_pid_task_tid_stat(struct mx_proc_pid_stat **pps, pid_t pid, pid_t tid) +{ + struct mx_proc_pid_stat *pstat; + int res; + + pstat = *pps; + if (!pstat) + pstat = mx_calloc_forever(1, sizeof(*pstat)); + + res = mx_proc_pid_stat_read(pstat, "/proc/%d/task/%d/stat", pid, tid); + if (res < 0) + return res; + + *pps = pstat; + return 0; +} + +int mx_proc_pid_stat_read(struct mx_proc_pid_stat *pps, char *fmt, ...) +{ + _mx_cleanup_free_ char *fname = NULL; + _mx_cleanup_free_ char *line = NULL; + va_list ap; + int res; + + assert(pps); + + va_start(ap, fmt); + mx_vasprintf_forever(&fname, fmt, ap); + va_end(ap); + + res = mx_read_first_line_from_file(fname, &line); + if (res < 0) + return res; + + res = _mx_proc_pid_stat_strscan(line, pps); + if (res < 0) + return res; + + return 0; +} + +void mx_proc_pid_stat_free_content(struct mx_proc_pid_stat *pps) +{ + if (!pps) + return; + + mx_free_null(pps->comm); +} + +static void mx_proc_tree_update_parent_pinfo(struct mx_proc_tree_node *this, struct mx_proc_info *pinfo) +{ + if (!this) + return; + + this->pinfo.sum_rss += pinfo->sum_rss; + + mx_proc_tree_update_parent_pinfo(this->parent, pinfo); +} + +static void mx_proc_tree_add_to_list_sorted(struct mx_proc_tree_node **ptn_ptr, struct mx_proc_tree_node *new) +{ + struct mx_proc_tree_node *current; + + assert(new); + assert(new->pinfo.pstat); + assert(!new->next); + assert(new->pinfo.pstat->pid > 0); + + current = *ptn_ptr; + + /* update stats */ + if (new->parent) { + new->parent->nchilds++; + mx_proc_tree_update_parent_pinfo(new->parent, &(new->pinfo)); + } + + /* empty list? -> start new list */ + if (!current) { + *ptn_ptr = new; + return; + } + + /* new is first entry */ + if (new->pinfo.pstat->pid < current->pinfo.pstat->pid) { + new->next = current; + *ptn_ptr = new; + return; + } + + /* find position */ + while (1) { + assert(new->pinfo.pstat->pid > current->pinfo.pstat->pid); + + /* new is last entry */ + if (!current->next) { + current->next = new; + break; + } + + assert(current->next->pinfo.pstat->pid > current->pinfo.pstat->pid); + + /* add new between current and current->next */ + if (new->pinfo.pstat->pid < current->next->pinfo.pstat->pid) { + new->next = current->next; + current->next = new; + break; + } + + current = current->next; + } + + return; +} + +static struct mx_proc_tree_node *mx_proc_tree_find_by_pid(struct mx_proc_tree_node *ptn, long long int pid) +{ + assert(ptn); + assert(pid >= 0); + + struct mx_proc_tree_node *current; + struct mx_proc_tree_node *node; + + if (pid == 0) + return NULL; + + current = ptn; + + for (current = ptn; current; current=current->next) { + if (current->pinfo.pstat->pid == pid) + return current; + + if (!current->childs) + continue; + + node = mx_proc_tree_find_by_pid(current->childs, pid); + if (node) + return node; + } + + return NULL; +} + +#define ppid_or_pgrp(x) (((x)->ppid != 1 || (x)->pid == (x)->pgrp) ? (x)->ppid : (x)->pgrp) + +static struct mx_proc_tree_node *mx_proc_tree_add(struct mx_proc_tree *pt, struct mx_proc_pid_stat *pps) +{ + assert(pps); + assert(pt); + struct mx_proc_tree_node *new; + struct mx_proc_tree_node *current; + struct mx_proc_tree_node *next; + struct mx_proc_tree_node *parent; + + new = mx_calloc_forever(1, sizeof(*new)); + + pt->nentries++; + + new->pinfo.pstat = pps; + new->pinfo.sum_rss = pps->rss; + + if (!(pt->root)) { + pt->root = new; + return new; + } + + assert(pt->root); + + /* new is second to last roots parent? -> collect */ + current = pt->root; + while (current->next) { + if (ppid_or_pgrp(current->next->pinfo.pstat) != new->pinfo.pstat->pid) { + current = current->next; + continue; + } + assert(ppid_or_pgrp(current->next->pinfo.pstat) == new->pinfo.pstat->pid); + + /* disconnect next */ + next = current->next; + current->next = current->next->next; + next->next = NULL; + + /* add as child of new */ + next->parent = new; + mx_proc_tree_add_to_list_sorted(&new->childs, next); + } + + /* new is first roots parent? -> new is new root */ + if (ppid_or_pgrp(pt->root->pinfo.pstat)== new->pinfo.pstat->pid) { + assert(!new->next); + + current = pt->root; + pt->root = pt->root->next; + + current->next = NULL; + current->parent = new; + + mx_proc_tree_add_to_list_sorted(&new->childs, current); + + if (!(pt->root)) { + pt->root = new; + return new; + } + } + + parent = mx_proc_tree_find_by_pid(pt->root, ppid_or_pgrp(new->pinfo.pstat)); + if (parent) { + new->parent = parent; + mx_proc_tree_add_to_list_sorted(&parent->childs, new); + } else { + mx_proc_tree_add_to_list_sorted(&pt->root, new); + } + + return new; +} + +static void mx_proc_tree_reorder_roots(struct mx_proc_tree *pt) +{ + struct mx_proc_tree_node *current; + struct mx_proc_tree_node *pid1; + struct mx_proc_tree_node *last = NULL; + struct mx_proc_tree_node *next = NULL; + + for (current = pt->root; current; current = current->next) { + if (current->pinfo.pstat->pid == 1) { + pid1 = current; + break; + } + } + + if (!pid1) + return; + + for (current = pt->root; current; current = next) { + next = current->next; + + if (current->pinfo.pstat->ppid != 1) { + last = current; + continue; + } + + if (!last) { + if (!current->next) + return; + pt->root = current->next; + } else { + last->next = current->next; + } + current->next = NULL; + current->parent = pid1; + mx_proc_tree_add_to_list_sorted(&pid1->childs, current); + } +} + +static int _mx_filter_numbers(const struct dirent *d) +{ + if (!isdigit(d->d_name[0])) + return 0; + + return 1; +} + +int mx_proc_tree(struct mx_proc_tree **newtree) +{ + struct mx_proc_tree *pt; + struct dirent **namelist = NULL; + struct mx_proc_pid_stat *pps; + int n; + int i; + int res; + unsigned long long int pid; + + assert(*newtree == NULL); + + pt = mx_calloc_forever(1, sizeof(*pt)); + + n = scandir("/proc", &namelist, _mx_filter_numbers, NULL); + if (n < 0) + return -errno; + + if (n == 0) + return -(errno=ENOENT); + + for (i=0; i < n; i++) { + res = mx_strtoull(namelist[i]->d_name, &pid); + free(namelist[i]); + if (res < 0) + continue; + + pps = NULL; + res = mx_proc_pid_stat(&pps, pid); + if (res < 0) + continue; + + mx_proc_tree_add(pt, pps); + } + free(namelist); + + mx_proc_tree_reorder_roots(pt); + + *newtree = pt; + return 0; +} + +static void _mx_proc_tree_node_free_recursive(struct mx_proc_tree_node *ptn) +{ + assert(ptn); + + struct mx_proc_tree_node *current; + struct mx_proc_tree_node *next; + + for (current = ptn; current; current=next) { + + if (current->childs) + _mx_proc_tree_node_free_recursive(current->childs); + + next = current->next; + + mx_proc_pid_stat_free_content(current->pinfo.pstat); + mx_free_null(current->pinfo.pstat); + mx_free_null(current); + } + + return; +} + +int mx_proc_tree_free(struct mx_proc_tree **tree) +{ + struct mx_proc_tree *pt; + + pt = *tree; + + _mx_proc_tree_node_free_recursive(pt->root); + + mx_free_null(*tree); + + return 0; +} + +struct mx_proc_info *mx_proc_tree_proc_info(struct mx_proc_tree *tree, pid_t pid) +{ + struct mx_proc_tree_node *ptn; + + assert(tree); + + ptn = mx_proc_tree_find_by_pid(tree->root, pid); + + if (!ptn) + return NULL; + + return &(ptn->pinfo); +} diff --git a/mx_proc.h b/mx_proc.h new file mode 100644 index 00000000..9f16806e --- /dev/null +++ b/mx_proc.h @@ -0,0 +1,89 @@ +#ifndef __MX_PROC_H__ +#define __MX_PROC_H__ 1 + +#include + +struct mx_proc_info { + struct mx_proc_pid_stat *pstat; + + unsigned long long int sum_rss; + + char **environment; +}; + +struct mx_proc_tree { + struct mx_proc_tree_node *root; + int nentries; +}; + +struct mx_proc_tree_node { + struct mx_proc_tree_node *parent; + struct mx_proc_tree_node *next; + + struct mx_proc_info pinfo; + + unsigned long long int nchilds; + struct mx_proc_tree_node *childs; +}; + +struct mx_proc_pid_stat { + long long int pid; /* 1 */ + char *comm; /* 2 (comm) */ + char state; /* 3 "RSDZTW" */ + long long int ppid; /* 4 */ + long long int pgrp; /* 5 */ + long long int session; /* 6 */ + long long int tty_nr; /* 7 */ + long long int tpgid; /* 8 */ + unsigned long long int flags; /* 9 */ + unsigned long long int minflt; /* 10 */ + unsigned long long int cminflt; /* 11 */ + unsigned long long int majflt; /* 12 */ + unsigned long long int cmajflt; /* 13 */ + unsigned long long int utime; /* 14 */ + unsigned long long int stime; /* 15 */ + long long int cutime; /* 16 */ + long long int cstime; /* 17 */ + long long int priority; /* 18 */ + long long int nice; /* 19 */ + long long int num_threads; /* 20 */ + long long int itrealvalue; /* 21 */ + unsigned long long int starttime; /* 22 */ + unsigned long long int vsize; /* 23 */ + long long int rss; /* 24 */ + unsigned long long int rsslim; /* 25 */ + unsigned long long int startcode; /* 26 */ + unsigned long long int endcode; /* 27 */ + unsigned long long int startstack; /* 28 */ + unsigned long long int kstkesp; /* 29 */ + unsigned long long int kstkeip; /* 30 */ + unsigned long long int signal; /* 31 */ + unsigned long long int blocked; /* 32 */ + unsigned long long int sigignore; /* 33 */ + unsigned long long int sigcatch; /* 34 */ + unsigned long long int wchan; /* 35 */ + unsigned long long int nswap; /* 36 */ + unsigned long long int cnswap; /* 37 */ + long long int exit_signal; /* 38 */ + long long int processor; /* 39 */ + unsigned long long int rt_priority; /* 40 */ + unsigned long long int policy; /* 41 */ + unsigned long long int delayacct_blkio_ticks; /* 42 */ + unsigned long long int guest_time; /* 43 */ + long long int cguest_time; /* 44 */ +}; + +int mx_proc_pid_stat_read(struct mx_proc_pid_stat *pps, char *fmt, ...); + +int mx_proc_pid_stat(struct mx_proc_pid_stat **pps, pid_t pid); +int mx_proc_pid_task_tid_stat(struct mx_proc_pid_stat **pps, pid_t pid, pid_t tid); + +void mx_proc_pid_stat_free_content(struct mx_proc_pid_stat *pps); + +int mx_proc_tree(struct mx_proc_tree **newtree); + +int mx_proc_tree_free(struct mx_proc_tree **tree); + +struct mx_proc_info *mx_proc_tree_proc_info(struct mx_proc_tree *tree, pid_t pid); + +#endif diff --git a/mx_util.c b/mx_util.c index 24088b2c..f783ed38 100644 --- a/mx_util.c +++ b/mx_util.c @@ -856,104 +856,6 @@ int mx_strscan_ll(char **str, long long int *to) return res; } -int mx_strscan_proc_pid_stat(char *str, struct proc_pid_stat *pps) -{ - size_t res = 0; - char *p; - char *s; - - pps->comm = NULL; - - s = str; - - res += mx_strscan_ll(&s, &(pps->pid)); - - p = strrchr(s, ')'); - if (!p) - return -(errno=EINVAL); - - *p = 0; - s++; - - pps->comm = mx_strdup_forever(s); - s = p + 2; - - pps->state = *s; - res += !(*(s+1) == ' '); - s += 2; - - res += mx_strscan_ll(&s, &(pps->ppid)); - res += mx_strscan_ll(&s, &(pps->pgrp)); - res += mx_strscan_ll(&s, &(pps->session)); - res += mx_strscan_ll(&s, &(pps->tty_nr)); - res += mx_strscan_ll(&s, &(pps->tpgid)); - res += mx_strscan_ull(&s, &(pps->flags)); - res += mx_strscan_ull(&s, &(pps->minflt)); - res += mx_strscan_ull(&s, &(pps->cminflt)); - res += mx_strscan_ull(&s, &(pps->majflt)); - res += mx_strscan_ull(&s, &(pps->cmajflt)); - res += mx_strscan_ull(&s, &(pps->utime)); - res += mx_strscan_ull(&s, &(pps->stime)); - res += mx_strscan_ll(&s, &(pps->cutime)); - res += mx_strscan_ll(&s, &(pps->cstime)); - res += mx_strscan_ll(&s, &(pps->priority)); - res += mx_strscan_ll(&s, &(pps->nice)); - res += mx_strscan_ll(&s, &(pps->num_threads)); - res += mx_strscan_ll(&s, &(pps->itrealvalue)); - res += mx_strscan_ull(&s, &(pps->starttime)); - res += mx_strscan_ull(&s, &(pps->vsize)); - res += mx_strscan_ll(&s, &(pps->rss)); - res += mx_strscan_ull(&s, &(pps->rsslim)); - res += mx_strscan_ull(&s, &(pps->startcode)); - res += mx_strscan_ull(&s, &(pps->endcode)); - res += mx_strscan_ull(&s, &(pps->startstack)); - res += mx_strscan_ull(&s, &(pps->kstkesp)); - res += mx_strscan_ull(&s, &(pps->kstkeip)); - res += mx_strscan_ull(&s, &(pps->signal)); - res += mx_strscan_ull(&s, &(pps->blocked)); - res += mx_strscan_ull(&s, &(pps->sigignore)); - res += mx_strscan_ull(&s, &(pps->sigcatch)); - res += mx_strscan_ull(&s, &(pps->wchan)); - res += mx_strscan_ull(&s, &(pps->nswap)); - res += mx_strscan_ull(&s, &(pps->cnswap)); - res += mx_strscan_ll(&s, &(pps->exit_signal)); - res += mx_strscan_ll(&s, &(pps->processor)); - res += mx_strscan_ull(&s, &(pps->rt_priority)); - res += mx_strscan_ull(&s, &(pps->policy)); - res += mx_strscan_ull(&s, &(pps->delayacct_blkio_ticks)); - res += mx_strscan_ull(&s, &(pps->guest_time)); - res += mx_strscan_ll(&s, &(pps->cguest_time)); - - if (res != 0) - return -(errno=EINVAL); - - return 0; -} - -int mx_proc_pid_stat(struct proc_pid_stat *pps, pid_t pid) -{ - _mx_cleanup_free_ char *fname = NULL; - _mx_cleanup_free_ char *line = NULL; - int res; - - mx_asprintf_forever(&fname, "/proc/%d/stat", pid); - - res = mx_read_first_line_from_file(fname, &line); - if (res < 0) - return res; - - res = mx_strscan_proc_pid_stat(line, pps); - if (res < 0) - return res; - - return 0; -} - -void mx_proc_pid_stat_free(struct proc_pid_stat *pps) -{ - mx_free_null(pps->comm); -} - int mx_sleep(unsigned int seconds) { if (seconds) diff --git a/mx_util.h b/mx_util.h index f2adb2ff..508476c6 100644 --- a/mx_util.h +++ b/mx_util.h @@ -12,53 +12,6 @@ #include "mx_log.h" -struct proc_pid_stat { - long long int pid; /* 1 */ - char *comm; /* 2 (comm) */ - char state; /* 3 "RSDZTW" */ - long long int ppid; /* 4 */ - long long int pgrp; /* 5 */ - long long int session; /* 6 */ - long long int tty_nr; /* 7 */ - long long int tpgid; /* 8 */ - unsigned long long int flags; /* 9 */ - unsigned long long int minflt; /* 10 */ - unsigned long long int cminflt; /* 11 */ - unsigned long long int majflt; /* 12 */ - unsigned long long int cmajflt; /* 13 */ - unsigned long long int utime; /* 14 */ - unsigned long long int stime; /* 15 */ - long long int cutime; /* 16 */ - long long int cstime; /* 17 */ - long long int priority; /* 18 */ - long long int nice; /* 19 */ - long long int num_threads; /* 20 */ - long long int itrealvalue; /* 21 */ - unsigned long long int starttime; /* 22 */ - unsigned long long int vsize; /* 23 */ - long long int rss; /* 24 */ - unsigned long long int rsslim; /* 25 */ - unsigned long long int startcode; /* 26 */ - unsigned long long int endcode; /* 27 */ - unsigned long long int startstack; /* 28 */ - unsigned long long int kstkesp; /* 29 */ - unsigned long long int kstkeip; /* 30 */ - unsigned long long int signal; /* 31 */ - unsigned long long int blocked; /* 32 */ - unsigned long long int sigignore; /* 33 */ - unsigned long long int sigcatch; /* 34 */ - unsigned long long int wchan; /* 35 */ - unsigned long long int nswap; /* 36 */ - unsigned long long int cnswap; /* 37 */ - long long int exit_signal; /* 38 */ - long long int processor; /* 39 */ - unsigned long long int rt_priority; /* 40 */ - unsigned long long int policy; /* 41 */ - unsigned long long int delayacct_blkio_ticks; /* 42 */ - unsigned long long int guest_time; /* 43 */ - long long int cguest_time; /* 44 */ -}; - #ifdef MX_NDEBUG # include # define mx_assert_return_minus_errno(test, eno) \ @@ -182,10 +135,6 @@ int mx_read_first_line_from_file(char *fname, char **line); int mx_strscan_ull(char **str, unsigned long long int *to); int mx_strscan_ll(char **str, long long int *to); -int mx_strscan_proc_pid_stat(char *str, struct proc_pid_stat *pps); - -int mx_proc_pid_stat(struct proc_pid_stat *pps, pid_t pid); -void mx_proc_pid_stat_free(struct proc_pid_stat *pps); int mx_sleep(unsigned int seconds); int mx_sleep_nofail(unsigned int seconds); diff --git a/mxq.h b/mxq.h index 22c5842c..54b073f9 100644 --- a/mxq.h +++ b/mxq.h @@ -17,7 +17,7 @@ #endif #ifndef MXQ_VERSIONDATE -# define MXQ_VERSIONDATE "2015" +# define MXQ_VERSIONDATE "today" #endif #ifndef MXQ_MYSQL_DEFAULT_FILE @@ -27,8 +27,22 @@ # define MXQ_MYSQL_DEFAULT_FILE_STR MXQ_MYSQL_DEFAULT_FILE #endif -#ifndef MXQ_MYSQL_DEFAULT_GROUP -# define MXQ_MYSQL_DEFAULT_GROUP program_invocation_short_name +#ifdef MXQ_DEVELOPMENT +# undef MXQ_MYSQL_DEFAULT_GROUP +# define MXQ_MYSQL_DEFAULT_GROUP MXQ_MYSQL_DEFAULT_GROUP_DEVELOPMENT +#else +# ifdef MXQ_TYPE_SERVER +# ifdef MXQ_MYSQL_DEFAULT_GROUP_SERVER +# define MXQ_MYSQL_DEFAULT_GROUP MXQ_MYSQL_DEFAULT_GROUP_SERVER +# endif +# else +# ifdef MXQ_MYSQL_DEFAULT_GROUP_CLIENT +# define MXQ_MYSQL_DEFAULT_GROUP MXQ_MYSQL_DEFAULT_GROUP_CLIENT +# endif +# endif +# ifndef MXQ_MYSQL_DEFAULT_GROUP +# define MXQ_MYSQL_DEFAULT_GROUP program_invocation_short_name +# endif #endif #define MXQ_MYSQL_DEFAULT_GROUP_STR MXQ_MYSQL_DEFAULT_GROUP @@ -36,7 +50,11 @@ static void mxq_print_generic_version(void) { printf( "%s - " MXQ_VERSIONFULL "\n" - " by Marius Tolzmann " MXQ_VERSIONDATE "\n" +#ifdef MXQ_DEVELOPMENT + "DEVELOPMENT VERSION: Do not use in production environments.\n" +#endif + " by Marius Tolzmann 2013-" MXQ_VERSIONDATE "\n" + " and Donald Buczek 2015-" MXQ_VERSIONDATE "\n" " Max Planck Institute for Molecular Genetics - Berlin Dahlem\n", program_invocation_short_name ); diff --git a/mxq_group.c b/mxq_group.c index 2887411d..e9c0ac4d 100644 --- a/mxq_group.c +++ b/mxq_group.c @@ -12,7 +12,7 @@ #include "mx_util.h" #include "mx_mysql.h" -#define GROUP_FIELDS_CNT 30 +#define GROUP_FIELDS_CNT 31 #define GROUP_FIELDS \ " group_id," \ " group_name," \ @@ -37,6 +37,7 @@ " group_jobs_unknown," \ " group_jobs_restarted," \ " group_slots_running," \ + " stats_max_sumrss," \ " stats_max_maxrss," \ " stats_max_utime_sec," \ " stats_max_stime_sec," \ @@ -85,6 +86,7 @@ static int bind_result_group_fields(struct mx_mysql_bind *result, struct mxq_gro res += mx_mysql_bind_var(result, idx++, uint64, &(g->group_slots_running)); + res += mx_mysql_bind_var(result, idx++, uint64, &(g->stats_max_sumrss)); res += mx_mysql_bind_var(result, idx++, uint64, &(g->stats_max_maxrss)); res += mx_mysql_bind_var(result, idx++, int64, &(g->stats_max_utime.tv_sec)); res += mx_mysql_bind_var(result, idx++, int64, &(g->stats_max_stime.tv_sec)); diff --git a/mxq_group.h b/mxq_group.h index 46a352b2..7286a5a0 100644 --- a/mxq_group.h +++ b/mxq_group.h @@ -41,6 +41,7 @@ struct mxq_group { uint64_t group_slots_running; + uint64_t stats_max_sumrss; uint64_t stats_max_maxrss; struct timeval stats_max_utime; diff --git a/mxq_job.c b/mxq_job.c index 68fc709d..d4bbe21f 100644 --- a/mxq_job.c +++ b/mxq_job.c @@ -16,44 +16,39 @@ #include "mxq_group.h" #include "mxq_job.h" -#define JOB_FIELDS_CNT 35 +#define JOB_FIELDS_CNT 36 #define JOB_FIELDS \ " job_id, " \ " job_status, " \ " job_flags, " \ " job_priority, " \ " group_id, " \ - \ " job_workdir, " \ " job_argc, " \ " job_argv, " \ " job_stdout, " \ " job_stderr, " \ - \ " job_umask, " \ " host_submit, " \ " host_id, " \ " server_id, " \ " host_hostname, " \ - \ " host_pid, " \ " host_slots, " \ " UNIX_TIMESTAMP(date_submit) as date_submit, " \ " UNIX_TIMESTAMP(date_start) as date_start, " \ " UNIX_TIMESTAMP(date_end) as date_end, " \ - \ + " stats_max_sumrss, " \ " stats_status, " \ " stats_utime_sec, " \ " stats_utime_usec, " \ " stats_stime_sec, " \ " stats_stime_usec, " \ - \ " stats_real_sec, " \ " stats_real_usec, " \ " stats_maxrss, " \ " stats_minflt, " \ " stats_majflt, " \ - \ " stats_nswap, " \ " stats_inblock, " \ " stats_oublock, " \ @@ -73,37 +68,32 @@ static int bind_result_job_fields(struct mx_mysql_bind *result, struct mxq_job * res += mx_mysql_bind_var(result, idx++, uint64, &(j->job_flags)); res += mx_mysql_bind_var(result, idx++, uint16, &(j->job_priority)); res += mx_mysql_bind_var(result, idx++, uint64, &(j->group_id)); - res += mx_mysql_bind_var(result, idx++, string, &(j->job_workdir)); res += mx_mysql_bind_var(result, idx++, uint16, &(j->job_argc)); res += mx_mysql_bind_var(result, idx++, string, &(j->job_argv_str)); res += mx_mysql_bind_var(result, idx++, string, &(j->job_stdout)); res += mx_mysql_bind_var(result, idx++, string, &(j->job_stderr)); - res += mx_mysql_bind_var(result, idx++, uint32, &(j->job_umask)); res += mx_mysql_bind_var(result, idx++, string, &(j->host_submit)); res += mx_mysql_bind_var(result, idx++, string, &(j->host_id)); res += mx_mysql_bind_var(result, idx++, string, &(j->server_id)); res += mx_mysql_bind_var(result, idx++, string, &(j->host_hostname)); - res += mx_mysql_bind_var(result, idx++, uint32, &(j->host_pid)); res += mx_mysql_bind_var(result, idx++, uint32, &(j->host_slots)); res += mx_mysql_bind_var(result, idx++, int64, &(j->date_submit)); res += mx_mysql_bind_var(result, idx++, int64, &(j->date_start)); res += mx_mysql_bind_var(result, idx++, int64, &(j->date_end)); - + res += mx_mysql_bind_var(result, idx++, uint64, &(j->stats_max_sumrss)); res += mx_mysql_bind_var(result, idx++, int32, &(j->stats_status)); res += mx_mysql_bind_var(result, idx++, int64, &(j->stats_rusage.ru_utime.tv_sec)); res += mx_mysql_bind_var(result, idx++, int64, &(j->stats_rusage.ru_utime.tv_usec)); res += mx_mysql_bind_var(result, idx++, int64, &(j->stats_rusage.ru_stime.tv_sec)); res += mx_mysql_bind_var(result, idx++, int64, &(j->stats_rusage.ru_stime.tv_usec)); - res += mx_mysql_bind_var(result, idx++, int64, &(j->stats_realtime.tv_sec)); res += mx_mysql_bind_var(result, idx++, int64, &(j->stats_realtime.tv_usec)); res += mx_mysql_bind_var(result, idx++, int64, &(j->stats_rusage.ru_maxrss)); res += mx_mysql_bind_var(result, idx++, int64, &(j->stats_rusage.ru_minflt)); res += mx_mysql_bind_var(result, idx++, int64, &(j->stats_rusage.ru_majflt)); - res += mx_mysql_bind_var(result, idx++, int64, &(j->stats_rusage.ru_nswap)); res += mx_mysql_bind_var(result, idx++, int64, &(j->stats_rusage.ru_inblock)); res += mx_mysql_bind_var(result, idx++, int64, &(j->stats_rusage.ru_oublock)); @@ -457,6 +447,7 @@ int mxq_set_job_status_running(struct mx_mysql *mysql, struct mxq_job *job) int mxq_set_job_status_exited(struct mx_mysql *mysql, struct mxq_job *job) { int res; + int idx; uint16_t newstatus; struct mx_mysql_bind param = {0}; @@ -485,6 +476,7 @@ int mxq_set_job_status_exited(struct mx_mysql *mysql, struct mxq_job *job) "UPDATE mxq_job SET" " job_status = ?," " date_end = NULL," + " stats_max_sumrss = ?, " " stats_status = ?, " " stats_utime_sec = ?, " " stats_utime_usec = ?, " @@ -506,30 +498,32 @@ int mxq_set_job_status_exited(struct mx_mysql *mysql, struct mxq_job *job) " AND server_id = ?" " AND host_pid = ?"; - res = mx_mysql_bind_init_param(¶m, 20); + res = mx_mysql_bind_init_param(¶m, 21); assert(res == 0); + idx = 0; res = 0; - res += mx_mysql_bind_var(¶m, 0, uint16, &(newstatus)); - res += mx_mysql_bind_var(¶m, 1, int32, &(job->stats_status)); - res += mx_mysql_bind_var(¶m, 2, int64, &(job->stats_rusage.ru_utime.tv_sec)); - res += mx_mysql_bind_var(¶m, 3, int64, &(job->stats_rusage.ru_utime.tv_usec)); - res += mx_mysql_bind_var(¶m, 4, int64, &(job->stats_rusage.ru_stime.tv_sec)); - res += mx_mysql_bind_var(¶m, 5, int64, &(job->stats_rusage.ru_stime.tv_usec)); - res += mx_mysql_bind_var(¶m, 6, int64, &(job->stats_realtime.tv_sec)); - res += mx_mysql_bind_var(¶m, 7, int64, &(job->stats_realtime.tv_usec)); - res += mx_mysql_bind_var(¶m, 8, int64, &(job->stats_rusage.ru_maxrss)); - res += mx_mysql_bind_var(¶m, 9, int64, &(job->stats_rusage.ru_minflt)); - res += mx_mysql_bind_var(¶m, 10, int64, &(job->stats_rusage.ru_majflt)); - res += mx_mysql_bind_var(¶m, 11, int64, &(job->stats_rusage.ru_nswap)); - res += mx_mysql_bind_var(¶m, 12, int64, &(job->stats_rusage.ru_inblock)); - res += mx_mysql_bind_var(¶m, 13, int64, &(job->stats_rusage.ru_oublock)); - res += mx_mysql_bind_var(¶m, 14, int64, &(job->stats_rusage.ru_nvcsw)); - res += mx_mysql_bind_var(¶m, 15, int64, &(job->stats_rusage.ru_nivcsw)); - res += mx_mysql_bind_var(¶m, 16, uint64, &(job->job_id)); - res += mx_mysql_bind_var(¶m, 17, string, &(job->host_hostname)); - res += mx_mysql_bind_var(¶m, 18, string, &(job->server_id)); - res += mx_mysql_bind_var(¶m, 19, uint32, &(job->host_pid)); + res += mx_mysql_bind_var(¶m, idx++, uint16, &(newstatus)); + res += mx_mysql_bind_var(¶m, idx++, uint64, &(job->stats_max_sumrss)); + res += mx_mysql_bind_var(¶m, idx++, int32, &(job->stats_status)); + res += mx_mysql_bind_var(¶m, idx++, int64, &(job->stats_rusage.ru_utime.tv_sec)); + res += mx_mysql_bind_var(¶m, idx++, int64, &(job->stats_rusage.ru_utime.tv_usec)); + res += mx_mysql_bind_var(¶m, idx++, int64, &(job->stats_rusage.ru_stime.tv_sec)); + res += mx_mysql_bind_var(¶m, idx++, int64, &(job->stats_rusage.ru_stime.tv_usec)); + res += mx_mysql_bind_var(¶m, idx++, int64, &(job->stats_realtime.tv_sec)); + res += mx_mysql_bind_var(¶m, idx++, int64, &(job->stats_realtime.tv_usec)); + res += mx_mysql_bind_var(¶m, idx++, int64, &(job->stats_rusage.ru_maxrss)); + res += mx_mysql_bind_var(¶m, idx++, int64, &(job->stats_rusage.ru_minflt)); + res += mx_mysql_bind_var(¶m, idx++, int64, &(job->stats_rusage.ru_majflt)); + res += mx_mysql_bind_var(¶m, idx++, int64, &(job->stats_rusage.ru_nswap)); + res += mx_mysql_bind_var(¶m, idx++, int64, &(job->stats_rusage.ru_inblock)); + res += mx_mysql_bind_var(¶m, idx++, int64, &(job->stats_rusage.ru_oublock)); + res += mx_mysql_bind_var(¶m, idx++, int64, &(job->stats_rusage.ru_nvcsw)); + res += mx_mysql_bind_var(¶m, idx++, int64, &(job->stats_rusage.ru_nivcsw)); + res += mx_mysql_bind_var(¶m, idx++, uint64, &(job->job_id)); + res += mx_mysql_bind_var(¶m, idx++, string, &(job->host_hostname)); + res += mx_mysql_bind_var(¶m, idx++, string, &(job->server_id)); + res += mx_mysql_bind_var(¶m, idx++, uint32, &(job->host_pid)); assert(res == 0); res = mx_mysql_do_statement_noresult_retry_on_fail(mysql, query, ¶m); @@ -676,6 +670,7 @@ int mxq_load_job_from_group_for_server(struct mx_mysql *mysql, struct mxq_job *m } if(res == 1) { memcpy(mxqjob, &jobs[0], sizeof(*mxqjob)); + free(jobs); break; } diff --git a/mxq_job.h b/mxq_job.h index 42b2d81d..f29baf62 100644 --- a/mxq_job.h +++ b/mxq_job.h @@ -52,6 +52,8 @@ struct mxq_job { struct timeval stats_starttime; + uint64_t stats_max_sumrss; + int32_t stats_status; struct timeval stats_realtime; struct rusage stats_rusage; diff --git a/mxq_log.c b/mxq_log.c index d59ffac7..77b40b5f 100644 --- a/mxq_log.c +++ b/mxq_log.c @@ -8,6 +8,8 @@ #include #include "mx_log.h" +#include "mx_util.h" + #ifndef mx_free_null #include #define mx_free_null(a) do { free((a)); (a) = NULL; } while(0) @@ -42,7 +44,6 @@ static int timetag(char *buf, size_t size) int mx_log_print(char *msg, size_t len) { - int res; char timebuf[1024]; static char *lastmsg = NULL; @@ -61,8 +62,7 @@ int mx_log_print(char *msg, size_t len) return -(errno=EINVAL); if (lastmsg && lastlen == len) { - res = strcmp(msg, lastmsg); - if (res == 0) { + if (mx_streq(msg, lastmsg)) { cnt++; mx_free_null(msg); return 2; diff --git a/mxqd.c b/mxqd.c index 3a7fa6a6..167ed126 100644 --- a/mxqd.c +++ b/mxqd.c @@ -1,6 +1,8 @@ #define _GNU_SOURCE +#define MXQ_TYPE_SERVER + #include #include #include @@ -34,12 +36,10 @@ #include "mxq_group.h" #include "mxq_job.h" #include "mx_mysql.h" +#include "mx_proc.h" #include "mxqd.h" #include "mxq.h" -#define MYSQL_DEFAULT_FILE MXQ_MYSQL_DEFAULT_FILE -#define MYSQL_DEFAULT_GROUP "mxqd" - #ifndef MXQ_INITIAL_PATH # define MXQ_INITIAL_PATH "/sbin:/bin:/usr/sbin:/usr/bin:/usr/local/sbin:/usr/local/bin" #endif @@ -48,6 +48,8 @@ # define MXQ_INITIAL_TMPDIR "/tmp" #endif +#define RUNNING_AS_ROOT (getuid() == 0) + volatile sig_atomic_t global_sigint_cnt=0; volatile sig_atomic_t global_sigterm_cnt=0; @@ -71,7 +73,11 @@ static void print_usage(void) "\n" " --pid-file default: create no pid file\n" " --daemonize default: run in foreground\n" +#ifdef MXQ_DEVELOPMENT + " --log default (in development): write no logfile\n" +#else " --no-log default: write a logfile\n" +#endif " --debug default: info log level\n" " --recover-only (recover from crash and exit)\n" "\n" @@ -281,7 +287,8 @@ int server_init(struct mxq_server *server, int argc, char *argv[]) unsigned long arg_memory_total = 2048; unsigned long arg_memory_max = 0; int i; - struct proc_pid_stat pps = {0}; + + _mx_cleanup_free_ struct mx_proc_pid_stat *pps = NULL; struct mx_getopt_ctl optctl; struct mx_option opts[] = { @@ -289,6 +296,7 @@ int server_init(struct mxq_server *server, int argc, char *argv[]) MX_OPTION_NO_ARG("version", 'V'), MX_OPTION_NO_ARG("daemonize", 1), MX_OPTION_NO_ARG("no-log", 3), + MX_OPTION_NO_ARG("log", 4), MX_OPTION_NO_ARG("debug", 5), MX_OPTION_NO_ARG("recover-only", 9), MX_OPTION_REQUIRED_ARG("pid-file", 2), @@ -307,16 +315,20 @@ int server_init(struct mxq_server *server, int argc, char *argv[]) arg_server_id = "main"; arg_hostname = mx_hostname(); +#ifdef MXQ_DEVELOPMENT + arg_nolog = 1; +#endif + arg_initial_path = MXQ_INITIAL_PATH; arg_initial_tmpdir = MXQ_INITIAL_TMPDIR; arg_mysql_default_group = getenv("MXQ_MYSQL_DEFAULT_GROUP"); if (!arg_mysql_default_group) - arg_mysql_default_group = MYSQL_DEFAULT_GROUP; + arg_mysql_default_group = MXQ_MYSQL_DEFAULT_GROUP; arg_mysql_default_file = getenv("MXQ_MYSQL_DEFAULT_FILE"); if (!arg_mysql_default_file) - arg_mysql_default_file = MYSQL_DEFAULT_FILE; + arg_mysql_default_file = MXQ_MYSQL_DEFAULT_FILE; mx_getopt_init(&optctl, argc-1, &argv[1], opts); @@ -340,6 +352,10 @@ int server_init(struct mxq_server *server, int argc, char *argv[]) arg_nolog = 1; break; + case 4: + arg_nolog = 0; + break; + case 5: mx_log_level_set(MX_LOG_DEBUG); break; @@ -417,6 +433,15 @@ int server_init(struct mxq_server *server, int argc, char *argv[]) MX_GETOPT_FINISH(optctl, argc, argv); + if (!RUNNING_AS_ROOT) { +#if defined(MXQ_DEVELOPMENT) || defined(RUNASNORMALUSER) + mx_log_notice("Running mxqd as non-root user."); +#else + mx_log_err("Running mxqd as non-root user is not supported at the moment."); + exit(EX_USAGE); +#endif + } + if (arg_daemonize && arg_nolog) { mx_log_err("Error while using conflicting options --daemonize and --no-log at once."); exit(EX_USAGE); @@ -485,15 +510,6 @@ int server_init(struct mxq_server *server, int argc, char *argv[]) } } - if (getuid()) { -#ifdef RUNASNORMALUSER - mx_log_notice("Running mxqd as non-root user."); -#else - mx_log_err("Running mxqd as non-root user is not supported at the moment."); - exit(EX_USAGE); -#endif - } - res = mx_read_first_line_from_file("/proc/sys/kernel/random/boot_id", &str_bootid); assert(res == 36); assert(str_bootid); @@ -503,10 +519,11 @@ int server_init(struct mxq_server *server, int argc, char *argv[]) res = mx_proc_pid_stat(&pps, getpid()); assert(res == 0); - server->starttime = pps.starttime; - mx_proc_pid_stat_free(&pps); + server->starttime = pps->starttime; + mx_proc_pid_stat_free_content(pps); mx_asprintf_forever(&server->host_id, "%s-%llx-%x", server->boot_id, server->starttime, getpid()); + mx_setenv_forever("MXQ_HOSTID", server->host_id); server->slots = arg_threads_total; res = cpuset_init(server); @@ -518,7 +535,7 @@ int server_init(struct mxq_server *server, int argc, char *argv[]) server->memory_max_per_slot = arg_memory_max; /* if run as non-root use full memory by default for every job */ - if (!arg_memory_max && getuid() != 0) + if (!arg_memory_max && !RUNNING_AS_ROOT) server->memory_max_per_slot = arg_memory_total; server->memory_avg_per_slot = (long double)server->memory_total / (long double)server->slots; @@ -968,7 +985,7 @@ static int init_child_process(struct mxq_group_list *group, struct mxq_job *j) g->user_name, g->user_uid, g->group_id, j->job_id); } - if(getuid()==0) { + if(RUNNING_AS_ROOT) { res = initgroups(passwd->pw_name, g->user_gid); if (res == -1) { @@ -1126,7 +1143,7 @@ unsigned long start_job(struct mxq_group_list *group) group->group.user_name, group->group.user_uid, group->group.group_id, mxqjob.job_id); cpuset_init_job(&mxqjob.host_cpu_set,&server->cpu_set_available,&server->cpu_set_running,group->slots_per_job); - cpuset_log(" job assgined cpus: ",&mxqjob.host_cpu_set); + cpuset_log(" job assigned cpus: ",&mxqjob.host_cpu_set); mx_mysql_disconnect(server->mysql); @@ -1513,8 +1530,11 @@ int killall_over_time(struct mxq_server *server) assert(server); - /* limit killing to every >= 5 minutes */ - mx_within_rate_limit_or_return(5*60, 1); + if (!server->jobs_running) + return 0; + + /* limit killing to every >= 60 seconds */ + mx_within_rate_limit_or_return(60, 1); mx_log_info("killall_over_time: Sending signals to all jobs running longer than requested."); @@ -1575,6 +1595,69 @@ int killall_over_time(struct mxq_server *server) return 0; } +int killall_over_memory(struct mxq_server *server) +{ + struct mxq_user_list *user; + struct mxq_group_list *group; + struct mxq_job_list *job; + struct mx_proc_tree *pt = NULL; + struct mx_proc_info *pinfo; + long pagesize; + pid_t pid; + unsigned long long int memory; + int res; + + assert(server); + + if (!server->jobs_running) + return 0; + + /* limit killing to every >= 10 seconds */ + mx_within_rate_limit_or_return(10, 0); + + pagesize = sysconf(_SC_PAGESIZE); + if (!pagesize) { + mx_log_warning("killall_over_memory(): Can't get _SC_PAGESIZE. Assuming 4096."); + pagesize = 4096; + } + + res = mx_proc_tree(&pt); + if (res < 0) { + mx_log_err("killall_over_memory(): Reading process tree failed: %m"); + return res; + } + + for (user=server->users; user; user=user->next) { + for (group=user->groups; group; group=group->next) { + for (job=group->jobs; job; job=job->next) { + pid = job->job.host_pid; + + pinfo = mx_proc_tree_proc_info(pt, pid); + if (!pinfo) { + mx_log_warning("killall_over_memory(): Can't find process with pid %llu in process tree", pid); + continue; + } + + memory = pinfo->sum_rss * pagesize / 1024; + + if (job->max_sum_rss < memory) + job->max_sum_rss = memory; + + if (memory/1024 <= group->group.job_memory) + continue; + + mx_log_info("killall_over_memory(): used(%lluMiB) > requested(%lluMiB): Sending signal=TERM to job=%s(%d):%lu:%lu pid=%d", + memory/1024, group->group.job_memory, + group->group.user_name, group->group.user_uid, group->group.group_id, job->job.job_id, pid); + + kill(pid, SIGTERM); + } + } + } + mx_proc_tree_free(&pt); + return 0; +} + int killallcancelled(struct mxq_server *server, int sig, unsigned int pgrp) { struct mxq_user_list *user; @@ -1679,7 +1762,7 @@ int catchall(struct mxq_server *server) { g = &job->group->group; timersub(&now, &j->stats_starttime, &j->stats_realtime); - + j->stats_max_sumrss = job->max_sum_rss; j->stats_status = status; j->stats_rusage = rusage; @@ -1730,7 +1813,7 @@ int load_groups(struct mxq_server *server) { int total; int i; - if (getuid() == 0) + if (RUNNING_AS_ROOT) group_cnt = mxq_load_running_groups(server->mysql, &mxqgroups); else group_cnt = mxq_load_running_groups_for_user(server->mysql, &mxqgroups, getuid()); @@ -1820,8 +1903,12 @@ int main(int argc, char *argv[]) } mx_log_info("mxqd - " MXQ_VERSIONFULL); - mx_log_info(" by Marius Tolzmann " MXQ_VERSIONDATE); + mx_log_info(" by Marius Tolzmann 2013-" MXQ_VERSIONDATE); + mx_log_info(" and Donald Buczek 2015-" MXQ_VERSIONDATE); mx_log_info(" Max Planck Institute for Molecular Genetics - Berlin Dahlem"); +#ifdef MXQ_DEVELOPMENT + mx_log_warning("DEVELOPMENT VERSION: Do not use in production environments."); +#endif mx_log_info("hostname=%s server_id=%s :: MXQ server started.", server.hostname, server.server_id); mx_log_info(" host_id=%s", server.host_id); mx_log_info("slots=%lu memory_total=%lu memory_avg_per_slot=%.0Lf memory_max_per_slot=%ld :: server initialized.", @@ -1871,6 +1958,7 @@ int main(int argc, char *argv[]) killallcancelled(&server, SIGTERM, 0); killallcancelled(&server, SIGINT, 0); killall_over_time(&server); + killall_over_memory(&server); if (!server.group_cnt) { assert(!server.jobs_running); @@ -1918,6 +2006,7 @@ int main(int argc, char *argv[]) killallcancelled(&server, SIGTERM, 0); killallcancelled(&server, SIGINT, 0); killall_over_time(&server); + killall_over_memory(&server); mx_log_info("jobs_running=%lu global_sigint_cnt=%d global_sigterm_cnt=%d : Exiting. Wating for jobs to finish. Sleeping for a while.", server.jobs_running, global_sigint_cnt, global_sigterm_cnt); diff --git a/mxqd.h b/mxqd.h index 4de72d44..4bc1cd0a 100644 --- a/mxqd.h +++ b/mxqd.h @@ -10,6 +10,8 @@ struct mxq_job_list { struct mxq_job job; + unsigned long long int max_sum_rss; + pid_t pid; }; diff --git a/mxqdump.c b/mxqdump.c index 2cac9c13..8b0b9665 100644 --- a/mxqdump.c +++ b/mxqdump.c @@ -128,6 +128,7 @@ static int print_group(struct mxq_group *g) " max_utime=%lu" " max_real=%lu" " max_memory=%lukiB" + " max_rss=%lukiB" " wait_sec=%lu" " run_sec=%lu" " idle_sec=%lu" @@ -150,10 +151,11 @@ static int print_group(struct mxq_group *g) g->job_threads, g->job_memory*1024, g->job_time*60, - (100UL*(uint64_t)g->stats_max_maxrss/1024UL/g->job_memory), + (100UL*(uint64_t)g->stats_max_sumrss/1024UL/g->job_memory), (100UL*(uint64_t)g->stats_max_real.tv_sec/60UL/g->job_time), g->stats_max_utime.tv_sec, g->stats_max_real.tv_sec, + g->stats_max_sumrss, g->stats_max_maxrss, g->stats_wait_sec, g->stats_run_sec, @@ -219,6 +221,7 @@ static int print_job(struct mxq_group *g, struct mxq_job *j) " runtime_requested=%us" " time_load=%lu%%" " memory_requested=%lukiB" + " max_memory=%lukiB" " max_rss=%lukiB" " memory_load=%lu%%" " threads=%d" @@ -244,8 +247,9 @@ static int print_job(struct mxq_group *g, struct mxq_job *j) g->job_time*60, (100UL*(run_sec)/60UL/g->job_time), g->job_memory*1024, + j->stats_max_sumrss, j->stats_rusage.ru_maxrss, - (100UL*j->stats_rusage.ru_maxrss/1024UL/g->job_memory), + (100UL*j->stats_max_sumrss/1024UL/g->job_memory), g->job_threads, j->host_slots, mxq_job_status_to_name(j->job_status), diff --git a/mxqps.c b/mxqps.c new file mode 100644 index 00000000..5088edc0 --- /dev/null +++ b/mxqps.c @@ -0,0 +1,94 @@ + +#include +#include +#include +#include +#include +#include + +#include "mx_util.h" +#include "mx_log.h" +#include "mx_proc.h" + + +int filter(const struct dirent *d) +{ + if (!isdigit(d->d_name[0])) + return 0; + + return 1; +} + +#define MX_PROC_TREE_NODE_IS_KERNEL_THREAD(x) ((x)->pinfo.pstat->ppid == 0 && (x)->pinfo.sum_rss == 0) + +int mx_proc_tree_node_print_debug(struct mx_proc_tree_node *ptn, int lvl) +{ + assert(ptn); + + struct mx_proc_tree_node *current; + + current = ptn; + + long pagesize; + + pagesize = sysconf(_SC_PAGESIZE); + assert(pagesize); + + for (current = ptn; current; current=current->next) { + if (MX_PROC_TREE_NODE_IS_KERNEL_THREAD(current)) + continue; + + printf("%7lld %7lld %7lld %7lld %15lld %15lld %7lld", + current->pinfo.pstat->pid, + current->pinfo.pstat->ppid, + current->pinfo.pstat->pgrp, + current->pinfo.pstat->session, + current->pinfo.pstat->rss*pagesize/1024, + current->pinfo.sum_rss*pagesize/1024, + current->pinfo.pstat->num_threads); + + if (lvl>0) + printf("%*s", lvl*4, "\\_"); + assert(current->pinfo.pstat); + printf(" %s\n", current->pinfo.pstat->comm); + + if (!current->childs) + continue; + + mx_proc_tree_node_print_debug(current->childs, lvl+(current->parent != NULL)); + } + + return 0; +} + +int mx_proc_tree_print_debug(struct mx_proc_tree *pt) +{ + assert(pt); + printf("%7s %7s %7s %7s %15s %15s %7s COMMAND\n", + "PID", + "PPID", + "PGRP", + "SESSION", + "RSS", + "SUMRSS", + "THREADS"); + mx_proc_tree_node_print_debug(pt->root, 0); + return 0; +} + +int main(void) +{ + int res; + struct mx_proc_tree *pt = NULL; + + res = mx_proc_tree(&pt); + assert(res == 0); + + mx_proc_tree_print_debug(pt); + + mx_proc_tree_free(&pt); + + + return 0; + +} diff --git a/mysql/alter_tables_0.17.0.sql b/mysql/alter_tables_0.17.0.sql new file mode 100644 index 00000000..b9daa722 --- /dev/null +++ b/mysql/alter_tables_0.17.0.sql @@ -0,0 +1,11 @@ +ALTER TABLE mxq_group + ADD COLUMN + stats_max_sumrss INT8 UNSIGNED NOT NULL DEFAULT 0 + AFTER + group_date_end; + +ALTER TABLE mxq_job + ADD COLUMN + stats_max_sumrss INT8 UNSIGNED NOT NULL DEFAULT 0 + AFTER + job_id_first; diff --git a/mysql/create_tables.sql b/mysql/create_tables.sql index ce129e4f..f2566cc6 100644 --- a/mysql/create_tables.sql +++ b/mysql/create_tables.sql @@ -1,5 +1,3 @@ - -DROP TABLE mxq_group; CREATE TABLE IF NOT EXISTS mxq_group ( group_id INT8 UNSIGNED NOT NULL PRIMARY KEY AUTO_INCREMENT, group_name VARCHAR(511) NOT NULL DEFAULT 'default', @@ -36,6 +34,8 @@ CREATE TABLE IF NOT EXISTS mxq_group ( group_date_end TIMESTAMP NOT NULL DEFAULT 0, + stats_max_sumrss INT8 UNSIGNED NOT NULL DEFAULT 0, + stats_max_maxrss INT8 UNSIGNED NOT NULL DEFAULT 0, stats_max_utime_sec INT8 UNSIGNED NOT NULL DEFAULT 0, stats_max_stime_sec INT8 UNSIGNED NOT NULL DEFAULT 0, @@ -62,7 +62,6 @@ CREATE TABLE IF NOT EXISTS mxq_group ( INDEX(group_name) ); -DROP TABLE mxq_job; CREATE TABLE IF NOT EXISTS mxq_job ( job_id INT8 UNSIGNED NOT NULL PRIMARY KEY AUTO_INCREMENT, job_status INT2 UNSIGNED NOT NULL DEFAULT 0, @@ -97,6 +96,8 @@ CREATE TABLE IF NOT EXISTS mxq_job ( job_id_old INT8 UNSIGNED NULL DEFAULT NULL, job_id_first INT8 UNSIGNED NULL DEFAULT NULL, + stats_max_sumrss INT8 UNSIGNED NOT NULL DEFAULT 0, + stats_status INT4 UNSIGNED NOT NULL DEFAULT 0, stats_utime_sec INT8 UNSIGNED NOT NULL DEFAULT 0, @@ -123,7 +124,6 @@ CREATE TABLE IF NOT EXISTS mxq_job ( INDEX (server_id(767)) ); -DROP TABLE mxq_server; CREATE TABLE IF NOT EXISTS mxq_server ( host_id INT4 UNSIGNED NOT NULL PRIMARY KEY AUTO_INCREMENT, host_hostname VARCHAR(511) NOT NULL DEFAULT 'localhost', @@ -142,136 +142,3 @@ CREATE TABLE IF NOT EXISTS mxq_server ( server_start TIMESTAMP DEFAULT 0, server_stop TIMESTAMP DEFAULT 0 ); - - - -LOCK TABLES mxq_job WRITE, mxq_group WRITE; -DELIMITER | -DROP TRIGGER mxq_add_group| -CREATE TRIGGER mxq_add_group BEFORE INSERT ON mxq_group - FOR EACH ROW BEGIN - SET NEW.group_mtime = NOW(); - - IF (NEW.group_jobs_running = 0 AND NEW.group_jobs_inq = 0) THEN - SET NEW.group_date_end = NEW.group_mtime; - ELSEIF (NEW.group_jobs_running > 0 OR NEW.group_jobs_inq > 0) THEN - SET NEW.group_date_end = 0; - END IF; - END; -| -DROP TRIGGER mxq_update_group| -CREATE TRIGGER mxq_update_group BEFORE UPDATE ON mxq_group - FOR EACH ROW BEGIN - SET NEW.group_mtime = NOW(); - - IF OLD.group_jobs_inq > 0 AND OLD.group_jobs_running = 0 THEN - SET NEW.stats_wait_sec = OLD.stats_wait_sec + (UNIX_TIMESTAMP(NEW.group_mtime) - UNIX_TIMESTAMP(OLD.group_mtime)); - ELSEIF OLD.group_jobs_running > 0 THEN - SET NEW.stats_run_sec = OLD.stats_run_sec + (UNIX_TIMESTAMP(NEW.group_mtime) - UNIX_TIMESTAMP(OLD.group_mtime)); - END IF; - - IF (NEW.group_jobs_running = 0 AND NEW.group_jobs_inq = 0) AND - (OLD.group_jobs_running > 0 OR OLD.group_jobs_inq > 0) THEN - SET NEW.group_date_end = NEW.group_mtime; - ELSEIF (OLD.group_jobs_running = 0 AND OLD.group_jobs_inq = 0) AND - (NEW.group_jobs_running > 0 OR NEW.group_jobs_inq > 0) THEN - SET NEW.stats_idle_sec = OLD.stats_idle_sec + (UNIX_TIMESTAMP(NEW.group_mtime) - UNIX_TIMESTAMP(OLD.group_date_end)); - SET NEW.group_date_end = 0; - END IF; - END; -| -DROP TRIGGER mxq_add_job| -CREATE TRIGGER mxq_add_job AFTER INSERT ON mxq_job - FOR EACH ROW BEGIN - UPDATE mxq_group SET - group_jobs=group_jobs+1, - group_jobs_inq=group_jobs_inq+1, - group_mtime=NULL - WHERE group_id=NEW.group_id; - END; -| -DROP TRIGGER mxq_update_job| -CREATE TRIGGER mxq_update_job BEFORE UPDATE ON mxq_job - FOR EACH ROW BEGIN - IF NEW.job_status != OLD.job_status THEN - IF NEW.job_status IN (150, 200) AND OLD.job_status IN (0, 100) THEN - UPDATE mxq_group SET - group_jobs_inq=group_jobs_inq-1, - group_jobs_running=group_jobs_running+1, - group_slots_running=group_slots_running+NEW.host_slots, - group_mtime=NULL - WHERE group_id=NEW.group_id; - ELSEIF NEW.job_status = 200 AND OLD.job_status = 150 THEN - UPDATE mxq_group SET - group_slots_running=group_slots_running-OLD.host_slots+NEW.host_slots, - group_mtime=NULL - WHERE group_id=NEW.group_id; - ELSEIF NEW.job_status IN (400, 750) AND OLD.job_status IN (150, 200, 250, 300, 350, 399) THEN - UPDATE mxq_group SET - group_slots_running=group_slots_running-NEW.host_slots, - group_jobs_running=group_jobs_running-1, - group_jobs_failed=group_jobs_failed+1, - stats_max_maxrss=GREATEST(stats_max_maxrss, NEW.stats_maxrss), - stats_max_utime_sec=GREATEST(stats_max_utime_sec, NEW.stats_utime_sec), - stats_max_stime_sec=GREATEST(stats_max_stime_sec, NEW.stats_stime_sec), - stats_max_real_sec=GREATEST(stats_max_real_sec, NEW.stats_real_sec), - stats_total_utime_sec=stats_total_utime_sec+NEW.stats_utime_sec, - stats_total_stime_sec=stats_total_stime_sec+NEW.stats_stime_sec, - stats_total_real_sec=stats_total_real_sec+NEW.stats_real_sec, - group_mtime=NULL - WHERE group_id=NEW.group_id; - ELSEIF NEW.job_status = 990 AND OLD.job_status IN (0, 100, 989) THEN - SET NEW.date_start = NOW(); - SET NEW.date_end = NEW.date_start; - UPDATE mxq_group SET - group_jobs_inq=group_jobs_inq-1, - group_jobs_cancelled=group_jobs_cancelled+1, - group_mtime=NULL - WHERE group_id=NEW.group_id; - ELSEIF NEW.job_status = 999 AND OLD.job_status IN (150, 200, 250, 399) THEN - UPDATE mxq_group SET - group_slots_running=group_slots_running-NEW.host_slots, - group_jobs_running=group_jobs_running-1, - group_jobs_unknown=group_jobs_unknown+1, - group_mtime=NULL - WHERE group_id=NEW.group_id; - ELSEIF NEW.job_status = 999 AND OLD.job_status IN (400, 750, 755) THEN - UPDATE mxq_group SET - group_jobs_failed=group_jobs_failed-1, - group_jobs_unknown=group_jobs_unknown+1, - group_mtime=NULL - WHERE group_id=NEW.group_id; - ELSEIF NEW.job_status = 1000 AND OLD.job_status IN (150, 200, 250, 300, 350, 399) THEN - UPDATE mxq_group SET - group_slots_running=group_slots_running-NEW.host_slots, - group_jobs_running=group_jobs_running-1, - group_jobs_finished=group_jobs_finished+1, - stats_max_maxrss=GREATEST(stats_max_maxrss, NEW.stats_maxrss), - stats_max_utime_sec=GREATEST(stats_max_utime_sec, NEW.stats_utime_sec), - stats_max_stime_sec=GREATEST(stats_max_stime_sec, NEW.stats_stime_sec), - stats_max_real_sec=GREATEST(stats_max_real_sec, NEW.stats_real_sec), - stats_total_utime_sec=stats_total_utime_sec+NEW.stats_utime_sec, - stats_total_stime_sec=stats_total_stime_sec+NEW.stats_stime_sec, - stats_total_real_sec=stats_total_real_sec+NEW.stats_real_sec, - stats_total_utime_sec_finished=stats_total_utime_sec_finished+NEW.stats_utime_sec, - stats_total_stime_sec_finished=stats_total_stime_sec_finished+NEW.stats_stime_sec, - stats_total_real_sec_finished=stats_total_real_sec_finished+NEW.stats_real_sec, - group_mtime=NULL - WHERE group_id=NEW.group_id; - ELSEIF NEW.job_status NOT IN (399, 755, 989, 990) THEN - UPDATE mxq_group SET - stats_max_maxrss=GREATEST(stats_max_maxrss, NEW.stats_maxrss), - stats_max_utime_sec=GREATEST(stats_max_utime_sec, NEW.stats_utime_sec), - stats_max_stime_sec=GREATEST(stats_max_stime_sec, NEW.stats_stime_sec), - stats_max_real_sec=GREATEST(stats_max_real_sec, NEW.stats_real_sec), - stats_total_utime_sec=stats_total_utime_sec+NEW.stats_utime_sec, - stats_total_stime_sec=stats_total_stime_sec+NEW.stats_stime_sec, - stats_total_real_sec=stats_total_real_sec+NEW.stats_real_sec, - group_mtime=NULL - WHERE group_id=NEW.group_id; - END IF; - END IF; - END; -| -DELIMITER ; -UNLOCK TABLES; diff --git a/mysql/create_trigger.sql b/mysql/create_trigger.sql new file mode 100644 index 00000000..c4269acf --- /dev/null +++ b/mysql/create_trigger.sql @@ -0,0 +1,133 @@ +LOCK TABLES mxq_job WRITE, mxq_group WRITE; +DELIMITER | +DROP TRIGGER IF EXISTS mxq_add_group| +CREATE TRIGGER mxq_add_group BEFORE INSERT ON mxq_group + FOR EACH ROW BEGIN + SET NEW.group_mtime = NOW(); + + IF (NEW.group_jobs_running = 0 AND NEW.group_jobs_inq = 0) THEN + SET NEW.group_date_end = NEW.group_mtime; + ELSEIF (NEW.group_jobs_running > 0 OR NEW.group_jobs_inq > 0) THEN + SET NEW.group_date_end = 0; + END IF; + END; +| +DROP TRIGGER IF EXISTS mxq_update_group| +CREATE TRIGGER mxq_update_group BEFORE UPDATE ON mxq_group + FOR EACH ROW BEGIN + SET NEW.group_mtime = NOW(); + + IF OLD.group_jobs_inq > 0 AND OLD.group_jobs_running = 0 THEN + SET NEW.stats_wait_sec = OLD.stats_wait_sec + (UNIX_TIMESTAMP(NEW.group_mtime) - UNIX_TIMESTAMP(OLD.group_mtime)); + ELSEIF OLD.group_jobs_running > 0 THEN + SET NEW.stats_run_sec = OLD.stats_run_sec + (UNIX_TIMESTAMP(NEW.group_mtime) - UNIX_TIMESTAMP(OLD.group_mtime)); + END IF; + + IF (NEW.group_jobs_running = 0 AND NEW.group_jobs_inq = 0) AND + (OLD.group_jobs_running > 0 OR OLD.group_jobs_inq > 0) THEN + SET NEW.group_date_end = NEW.group_mtime; + ELSEIF (OLD.group_jobs_running = 0 AND OLD.group_jobs_inq = 0) AND + (NEW.group_jobs_running > 0 OR NEW.group_jobs_inq > 0) THEN + SET NEW.stats_idle_sec = OLD.stats_idle_sec + (UNIX_TIMESTAMP(NEW.group_mtime) - UNIX_TIMESTAMP(OLD.group_date_end)); + SET NEW.group_date_end = 0; + END IF; + END; +| +DROP TRIGGER IF EXISTS mxq_add_job| +CREATE TRIGGER mxq_add_job AFTER INSERT ON mxq_job + FOR EACH ROW BEGIN + UPDATE mxq_group SET + group_jobs=group_jobs+1, + group_jobs_inq=group_jobs_inq+1, + group_mtime=NULL + WHERE group_id=NEW.group_id; + END; +| +DROP TRIGGER IF EXISTS mxq_update_job| +CREATE TRIGGER mxq_update_job BEFORE UPDATE ON mxq_job + FOR EACH ROW BEGIN + IF NEW.job_status != OLD.job_status THEN + IF NEW.job_status IN (150, 200) AND OLD.job_status IN (0, 100) THEN + UPDATE mxq_group SET + group_jobs_inq=group_jobs_inq-1, + group_jobs_running=group_jobs_running+1, + group_slots_running=group_slots_running+NEW.host_slots, + group_mtime=NULL + WHERE group_id=NEW.group_id; + ELSEIF NEW.job_status = 200 AND OLD.job_status = 150 THEN + UPDATE mxq_group SET + group_slots_running=group_slots_running-OLD.host_slots+NEW.host_slots, + group_mtime=NULL + WHERE group_id=NEW.group_id; + ELSEIF NEW.job_status IN (400, 750) AND OLD.job_status IN (150, 200, 250, 300, 350, 399) THEN + UPDATE mxq_group SET + group_slots_running=group_slots_running-NEW.host_slots, + group_jobs_running=group_jobs_running-1, + group_jobs_failed=group_jobs_failed+1, + stats_max_sumrss=GREATEST(stats_max_sumrss, NEW.stats_max_sumrss), + stats_max_maxrss=GREATEST(stats_max_maxrss, NEW.stats_maxrss), + stats_max_utime_sec=GREATEST(stats_max_utime_sec, NEW.stats_utime_sec), + stats_max_stime_sec=GREATEST(stats_max_stime_sec, NEW.stats_stime_sec), + stats_max_real_sec=GREATEST(stats_max_real_sec, NEW.stats_real_sec), + stats_total_utime_sec=stats_total_utime_sec+NEW.stats_utime_sec, + stats_total_stime_sec=stats_total_stime_sec+NEW.stats_stime_sec, + stats_total_real_sec=stats_total_real_sec+NEW.stats_real_sec, + group_mtime=NULL + WHERE group_id=NEW.group_id; + ELSEIF NEW.job_status = 990 AND OLD.job_status IN (0, 100, 989) THEN + SET NEW.date_start = NOW(); + SET NEW.date_end = NEW.date_start; + UPDATE mxq_group SET + group_jobs_inq=group_jobs_inq-1, + group_jobs_cancelled=group_jobs_cancelled+1, + group_mtime=NULL + WHERE group_id=NEW.group_id; + ELSEIF NEW.job_status = 999 AND OLD.job_status IN (150, 200, 250, 399) THEN + UPDATE mxq_group SET + group_slots_running=group_slots_running-NEW.host_slots, + group_jobs_running=group_jobs_running-1, + group_jobs_unknown=group_jobs_unknown+1, + group_mtime=NULL + WHERE group_id=NEW.group_id; + ELSEIF NEW.job_status = 999 AND OLD.job_status IN (400, 750, 755) THEN + UPDATE mxq_group SET + group_jobs_failed=group_jobs_failed-1, + group_jobs_unknown=group_jobs_unknown+1, + group_mtime=NULL + WHERE group_id=NEW.group_id; + ELSEIF NEW.job_status = 1000 AND OLD.job_status IN (150, 200, 250, 300, 350, 399) THEN + UPDATE mxq_group SET + group_slots_running=group_slots_running-NEW.host_slots, + group_jobs_running=group_jobs_running-1, + group_jobs_finished=group_jobs_finished+1, + stats_max_sumrss=GREATEST(stats_max_sumrss, NEW.stats_max_sumrss), + stats_max_maxrss=GREATEST(stats_max_maxrss, NEW.stats_maxrss), + stats_max_utime_sec=GREATEST(stats_max_utime_sec, NEW.stats_utime_sec), + stats_max_stime_sec=GREATEST(stats_max_stime_sec, NEW.stats_stime_sec), + stats_max_real_sec=GREATEST(stats_max_real_sec, NEW.stats_real_sec), + stats_total_utime_sec=stats_total_utime_sec+NEW.stats_utime_sec, + stats_total_stime_sec=stats_total_stime_sec+NEW.stats_stime_sec, + stats_total_real_sec=stats_total_real_sec+NEW.stats_real_sec, + stats_total_utime_sec_finished=stats_total_utime_sec_finished+NEW.stats_utime_sec, + stats_total_stime_sec_finished=stats_total_stime_sec_finished+NEW.stats_stime_sec, + stats_total_real_sec_finished=stats_total_real_sec_finished+NEW.stats_real_sec, + group_mtime=NULL + WHERE group_id=NEW.group_id; + ELSEIF NEW.job_status NOT IN (399, 755, 989, 990) THEN + UPDATE mxq_group SET + stats_max_sumrss=GREATEST(stats_max_sumrss, NEW.stats_max_sumrss), + stats_max_maxrss=GREATEST(stats_max_maxrss, NEW.stats_maxrss), + stats_max_utime_sec=GREATEST(stats_max_utime_sec, NEW.stats_utime_sec), + stats_max_stime_sec=GREATEST(stats_max_stime_sec, NEW.stats_stime_sec), + stats_max_real_sec=GREATEST(stats_max_real_sec, NEW.stats_real_sec), + stats_total_utime_sec=stats_total_utime_sec+NEW.stats_utime_sec, + stats_total_stime_sec=stats_total_stime_sec+NEW.stats_stime_sec, + stats_total_real_sec=stats_total_real_sec+NEW.stats_real_sec, + group_mtime=NULL + WHERE group_id=NEW.group_id; + END IF; + END IF; + END; +| +DELIMITER ; +UNLOCK TABLES; diff --git a/mysql/drop_tables.sql b/mysql/drop_tables.sql new file mode 100644 index 00000000..5ff48f17 --- /dev/null +++ b/mysql/drop_tables.sql @@ -0,0 +1,3 @@ +DROP TABLE IF EXISTS mxq_group; +DROP TABLE IF EXISTS mxq_job; +DROP TABLE IF EXISTS mxq_server; diff --git a/test_mx_util.c b/test_mx_util.c index b290490a..06cfe728 100644 --- a/test_mx_util.c +++ b/test_mx_util.c @@ -8,6 +8,7 @@ #include #include "mx_util.h" +#include "mx_proc.h" static void test_mx_strskipwhitespaces(void) { @@ -305,8 +306,7 @@ static void test_mx_strscan(void) unsigned long long int ull; long long int ll; _mx_cleanup_free_ char *line = NULL; - struct proc_pid_stat pps = {0}; - struct proc_pid_stat pps2 = {0}; + _mx_cleanup_free_ struct mx_proc_pid_stat *pps = NULL; assert(s = strdup("123 456 -789 246 abc")); str = s; @@ -340,20 +340,12 @@ static void test_mx_strscan(void) assert(mx_streq(str, "")); assert(mx_streq(s, "123")); - assert(mx_read_first_line_from_file("/proc/self/stat", &line) > 0); - assert(mx_strscan_proc_pid_stat(line, &pps) == 0); - assert(pps.pid == getpid()); - assert(pps.ppid == getppid()); - assert(pps.state == 'R'); - assert(mx_streq(pps.comm, program_invocation_short_name) || mx_streq(pps.comm, "memcheck-amd64-")); - mx_proc_pid_stat_free(&pps); - - assert(mx_proc_pid_stat(&pps2, getpid()) == 0); - assert(pps2.pid == getpid()); - assert(pps2.ppid == getppid()); - assert(pps2.state == 'R'); - assert(mx_streq(pps2.comm, program_invocation_short_name) || mx_streq(pps2.comm, "memcheck-amd64-")); - mx_proc_pid_stat_free(&pps2); + assert(mx_proc_pid_stat(&pps, getpid()) == 0); + assert(pps->pid == getpid()); + assert(pps->ppid == getppid()); + assert(pps->state == 'R'); + assert(mx_streq(pps->comm, program_invocation_short_name) || mx_streq(pps->comm, "memcheck-amd64-")); + mx_proc_pid_stat_free_content(pps); } static void test_mx_strvec() { diff --git a/web/pages/mxq/mxq.in b/web/pages/mxq/mxq.in index 6a4f8b2b..e938bbec 100755 --- a/web/pages/mxq/mxq.in +++ b/web/pages/mxq/mxq.in @@ -269,6 +269,7 @@ group_mtime : $o{group_mtime} group_date_end : $o{group_date_end} +stats_max_sumrss : $o{stats_max_maxrss} kiB stats_max_maxrss : $o{stats_max_maxrss} stats_max_utime_sec : $o{stats_max_utime_sec} stats_max_stime_sec : $o{stats_max_stime_sec} @@ -311,7 +312,7 @@ sub job { $dbh or db_init(); - my $sth=$dbh->prepare('SELECT *,timestampdiff(MINUTE,date_start,now()) as t FROM mxq_job WHERE job_id=? LIMIT 1',undef); + my $sth=$dbh->prepare('SELECT *,timestampdiff(MINUTE,date_start,now()) as t,timestampdiff(MINUTE,date_start,date_end) as t2 FROM mxq_job WHERE job_id=? LIMIT 1',undef); $sth->execute($job_id); my %o=%{$sth->fetchrow_hashref('NAME_lc')}; @@ -325,8 +326,8 @@ sub job { my $job_stdout=escapeHTML($o{job_stdout}); my $job_stderr=escapeHTML($o{job_stderr}); - - my $ago = defined $o{t} ? "($o{t} minutes ago)" : ''; + my $ago = $job_status_text eq 'RUNNING' && defined $o{t} ? "($o{t} minutes ago)" : ''; + my $rt = defined $o{t2} ? "($o{t2} minutes runtime)" : ''; defined $_ or $_='<null>' for values %o; @@ -358,12 +359,14 @@ host_slots : $o{host_slots} date_submit : $o{date_submit} date_start : $o{date_start} $ago -date_end : $o{date_end} +date_end : $o{date_end} $rt job_id_new : $o{job_id_new} job_id_old : $o{job_id_old} job_id_first : $o{job_id_first} +stats_max_sumrss : $o{stats_max_maxrss} kiB + stats_status : $o{stats_status} stats_utime_sec : $o{stats_utime_sec} @@ -394,17 +397,17 @@ sub group_table_rows { $out.=$q->Tr($q->th($head)); while (my $row=$sth->fetchrow_arrayref()) { - my ($group_id,$group_name,$user_name,$group_mtime,$group_status, + my ($group_id,$group_name,$job_threads,$user_name,$group_mtime,$group_status, $group_jobs,$group_jobs_inq,$group_jobs_running,$group_jobs_finished,$group_jobs_failed,$group_jobs_cancelled,$group_jobs_unknown )=@$row; $out.=$q->Tr( $q->td({class=>'number'},$q->a({href=>selfurl("/group/$group_id")},$group_id)), $q->td($group_name), + $q->td({class=>'number'},$job_threads), $q->td($user_name), $q->td($group_mtime), $q->td(group_status($group_status)), - $q->td({class=>'number'},$group_jobs), $q->td({class=>'number'},$group_jobs_inq), $q->td({class=>'number'},$group_jobs_running), @@ -426,11 +429,11 @@ sub group_table { my $out; my @cols=qw( - group_id group_name user_name group_mtime group_status + group_id group_name job_threads user_name group_mtime group_status group_jobs group_jobs_inq group_jobs_running group_jobs_finished group_jobs_failed group_jobs_cancelled group_jobs_unknown ); my @head=qw( - group_id group_name user_name group_mtime group_status + group_id group_name threads user_name group_mtime group_status jobs inq running finished failed cancelled unknown ); @@ -444,6 +447,7 @@ sub group_table { @cols=qw( group_id group_name + job_threads user_name group_date_end group_status @@ -457,6 +461,7 @@ sub group_table { @head=qw( group_id group_name + threads user_name date_end group_status