diff --git a/mxq_group.c b/mxq_group.c index 2887411..e9c0ac4 100644 --- a/mxq_group.c +++ b/mxq_group.c @@ -12,7 +12,7 @@ #include "mx_util.h" #include "mx_mysql.h" -#define GROUP_FIELDS_CNT 30 +#define GROUP_FIELDS_CNT 31 #define GROUP_FIELDS \ " group_id," \ " group_name," \ @@ -37,6 +37,7 @@ " group_jobs_unknown," \ " group_jobs_restarted," \ " group_slots_running," \ + " stats_max_sumrss," \ " stats_max_maxrss," \ " stats_max_utime_sec," \ " stats_max_stime_sec," \ @@ -85,6 +86,7 @@ static int bind_result_group_fields(struct mx_mysql_bind *result, struct mxq_gro res += mx_mysql_bind_var(result, idx++, uint64, &(g->group_slots_running)); + res += mx_mysql_bind_var(result, idx++, uint64, &(g->stats_max_sumrss)); res += mx_mysql_bind_var(result, idx++, uint64, &(g->stats_max_maxrss)); res += mx_mysql_bind_var(result, idx++, int64, &(g->stats_max_utime.tv_sec)); res += mx_mysql_bind_var(result, idx++, int64, &(g->stats_max_stime.tv_sec)); diff --git a/mxq_group.h b/mxq_group.h index 46a352b..7286a5a 100644 --- a/mxq_group.h +++ b/mxq_group.h @@ -41,6 +41,7 @@ struct mxq_group { uint64_t group_slots_running; + uint64_t stats_max_sumrss; uint64_t stats_max_maxrss; struct timeval stats_max_utime; diff --git a/mxq_job.c b/mxq_job.c index 68fc709..ebde474 100644 --- a/mxq_job.c +++ b/mxq_job.c @@ -16,44 +16,39 @@ #include "mxq_group.h" #include "mxq_job.h" -#define JOB_FIELDS_CNT 35 +#define JOB_FIELDS_CNT 36 #define JOB_FIELDS \ " job_id, " \ " job_status, " \ " job_flags, " \ " job_priority, " \ " group_id, " \ - \ " job_workdir, " \ " job_argc, " \ " job_argv, " \ " job_stdout, " \ " job_stderr, " \ - \ " job_umask, " \ " host_submit, " \ " host_id, " \ " server_id, " \ " host_hostname, " \ - \ " host_pid, " \ " host_slots, " \ " UNIX_TIMESTAMP(date_submit) as date_submit, " \ " UNIX_TIMESTAMP(date_start) as date_start, " \ " UNIX_TIMESTAMP(date_end) as date_end, " \ - \ + " stats_max_sumrss, " \ " stats_status, " \ " stats_utime_sec, " \ " stats_utime_usec, " \ " stats_stime_sec, " \ " stats_stime_usec, " \ - \ " stats_real_sec, " \ " stats_real_usec, " \ " stats_maxrss, " \ " stats_minflt, " \ " stats_majflt, " \ - \ " stats_nswap, " \ " stats_inblock, " \ " stats_oublock, " \ @@ -73,37 +68,32 @@ static int bind_result_job_fields(struct mx_mysql_bind *result, struct mxq_job * res += mx_mysql_bind_var(result, idx++, uint64, &(j->job_flags)); res += mx_mysql_bind_var(result, idx++, uint16, &(j->job_priority)); res += mx_mysql_bind_var(result, idx++, uint64, &(j->group_id)); - res += mx_mysql_bind_var(result, idx++, string, &(j->job_workdir)); res += mx_mysql_bind_var(result, idx++, uint16, &(j->job_argc)); res += mx_mysql_bind_var(result, idx++, string, &(j->job_argv_str)); res += mx_mysql_bind_var(result, idx++, string, &(j->job_stdout)); res += mx_mysql_bind_var(result, idx++, string, &(j->job_stderr)); - res += mx_mysql_bind_var(result, idx++, uint32, &(j->job_umask)); res += mx_mysql_bind_var(result, idx++, string, &(j->host_submit)); res += mx_mysql_bind_var(result, idx++, string, &(j->host_id)); res += mx_mysql_bind_var(result, idx++, string, &(j->server_id)); res += mx_mysql_bind_var(result, idx++, string, &(j->host_hostname)); - res += mx_mysql_bind_var(result, idx++, uint32, &(j->host_pid)); res += mx_mysql_bind_var(result, idx++, uint32, &(j->host_slots)); res += mx_mysql_bind_var(result, idx++, int64, &(j->date_submit)); res += mx_mysql_bind_var(result, idx++, int64, &(j->date_start)); res += mx_mysql_bind_var(result, idx++, int64, &(j->date_end)); - + res += mx_mysql_bind_var(result, idx++, uint64, &(j->stats_max_sumrss)); res += mx_mysql_bind_var(result, idx++, int32, &(j->stats_status)); res += mx_mysql_bind_var(result, idx++, int64, &(j->stats_rusage.ru_utime.tv_sec)); res += mx_mysql_bind_var(result, idx++, int64, &(j->stats_rusage.ru_utime.tv_usec)); res += mx_mysql_bind_var(result, idx++, int64, &(j->stats_rusage.ru_stime.tv_sec)); res += mx_mysql_bind_var(result, idx++, int64, &(j->stats_rusage.ru_stime.tv_usec)); - res += mx_mysql_bind_var(result, idx++, int64, &(j->stats_realtime.tv_sec)); res += mx_mysql_bind_var(result, idx++, int64, &(j->stats_realtime.tv_usec)); res += mx_mysql_bind_var(result, idx++, int64, &(j->stats_rusage.ru_maxrss)); res += mx_mysql_bind_var(result, idx++, int64, &(j->stats_rusage.ru_minflt)); res += mx_mysql_bind_var(result, idx++, int64, &(j->stats_rusage.ru_majflt)); - res += mx_mysql_bind_var(result, idx++, int64, &(j->stats_rusage.ru_nswap)); res += mx_mysql_bind_var(result, idx++, int64, &(j->stats_rusage.ru_inblock)); res += mx_mysql_bind_var(result, idx++, int64, &(j->stats_rusage.ru_oublock)); @@ -457,6 +447,7 @@ int mxq_set_job_status_running(struct mx_mysql *mysql, struct mxq_job *job) int mxq_set_job_status_exited(struct mx_mysql *mysql, struct mxq_job *job) { int res; + int idx; uint16_t newstatus; struct mx_mysql_bind param = {0}; @@ -485,6 +476,7 @@ int mxq_set_job_status_exited(struct mx_mysql *mysql, struct mxq_job *job) "UPDATE mxq_job SET" " job_status = ?," " date_end = NULL," + " stats_max_sumrss = ?, " " stats_status = ?, " " stats_utime_sec = ?, " " stats_utime_usec = ?, " @@ -506,30 +498,32 @@ int mxq_set_job_status_exited(struct mx_mysql *mysql, struct mxq_job *job) " AND server_id = ?" " AND host_pid = ?"; - res = mx_mysql_bind_init_param(¶m, 20); + res = mx_mysql_bind_init_param(¶m, 21); assert(res == 0); + idx = 0; res = 0; - res += mx_mysql_bind_var(¶m, 0, uint16, &(newstatus)); - res += mx_mysql_bind_var(¶m, 1, int32, &(job->stats_status)); - res += mx_mysql_bind_var(¶m, 2, int64, &(job->stats_rusage.ru_utime.tv_sec)); - res += mx_mysql_bind_var(¶m, 3, int64, &(job->stats_rusage.ru_utime.tv_usec)); - res += mx_mysql_bind_var(¶m, 4, int64, &(job->stats_rusage.ru_stime.tv_sec)); - res += mx_mysql_bind_var(¶m, 5, int64, &(job->stats_rusage.ru_stime.tv_usec)); - res += mx_mysql_bind_var(¶m, 6, int64, &(job->stats_realtime.tv_sec)); - res += mx_mysql_bind_var(¶m, 7, int64, &(job->stats_realtime.tv_usec)); - res += mx_mysql_bind_var(¶m, 8, int64, &(job->stats_rusage.ru_maxrss)); - res += mx_mysql_bind_var(¶m, 9, int64, &(job->stats_rusage.ru_minflt)); - res += mx_mysql_bind_var(¶m, 10, int64, &(job->stats_rusage.ru_majflt)); - res += mx_mysql_bind_var(¶m, 11, int64, &(job->stats_rusage.ru_nswap)); - res += mx_mysql_bind_var(¶m, 12, int64, &(job->stats_rusage.ru_inblock)); - res += mx_mysql_bind_var(¶m, 13, int64, &(job->stats_rusage.ru_oublock)); - res += mx_mysql_bind_var(¶m, 14, int64, &(job->stats_rusage.ru_nvcsw)); - res += mx_mysql_bind_var(¶m, 15, int64, &(job->stats_rusage.ru_nivcsw)); - res += mx_mysql_bind_var(¶m, 16, uint64, &(job->job_id)); - res += mx_mysql_bind_var(¶m, 17, string, &(job->host_hostname)); - res += mx_mysql_bind_var(¶m, 18, string, &(job->server_id)); - res += mx_mysql_bind_var(¶m, 19, uint32, &(job->host_pid)); + res += mx_mysql_bind_var(¶m, idx++, uint16, &(newstatus)); + res += mx_mysql_bind_var(¶m, idx++, uint64, &(job->stats_max_sumrss)); + res += mx_mysql_bind_var(¶m, idx++, int32, &(job->stats_status)); + res += mx_mysql_bind_var(¶m, idx++, int64, &(job->stats_rusage.ru_utime.tv_sec)); + res += mx_mysql_bind_var(¶m, idx++, int64, &(job->stats_rusage.ru_utime.tv_usec)); + res += mx_mysql_bind_var(¶m, idx++, int64, &(job->stats_rusage.ru_stime.tv_sec)); + res += mx_mysql_bind_var(¶m, idx++, int64, &(job->stats_rusage.ru_stime.tv_usec)); + res += mx_mysql_bind_var(¶m, idx++, int64, &(job->stats_realtime.tv_sec)); + res += mx_mysql_bind_var(¶m, idx++, int64, &(job->stats_realtime.tv_usec)); + res += mx_mysql_bind_var(¶m, idx++, int64, &(job->stats_rusage.ru_maxrss)); + res += mx_mysql_bind_var(¶m, idx++, int64, &(job->stats_rusage.ru_minflt)); + res += mx_mysql_bind_var(¶m, idx++, int64, &(job->stats_rusage.ru_majflt)); + res += mx_mysql_bind_var(¶m, idx++, int64, &(job->stats_rusage.ru_nswap)); + res += mx_mysql_bind_var(¶m, idx++, int64, &(job->stats_rusage.ru_inblock)); + res += mx_mysql_bind_var(¶m, idx++, int64, &(job->stats_rusage.ru_oublock)); + res += mx_mysql_bind_var(¶m, idx++, int64, &(job->stats_rusage.ru_nvcsw)); + res += mx_mysql_bind_var(¶m, idx++, int64, &(job->stats_rusage.ru_nivcsw)); + res += mx_mysql_bind_var(¶m, idx++, uint64, &(job->job_id)); + res += mx_mysql_bind_var(¶m, idx++, string, &(job->host_hostname)); + res += mx_mysql_bind_var(¶m, idx++, string, &(job->server_id)); + res += mx_mysql_bind_var(¶m, idx++, uint32, &(job->host_pid)); assert(res == 0); res = mx_mysql_do_statement_noresult_retry_on_fail(mysql, query, ¶m); diff --git a/mxq_job.h b/mxq_job.h index 42b2d81..f29baf6 100644 --- a/mxq_job.h +++ b/mxq_job.h @@ -52,6 +52,8 @@ struct mxq_job { struct timeval stats_starttime; + uint64_t stats_max_sumrss; + int32_t stats_status; struct timeval stats_realtime; struct rusage stats_rusage; diff --git a/mxqd.c b/mxqd.c index 43afc3d..bec2797 100644 --- a/mxqd.c +++ b/mxqd.c @@ -1638,19 +1638,19 @@ int killall_over_memory(struct mxq_server *server) continue; } - memory = pinfo->sum_rss * pagesize / 1024 / 1024; + memory = pinfo->sum_rss * pagesize / 1024; if (job->max_sum_rss < memory) job->max_sum_rss = memory; - if (memory <= group->group.job_memory) + if (memory/1024 <= group->group.job_memory) continue; - mx_log_info("killall_over_memory(): used(%llu) > requested(%llu): Sending signal=KILL to job=%s(%d):%lu:%lu pgrp=%d", - memory, group->group.job_memory, + mx_log_info("killall_over_memory(): used(%lluMiB) > requested(%lluMiB): Sending signal=TERM to job=%s(%d):%lu:%lu pid=%d", + memory/1024, group->group.job_memory, group->group.user_name, group->group.user_uid, group->group.group_id, job->job.job_id, pid); - kill(-pid, SIGKILL); + kill(pid, SIGTERM); } } } @@ -1762,7 +1762,7 @@ int catchall(struct mxq_server *server) { g = &job->group->group; timersub(&now, &j->stats_starttime, &j->stats_realtime); - + j->stats_max_sumrss = job->max_sum_rss; j->stats_status = status; j->stats_rusage = rusage; diff --git a/mxqdump.c b/mxqdump.c index 2cac9c1..8b0b966 100644 --- a/mxqdump.c +++ b/mxqdump.c @@ -128,6 +128,7 @@ static int print_group(struct mxq_group *g) " max_utime=%lu" " max_real=%lu" " max_memory=%lukiB" + " max_rss=%lukiB" " wait_sec=%lu" " run_sec=%lu" " idle_sec=%lu" @@ -150,10 +151,11 @@ static int print_group(struct mxq_group *g) g->job_threads, g->job_memory*1024, g->job_time*60, - (100UL*(uint64_t)g->stats_max_maxrss/1024UL/g->job_memory), + (100UL*(uint64_t)g->stats_max_sumrss/1024UL/g->job_memory), (100UL*(uint64_t)g->stats_max_real.tv_sec/60UL/g->job_time), g->stats_max_utime.tv_sec, g->stats_max_real.tv_sec, + g->stats_max_sumrss, g->stats_max_maxrss, g->stats_wait_sec, g->stats_run_sec, @@ -219,6 +221,7 @@ static int print_job(struct mxq_group *g, struct mxq_job *j) " runtime_requested=%us" " time_load=%lu%%" " memory_requested=%lukiB" + " max_memory=%lukiB" " max_rss=%lukiB" " memory_load=%lu%%" " threads=%d" @@ -244,8 +247,9 @@ static int print_job(struct mxq_group *g, struct mxq_job *j) g->job_time*60, (100UL*(run_sec)/60UL/g->job_time), g->job_memory*1024, + j->stats_max_sumrss, j->stats_rusage.ru_maxrss, - (100UL*j->stats_rusage.ru_maxrss/1024UL/g->job_memory), + (100UL*j->stats_max_sumrss/1024UL/g->job_memory), g->job_threads, j->host_slots, mxq_job_status_to_name(j->job_status), diff --git a/mysql/alter_tables_0.17.0.sql b/mysql/alter_tables_0.17.0.sql new file mode 100644 index 0000000..b9daa72 --- /dev/null +++ b/mysql/alter_tables_0.17.0.sql @@ -0,0 +1,11 @@ +ALTER TABLE mxq_group + ADD COLUMN + stats_max_sumrss INT8 UNSIGNED NOT NULL DEFAULT 0 + AFTER + group_date_end; + +ALTER TABLE mxq_job + ADD COLUMN + stats_max_sumrss INT8 UNSIGNED NOT NULL DEFAULT 0 + AFTER + job_id_first; diff --git a/mysql/create_tables.sql b/mysql/create_tables.sql index 978c679..f2566cc 100644 --- a/mysql/create_tables.sql +++ b/mysql/create_tables.sql @@ -34,6 +34,8 @@ CREATE TABLE IF NOT EXISTS mxq_group ( group_date_end TIMESTAMP NOT NULL DEFAULT 0, + stats_max_sumrss INT8 UNSIGNED NOT NULL DEFAULT 0, + stats_max_maxrss INT8 UNSIGNED NOT NULL DEFAULT 0, stats_max_utime_sec INT8 UNSIGNED NOT NULL DEFAULT 0, stats_max_stime_sec INT8 UNSIGNED NOT NULL DEFAULT 0, @@ -94,6 +96,8 @@ CREATE TABLE IF NOT EXISTS mxq_job ( job_id_old INT8 UNSIGNED NULL DEFAULT NULL, job_id_first INT8 UNSIGNED NULL DEFAULT NULL, + stats_max_sumrss INT8 UNSIGNED NOT NULL DEFAULT 0, + stats_status INT4 UNSIGNED NOT NULL DEFAULT 0, stats_utime_sec INT8 UNSIGNED NOT NULL DEFAULT 0, diff --git a/mysql/create_trigger.sql b/mysql/create_trigger.sql index ccd2af6..c4269ac 100644 --- a/mysql/create_trigger.sql +++ b/mysql/create_trigger.sql @@ -64,6 +64,7 @@ CREATE TRIGGER mxq_update_job BEFORE UPDATE ON mxq_job group_slots_running=group_slots_running-NEW.host_slots, group_jobs_running=group_jobs_running-1, group_jobs_failed=group_jobs_failed+1, + stats_max_sumrss=GREATEST(stats_max_sumrss, NEW.stats_max_sumrss), stats_max_maxrss=GREATEST(stats_max_maxrss, NEW.stats_maxrss), stats_max_utime_sec=GREATEST(stats_max_utime_sec, NEW.stats_utime_sec), stats_max_stime_sec=GREATEST(stats_max_stime_sec, NEW.stats_stime_sec), @@ -99,6 +100,7 @@ CREATE TRIGGER mxq_update_job BEFORE UPDATE ON mxq_job group_slots_running=group_slots_running-NEW.host_slots, group_jobs_running=group_jobs_running-1, group_jobs_finished=group_jobs_finished+1, + stats_max_sumrss=GREATEST(stats_max_sumrss, NEW.stats_max_sumrss), stats_max_maxrss=GREATEST(stats_max_maxrss, NEW.stats_maxrss), stats_max_utime_sec=GREATEST(stats_max_utime_sec, NEW.stats_utime_sec), stats_max_stime_sec=GREATEST(stats_max_stime_sec, NEW.stats_stime_sec), @@ -113,6 +115,7 @@ CREATE TRIGGER mxq_update_job BEFORE UPDATE ON mxq_job WHERE group_id=NEW.group_id; ELSEIF NEW.job_status NOT IN (399, 755, 989, 990) THEN UPDATE mxq_group SET + stats_max_sumrss=GREATEST(stats_max_sumrss, NEW.stats_max_sumrss), stats_max_maxrss=GREATEST(stats_max_maxrss, NEW.stats_maxrss), stats_max_utime_sec=GREATEST(stats_max_utime_sec, NEW.stats_utime_sec), stats_max_stime_sec=GREATEST(stats_max_stime_sec, NEW.stats_stime_sec), diff --git a/web/pages/mxq/mxq.in b/web/pages/mxq/mxq.in index 6a4f8b2..db24812 100755 --- a/web/pages/mxq/mxq.in +++ b/web/pages/mxq/mxq.in @@ -269,6 +269,7 @@ group_mtime : $o{group_mtime} group_date_end : $o{group_date_end} +stats_max_sumrss : $o{stats_max_maxrss} kiB stats_max_maxrss : $o{stats_max_maxrss} stats_max_utime_sec : $o{stats_max_utime_sec} stats_max_stime_sec : $o{stats_max_stime_sec} @@ -364,6 +365,8 @@ job_id_new : $o{job_id_new} job_id_old : $o{job_id_old} job_id_first : $o{job_id_first} +stats_max_sumrss : $o{stats_max_maxrss} kiB + stats_status : $o{stats_status} stats_utime_sec : $o{stats_utime_sec}