From 0b64266ce137fa7a0f109953cab0e9f6c871697b Mon Sep 17 00:00:00 2001 From: Donald Buczek Date: Sat, 11 Apr 2020 15:25:35 +0200 Subject: [PATCH] mxqd: Only start jobs we are qualified for Check, whether this server is qualified to start jobs from a group. Lazy-evaluate qualification criteria and cache the result. Don't start jobs we are not qualified for. For now, the only qualification criteria is, that our short or fully qualified hostname is not included the the excluded_servers set of the group. This can later be expanded to additional criteria (e.g. hostconfig or processor flags). --- mxqd.c | 34 ++++++++++++++++++++++++++++++++++ mxqd.h | 4 ++++ mxqd_control.c | 3 +++ 3 files changed, 41 insertions(+) diff --git a/mxqd.c b/mxqd.c index c1106aba..ff211eac 100644 --- a/mxqd.c +++ b/mxqd.c @@ -43,6 +43,7 @@ #include "mxq.h" #include "mxqd_control.h" +#include "keywordset.h" #ifndef MXQ_INITIAL_PATH # define MXQ_INITIAL_PATH "/sbin:/bin:/usr/sbin:/usr/bin:/usr/local/sbin:/usr/local/bin" @@ -546,6 +547,16 @@ int server_init(struct mxq_server *server, int argc, char *argv[]) } server->hostname = arg_hostname; + { + char *dot=index(arg_hostname,'.'); + if (dot) { + server->hostname_short = mx_malloc_forever(dot-arg_hostname+1); + strncpy(server->hostname_short, arg_hostname, dot-arg_hostname); + server->hostname_short[dot-arg_hostname] = 0; + } else + server->hostname_short = mx_strdup_forever(arg_hostname); + } + server->daemon_name = arg_daemon_name; server->initial_path = arg_initial_path; server->initial_tmpdir = arg_initial_tmpdir; @@ -1277,6 +1288,26 @@ unsigned long start_job(struct mxq_group_list *glist) /**********************************************************************/ +static int server_is_qualified(struct mxq_server *server, struct mxq_group *group) { + int is_qualified = 1; + if (*group->group_disabled_servers != 0) { + struct keywordset *kws = keywordset_new(group->group_disabled_servers); + if ( keywordset_ismember(kws, server->hostname_short) + || keywordset_ismember(kws, server->hostname) ) + is_qualified = 0; + keywordset_free(kws); + } + return (is_qualified); +} + +static int server_is_qualified_cached(struct mxq_server *server, struct mxq_group_list *glist) { + if (!glist->server_is_qualified_evaluated) { + glist->server_is_qualified = server_is_qualified(server, &glist->group); + glist->server_is_qualified_evaluated = 1; + } + return glist->server_is_qualified; +} + unsigned long start_user(struct mxq_user_list *ulist, long slots_to_start) { struct mxq_server *server; @@ -1319,6 +1350,8 @@ unsigned long start_user(struct mxq_user_list *ulist, long slots_to_start) if (df_scratch/1024/1024/1024 < group->job_tmpdir_size + 20) { continue; } + if (!server_is_qualified_cached(server, glist)) + continue; mx_log_info(" group=%s(%d):%lu slots_to_start=%ld slots_per_job=%lu :: trying to start job for group.", group->user_name, group->user_uid, group->group_id, slots_to_start, glist->slots_per_job); @@ -1472,6 +1505,7 @@ void server_free(struct mxq_server *server) mx_free_null(server->finished_jobsdir); mx_flock_free(server->flock); mx_free_null(server->supgid); + mx_free_null(server->hostname_short); mx_log_finish(); } diff --git a/mxqd.h b/mxqd.h index 5c9ce7e3..87aa2236 100644 --- a/mxqd.h +++ b/mxqd.h @@ -43,6 +43,9 @@ struct mxq_group_list { unsigned long global_threads_running; unsigned long global_slots_running; + int server_is_qualified_evaluated; + int server_is_qualified; + short orphaned; }; @@ -95,6 +98,7 @@ struct mxq_server { unsigned long long int starttime; char *host_id; char *hostname; + char *hostname_short; char *daemon_name; char *pidfilename; char *finished_jobsdir; diff --git a/mxqd_control.c b/mxqd_control.c index 36814e26..bbbd8549 100644 --- a/mxqd_control.c +++ b/mxqd_control.c @@ -103,6 +103,9 @@ static void _group_list_init(struct mxq_group_list *glist) glist->slots_max = slots_max; glist->memory_max = memory_max; + glist->server_is_qualified_evaluated = 0; + glist->server_is_qualified = 0; + glist->orphaned = 0; }