From 117c12b624d35499f5f36f2ccada089251009d4c Mon Sep 17 00:00:00 2001 From: Donald Buczek Date: Wed, 5 Jul 2017 20:21:48 +0200 Subject: [PATCH] mxqd: Change poll times Use 10 seconds everywhere to decrease the load on the database and races between the mxq daemons a bit. At the same time this increases the chance that multiple jobs of the same group are started on the same server, which is good (better use of caches, smaller failure surface). This is the maximum time a single server will need to react to database changes (mxqsub or mxqkill). Administrative signals will get immediate reaction. Finished user jobs will usually also get immediate reaction. However, this is not true for jobs we picked up from a previous daemon incarnation and which are not our children. If these jobs finish, we will not get a signal, so we need to look into the spool directory from time to time. This is another reason, why we need a timeout at all. Now that we want to use 10 seconds everywhere, we can make it a constant. --- mxqd.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/mxqd.c b/mxqd.c index ce78837d..3d8cbf49 100644 --- a/mxqd.c +++ b/mxqd.c @@ -2338,7 +2338,7 @@ int main(int argc, char *argv[]) int res; int fail = 0; - struct timespec poll_interval={0,0}; + static struct timespec poll_interval={10,0}; /* 10 seconds */ siginfo_t siginfo; int saved_argc; @@ -2427,7 +2427,6 @@ int main(int argc, char *argv[]) assert(!group_cnt); mxq_daemon_set_status(server->mysql, daemon, MXQ_DAEMON_STATUS_IDLE); mx_log_debug("Nothing to do"); - poll_interval.tv_sec=1; continue; } @@ -2440,7 +2439,6 @@ int main(int argc, char *argv[]) mxq_daemon_set_status(server->mysql, daemon, MXQ_DAEMON_STATUS_FULL); } mx_log_debug("All slots running"); - poll_interval.tv_sec=7; continue; } @@ -2456,7 +2454,6 @@ int main(int argc, char *argv[]) if (res<0) { mx_log_info("No more slots started because we have users waiting for free slots"); mxq_daemon_set_status(server->mysql, daemon, MXQ_DAEMON_STATUS_WAITING); - poll_interval.tv_sec=3; continue; } @@ -2464,11 +2461,9 @@ int main(int argc, char *argv[]) if (!server->jobs_running) { mxq_daemon_set_status(server->mysql, daemon, MXQ_DAEMON_STATUS_IDLE); mx_log_debug("Tried Hard and nobody is doing anything."); - poll_interval.tv_sec=15; } else { mxq_daemon_set_status(server->mysql, daemon, MXQ_DAEMON_STATUS_RUNNING); mx_log_debug("Tried Hard. But have done nothing."); - poll_interval.tv_sec=3; } continue; } @@ -2484,7 +2479,6 @@ int main(int argc, char *argv[]) /* while not quitting and not restarting -> wait for and collect all running jobs */ mxq_daemon_set_status(server->mysql, daemon, MXQ_DAEMON_STATUS_TERMINATING); - poll_interval.tv_sec=1; while (server->jobs_running && !global_sigquit_cnt && !global_sigrestart_cnt && !fail) { slots_returned = catchall(server); slots_returned += fspool_scan(server);