Skip to content

Commit

Permalink
mxqd: kill_over_time send SIGXCPU, SIGTERM, SIGHUP and SIGKILL
Browse files Browse the repository at this point in the history
job time

  - >100.0% and <101.6% send main process SIGXCPU

  - >101.6% send process group SIGXCPU

  - >105.0% send main process SIGTERM and process group SIGHUP

  - >106.6% send process group SIGTERM

  - >110.0% send main process and process group SIGKILL
  • Loading branch information
mariux committed Aug 24, 2015
1 parent 16d5830 commit 9fd6580
Showing 1 changed file with 45 additions and 13 deletions.
58 changes: 45 additions & 13 deletions mxqd.c
Original file line number Diff line number Diff line change
Expand Up @@ -1349,7 +1349,7 @@ int killall(struct mxq_server *server, int sig, unsigned int pgrp)
return 0;
}

int killall_over_time(struct mxq_server *server, int sig, unsigned int pgrp)
int killall_over_time(struct mxq_server *server)
{
struct mxq_user_list *user;
struct mxq_group_list *group;
Expand All @@ -1369,17 +1369,50 @@ int killall_over_time(struct mxq_server *server, int sig, unsigned int pgrp)
for (job=group->jobs; job; job=job->next) {
timersub(&now, &job->job.stats_starttime, &delta);

if (delta.tv_sec <= group->group.job_time*63)
if (delta.tv_sec <= group->group.job_time*60)
continue;

pid = job->job.host_pid;
if (pgrp)
pid = -pid;
mx_log_info("killall_over_time(): Sending signal=%d to job=%s(%d):%lu:%lu %s=%d",
sig,
group->group.user_name, group->group.user_uid, group->group.group_id, job->job.job_id,
pgrp?"pgrp":"pid", pid);
kill(pid, sig);

if (delta.tv_sec <= group->group.job_time*61) {
mx_log_debug("killall_over_time(): Sending signal=XCPU to job=%s(%d):%lu:%lu pid=%d",
group->group.user_name, group->group.user_uid, group->group.group_id, job->job.job_id, pid);
kill(pid, SIGXCPU);
continue;
}

mx_log_debug("killall_over_time(): Sending signal=XCPU to job=%s(%d):%lu:%lu pgrp=%d",
group->group.user_name, group->group.user_uid, group->group.group_id, job->job.job_id, pid);
kill(-pid, SIGXCPU);

if (delta.tv_sec <= group->group.job_time*63)
continue;

mx_log_info("killall_over_time(): Sending signal=TERM to job=%s(%d):%lu:%lu pid=%d",
group->group.user_name, group->group.user_uid, group->group.group_id, job->job.job_id, pid);
kill(pid, SIGTERM);

mx_log_info("killall_over_time(): Sending signal=HUP to job=%s(%d):%lu:%lu pgrp=%d",
group->group.user_name, group->group.user_uid, group->group.group_id, job->job.job_id, pid);
kill(-pid, SIGHUP);

if (delta.tv_sec <= group->group.job_time*64)
continue;

mx_log_info("killall_over_time(): Sending signal=TERM to job=%s(%d):%lu:%lu pgrp=%d",
group->group.user_name, group->group.user_uid, group->group.group_id, job->job.job_id, pid);
kill(-pid, SIGTERM);

if (delta.tv_sec <= group->group.job_time*66)
continue;

mx_log_info("killall_over_time(): Sending signal=KILL to job=%s(%d):%lu:%lu pid=%d",
group->group.user_name, group->group.user_uid, group->group.group_id, job->job.job_id, pid);
kill(pid, SIGKILL);

mx_log_info("killall_over_time(): Sending signal=KILL to job=%s(%d):%lu:%lu pgrp=%d",
group->group.user_name, group->group.user_uid, group->group.group_id, job->job.job_id, pid);
kill(-pid, SIGKILL);
}
}
}
Expand Down Expand Up @@ -1672,8 +1705,7 @@ int main(int argc, char *argv[])

killallcancelled(&server, SIGTERM, 0);
killallcancelled(&server, SIGINT, 0);
killall_over_time(&server, SIGTERM, 0);
killall_over_time(&server, SIGINT, 0);
killall_over_time(&server);

if (!server.group_cnt) {
assert(!server.jobs_running);
Expand Down Expand Up @@ -1720,8 +1752,8 @@ int main(int argc, char *argv[])

killallcancelled(&server, SIGTERM, 0);
killallcancelled(&server, SIGINT, 0);
killall_over_time(&server, SIGTERM, 0);
killall_over_time(&server, SIGINT, 0);
killall_over_time(&server);
killall_over_time(&server);

mx_log_info("jobs_running=%lu global_sigint_cnt=%d global_sigterm_cnt=%d : Exiting. Wating for jobs to finish. Sleeping for a while.",
server.jobs_running, global_sigint_cnt, global_sigterm_cnt);
Expand Down

0 comments on commit 9fd6580

Please sign in to comment.