Skip to content

Commit

Permalink
mxqd: be less verbose and adjust sleep times
Browse files Browse the repository at this point in the history
  • Loading branch information
mariux committed Dec 17, 2014
1 parent d9f8600 commit f8ea1d6
Showing 1 changed file with 58 additions and 36 deletions.
94 changes: 58 additions & 36 deletions mxqd.c
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,12 @@ void group_init(struct mxq_group_list *group)
struct mxq_group *g;

long double memory_threads;
long double memory_per_thread;
long double memory_max_available;
unsigned long slots_per_job;
unsigned long jobs_max;
unsigned long slots_max;
unsigned long memory_max;

assert(group);
assert(group->user);
Expand All @@ -225,35 +231,49 @@ void group_init(struct mxq_group_list *group)
s = group->user->server;
g = &group->group;

group->orphaned = 0;

group->memory_per_thread = (long double)g->job_memory / (long double) g->job_threads;
group->memory_max_available = s->memory_total * s->memory_max_per_slot / group->memory_per_thread;
memory_per_thread = (long double)g->job_memory / (long double) g->job_threads;
memory_max_available = (long double)s->memory_total * (long double)s->memory_max_per_slot / memory_per_thread;

if (group->memory_max_available > s->memory_total)
group->memory_max_available = s->memory_total;
if (memory_max_available > s->memory_total)
memory_max_available = s->memory_total;

group->slots_per_job = ceill((long double)g->job_memory / s->memory_avg_per_slot);
slots_per_job = ceill((long double)g->job_memory / s->memory_avg_per_slot);

if (group->slots_per_job < g->job_threads)
group->slots_per_job = g->job_threads;
if (slots_per_job < g->job_threads)
slots_per_job = g->job_threads;

memory_threads = group->memory_max_available / group->memory_per_thread;
memory_threads = memory_max_available / memory_per_thread;

if (group->memory_per_thread > s->memory_max_per_slot) {
group->jobs_max = memory_threads + 0.5;
} else if (group->memory_per_thread > s->memory_avg_per_slot) {
group->jobs_max = memory_threads + 0.5;
if (memory_per_thread > s->memory_max_per_slot) {
jobs_max = memory_threads + 0.5;
} else if (memory_per_thread > s->memory_avg_per_slot) {
jobs_max = memory_threads + 0.5;
} else {
group->jobs_max = s->slots;
jobs_max = s->slots;
}
jobs_max /= g->job_threads;

slots_max = jobs_max * slots_per_job;
memory_max = jobs_max * g->job_memory;

if (group->memory_per_thread != memory_per_thread
|| group->memory_max_available != memory_max_available
|| group->memory_max_available != memory_max_available
|| group->slots_per_job != slots_per_job
|| group->jobs_max != jobs_max
|| group->slots_max != slots_max
|| group->memory_max != memory_max)
MXQ_LOG_INFO(" group=%s(%u):%lu jobs_max=%lu slots_max=%lu memory_max=%lu slots_per_job=%lu :: group %sinitialized.\n",
g->user_name, g->user_uid, g->group_id, jobs_max, slots_max, memory_max, slots_per_job,
group->orphaned?"re":"");

group->jobs_max /= g->job_threads;
group->slots_max = group->jobs_max * group->slots_per_job;
group->memory_max = group->jobs_max * g->job_memory;

MXQ_LOG_INFO(" group=%s(%u):%lu jobs_max=%lu slots_max=%lu memory_max=%lu slots_per_job=%lu :: group initialized.\n",
g->user_name, g->user_uid, g->group_id, group->jobs_max, group->slots_max, group->memory_max, group->slots_per_job);
group->orphaned = 0;
group->memory_per_thread = memory_per_thread;
group->memory_max_available = memory_max_available;
group->slots_per_job = slots_per_job;
group->jobs_max = jobs_max;
group->slots_max = slots_max;
group->memory_max = memory_max;
}

/**********************************************************************/
Expand Down Expand Up @@ -810,8 +830,8 @@ unsigned long start_user(struct mxq_user_list *user, int job_limit, long slots_t

assert(slots_to_start <= server->slots - server->slots_running);

MXQ_LOG_INFO(" user=%s(%d) slots_to_start=%ld job_limit=%d :: trying to start jobs for user.\n",
mxqgrp->user_name, mxqgrp->user_uid, slots_to_start, job_limit);
// MXQ_LOG_INFO(" user=%s(%d) slots_to_start=%ld job_limit=%d :: trying to start jobs for user.\n",
// mxqgrp->user_name, mxqgrp->user_uid, slots_to_start, job_limit);

for (group=user->groups; group && slots_to_start > 0 && (!job_limit || jobs_started < job_limit); group=gnext) {

Expand Down Expand Up @@ -902,7 +922,7 @@ unsigned long start_users(struct mxq_server *server)
if (!server->user_cnt)
return 0;

MXQ_LOG_INFO("=== starting jobs on free_slots=%lu slots for user_cnt=%lu users\n", server->slots - server->slots_running, server->user_cnt);
//MXQ_LOG_INFO("=== starting jobs on free_slots=%lu slots for user_cnt=%lu users\n", server->slots - server->slots_running, server->user_cnt);

for (user=server->users; user; user=user->next) {

Expand Down Expand Up @@ -931,9 +951,6 @@ unsigned long start_users(struct mxq_server *server)
}
}

MXQ_LOG_INFO("server-stats:\t%6lu of %6lu MiB\tallocated\n", server->memory_used, server->memory_total);
MXQ_LOG_INFO("\t%6lu of %6lu slots\tallocated for %lu running threads (%lu jobs)\n", server->slots_running, server->slots, server->threads_running, server->jobs_running);

return slots_started_total;
}

Expand Down Expand Up @@ -1018,6 +1035,9 @@ void server_dump(struct mxq_server *server)
}
}
}

MXQ_LOG_INFO("memory_used=%lu memory_total=%lu\n", server->memory_used, server->memory_total);
MXQ_LOG_INFO("slots_running=%lu slots=%lu threads_running=%lu jobs_running=%lu\n", server->slots_running, server->slots, server->threads_running, server->jobs_running);
MXQ_LOG_INFO("====================== SERVER DUMP END ======================\n");
}

Expand Down Expand Up @@ -1168,8 +1188,8 @@ int main(int argc, char *argv[])
struct mxq_server server;
struct mxq_group_list *group;

unsigned long slots_started;
unsigned long slots_returned;
unsigned long slots_started = 0;
unsigned long slots_returned = 0;

int i;
int res;
Expand Down Expand Up @@ -1210,11 +1230,13 @@ int main(int argc, char *argv[])
if (slots_returned)
MXQ_LOG_INFO("slots_returned=%lu :: Main Loop freed %lu slots.\n", slots_returned, slots_returned);

server_dump(&server);
if (slots_started || slots_returned) {
server_dump(&server);
}

group_cnt = load_groups(&server);
if (group_cnt)
MXQ_LOG_INFO("group_cnt=%d :: %d Groups loaded\n", group_cnt, group_cnt);
// if (group_cnt)
// MXQ_LOG_INFO("group_cnt=%d :: %d Groups loaded\n", group_cnt, group_cnt);

if (!server.group_cnt) {
assert(!server.jobs_running);
Expand All @@ -1225,8 +1247,8 @@ int main(int argc, char *argv[])
}

if (server.slots_running == server.slots) {
MXQ_LOG_INFO("All slots running. Sleeping for a short while.\n");
sleep(20);
MXQ_LOG_INFO("All slots running. Sleeping for a short while (7 seconds).\n");
sleep(7);
continue;
}

Expand All @@ -1236,8 +1258,8 @@ int main(int argc, char *argv[])

if (!slots_started && !slots_returned) {
if (!server.jobs_running) {
MXQ_LOG_INFO("Tried Hard and nobody is doing anything. Sleeping for a long while (30 seconds).\n");
sleep(30);
MXQ_LOG_INFO("Tried Hard and nobody is doing anything. Sleeping for a long while (15 seconds).\n");
sleep(15);
} else {
MXQ_LOG_INFO("Tried Hard. But have done nothing. Sleeping for a very short while.\n");
sleep(3);
Expand Down

0 comments on commit f8ea1d6

Please sign in to comment.