From 74ebbc3e9adb43252aaa876ccdf66f7f6d475041 Mon Sep 17 00:00:00 2001 From: Donald Buczek Date: Fri, 20 Aug 2021 13:05:49 +0200 Subject: [PATCH] ppidcache: Add new module Add a module which can be used as a caching layer for mx_utils `mx_proc_get_ppid`. The cache can be preloaded with information for all processes, which allows, for example, to walk over all descendants of a process. We need this because we want to get rid of process group based signaling of jobs. The main problem with that is, that users are able to escape the process group and so we have no way to kill all procceses of a job relyably. We want to change that to process-tree based signalling. All mxq user jobs are descendants of the per-job reaper processes and this can't be escaped. Notes: - This module duplicates some of the functionality of mx_proc, which scans in a whole process tree. Maybe this can be consolidated in the future. Currently the process tree scanning part of mx_proc seems to be a bit of overkill (with a lot of mallocs) to use for getting the process topology only. - With CONFIG_PROC_CHILDREN (available since Linux 4.2) we would have /proc/PID/tasks/TID/children which might be useful to walk descendants without scanning the parents for all processes. - The current process group based signalling has the additional problem that the reaper is part of the process group. So whenever we send SIGKILL to the process group, we effectively kill the reaper, too. As we are going to make `mxqkill` send a SIGTERM (if needed) we'd need to address that if we continued to use process group based signalling. --- .gitignore | 2 +- Makefile | 7 ++++ ppidcache.c | 119 ++++++++++++++++++++++++++++++++++++++++++++++++++++ ppidcache.h | 12 ++++++ 4 files changed, 139 insertions(+), 1 deletion(-) create mode 100644 ppidcache.c create mode 100644 ppidcache.h diff --git a/.gitignore b/.gitignore index 3097c28d..61b51d6d 100644 --- a/.gitignore +++ b/.gitignore @@ -29,7 +29,7 @@ parser.tab.h parser.tab.o test_parser.o test_parser - +ppidcache.o mxqsub /mxqsub.1 diff --git a/Makefile b/Makefile index 065844af..ae84325c 100644 --- a/Makefile +++ b/Makefile @@ -520,6 +520,13 @@ keywordset.o: xmalloc.h clean: CLEAN += keywordset.o +### ppidcache.o ------------------------------------------------------- + +ppidcache.o: $(mx_util.h) +ppidcache.o: $(mx_proc.h) + +clean: CLEAN += ppidcache.o + ######################################################################## ### mxqd --------------------------------------------------------------- diff --git a/ppidcache.c b/ppidcache.c new file mode 100644 index 00000000..9d07de43 --- /dev/null +++ b/ppidcache.c @@ -0,0 +1,119 @@ +#include +#include +#include "mx_util.h" +#include "ppidcache.h" +#include "mx_proc.h" + +struct entry { + pid_t pid; + pid_t parent; +}; + +struct ppidcache { + int count; + int alloc; + struct entry *entries; +}; + +struct ppidcache *ppidcache_new() { + struct ppidcache *ppidcache = mx_malloc_forever(sizeof(struct ppidcache)); + ppidcache->count = 0; + ppidcache->alloc = 500; + ppidcache->entries = mx_malloc_forever(ppidcache->alloc*sizeof(struct entry)); + return ppidcache; +} + +void ppidcache_free(struct ppidcache *ppidcache) { + free(ppidcache->entries); + free(ppidcache); +} + +static int _ppidcache_find(struct ppidcache *ppidcache, pid_t pid) { + int i; + for (i=0 ; icount ; i++) { + if (ppidcache->entries[i].pid == pid) + return i; + } + return -1; +} + +pid_t ppidcache_get_ppid(struct ppidcache *ppidcache, pid_t pid) { + int i = _ppidcache_find(ppidcache, pid); + if (i != -1) + return ppidcache->entries[i].parent; + if (ppidcache->count == ppidcache->alloc) { + ppidcache->alloc += 100; + struct entry *new = mx_malloc_forever(ppidcache->alloc * sizeof(struct entry)); + memcpy(new, ppidcache->entries, ppidcache->count*sizeof(struct entry)); + free(ppidcache->entries); + ppidcache->entries = new; + } + pid_t parent = mx_proc_get_parent(pid); + ppidcache->entries[ppidcache->count].pid = pid; + ppidcache->entries[ppidcache->count++].parent = parent; + return parent; +} + +int ppidcache_is_descendant(struct ppidcache *ppidcache, pid_t ancestor, pid_t candidate) { + int fuse=100; + pid_t pid = candidate; + do { + pid = ppidcache_get_ppid(ppidcache, pid); + if (pid == 0 || pid == -1) + return 0; + if (pid == ancestor) + return 1; + } while (--fuse > 0); + return 0; +} + +/* + * Load the cache with all pids from the system. + * Previous cached content, if any, is removed. + */ +void ppidcache_scan(struct ppidcache *ppidcache) { + _mx_cleanup_closedir_ DIR *dir = NULL; + pid_t pid; + struct dirent *dirent; + + ppidcache->count = 0; + dir = opendir("/proc"); + if (dir == NULL) { + perror("/proc"); + return; + } + while (1) { + errno = 0; + dirent = readdir(dir); + if (dirent == NULL) { + if (errno) + perror("/proc"); + return; + } + if (strspn(dirent->d_name, "0123456789") != strlen(dirent->d_name)) + continue; + pid = atoi(dirent->d_name); + ppidcache_get_ppid(ppidcache, pid); + } +} + +/* + * call cb(data, pid) for all cached(!) descendants. + * stop when no more descendants or when callback returns 0 + */ +void ppidcache_do_descendants( + struct ppidcache *ppidcache, + pid_t pid, + int cb(void *data, pid_t pid), + void *data) +{ + int next; + pid_t candidate; + + for (next=0 ; nextcount; next++) { + candidate = ppidcache->entries[next].pid; + if (ppidcache_is_descendant(ppidcache, pid, candidate)) + if ((*cb)(data, candidate) == 0) + return; + } +} diff --git a/ppidcache.h b/ppidcache.h new file mode 100644 index 00000000..ca4a7e37 --- /dev/null +++ b/ppidcache.h @@ -0,0 +1,12 @@ +#ifndef _PPIDCACHE_H +#define _PPIDCACHE_H + +#include + +struct ppidcache *ppidcache_new(); +void ppidcache_free (struct ppidcache *ppidcache); +pid_t ppidcache_get_ppid(struct ppidcache *ppidcache, pid_t pid); +int ppidcache_is_descendant(struct ppidcache *ppidcache, pid_t ancestor, pid_t candidate); +void ppidcache_scan(struct ppidcache *ppidcache); +void ppidcache_do_descendants(struct ppidcache *ppidcache, pid_t pid, int cb(void *data, pid_t pid), void *data); +#endif