diff --git a/Makefile b/Makefile index 5b12cf4..f1548e9 100644 --- a/Makefile +++ b/Makefile @@ -68,6 +68,7 @@ HELPER_BEE_SHELL+=bee-remove HELPER_BEE_SHELL+=bee-update HELPER_C+=bee-cache-inventory +HELPER_C+=beeindextr HELPER_SHELL+=compat-filesfile2contentfile HELPER_SHELL+=compat-fixmetadir @@ -134,6 +135,7 @@ BEESORT_OBJECTS=bee_tree.o bee_version_compare.o bee_version_output.o bee_versio BEEGETOPT_OBJECTS=bee_getopt.o beegetopt.o BEEFLOCK_OBJECTS=bee_getopt.o beeflock.o BEECACHEINVENTORY_OBJECTS=bee-cache-inventory.o bee_getopt.o +BEEICANONDIRS_OBJECTS=beeindextr.o bee_BUILDTYPES=$(addsuffix .sh,$(addprefix buildtypes/,$(BUILDTYPES))) @@ -166,6 +168,9 @@ beeflock: $(addprefix src/, ${BEEFLOCK_OBJECTS}) bee-cache-inventory: $(addprefix src/, ${BEECACHEINVENTORY_OBJECTS}) $(call quiet-command,${CC} ${LDFLAGS} -o $@ $^,"LD $@") +beeindextr: $(addprefix src/, ${BEEICANONDIRS_OBJECTS}) + $(call quiet-command,${CC} ${LDFLAGS} -o $@ $^,"LD $@") + %.o: %.c $(call quiet-command,${CC} ${CFLAGS} -o $@ -c $^,"CC $@") diff --git a/src/beeindextr.c b/src/beeindextr.c new file mode 100644 index 0000000..e3f01aa --- /dev/null +++ b/src/beeindextr.c @@ -0,0 +1,473 @@ +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +__attribute__((format (printf, 1, 2))) +static void die(const char *restrict fmt, ...) { + va_list ap; + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + _exit(1); +} + +#define offsetof(type, member) __builtin_offsetof (type, member) + +#define container_of(ptr, type, member) \ + ((type *) ((char *) (ptr) - offsetof(type, member))) + +static void *malloc_nofail(size_t size) { + void *ret = malloc(size); + if (ret == NULL) + die("%m\n"); + return ret; +} + +static void *zmalloc_nofail(size_t size) { + void *ret = malloc_nofail(size); + memset(ret, 0, size); + return ret; +} + +static char *strdup_nofail(const char *s) { + char *dst = strdup(s); + if (dst == NULL) + die("%m\n"); + return dst; +} + +/*** Simple string to u32 hash implementation ***/ + +static uint32_t naive_hash(const char *s) { + uint32_t h = 0; + while (*s) { + uint32_t highbits = h & 0xf8000000; + h = h << 5; + h = h ^ (highbits >> 27); + h = h ^ *s++; + } + return h; +} + +/************ simple readlink cache *************/ + +struct readlink_cache_entry { + char *name; + uint32_t hash; + ssize_t result; + int saved_errno; // valid if result < 0 otherwise 0 + char *target; // not zero terminated , NULL when unused +}; + +static struct readlink_cache { + int slots; + int used; + struct readlink_cache_entry *entry; +} readlink_cache; + +__attribute__((unused)) +static void readlink_cache_free() { + struct readlink_cache *c = &readlink_cache; + for (int i=0 ; i < c->used ; i++) { + free(c->entry[i].name); + free(c->entry[i].target); + } + c->slots = 0; + c->used = 0; + free(c->entry); + c->entry = NULL; +} + +static void readlink_cache_add(const char *name, ssize_t result, int saved_errno, char *target) { + struct readlink_cache *c = &readlink_cache; + if (c->slots == 0) { + c->slots = 200; + c->entry = zmalloc_nofail(200 * sizeof(*c->entry)); + } else if (c->used >= c->slots) { + int new_slots = c->slots + c->slots; + struct readlink_cache_entry *new_entry = zmalloc_nofail(new_slots * sizeof(*new_entry)); + memcpy(new_entry, c->entry, c->slots * sizeof(*new_entry)); + memset(&new_entry[c->slots], 0, new_slots - c->slots); + c->slots = new_slots; + free(c->entry); + c->entry = new_entry; + } + c->entry[c->used].name = strdup_nofail(name); + c->entry[c->used].hash = naive_hash(name); + c->entry[c->used].result = result; + c->entry[c->used].saved_errno = saved_errno; + if (result > 0) { + c->entry[c->used].target = malloc_nofail(result); + memcpy(c->entry[c->used].target, target, result); + } + c->used++; +} + +static ssize_t readlink_cache_readlink(const char *restrict pathname, char *restrict buf, size_t bufsize) { + struct readlink_cache *c = &readlink_cache; + uint32_t hash = naive_hash(pathname); + + for (int i = c->used-1 ; i >= 0 ; i--) { + if (c->entry[i].hash == hash && strcmp(c->entry[i].name, pathname) == 0) { + ssize_t result = c->entry[i].result; + if (result < 0) { + errno = c->entry[i].saved_errno; + } else if (result > 0) { + if ((unsigned)result > bufsize) + result = bufsize; + memcpy(buf, c->entry[i].target, result); + } + return result; + } + } + ssize_t result = readlink(pathname, buf, bufsize); + int saved_errno = 0; + if (result < 0) + saved_errno = errno; + readlink_cache_add(pathname, result, saved_errno, buf); + return result; +} + +__attribute__((unused)) +static void readlink_cache_dump() { + printf("CACHE:\n"); + struct readlink_cache *c = &readlink_cache; + printf(" slots: %d\n", c->slots); + printf(" used: %d\n", c->used); + printf(" entry: %p\n", c->entry); + for (int i=0 ; i < c->slots ; i++) { + printf(" entry[%d].name: %s\n", i, c->entry[i].name); + printf(" entry[%d].hash: %08x\n", i, c->entry[i].hash); + printf(" entry[%d].result: %ld\n", i, c->entry[i].result); + printf(" entry[%d].saved_errno: %d\n", i, c->entry[i].saved_errno); + printf(" entry[%d].target: %.*s\n", i, (int)c->entry[i].result, c->entry[i].target); + } +} + +/************* canondir cache ***********/ + +struct cdir_cache_entry { + char *name; + uint32_t hash; + char *dst; // NULL for equal src and destination string +}; + +static struct cdir_cache { + int slots; + int used; + struct cdir_cache_entry *entry; +} cdir_cache; + +__attribute__((unused)) +static void cdir_cache_free() { + struct cdir_cache *c = &cdir_cache; + for (int i=0 ; i < c->used ; i++) { + free(c->entry[i].name); + free(c->entry[i].dst); + } + c->slots = 0; + c->used = 0; + free(c->entry); + c->entry = NULL; +} + +static char *cdir_cache_try(const char *restrict path) { + struct cdir_cache *c = &cdir_cache; + uint32_t hash = naive_hash(path); + + for (int i = c->used-1 ; i >= 0 ; i--) { + if (c->entry[i].hash == hash && strcmp(c->entry[i].name, path) == 0) + return c->entry[i].dst; + } + return (void *)-1; // not found +} + +static void cdir_cache_add(const char *name, const char *dst) { + struct cdir_cache *c = &cdir_cache; + if (c->slots == 0) { + c->slots = 200; + c->entry = zmalloc_nofail(200 * sizeof(*c->entry)); + } else if (c->used >= c->slots) { + int new_slots = c->slots + c->slots; + struct cdir_cache_entry *new_entry = zmalloc_nofail(new_slots * sizeof(*new_entry)); + memcpy(new_entry, c->entry, c->slots * sizeof(*new_entry)); + memset(&new_entry[c->slots], 0, new_slots - c->slots); + c->slots = new_slots; + free(c->entry); + c->entry = new_entry; + } + c->entry[c->used].name = strdup_nofail(name); + c->entry[c->used].hash = naive_hash(name); + if ( strcmp(name, dst) != 0) + c->entry[c->used].dst = strdup_nofail(dst); + else + c->entry[c->used].dst = NULL; + c->used++; +} + +/****************************************/ + +static char *get_dirname(char *path, char *outbuf, ssize_t outbuf_len) { + char *p =path + strlen(path); + while (p > path + 1 && p[-1] == '/') + p--; + while (p > path + 1 && p[-1] != '/') + p--; + while (p > path + 1 && p[-1] == '/') + p--; + if (p - path + 1 > outbuf_len) + die ("get_dirname: output buffer to small\n"); + memcpy(outbuf, path, p - path); + outbuf[p - path] = '\0'; + return outbuf; +} + +static char *get_basename(char *path, char *outbuf, ssize_t outbuf_len) { + char *p = path + strlen(path); + int len = 0; + while (p > path + 1 && p[-1] == '/') + p--; + while (p > path + 1 && p[-1] != '/') { + p--; + len++; + } + if (len + 1 > outbuf_len) + die ("get_basename: output buffer to small\n"); + memcpy(outbuf, p, len); + outbuf[len] = '\0'; + return outbuf; +} + +static char *resolvedir(char *patharg, char *outbuf, size_t outbuf_len) { + + if (outbuf_len < 1) + die ("resolvedir: output buffer to small\n"); + + static char dirnamebuf[PATH_MAX]; + + char *path = patharg; // path is the full abosult path we work on + // when we follow a symlink, this will be changed + // to point to a malloced() buffer + + char *in = path; // input pointer + char *out = outbuf; // output pointer + + if (*in != '/') + die ("resolvedir: relative paths unsupported\n"); + + char *origdir = NULL; // directory part of original patharg malloced() + + { + // try the cdir cache with the full translation of the drirectory part + static char basenamebuf[PATH_MAX]; + char *src = get_dirname(path, dirnamebuf, sizeof(dirnamebuf)); + char *dst = cdir_cache_try(src); + if (dst == (void *)-1) { + // not found in cache. keep the directory part we parsed out of the string + // so that we can add the translation to the cache later + origdir = strdup_nofail(src); + } else { + if (dst == NULL) + dst = src; // NULL = negative cached (resolved dir = original dir) + char *out = outbuf; + strcpy(outbuf, dst); + out += strlen(dst); + if (! (outbuf[0] == '/' && outbuf[1] == '\0')) + *out++ = '/'; + strcpy(out, get_basename(path, basenamebuf, sizeof(basenamebuf))); + return outbuf; + } + } + + in++; // skip '/' + *out++ = '/'; + + while(1) { + assert( out[-1] == '/' ); + + if (*in == '\0') + break; + + char *start = in; + + while (*in != '\0' && *in != '/') + in++; + + if (in == start) { // / - ignore redundant '/' + ; + } else if (in == start+1 && start[0] == '.') { // ./ - ignore + ; + } else if (in == start+2 && start[0] == '.' && start[1] == '.') { // ../ + // up one level - rewind output + out--; + while (out > outbuf && out[-1] != '/') + out--; + } else { + // copy component name + if (outbuf+outbuf_len < out+(in-start)+1) + die("resolvedir: output buffer to small\n"); + memcpy(out, start, in - start); + out += in - start; + + // if this is the last component (the filename), do not check for symlinks + if (*in == '\0') + break; + + // check for symlink + + static char readlinkbuf[PATH_MAX]; // NOT zero-terminated + + // make the output collected so far a zero-terminated string + *out = '\0'; + int l = readlink_cache_readlink(outbuf, readlinkbuf, sizeof(readlinkbuf)); + if (l == sizeof(readlinkbuf)) + die("%s: symlink target name to long.\n", outbuf); + + if (l < 0) { + // not a symlink + *out++ = '/'; + } else { + // symlink + int restlen = strlen(in); + char *new_path; + + if (readlinkbuf[0] == '/') { + // absolute symlink, clear output, leave "/" + out = outbuf+1; + // in = target + rest + new_path = malloc(l + restlen + 1); + memcpy(new_path, readlinkbuf, l); + strcpy(&new_path[l], in); + } else { + // relative symlink, remove last component (the symlink name) from output, leave "/" + while ( out > outbuf+1 && out[-1] != '/') + out--; + // in = "/" + target + rest + new_path = malloc(1 + l + restlen + 1); + new_path[0] = '/'; + memcpy(&new_path[1], readlinkbuf, l); + strcpy(&new_path[l+1], in); + } + if (path != patharg) + free(path); + path = new_path; + in = path; + } + } + if (*in == '\0') + break; + in++; + } + *out = '\0'; + if (path != patharg) + free(path); + + // cache translation of original input directory to canonicalized output directory + char *translated_dir = get_dirname(outbuf, dirnamebuf, sizeof(dirnamebuf)); + cdir_cache_add(origdir, translated_dir); + free(origdir); + + return outbuf; +} + +__attribute__((unused)) +static void _resolvedir_selftest(char *in, char *expect) { + char obuf[128]; + resolvedir(in, obuf, sizeof(obuf)); + if (strcmp(obuf, expect) != 0) + printf("WARNING: resolvedir_selftest: in '%s' expected '%s' got '%s'\n", in, expect, obuf); +} + +__attribute__((unused)) +static void resolvedir_selftest() { + _resolvedir_selftest("/", "/"); + _resolvedir_selftest("/file", "/file"); + _resolvedir_selftest("/dir/", "/dir"); + _resolvedir_selftest("/dir/file", "/dir/file"); + _resolvedir_selftest("/dir////file", "/dir/file"); + _resolvedir_selftest("/dir1/../dir2/file", "/dir2/file"); + _resolvedir_selftest("/dir1/../dir2/dir3///", "/dir2/dir3"); + + _resolvedir_selftest("/usr/tmp/file", "/tmp/file"); + _resolvedir_selftest("/usr/tmp/dir/file", "/tmp/dir/file"); + _resolvedir_selftest("/lib64", "/lib64"); + _resolvedir_selftest("/lib64/file", "/lib/file"); + _resolvedir_selftest("/lib64/dir/file", "/lib/dir/file"); +} + + +static char *skipword(char *c) { + while (*c != '\0' && !isspace(*c) ) + c++; + while (*c != '\0' && isspace(*c) ) + c++; + return c; +} + +static char *skipwords(char *c, int n) { + for (int i=0 ; i < n ; i++) + c = skipword(c); + return c; +} + +static char *lbuf; +static size_t lbuf_len; + +static void do_file(const char *restrict inventory) { + + FILE *f = fopen(inventory, "r"); + if (f == NULL) + die("%s: %m\n", inventory); + + errno = 0; + while (1) { + errno = 0; + if (getline(&lbuf, &lbuf_len, f) == -1) + break; + size_t len = strlen(lbuf); + if (len > 0 && lbuf[len-1] == '\n') + lbuf[len-1] = '\0'; + + char *p = skipwords(lbuf, 7); + if (*p == '\0') { + die("%s: format error. Line: '%s'\n", inventory, lbuf); + } + if (p > lbuf+1) + p[-1] = '\0'; + + static char resolvebuf[PATH_MAX]; + + char *resolved = resolvedir(p, resolvebuf, sizeof(resolvebuf)); + printf("%s %s\n", lbuf, resolved); + } + if (errno) + die("%s: %m\n", inventory); + + fclose(f); +} + +int main(int argc, char **argv) { + if (argc==1) { + do_file("/proc/self/fd/0"); + } else { + for (int i=1 ; i < argc ; i++) { + do_file(argv[i]); + } + } +#ifndef NDEBUG + free(lbuf); + cdir_cache_free(); + readlink_cache_free(); +#endif + return 0; +}