From 809a3c6f422ae9c240dfabfe41f755297e2e5a3c Mon Sep 17 00:00:00 2001 From: Donald Buczek Date: Wed, 15 Apr 2020 15:25:03 +0200 Subject: [PATCH] Add keywordset module Add a utility module which can store a set of keywords. The set can be created and updated from a string and can be serialized into a string. The canonical string representation produced by this utilits is the space separated list of the sorted keywords in lexical order. Input strings used to create or update a keyword set contain keywords separated by whitespace. The keywords may be prefxied with "+" and "-". For updates, first all unprefixed keywords are processes to create the initial set, than updates are applied from the prefixed keywords. An empty string used for updates is considered "no change", not "set to empty set". Usage example: struct keywordset *kws = keywordset_new("xx yy") keywordset_update(kws, "-yy +zz") // remove yy, add zz keywordset_update(kws, "aa bb") // set to aa, bb if (keywordset_ismember(kws,"bb")) ... // true char *s = keywordset_get(kws): // s now "aa bb" free(s); // caller must free() keywordset_purge(kws) // set nowto empty. keywordset_free(kws); --- keywordset.c | 162 ++++++++++++++++++++++++++++++++++++++++++++++ keywordset.h | 11 ++++ test_keywordset.c | 50 ++++++++++++++ 3 files changed, 223 insertions(+) create mode 100644 keywordset.c create mode 100644 keywordset.h create mode 100644 test_keywordset.c diff --git a/keywordset.c b/keywordset.c new file mode 100644 index 00000000..b95adb5c --- /dev/null +++ b/keywordset.c @@ -0,0 +1,162 @@ +#include "keywordset.h" +#include +#include +#include +#include +#include "xmalloc.h" + +#define KEYWORDSET_INITIAL_SLOTS (4-2) + +struct keywordset { + int nr_slots; + int used; + char **names; +}; + +static int find_name(struct keywordset *kws, char *name, size_t len) { + int i; + int j; + for ( i = 0; i < kws->used ; i++ ) { + j = 0; + while(1) { + if (kws->names[i][j] == 0) + break; + if (kws->names[i][j] != name[j]) + break; + j++; + if (j==len) + return i; + } + } + return -1; +} + +static void expand(struct keywordset *kws) { + int new_slots=(kws->nr_slots+2)*2-2; + kws->names=xrealloc(kws->names,new_slots*sizeof(*kws->names)); + kws->nr_slots=new_slots; +} + +static void add_name(struct keywordset *kws, char *name, size_t len) { + int i=find_name(kws, name, len); + if (i>=0) { + free(kws->names[i]); + kws->names[i] = xstrndup(name, len); + } else { + if (kws->used == kws->nr_slots) + expand(kws); + kws->names[kws->used++] = xstrndup(name, len); + } +} + +static void remove_name(struct keywordset *kws, char *name, size_t len) { + int i=find_name(kws, name, len); + if (i>=0) { + free(kws->names[i]); + memmove(&(kws->names[i]), &(kws->names[i+1]), (kws->used-i-1)*sizeof(*kws->names)); + kws->used--; + } +} + +void keywordset_purge(struct keywordset *kws) { + int i; + for ( i = 0 ; i < kws->used ; i++) + free(kws->names[i]); + kws->used = 0; +} + +enum PHASE { + PHASE_SET, + PHASE_UPDATE, +}; + +static void keywordset_update_phase(struct keywordset *kws, char *input, enum PHASE phase) { + char *c=input; + char *name_start; + char action; + int purged=0; + while (*c) { + while (*c && isspace(*c)) + c++; + + if (*c == '+' || *c == '-') { + action = *c; + c++; + } else { + action = ' '; + } + + if (*c) { + name_start=c++; + while (*c && !isspace(*c)) + c++; + if (phase == PHASE_SET && action==' ') { + if (!purged) { + keywordset_purge(kws); + purged = 1; + } + add_name(kws, name_start, c-name_start); + } else if (phase == PHASE_UPDATE) { + if (action == '+') + add_name(kws, name_start, c-name_start); + else if (action == '-') + remove_name(kws, name_start, c-name_start); + } + } + } +} + +void keywordset_update(struct keywordset *kws, char *input) { + keywordset_update_phase(kws, input, PHASE_SET); + keywordset_update_phase(kws, input, PHASE_UPDATE); +} + +struct keywordset *keywordset_new(char *input) { + struct keywordset *kws = xmalloc(sizeof(*kws)); + kws->nr_slots = KEYWORDSET_INITIAL_SLOTS; + kws->used = 0; + kws->names = xmalloc(KEYWORDSET_INITIAL_SLOTS*sizeof(*kws->names)); + if (input) + keywordset_update(kws, input); + return kws; +} + +static int cmp(const void *a, const void *b) { + return strcmp(*(char **)a, *(char **)b); +} + +char *keywordset_get(struct keywordset *kws) { + char **names=xmalloc(kws->used * sizeof(*names)); + memcpy(names, kws->names, kws->used * sizeof(*names)); + qsort(names, kws->used, sizeof(*names), cmp); + size_t len = 0; + int i; + for (i=0; iused; i++) { + len += strlen(names[i]); + } + size_t outlen = len + (kws->used >= 2 ? kws->used-1 : 0); + char *out=xmalloc(outlen + 1 ); + char *p=out; + for ( i = 0 ; i < kws->used ; i++) { + p=stpcpy(p, names[i]); + *p++ = ' '; + } + out[outlen] = 0; + free(names); + return(out); +} + +int keywordset_ismember(struct keywordset *kws, char *name) { + if (find_name(kws, name, strlen(name)) >= 0) + return 1; + else + return 0; +} + +void keywordset_free(struct keywordset *kws) { + int i; + for ( i = 0 ; i < kws->used ; i++) + free(kws->names[i]); + free(kws->names); + free(kws); +} diff --git a/keywordset.h b/keywordset.h new file mode 100644 index 00000000..7484235b --- /dev/null +++ b/keywordset.h @@ -0,0 +1,11 @@ +#ifndef _KEYWORDSET_H +#define _KEYWORDSET_H + +struct keywordset *keywordset_new(char *input); +void keywordset_update(struct keywordset *kws, char *input); +char *keywordset_get(struct keywordset *kws); +int keywordset_ismember(struct keywordset *kws, char *name); +void keywordset_purge(struct keywordset *kws); +void keywordset_free(struct keywordset *kws); + +#endif diff --git a/test_keywordset.c b/test_keywordset.c new file mode 100644 index 00000000..df6e8345 --- /dev/null +++ b/test_keywordset.c @@ -0,0 +1,50 @@ +#include +#include +#include "keywordset.h" +#include +#include + +static void test_new(char *init, char *expect) { + struct keywordset *kws = keywordset_new(init); + char *s = keywordset_get(kws); + if (strcmp(s, expect)) + fprintf(stderr, "FAIL: new from '%s' got '%s' expected '%s'\n", init, s, expect); \ + free(s); + keywordset_free(kws); +} + +static void test_update(struct keywordset *kws, char *update, char *expect) { + char *init = keywordset_get(kws); + keywordset_update(kws, update); + char *s = keywordset_get(kws); + if (strcmp(s, expect)) + fprintf(stderr, "FAIL: update '%s' with '%s' got '%s' expected '%s'\n", init, update, s, expect); + free(s); + free(init); +} + +int main() { + + test_new(NULL, ""); + test_new("", ""); + test_new("h7 h8 h9 h1 h2 h3", "h1 h2 h3 h7 h8 h9"); + test_new(" h7\th8 h9 h1 h2 h3 \n", "h1 h2 h3 h7 h8 h9"); + + struct keywordset *kws = keywordset_new("a b c"); + + test_update(kws, "d e f", "d e f"); + test_update(kws, "+g +h +i", "d e f g h i"); + test_update(kws, "-e -h", "d f g i"); + test_update(kws, "-i +x", "d f g x"); + test_update(kws, "-x +m +n -n x y +a", "a m y"); + + test_update(kws, "-x +x", "a m x y"); + test_update(kws, "+z -z", "a m x y"); + + keywordset_purge(kws); + test_update(kws, "", ""); + keywordset_purge(kws); + test_update(kws, "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"); + + keywordset_free(kws); +}