Skip to content

Commit

Permalink
Add keywordset module
Browse files Browse the repository at this point in the history
Add a utility module which can store a set of keywords. The set can be
created and updated from a string and can be serialized into a string.

The canonical string representation produced by this utilits is the space
separated list of the sorted keywords in lexical order.

Input strings used to create or update a keyword set contain keywords
separated by whitespace. The keywords may be prefxied with "+" and "-".
For updates, first all unprefixed keywords are processes to create the
initial set, than updates are applied from the prefixed keywords.

An empty string used for updates is considered "no change", not
"set to empty set".

Usage example:

    struct keywordset *kws = keywordset_new("xx yy")
    keywordset_update(kws, "-yy +zz")  // remove yy, add zz
    keywordset_update(kws, "aa bb")    // set to aa, bb
    if (keywordset_ismember(kws,"bb")) ... // true
    char *s = keywordset_get(kws):   // s now "aa bb"
    free(s);                         // caller must free()
    keywordset_purge(kws)            // set nowto empty.
    keywordset_free(kws);
  • Loading branch information
donald committed Apr 15, 2020
1 parent e629d62 commit 809a3c6
Show file tree
Hide file tree
Showing 3 changed files with 223 additions and 0 deletions.
162 changes: 162 additions & 0 deletions keywordset.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
#include "keywordset.h"
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include <string.h>
#include "xmalloc.h"

#define KEYWORDSET_INITIAL_SLOTS (4-2)

struct keywordset {
int nr_slots;
int used;
char **names;
};

static int find_name(struct keywordset *kws, char *name, size_t len) {
int i;
int j;
for ( i = 0; i < kws->used ; i++ ) {
j = 0;
while(1) {
if (kws->names[i][j] == 0)
break;
if (kws->names[i][j] != name[j])
break;
j++;
if (j==len)
return i;
}
}
return -1;
}

static void expand(struct keywordset *kws) {
int new_slots=(kws->nr_slots+2)*2-2;
kws->names=xrealloc(kws->names,new_slots*sizeof(*kws->names));
kws->nr_slots=new_slots;
}

static void add_name(struct keywordset *kws, char *name, size_t len) {
int i=find_name(kws, name, len);
if (i>=0) {
free(kws->names[i]);
kws->names[i] = xstrndup(name, len);
} else {
if (kws->used == kws->nr_slots)
expand(kws);
kws->names[kws->used++] = xstrndup(name, len);
}
}

static void remove_name(struct keywordset *kws, char *name, size_t len) {
int i=find_name(kws, name, len);
if (i>=0) {
free(kws->names[i]);
memmove(&(kws->names[i]), &(kws->names[i+1]), (kws->used-i-1)*sizeof(*kws->names));
kws->used--;
}
}

void keywordset_purge(struct keywordset *kws) {
int i;
for ( i = 0 ; i < kws->used ; i++)
free(kws->names[i]);
kws->used = 0;
}

enum PHASE {
PHASE_SET,
PHASE_UPDATE,
};

static void keywordset_update_phase(struct keywordset *kws, char *input, enum PHASE phase) {
char *c=input;
char *name_start;
char action;
int purged=0;
while (*c) {
while (*c && isspace(*c))
c++;

if (*c == '+' || *c == '-') {
action = *c;
c++;
} else {
action = ' ';
}

if (*c) {
name_start=c++;
while (*c && !isspace(*c))
c++;
if (phase == PHASE_SET && action==' ') {
if (!purged) {
keywordset_purge(kws);
purged = 1;
}
add_name(kws, name_start, c-name_start);
} else if (phase == PHASE_UPDATE) {
if (action == '+')
add_name(kws, name_start, c-name_start);
else if (action == '-')
remove_name(kws, name_start, c-name_start);
}
}
}
}

void keywordset_update(struct keywordset *kws, char *input) {
keywordset_update_phase(kws, input, PHASE_SET);
keywordset_update_phase(kws, input, PHASE_UPDATE);
}

struct keywordset *keywordset_new(char *input) {
struct keywordset *kws = xmalloc(sizeof(*kws));
kws->nr_slots = KEYWORDSET_INITIAL_SLOTS;
kws->used = 0;
kws->names = xmalloc(KEYWORDSET_INITIAL_SLOTS*sizeof(*kws->names));
if (input)
keywordset_update(kws, input);
return kws;
}

static int cmp(const void *a, const void *b) {
return strcmp(*(char **)a, *(char **)b);
}

char *keywordset_get(struct keywordset *kws) {
char **names=xmalloc(kws->used * sizeof(*names));
memcpy(names, kws->names, kws->used * sizeof(*names));
qsort(names, kws->used, sizeof(*names), cmp);
size_t len = 0;
int i;
for (i=0; i<kws->used; i++) {
len += strlen(names[i]);
}
size_t outlen = len + (kws->used >= 2 ? kws->used-1 : 0);
char *out=xmalloc(outlen + 1 );
char *p=out;
for ( i = 0 ; i < kws->used ; i++) {
p=stpcpy(p, names[i]);
*p++ = ' ';
}
out[outlen] = 0;
free(names);
return(out);
}

int keywordset_ismember(struct keywordset *kws, char *name) {
if (find_name(kws, name, strlen(name)) >= 0)
return 1;
else
return 0;
}

void keywordset_free(struct keywordset *kws) {
int i;
for ( i = 0 ; i < kws->used ; i++)
free(kws->names[i]);
free(kws->names);
free(kws);
}
11 changes: 11 additions & 0 deletions keywordset.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#ifndef _KEYWORDSET_H
#define _KEYWORDSET_H

struct keywordset *keywordset_new(char *input);
void keywordset_update(struct keywordset *kws, char *input);
char *keywordset_get(struct keywordset *kws);
int keywordset_ismember(struct keywordset *kws, char *name);
void keywordset_purge(struct keywordset *kws);
void keywordset_free(struct keywordset *kws);

#endif
50 changes: 50 additions & 0 deletions test_keywordset.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
#include <assert.h>
#include <string.h>
#include "keywordset.h"
#include <stdlib.h>
#include <stdio.h>

static void test_new(char *init, char *expect) {
struct keywordset *kws = keywordset_new(init);
char *s = keywordset_get(kws);
if (strcmp(s, expect))
fprintf(stderr, "FAIL: new from '%s' got '%s' expected '%s'\n", init, s, expect); \
free(s);
keywordset_free(kws);
}

static void test_update(struct keywordset *kws, char *update, char *expect) {
char *init = keywordset_get(kws);
keywordset_update(kws, update);
char *s = keywordset_get(kws);
if (strcmp(s, expect))
fprintf(stderr, "FAIL: update '%s' with '%s' got '%s' expected '%s'\n", init, update, s, expect);
free(s);
free(init);
}

int main() {

test_new(NULL, "");
test_new("", "");
test_new("h7 h8 h9 h1 h2 h3", "h1 h2 h3 h7 h8 h9");
test_new(" h7\th8 h9 h1 h2 h3 \n", "h1 h2 h3 h7 h8 h9");

struct keywordset *kws = keywordset_new("a b c");

test_update(kws, "d e f", "d e f");
test_update(kws, "+g +h +i", "d e f g h i");
test_update(kws, "-e -h", "d f g i");
test_update(kws, "-i +x", "d f g x");
test_update(kws, "-x +m +n -n x y +a", "a m y");

test_update(kws, "-x +x", "a m x y");
test_update(kws, "+z -z", "a m x y");

keywordset_purge(kws);
test_update(kws, "", "");
keywordset_purge(kws);
test_update(kws, "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx");

keywordset_free(kws);
}

0 comments on commit 809a3c6

Please sign in to comment.