Skip to content

Commit

Permalink
fn_escape: Do not use unicode regex
Browse files Browse the repository at this point in the history
The approach with glib regex is wrong here, because the pattrern works
on unicode characters not on bytes. So we might match a multi-byte
character.

Do it manually and escape every byte which is not a ascii graph
character.
  • Loading branch information
donald committed Jun 5, 2022
1 parent c366c4e commit 1c04573
Showing 1 changed file with 8 additions and 17 deletions.
25 changes: 8 additions & 17 deletions cmirror.c
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
#include <sys/utsname.h>
#include <utime.h>
#include <sys/time.h>
#include <ctype.h>

// https://www.manpagez.com/html/glib/glib-2.56.0/glib-Double-ended-Queues.php
// http://owww.molgen.mpg.de/~buczek/glib-doc/
Expand Down Expand Up @@ -82,25 +83,15 @@ static char **match_re(GRegex *regex, char *string) {
match_re(regex, string); \
})

static gboolean fn_escape_cb(const GMatchInfo *match_info, GString *result, gpointer user_data) {
(void)user_data;
g_autofree char *c = g_match_info_fetch(match_info, 0);
assert(c && c[0] != '\0' && c[1] == '\0');
g_string_append_printf(result, "\\x%02x", *c);
return FALSE;
}

static char *fn_escape(char *fn) {
static GRegex *regex = NULL;
if (!regex) {
regex = compile_pattern("([[:^graph:]\\\\])");
GString *out = g_string_sized_new(80);
for (char c = *fn++ ; c ; c = *fn++) {
if (isgraph(c))
g_string_append_c(out, c);
else
g_string_append_printf(out, "\\x%02x", (unsigned char)c);
}
GError *error;

char *ret = g_regex_replace_eval(regex, fn, -1, 0, 0, fn_escape_cb, NULL, &error);
if (!ret)
die("%s\n", error->message);
return ret;
return g_string_free(out, FALSE);
}

static gboolean fn_unescape_cb(const GMatchInfo *match_info, GString *result, gpointer user_data) {
Expand Down

0 comments on commit 1c04573

Please sign in to comment.