Skip to content

Commit

Permalink
color-words: take an optional regular expression describing words
Browse files Browse the repository at this point in the history
In some applications, words are not delimited by white space.  To
allow for that, you can specify a regular expression describing
what makes a word with

	git diff --color-words='[A-Za-z0-9]+'

Note that words cannot contain newline characters.

As suggested by Thomas Rast, the words are the exact matches of the
regular expression.

Note that a regular expression beginning with a '^' will match only
a word at the beginning of the hunk, not a word at the beginning of
a line, and is probably not what you want.

This commit contains a quoting fix by Thomas Rast.

Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
  • Loading branch information
Johannes Schindelin authored and Junio C Hamano committed Jan 17, 2009
1 parent 2e5d200 commit 2b6a541
Show file tree
Hide file tree
Showing 4 changed files with 118 additions and 10 deletions.
6 changes: 5 additions & 1 deletion Documentation/diff-options.txt
Original file line number Diff line number Diff line change
Expand Up @@ -91,8 +91,12 @@ endif::git-format-patch[]
Turn off colored diff, even when the configuration file
gives the default to color output.

--color-words::
--color-words[=regex]::
Show colored word diff, i.e. color words which have changed.
+
Optionally, you can pass a regular expression that tells Git what the
words are that you are looking for; The default is to interpret any
stretch of non-whitespace as a word.

--no-renames::
Turn off rename detection, even when the configuration
Expand Down
64 changes: 55 additions & 9 deletions diff.c
Original file line number Diff line number Diff line change
Expand Up @@ -333,12 +333,14 @@ static void diff_words_append(char *line, unsigned long len,
len--;
memcpy(buffer->text.ptr + buffer->text.size, line, len);
buffer->text.size += len;
buffer->text.ptr[buffer->text.size] = '\0';
}

struct diff_words_data {
struct diff_words_buffer minus, plus;
const char *current_plus;
FILE *file;
regex_t *word_regex;
};

static void fn_out_diff_words_aux(void *priv, char *line, unsigned long len)
Expand Down Expand Up @@ -382,29 +384,58 @@ static void fn_out_diff_words_aux(void *priv, char *line, unsigned long len)
diff_words->current_plus = plus_end;
}

/* This function starts looking at *begin, and returns 0 iff a word was found. */
static int find_word_boundaries(mmfile_t *buffer, regex_t *word_regex,
int *begin, int *end)
{
if (word_regex && *begin < buffer->size) {
regmatch_t match[1];
if (!regexec(word_regex, buffer->ptr + *begin, 1, match, 0)) {
char *p = memchr(buffer->ptr + *begin + match[0].rm_so,
'\n', match[0].rm_eo - match[0].rm_so);
*end = p ? p - buffer->ptr : match[0].rm_eo + *begin;
*begin += match[0].rm_so;
return *begin >= *end;
}
return -1;
}

/* find the next word */
while (*begin < buffer->size && isspace(buffer->ptr[*begin]))
(*begin)++;
if (*begin >= buffer->size)
return -1;

/* find the end of the word */
*end = *begin + 1;
while (*end < buffer->size && !isspace(buffer->ptr[*end]))
(*end)++;

return 0;
}

/*
* This function splits the words in buffer->text, stores the list with
* newline separator into out, and saves the offsets of the original words
* in buffer->orig.
*/
static void diff_words_fill(struct diff_words_buffer *buffer, mmfile_t *out)
static void diff_words_fill(struct diff_words_buffer *buffer, mmfile_t *out,
regex_t *word_regex)
{
int i, j;
long alloc = 0;

out->size = 0;
out->ptr = xmalloc(buffer->text.size);
out->ptr = NULL;

/* fake an empty "0th" word */
ALLOC_GROW(buffer->orig, 1, buffer->orig_alloc);
buffer->orig[0].begin = buffer->orig[0].end = buffer->text.ptr;
buffer->orig_nr = 1;

for (i = 0; i < buffer->text.size; i++) {
if (isspace(buffer->text.ptr[i]))
continue;
for (j = i + 1; j < buffer->text.size &&
!isspace(buffer->text.ptr[j]); j++)
; /* find the end of the word */
if (find_word_boundaries(&buffer->text, word_regex, &i, &j))
return;

/* store original boundaries */
ALLOC_GROW(buffer->orig, buffer->orig_nr + 1,
Expand All @@ -414,6 +445,7 @@ static void diff_words_fill(struct diff_words_buffer *buffer, mmfile_t *out)
buffer->orig_nr++;

/* store one word */
ALLOC_GROW(out->ptr, out->size + j - i + 1, alloc);
memcpy(out->ptr + out->size, buffer->text.ptr + i, j - i);
out->ptr[out->size + j - i] = '\n';
out->size += j - i + 1;
Expand Down Expand Up @@ -443,9 +475,10 @@ static void diff_words_show(struct diff_words_data *diff_words)

memset(&xpp, 0, sizeof(xpp));
memset(&xecfg, 0, sizeof(xecfg));
diff_words_fill(&diff_words->minus, &minus);
diff_words_fill(&diff_words->plus, &plus);
diff_words_fill(&diff_words->minus, &minus, diff_words->word_regex);
diff_words_fill(&diff_words->plus, &plus, diff_words->word_regex);
xpp.flags = XDF_NEED_MINIMAL;
/* as only the hunk header will be parsed, we need a 0-context */
xecfg.ctxlen = 0;
xdi_diff_outf(&minus, &plus, fn_out_diff_words_aux, diff_words,
&xpp, &xecfg, &ecb);
Expand Down Expand Up @@ -484,6 +517,7 @@ static void free_diff_words_data(struct emit_callback *ecbdata)
free (ecbdata->diff_words->minus.orig);
free (ecbdata->diff_words->plus.text.ptr);
free (ecbdata->diff_words->plus.orig);
free(ecbdata->diff_words->word_regex);
free(ecbdata->diff_words);
ecbdata->diff_words = NULL;
}
Expand Down Expand Up @@ -1506,6 +1540,14 @@ static void builtin_diff(const char *name_a,
ecbdata.diff_words =
xcalloc(1, sizeof(struct diff_words_data));
ecbdata.diff_words->file = o->file;
if (o->word_regex) {
ecbdata.diff_words->word_regex = (regex_t *)
xmalloc(sizeof(regex_t));
if (regcomp(ecbdata.diff_words->word_regex,
o->word_regex, REG_EXTENDED))
die ("Invalid regular expression: %s",
o->word_regex);
}
}
xdi_diff_outf(&mf1, &mf2, fn_out_consume, &ecbdata,
&xpp, &xecfg, &ecb);
Expand Down Expand Up @@ -2517,6 +2559,10 @@ int diff_opt_parse(struct diff_options *options, const char **av, int ac)
DIFF_OPT_CLR(options, COLOR_DIFF);
else if (!strcmp(arg, "--color-words"))
options->flags |= DIFF_OPT_COLOR_DIFF | DIFF_OPT_COLOR_DIFF_WORDS;
else if (!prefixcmp(arg, "--color-words=")) {
options->flags |= DIFF_OPT_COLOR_DIFF | DIFF_OPT_COLOR_DIFF_WORDS;
options->word_regex = arg + 14;
}
else if (!strcmp(arg, "--exit-code"))
DIFF_OPT_SET(options, EXIT_WITH_STATUS);
else if (!strcmp(arg, "--quiet"))
Expand Down
1 change: 1 addition & 0 deletions diff.h
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ struct diff_options {

int stat_width;
int stat_name_width;
const char *word_regex;

/* this is set by diffcore for DIFF_FORMAT_PATCH */
int found_changes;
Expand Down
57 changes: 57 additions & 0 deletions t/t4034-diff-words.sh
Original file line number Diff line number Diff line change
Expand Up @@ -63,4 +63,61 @@ test_expect_success 'word diff with runs of whitespace' '
'

cat > expect <<\EOF
<WHITE>diff --git a/pre b/post<RESET>
<WHITE>index 330b04f..5ed8eff 100644<RESET>
<WHITE>--- a/pre<RESET>
<WHITE>+++ b/post<RESET>
<BROWN>@@ -1,3 +1,7 @@<RESET>
h(4),<GREEN>hh<RESET>[44]
<RESET>
a = b + c<RESET>
<GREEN>aa = a<RESET>
<GREEN>aeff = aeff * ( aaa<RESET> )
EOF

test_expect_success 'word diff with a regular expression' '
word_diff --color-words="[a-z]+"
'

echo 'aaa (aaa)' > pre
echo 'aaa (aaa) aaa' > post

cat > expect <<\EOF
<WHITE>diff --git a/pre b/post<RESET>
<WHITE>index c29453b..be22f37 100644<RESET>
<WHITE>--- a/pre<RESET>
<WHITE>+++ b/post<RESET>
<BROWN>@@ -1 +1 @@<RESET>
aaa (aaa) <GREEN>aaa<RESET>
EOF

test_expect_success 'test parsing words for newline' '
word_diff --color-words="a+"
'

echo '(:' > pre
echo '(' > post

cat > expect <<\EOF
<WHITE>diff --git a/pre b/post<RESET>
<WHITE>index 289cb9d..2d06f37 100644<RESET>
<WHITE>--- a/pre<RESET>
<WHITE>+++ b/post<RESET>
<BROWN>@@ -1 +1 @@<RESET>
(<RED>:<RESET>
EOF

test_expect_success 'test when words are only removed at the end' '
word_diff --color-words=.
'

test_done

0 comments on commit 2b6a541

Please sign in to comment.