Skip to content

Commit

Permalink
grep: teach --debug option to dump the parse tree
Browse files Browse the repository at this point in the history
Our "grep" allows complex boolean expressions to be formed to match
each individual line with operators like --and, '(', ')' and --not.
Introduce the "--debug" option to show the parse tree to help people
who want to debug and enhance it.

Also "log" learns "--grep-debug" option to do the same.  The command
line parser to the log family is a lot more limited than the general
"git grep" parser, but it has special handling for header matching
(e.g. "--author"), and a parse tree is valuable when working on it.

Note that "--all-match" is *not* any individual node in the parse
tree.  It is an instruction to the evaluator to check all the nodes
in the top-level backbone have matched and reject a document as
non-matching otherwise.

Signed-off-by: Junio C Hamano <gitster@pobox.com>
  • Loading branch information
Junio C Hamano committed Sep 14, 2012
1 parent 785ee49 commit 17bf35a
Show file tree
Hide file tree
Showing 4 changed files with 96 additions and 2 deletions.
3 changes: 3 additions & 0 deletions builtin/grep.c
Original file line number Diff line number Diff line change
Expand Up @@ -772,6 +772,9 @@ int cmd_grep(int argc, const char **argv, const char *prefix)
"indicate hit with exit status without output"),
OPT_BOOLEAN(0, "all-match", &opt.all_match,
"show only matches from files that match all patterns"),
{ OPTION_SET_INT, 0, "debug", &opt.debug, NULL,
"show parse tree for grep expression",
PARSE_OPT_NOARG | PARSE_OPT_HIDDEN, NULL, 1 },
OPT_GROUP(""),
{ OPTION_STRING, 'O', "open-files-in-pager", &show_in_pager,
"pager", "show matching files in the pager",
Expand Down
92 changes: 90 additions & 2 deletions grep.c
Original file line number Diff line number Diff line change
Expand Up @@ -332,6 +332,87 @@ static struct grep_expr *compile_pattern_expr(struct grep_pat **list)
return compile_pattern_or(list);
}

static void indent(int in)
{
while (in-- > 0)
fputc(' ', stderr);
}

static void dump_grep_pat(struct grep_pat *p)
{
switch (p->token) {
case GREP_AND: fprintf(stderr, "*and*"); break;
case GREP_OPEN_PAREN: fprintf(stderr, "*(*"); break;
case GREP_CLOSE_PAREN: fprintf(stderr, "*)*"); break;
case GREP_NOT: fprintf(stderr, "*not*"); break;
case GREP_OR: fprintf(stderr, "*or*"); break;

case GREP_PATTERN: fprintf(stderr, "pattern"); break;
case GREP_PATTERN_HEAD: fprintf(stderr, "pattern_head"); break;
case GREP_PATTERN_BODY: fprintf(stderr, "pattern_body"); break;
}

switch (p->token) {
default: break;
case GREP_PATTERN_HEAD:
fprintf(stderr, "<head %d>", p->field); break;
case GREP_PATTERN_BODY:
fprintf(stderr, "<body>"); break;
}
switch (p->token) {
default: break;
case GREP_PATTERN_HEAD:
case GREP_PATTERN_BODY:
case GREP_PATTERN:
fprintf(stderr, "%.*s", (int)p->patternlen, p->pattern);
break;
}
fputc('\n', stderr);
}

static void dump_grep_expression_1(struct grep_expr *x, int in)
{
indent(in);
switch (x->node) {
case GREP_NODE_TRUE:
fprintf(stderr, "true\n");
break;
case GREP_NODE_ATOM:
dump_grep_pat(x->u.atom);
break;
case GREP_NODE_NOT:
fprintf(stderr, "(not\n");
dump_grep_expression_1(x->u.unary, in+1);
indent(in);
fprintf(stderr, ")\n");
break;
case GREP_NODE_AND:
fprintf(stderr, "(and\n");
dump_grep_expression_1(x->u.binary.left, in+1);
dump_grep_expression_1(x->u.binary.right, in+1);
indent(in);
fprintf(stderr, ")\n");
break;
case GREP_NODE_OR:
fprintf(stderr, "(or\n");
dump_grep_expression_1(x->u.binary.left, in+1);
dump_grep_expression_1(x->u.binary.right, in+1);
indent(in);
fprintf(stderr, ")\n");
break;
}
}

void dump_grep_expression(struct grep_opt *opt)
{
struct grep_expr *x = opt->pattern_expression;

if (opt->all_match)
fprintf(stderr, "[all-match]\n");
dump_grep_expression_1(x, 0);
fflush(NULL);
}

static struct grep_expr *grep_true_expr(void)
{
struct grep_expr *z = xcalloc(1, sizeof(*z));
Expand Down Expand Up @@ -395,7 +476,7 @@ static struct grep_expr *prep_header_patterns(struct grep_opt *opt)
return header_expr;
}

void compile_grep_patterns(struct grep_opt *opt)
static void compile_grep_patterns_real(struct grep_opt *opt)
{
struct grep_pat *p;
struct grep_expr *header_expr = prep_header_patterns(opt);
Expand All @@ -415,7 +496,7 @@ void compile_grep_patterns(struct grep_opt *opt)

if (opt->all_match || header_expr)
opt->extended = 1;
else if (!opt->extended)
else if (!opt->extended && !opt->debug)
return;

p = opt->pattern_list;
Expand All @@ -435,6 +516,13 @@ void compile_grep_patterns(struct grep_opt *opt)
opt->all_match = 1;
}

void compile_grep_patterns(struct grep_opt *opt)
{
compile_grep_patterns_real(opt);
if (opt->debug)
dump_grep_expression(opt);
}

static void free_pattern_expr(struct grep_expr *x)
{
switch (x->node) {
Expand Down
1 change: 1 addition & 0 deletions grep.h
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ struct grep_opt {
int word_regexp;
int fixed;
int all_match;
int debug;
#define GREP_BINARY_DEFAULT 0
#define GREP_BINARY_NOMATCH 1
#define GREP_BINARY_TEXT 2
Expand Down
2 changes: 2 additions & 0 deletions revision.c
Original file line number Diff line number Diff line change
Expand Up @@ -1578,6 +1578,8 @@ static int handle_revision_opt(struct rev_info *revs, int argc, const char **arg
} else if ((argcount = parse_long_opt("grep", argv, &optarg))) {
add_message_grep(revs, optarg);
return argcount;
} else if (!strcmp(arg, "--grep-debug")) {
revs->grep_filter.debug = 1;
} else if (!strcmp(arg, "--extended-regexp") || !strcmp(arg, "-E")) {
revs->grep_filter.regflags |= REG_EXTENDED;
} else if (!strcmp(arg, "--regexp-ignore-case") || !strcmp(arg, "-i")) {
Expand Down

0 comments on commit 17bf35a

Please sign in to comment.