Skip to content

Commit

Permalink
Merge branch 'mk/grep-pcre'
Browse files Browse the repository at this point in the history
* mk/grep-pcre:
  git-grep: Fix problems with recently added tests
  git-grep: Update tests (mainly for -P)
  Makefile: Pass USE_LIBPCRE down in GIT-BUILD-OPTIONS
  git-grep: update tests now regexp type is "last one wins"
  git-grep: do not die upon -F/-P when grep.extendedRegexp is set.
  git-grep: Bail out when -P is used with -F or -E
  grep: Add basic tests
  configure: Check for libpcre
  git-grep: Learn PCRE
  grep: Extract compile_regexp_failed() from compile_regexp()
  grep: Fix a typo in a comment
  grep: Put calls to fixmatch() and regmatch() into patmatch()
  contrib/completion: --line-number to git grep
  Documentation: Add --line-number to git-grep synopsis
  • Loading branch information
Junio C Hamano committed May 30, 2011
2 parents 3d109dd + d0042ab commit be653d6
Show file tree
Hide file tree
Showing 11 changed files with 347 additions and 30 deletions.
8 changes: 7 additions & 1 deletion Documentation/git-grep.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@ SYNOPSIS
'git grep' [-a | --text] [-I] [-i | --ignore-case] [-w | --word-regexp]
[-v | --invert-match] [-h|-H] [--full-name]
[-E | --extended-regexp] [-G | --basic-regexp]
[-F | --fixed-strings] [-n]
[-P | --perl-regexp]
[-F | --fixed-strings] [-n | --line-number]
[-l | --files-with-matches] [-L | --files-without-match]
[(-O | --open-files-in-pager) [<pager>]]
[-z | --null]
Expand Down Expand Up @@ -97,6 +98,11 @@ OPTIONS
Use POSIX extended/basic regexp for patterns. Default
is to use basic regexp.

-P::
--perl-regexp::
Use Perl-compatible regexp for patterns. Requires libpcre to be
compiled in.

-F::
--fixed-strings::
Use fixed strings for patterns (don't interpret pattern
Expand Down
16 changes: 16 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,12 @@ all::
# Define NO_OPENSSL environment variable if you do not have OpenSSL.
# This also implies BLK_SHA1.
#
# Define USE_LIBPCRE if you have and want to use libpcre. git-grep will be
# able to use Perl-compatible regular expressions.
#
# Define LIBPCREDIR=/foo/bar if your libpcre header and library files are in
# /foo/bar/include and /foo/bar/lib directories.
#
# Define NO_CURL if you do not have libcurl installed. git-http-pull and
# git-http-push are not built, and you cannot use http:// and https://
# transports.
Expand Down Expand Up @@ -1258,6 +1264,15 @@ ifdef NO_LIBGEN_H
COMPAT_OBJS += compat/basename.o
endif

ifdef USE_LIBPCRE
BASIC_CFLAGS += -DUSE_LIBPCRE
ifdef LIBPCREDIR
BASIC_CFLAGS += -I$(LIBPCREDIR)/include
EXTLIBS += -L$(LIBPCREDIR)/$(lib) $(CC_LD_DYNPATH)$(LIBPCREDIR)/$(lib)
endif
EXTLIBS += -lpcre
endif

ifdef NO_CURL
BASIC_CFLAGS += -DNO_CURL
REMOTE_CURL_PRIMARY =
Expand Down Expand Up @@ -2089,6 +2104,7 @@ GIT-BUILD-OPTIONS: FORCE
@echo PYTHON_PATH=\''$(subst ','\'',$(PYTHON_PATH_SQ))'\' >>$@
@echo TAR=\''$(subst ','\'',$(subst ','\'',$(TAR)))'\' >>$@
@echo NO_CURL=\''$(subst ','\'',$(subst ','\'',$(NO_CURL)))'\' >>$@
@echo USE_LIBPCRE=\''$(subst ','\'',$(subst ','\'',$(USE_LIBPCRE)))'\' >>$@
@echo NO_PERL=\''$(subst ','\'',$(subst ','\'',$(NO_PERL)))'\' >>$@
@echo NO_PYTHON=\''$(subst ','\'',$(subst ','\'',$(NO_PYTHON)))'\' >>$@
ifdef GIT_TEST_CMP
Expand Down
52 changes: 43 additions & 9 deletions builtin/grep.c
Original file line number Diff line number Diff line change
Expand Up @@ -753,6 +753,15 @@ int cmd_grep(int argc, const char **argv, const char *prefix)
int i;
int dummy;
int use_index = 1;
enum {
pattern_type_unspecified = 0,
pattern_type_bre,
pattern_type_ere,
pattern_type_fixed,
pattern_type_pcre,
};
int pattern_type = pattern_type_unspecified;

struct option options[] = {
OPT_BOOLEAN(0, "cached", &cached,
"search in index instead of in the work tree"),
Expand All @@ -774,13 +783,18 @@ int cmd_grep(int argc, const char **argv, const char *prefix)
"descend at most <depth> levels", PARSE_OPT_NONEG,
NULL, 1 },
OPT_GROUP(""),
OPT_BIT('E', "extended-regexp", &opt.regflags,
"use extended POSIX regular expressions", REG_EXTENDED),
OPT_NEGBIT('G', "basic-regexp", &opt.regflags,
"use basic POSIX regular expressions (default)",
REG_EXTENDED),
OPT_BOOLEAN('F', "fixed-strings", &opt.fixed,
"interpret patterns as fixed strings"),
OPT_SET_INT('E', "extended-regexp", &pattern_type,
"use extended POSIX regular expressions",
pattern_type_ere),
OPT_SET_INT('G', "basic-regexp", &pattern_type,
"use basic POSIX regular expressions (default)",
pattern_type_bre),
OPT_SET_INT('F', "fixed-strings", &pattern_type,
"interpret patterns as fixed strings",
pattern_type_fixed),
OPT_SET_INT('P', "perl-regexp", &pattern_type,
"use Perl-compatible regular expressions",
pattern_type_pcre),
OPT_GROUP(""),
OPT_BOOLEAN('n', "line-number", &opt.linenum, "show line numbers"),
OPT_NEGBIT('h', NULL, &opt.pathname, "don't show filenames", 1),
Expand Down Expand Up @@ -886,6 +900,28 @@ int cmd_grep(int argc, const char **argv, const char *prefix)
PARSE_OPT_KEEP_DASHDASH |
PARSE_OPT_STOP_AT_NON_OPTION |
PARSE_OPT_NO_INTERNAL_HELP);
switch (pattern_type) {
case pattern_type_fixed:
opt.fixed = 1;
opt.pcre = 0;
break;
case pattern_type_bre:
opt.fixed = 0;
opt.pcre = 0;
opt.regflags &= ~REG_EXTENDED;
break;
case pattern_type_ere:
opt.fixed = 0;
opt.pcre = 0;
opt.regflags |= REG_EXTENDED;
break;
case pattern_type_pcre:
opt.fixed = 0;
opt.pcre = 1;
break;
default:
break; /* nothing */
}

if (use_index && !startup_info->have_repository)
/* die the same way as if we did it at the beginning */
Expand Down Expand Up @@ -925,8 +961,6 @@ int cmd_grep(int argc, const char **argv, const char *prefix)
die(_("no pattern given."));
if (!opt.fixed && opt.ignore_case)
opt.regflags |= REG_ICASE;
if ((opt.regflags != REG_NEWLINE) && opt.fixed)
die(_("cannot mix --fixed-strings and regexp"));

#ifndef NO_PTHREADS
if (online_cpus() == 1 || !grep_threads_ok(&opt))
Expand Down
1 change: 1 addition & 0 deletions config.mak.in
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ NO_INET_PTON=@NO_INET_PTON@
NO_ICONV=@NO_ICONV@
OLD_ICONV=@OLD_ICONV@
NO_REGEX=@NO_REGEX@
USE_LIBPCRE=@USE_LIBPCRE@
NO_DEFLATE_BOUND=@NO_DEFLATE_BOUND@
INLINE=@INLINE@
SOCKLEN_T=@SOCKLEN_T@
Expand Down
40 changes: 40 additions & 0 deletions configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,27 @@ AS_HELP_STRING([--with-openssl],[use OpenSSL library (default is YES)])
AS_HELP_STRING([], [ARG can be prefix for openssl library and headers]),\
GIT_PARSE_WITH(openssl))
#
# Define USE_LIBPCRE if you have and want to use libpcre. git-grep will be
# able to use Perl-compatible regular expressions.
#
# Define LIBPCREDIR=/foo/bar if your libpcre header and library files are in
# /foo/bar/include and /foo/bar/lib directories.
#
AC_ARG_WITH(libpcre,
AS_HELP_STRING([--with-libpcre],[support Perl-compatible regexes (default is NO)])
AS_HELP_STRING([], [ARG can be also prefix for libpcre library and headers]),
if test "$withval" = "no"; then \
USE_LIBPCRE=; \
elif test "$withval" = "yes"; then \
USE_LIBPCRE=YesPlease; \
else
USE_LIBPCRE=YesPlease; \
LIBPCREDIR=$withval; \
AC_MSG_NOTICE([Setting LIBPCREDIR to $withval]); \
GIT_CONF_APPEND_LINE(LIBPCREDIR=$withval); \
fi \
)
#
# Define NO_CURL if you do not have curl installed. git-http-pull and
# git-http-push are not built, and you cannot use http:// and https://
# transports.
Expand Down Expand Up @@ -434,6 +455,25 @@ GIT_UNSTASH_FLAGS($OPENSSLDIR)
AC_SUBST(NEEDS_SSL_WITH_CRYPTO)
AC_SUBST(NO_OPENSSL)

#
# Define USE_LIBPCRE if you have and want to use libpcre. git-grep will be
# able to use Perl-compatible regular expressions.
#

if test -n "$USE_LIBPCRE"; then

GIT_STASH_FLAGS($LIBPCREDIR)

AC_CHECK_LIB([pcre], [pcre_version],
[USE_LIBPCRE=YesPlease],
[USE_LIBPCRE=])

GIT_UNSTASH_FLAGS($LIBPCREDIR)

AC_SUBST(USE_LIBPCRE)

fi

#
# Define NO_CURL if you do not have libcurl installed. git-http-pull and
# git-http-push are not built, and you cannot use http:// and https://
Expand Down
3 changes: 2 additions & 1 deletion contrib/completion/git-completion.bash
Original file line number Diff line number Diff line change
Expand Up @@ -1441,8 +1441,9 @@ _git_grep ()
__gitcomp "
--cached
--text --ignore-case --word-regexp --invert-match
--full-name
--full-name --line-number
--extended-regexp --basic-regexp --fixed-strings
--perl-regexp
--files-with-matches --name-only
--files-without-match
--max-depth
Expand Down
125 changes: 106 additions & 19 deletions grep.c
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,84 @@ struct grep_opt *grep_opt_dup(const struct grep_opt *opt)
return ret;
}

static NORETURN void compile_regexp_failed(const struct grep_pat *p,
const char *error)
{
char where[1024];

if (p->no)
sprintf(where, "In '%s' at %d, ", p->origin, p->no);
else if (p->origin)
sprintf(where, "%s, ", p->origin);
else
where[0] = 0;

die("%s'%s': %s", where, p->pattern, error);
}

#ifdef USE_LIBPCRE
static void compile_pcre_regexp(struct grep_pat *p, const struct grep_opt *opt)
{
const char *error;
int erroffset;
int options = 0;

if (opt->ignore_case)
options |= PCRE_CASELESS;

p->pcre_regexp = pcre_compile(p->pattern, options, &error, &erroffset,
NULL);
if (!p->pcre_regexp)
compile_regexp_failed(p, error);

p->pcre_extra_info = pcre_study(p->pcre_regexp, 0, &error);
if (!p->pcre_extra_info && error)
die("%s", error);
}

static int pcrematch(struct grep_pat *p, const char *line, const char *eol,
regmatch_t *match, int eflags)
{
int ovector[30], ret, flags = 0;

if (eflags & REG_NOTBOL)
flags |= PCRE_NOTBOL;

ret = pcre_exec(p->pcre_regexp, p->pcre_extra_info, line, eol - line,
0, flags, ovector, ARRAY_SIZE(ovector));
if (ret < 0 && ret != PCRE_ERROR_NOMATCH)
die("pcre_exec failed with error code %d", ret);
if (ret > 0) {
ret = 0;
match->rm_so = ovector[0];
match->rm_eo = ovector[1];
}

return ret;
}

static void free_pcre_regexp(struct grep_pat *p)
{
pcre_free(p->pcre_regexp);
pcre_free(p->pcre_extra_info);
}
#else /* !USE_LIBPCRE */
static void compile_pcre_regexp(struct grep_pat *p, const struct grep_opt *opt)
{
die("cannot use Perl-compatible regexes when not compiled with USE_LIBPCRE");
}

static int pcrematch(struct grep_pat *p, const char *line, const char *eol,
regmatch_t *match, int eflags)
{
return 1;
}

static void free_pcre_regexp(struct grep_pat *p)
{
}
#endif /* !USE_LIBPCRE */

static void compile_regexp(struct grep_pat *p, struct grep_opt *opt)
{
int err;
Expand All @@ -70,20 +148,17 @@ static void compile_regexp(struct grep_pat *p, struct grep_opt *opt)
if (p->fixed)
return;

if (opt->pcre) {
compile_pcre_regexp(p, opt);
return;
}

err = regcomp(&p->regexp, p->pattern, opt->regflags);
if (err) {
char errbuf[1024];
char where[1024];
if (p->no)
sprintf(where, "In '%s' at %d, ",
p->origin, p->no);
else if (p->origin)
sprintf(where, "%s, ", p->origin);
else
where[0] = 0;
regerror(err, &p->regexp, errbuf, 1024);
regfree(&p->regexp);
die("%s'%s': %s", where, p->pattern, errbuf);
compile_regexp_failed(p, errbuf);
}
}

Expand Down Expand Up @@ -320,7 +395,10 @@ void free_grep_patterns(struct grep_opt *opt)
case GREP_PATTERN: /* atom */
case GREP_PATTERN_HEAD:
case GREP_PATTERN_BODY:
regfree(&p->regexp);
if (p->pcre_regexp)
free_pcre_regexp(p);
else
regfree(&p->regexp);
break;
default:
break;
Expand Down Expand Up @@ -412,6 +490,21 @@ static int regmatch(const regex_t *preg, char *line, char *eol,
return regexec(preg, line, 1, match, eflags);
}

static int patmatch(struct grep_pat *p, char *line, char *eol,
regmatch_t *match, int eflags)
{
int hit;

if (p->fixed)
hit = !fixmatch(p, line, eol, match);
else if (p->pcre_regexp)
hit = !pcrematch(p, line, eol, match, eflags);
else
hit = !regmatch(&p->regexp, line, eol, match, eflags);

return hit;
}

static int strip_timestamp(char *bol, char **eol_p)
{
char *eol = *eol_p;
Expand Down Expand Up @@ -461,10 +554,7 @@ static int match_one_pattern(struct grep_pat *p, char *bol, char *eol,
}

again:
if (p->fixed)
hit = !fixmatch(p, bol, eol, pmatch);
else
hit = !regmatch(&p->regexp, bol, eol, pmatch, eflags);
hit = patmatch(p, bol, eol, pmatch, eflags);

if (hit && p->word_regexp) {
if ((pmatch[0].rm_so < 0) ||
Expand Down Expand Up @@ -791,10 +881,7 @@ static int look_ahead(struct grep_opt *opt,
int hit;
regmatch_t m;

if (p->fixed)
hit = !fixmatch(p, bol, bol + *left_p, &m);
else
hit = !regmatch(&p->regexp, bol, bol + *left_p, &m, 0);
hit = patmatch(p, bol, bol + *left_p, &m, 0);
if (!hit || m.rm_so < 0 || m.rm_eo < 0)
continue;
if (earliest < 0 || m.rm_so < earliest)
Expand Down Expand Up @@ -891,7 +978,7 @@ static int grep_buffer_1(struct grep_opt *opt, const char *name,
int hit;

/*
* look_ahead() skips quicly to the line that possibly
* look_ahead() skips quickly to the line that possibly
* has the next hit; don't call it if we need to do
* something more than just skipping the current line
* in response to an unmatch for the current line. E.g.
Expand Down
Loading

0 comments on commit be653d6

Please sign in to comment.