From a60ea8fb66945a886ea53fd3f41e61cc5fb3201e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nguy=E1=BB=85n=20Th=C3=A1i=20Ng=E1=BB=8Dc=20Duy?= Date: Mon, 15 Feb 2016 16:03:36 +0700 Subject: [PATCH 1/4] dir.c: fix match_pathname() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Given the pattern "1/2/3/4" and the path "1/2/3/4/f", the pattern prefix is "1/2/3/4". We will compare and remove the prefix from both pattern and path and come to this code /* * If the whole pattern did not have a wildcard, * then our prefix match is all we need; we * do not need to call fnmatch at all. */ if (!patternlen && !namelen) return 1; where patternlen is zero (full pattern consumed) and the remaining path in "name" is "/f". We fail to realize it's matched in this case and fall back to fnmatch(), which also fails to catch it. Fix it. Signed-off-by: Nguyễn Thái Ngọc Duy Signed-off-by: Junio C Hamano --- dir.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dir.c b/dir.c index f0b6d0a3e..bcaafac40 100644 --- a/dir.c +++ b/dir.c @@ -878,7 +878,7 @@ int match_pathname(const char *pathname, int pathlen, * then our prefix match is all we need; we * do not need to call fnmatch at all. */ - if (!patternlen && !namelen) + if (!patternlen && (!namelen || *name == '/')) return 1; } From bac65a2be598811d944c846bdf61580f5c1af927 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nguy=E1=BB=85n=20Th=C3=A1i=20Ng=E1=BB=8Dc=20Duy?= Date: Mon, 15 Feb 2016 16:03:37 +0700 Subject: [PATCH 2/4] dir.c: support tracing exclude MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Nguyễn Thái Ngọc Duy Signed-off-by: Junio C Hamano --- Documentation/git-check-ignore.txt | 1 + Documentation/git.txt | 5 +++++ dir.c | 20 ++++++++++++++++++++ 3 files changed, 26 insertions(+) diff --git a/Documentation/git-check-ignore.txt b/Documentation/git-check-ignore.txt index e94367a5e..f60ee051f 100644 --- a/Documentation/git-check-ignore.txt +++ b/Documentation/git-check-ignore.txt @@ -114,6 +114,7 @@ SEE ALSO linkgit:gitignore[5] linkgit:gitconfig[5] linkgit:git-ls-files[1] +GIT_TRACE_EXCLUDE in linkgit:git[1] GIT --- diff --git a/Documentation/git.txt b/Documentation/git.txt index d987ad20c..2c4f0f247 100644 --- a/Documentation/git.txt +++ b/Documentation/git.txt @@ -1064,6 +1064,11 @@ of clones and fetches. cloning of shallow repositories. See 'GIT_TRACE' for available trace output options. +'GIT_TRACE_EXCLUDE':: + Enables trace messages that can help debugging .gitignore + processing. See 'GIT_TRACE' for available trace output + options. + 'GIT_LITERAL_PATHSPECS':: Setting this variable to `1` will cause Git to treat all pathspecs literally, rather than as glob patterns. For example, diff --git a/dir.c b/dir.c index bcaafac40..0be7cf192 100644 --- a/dir.c +++ b/dir.c @@ -53,6 +53,8 @@ static enum path_treatment read_directory_recursive(struct dir_struct *dir, int check_only, const struct path_simplify *simplify); static int get_dtype(struct dirent *de, const char *path, int len); +static struct trace_key trace_exclude = TRACE_KEY_INIT(EXCLUDE); + /* helper string functions with support for the ignore_case flag */ int strcmp_icase(const char *a, const char *b) { @@ -905,6 +907,8 @@ static struct exclude *last_exclude_matching_from_list(const char *pathname, if (!el->nr) return NULL; /* undefined */ + trace_printf_key(&trace_exclude, "exclude: from %s\n", el->src); + for (i = el->nr - 1; 0 <= i; i--) { struct exclude *x = el->excludes[i]; const char *exclude = x->pattern; @@ -936,6 +940,16 @@ static struct exclude *last_exclude_matching_from_list(const char *pathname, break; } } + + if (!exc) { + trace_printf_key(&trace_exclude, "exclude: %.*s => n/a\n", + pathlen, pathname); + return NULL; + } + + trace_printf_key(&trace_exclude, "exclude: %.*s vs %s at line %d => %s\n", + pathlen, pathname, exc->pattern, exc->srcpos, + exc->flags & EXC_FLAG_NEGATIVE ? "no" : "yes"); return exc; } @@ -1683,9 +1697,13 @@ static enum path_treatment read_directory_recursive(struct dir_struct *dir, struct cached_dir cdir; enum path_treatment state, subdir_state, dir_state = path_none; struct strbuf path = STRBUF_INIT; + static int level = 0; strbuf_add(&path, base, baselen); + trace_printf_key(&trace_exclude, "exclude: [%d] enter '%.*s'\n", + level++, baselen, base); + if (open_cached_dir(&cdir, dir, untracked, &path, check_only)) goto out; @@ -1749,6 +1767,8 @@ static enum path_treatment read_directory_recursive(struct dir_struct *dir, } close_cached_dir(&cdir); out: + trace_printf_key(&trace_exclude, "exclude: [%d] leave '%.*s'\n", + --level, baselen, base); strbuf_release(&path); return dir_state; From c62a91736a6dcd909167cf9fbb751d65c85482dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nguy=E1=BB=85n=20Th=C3=A1i=20Ng=E1=BB=8Dc=20Duy?= Date: Mon, 15 Feb 2016 16:03:38 +0700 Subject: [PATCH 3/4] dir.c: support marking some patterns already matched MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Given path "a" and the pattern "a", it's matched. But if we throw path "a/b" to pattern "a", the code fails to realize that if "a" matches "a" then "a/b" should also be matched. When the pattern is matched the first time, we can mark it "sticky", so that all files and dirs inside the matched path also matches. This is a simpler solution than modify all match scenarios to fix that. Signed-off-by: Nguyễn Thái Ngọc Duy Signed-off-by: Junio C Hamano --- dir.c | 77 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++--- dir.h | 3 +++ 2 files changed, 77 insertions(+), 3 deletions(-) diff --git a/dir.c b/dir.c index 0be7cf192..8a9d8c061 100644 --- a/dir.c +++ b/dir.c @@ -521,6 +521,7 @@ void add_exclude(const char *string, const char *base, x->baselen = baselen; x->flags = flags; x->srcpos = srcpos; + string_list_init(&x->sticky_paths, 1); ALLOC_GROW(el->excludes, el->nr + 1, el->alloc); el->excludes[el->nr++] = x; x->el = el; @@ -561,8 +562,10 @@ void clear_exclude_list(struct exclude_list *el) { int i; - for (i = 0; i < el->nr; i++) + for (i = 0; i < el->nr; i++) { + string_list_clear(&el->excludes[i]->sticky_paths, 0); free(el->excludes[i]); + } free(el->excludes); free(el->filebuf); @@ -889,6 +892,44 @@ int match_pathname(const char *pathname, int pathlen, WM_PATHNAME) == 0; } +static void add_sticky(struct exclude *exc, const char *pathname, int pathlen) +{ + struct strbuf sb = STRBUF_INIT; + int i; + + for (i = exc->sticky_paths.nr - 1; i >= 0; i--) { + const char *sticky = exc->sticky_paths.items[i].string; + int len = strlen(sticky); + + if (pathlen < len && sticky[pathlen] == '/' && + !strncmp(pathname, sticky, pathlen)) + return; + } + + strbuf_add(&sb, pathname, pathlen); + string_list_append_nodup(&exc->sticky_paths, strbuf_detach(&sb, NULL)); +} + +static int match_sticky(struct exclude *exc, const char *pathname, int pathlen, int dtype) +{ + int i; + + for (i = exc->sticky_paths.nr - 1; i >= 0; i--) { + const char *sticky = exc->sticky_paths.items[i].string; + int len = strlen(sticky); + + if (pathlen == len && dtype == DT_DIR && + !strncmp(pathname, sticky, len)) + return 1; + + if (pathlen > len && pathname[len] == '/' && + !strncmp(pathname, sticky, len)) + return 1; + } + + return 0; +} + /* * Scan the given exclude list in reverse to see whether pathname * should be ignored. The first match (i.e. the last on the list), if @@ -914,6 +955,16 @@ static struct exclude *last_exclude_matching_from_list(const char *pathname, const char *exclude = x->pattern; int prefix = x->nowildcardlen; + if (x->sticky_paths.nr) { + if (*dtype == DT_UNKNOWN) + *dtype = get_dtype(NULL, pathname, pathlen); + if (match_sticky(x, pathname, pathlen, *dtype)) { + exc = x; + break; + } + continue; + } + if (x->flags & EXC_FLAG_MUSTBEDIR) { if (*dtype == DT_UNKNOWN) *dtype = get_dtype(NULL, pathname, pathlen); @@ -947,9 +998,10 @@ static struct exclude *last_exclude_matching_from_list(const char *pathname, return NULL; } - trace_printf_key(&trace_exclude, "exclude: %.*s vs %s at line %d => %s\n", + trace_printf_key(&trace_exclude, "exclude: %.*s vs %s at line %d => %s%s\n", pathlen, pathname, exc->pattern, exc->srcpos, - exc->flags & EXC_FLAG_NEGATIVE ? "no" : "yes"); + exc->flags & EXC_FLAG_NEGATIVE ? "no" : "yes", + exc->sticky_paths.nr ? " (stuck)" : ""); return exc; } @@ -2005,6 +2057,25 @@ static struct untracked_cache_dir *validate_untracked_cache(struct dir_struct *d return root; } +static void clear_sticky(struct dir_struct *dir) +{ + struct exclude_list_group *g; + struct exclude_list *el; + struct exclude *x; + int i, j, k; + + for (i = EXC_CMDL; i <= EXC_FILE; i++) { + g = &dir->exclude_list_group[i]; + for (j = g->nr - 1; j >= 0; j--) { + el = &g->el[j]; + for (k = el->nr - 1; 0 <= k; k--) { + x = el->excludes[k]; + string_list_clear(&x->sticky_paths, 0); + } + } + } +} + int read_directory(struct dir_struct *dir, const char *path, int len, const struct pathspec *pathspec) { struct path_simplify *simplify; diff --git a/dir.h b/dir.h index cd46f3001..3ec3fb0dc 100644 --- a/dir.h +++ b/dir.h @@ -4,6 +4,7 @@ /* See Documentation/technical/api-directory-listing.txt */ #include "strbuf.h" +#include "string-list.h" struct dir_entry { unsigned int len; @@ -34,6 +35,8 @@ struct exclude { * and from -1 decrementing for patterns from CLI args. */ int srcpos; + + struct string_list sticky_paths; }; /* From d589a67eceacd1cc171bbe94906ca7c9a0edd8c5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nguy=E1=BB=85n=20Th=C3=A1i=20Ng=E1=BB=8Dc=20Duy?= Date: Mon, 15 Feb 2016 16:03:39 +0700 Subject: [PATCH 4/4] dir.c: don't exclude whole dir prematurely MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If there is a pattern "!foo/bar", this patch makes it not exclude "foo" right away. This gives us a chance to examine "foo" and re-include "foo/bar". Helped-by: brian m. carlson Helped-by: Micha Wiedenmann Signed-off-by: Nguyễn Thái Ngọc Duy Signed-off-by: Junio C Hamano --- Documentation/gitignore.txt | 17 +++- dir.c | 109 +++++++++++++++++++- t/t3001-ls-files-others-exclude.sh | 7 +- t/t3007-ls-files-other-negative.sh | 153 +++++++++++++++++++++++++++++ 4 files changed, 276 insertions(+), 10 deletions(-) create mode 100755 t/t3007-ls-files-other-negative.sh diff --git a/Documentation/gitignore.txt b/Documentation/gitignore.txt index 473623d63..3ded6fdc9 100644 --- a/Documentation/gitignore.txt +++ b/Documentation/gitignore.txt @@ -82,12 +82,12 @@ PATTERN FORMAT - An optional prefix "`!`" which negates the pattern; any matching file excluded by a previous pattern will become - included again. It is not possible to re-include a file if a parent - directory of that file is excluded. Git doesn't list excluded - directories for performance reasons, so any patterns on contained - files have no effect, no matter where they are defined. + included again. Put a backslash ("`\`") in front of the first "`!`" for patterns that begin with a literal "`!`", for example, "`\!important!.txt`". + It is possible to re-include a file if a parent directory of that + file is excluded if certain conditions are met. See section NOTES + for detail. - If the pattern ends with a slash, it is removed for the purpose of the following description, but it would only find @@ -141,6 +141,15 @@ not tracked by Git remain untracked. To stop tracking a file that is currently tracked, use 'git rm --cached'. +To re-include files or directories when their parent directory is +excluded, the following conditions must be met: + + - The rules to exclude a directory and re-include a subset back must + be in the same .gitignore file. + + - The directory part in the re-include rules must be literal (i.e. no + wildcards) + EXAMPLES -------- diff --git a/dir.c b/dir.c index 8a9d8c061..552af2370 100644 --- a/dir.c +++ b/dir.c @@ -930,6 +930,75 @@ static int match_sticky(struct exclude *exc, const char *pathname, int pathlen, return 0; } +static inline int different_decisions(const struct exclude *a, + const struct exclude *b) +{ + return (a->flags & EXC_FLAG_NEGATIVE) != (b->flags & EXC_FLAG_NEGATIVE); +} + +/* + * Return non-zero if pathname is a directory and an ancestor of the + * literal path in a pattern. + */ +static int match_directory_part(const char *pathname, int pathlen, + int *dtype, struct exclude *x) +{ + const char *base = x->base; + int baselen = x->baselen ? x->baselen - 1 : 0; + const char *pattern = x->pattern; + int prefix = x->nowildcardlen; + int patternlen = x->patternlen; + + if (*dtype == DT_UNKNOWN) + *dtype = get_dtype(NULL, pathname, pathlen); + if (*dtype != DT_DIR) + return 0; + + if (*pattern == '/') { + pattern++; + patternlen--; + prefix--; + } + + if (baselen) { + if (((pathlen < baselen && base[pathlen] == '/') || + pathlen == baselen) && + !strncmp_icase(pathname, base, pathlen)) + return 1; + pathname += baselen + 1; + pathlen -= baselen + 1; + } + + + if (prefix && + (((pathlen < prefix && pattern[pathlen] == '/') || + pathlen == prefix) && + !strncmp_icase(pathname, pattern, pathlen))) + return 1; + + return 0; +} + +static struct exclude *should_descend(const char *pathname, int pathlen, + int *dtype, struct exclude_list *el, + struct exclude *exc) +{ + int i; + + for (i = el->nr - 1; 0 <= i; i--) { + struct exclude *x = el->excludes[i]; + + if (x == exc) + break; + + if (!(x->flags & EXC_FLAG_NODIR) && + different_decisions(x, exc) && + match_directory_part(pathname, pathlen, dtype, x)) + return x; + } + return NULL; +} + /* * Scan the given exclude list in reverse to see whether pathname * should be ignored. The first match (i.e. the last on the list), if @@ -943,7 +1012,7 @@ static struct exclude *last_exclude_matching_from_list(const char *pathname, struct exclude_list *el) { struct exclude *exc = NULL; /* undecided */ - int i; + int i, maybe_descend = 0; if (!el->nr) return NULL; /* undefined */ @@ -955,6 +1024,10 @@ static struct exclude *last_exclude_matching_from_list(const char *pathname, const char *exclude = x->pattern; int prefix = x->nowildcardlen; + if (!maybe_descend && i < el->nr - 1 && + different_decisions(x, el->excludes[i+1])) + maybe_descend = 1; + if (x->sticky_paths.nr) { if (*dtype == DT_UNKNOWN) *dtype = get_dtype(NULL, pathname, pathlen); @@ -998,6 +1071,34 @@ static struct exclude *last_exclude_matching_from_list(const char *pathname, return NULL; } + /* + * We have found a matching pattern "exc" that may exclude whole + * directory. We also found that there may be a pattern that matches + * something inside the directory and reincludes stuff. + * + * Go through the patterns again, find that pattern and double check. + * If it's true, return "undecided" and keep descending in. "exc" is + * marked sticky so that it continues to match inside the directory. + */ + if (!(exc->flags & EXC_FLAG_NEGATIVE) && maybe_descend) { + struct exclude *x; + + if (*dtype == DT_UNKNOWN) + *dtype = get_dtype(NULL, pathname, pathlen); + + if (*dtype == DT_DIR && + (x = should_descend(pathname, pathlen, dtype, el, exc))) { + add_sticky(exc, pathname, pathlen); + trace_printf_key(&trace_exclude, + "exclude: %.*s vs %s at line %d => %s," + " forced open by %s at line %d => n/a\n", + pathlen, pathname, exc->pattern, exc->srcpos, + exc->flags & EXC_FLAG_NEGATIVE ? "no" : "yes", + x->pattern, x->srcpos); + return NULL; + } + } + trace_printf_key(&trace_exclude, "exclude: %.*s vs %s at line %d => %s%s\n", pathlen, pathname, exc->pattern, exc->srcpos, exc->flags & EXC_FLAG_NEGATIVE ? "no" : "yes", @@ -2096,6 +2197,12 @@ int read_directory(struct dir_struct *dir, const char *path, int len, const stru if (has_symlink_leading_path(path, len)) return dir->nr; + /* + * Stay on the safe side. if read_directory() has run once on + * "dir", some sticky flag may have been left. Clear them all. + */ + clear_sticky(dir); + /* * exclude patterns are treated like positive ones in * create_simplify. Usually exclude patterns should be a diff --git a/t/t3001-ls-files-others-exclude.sh b/t/t3001-ls-files-others-exclude.sh index 3fc484e8c..d043078da 100755 --- a/t/t3001-ls-files-others-exclude.sh +++ b/t/t3001-ls-files-others-exclude.sh @@ -175,13 +175,10 @@ test_expect_success 'negated exclude matches can override previous ones' ' grep "^a.1" output ' -test_expect_success 'excluded directory overrides content patterns' ' +test_expect_success 'excluded directory does not override content patterns' ' git ls-files --others --exclude="one" --exclude="!one/a.1" >output && - if grep "^one/a.1" output - then - false - fi + grep "^one/a.1" output ' test_expect_success 'negated directory doesn'\''t affect content patterns' ' diff --git a/t/t3007-ls-files-other-negative.sh b/t/t3007-ls-files-other-negative.sh new file mode 100755 index 000000000..0797b86ad --- /dev/null +++ b/t/t3007-ls-files-other-negative.sh @@ -0,0 +1,153 @@ +#!/bin/sh + +test_description='test re-include patterns' + +. ./test-lib.sh + +test_expect_success 'setup' ' + mkdir -p fooo foo/bar tmp && + touch abc foo/def foo/bar/ghi foo/bar/bar +' + +test_expect_success 'no match, do not enter subdir and waste cycles' ' + cat >.gitignore <<-\EOF && + /tmp + /foo + !fooo/bar/bar + EOF + GIT_TRACE_EXCLUDE="$(pwd)/tmp/trace" git ls-files -o --exclude-standard >tmp/actual && + ! grep "enter .foo/.\$" tmp/trace && + cat >tmp/expected <<-\EOF && + .gitignore + abc + EOF + test_cmp tmp/expected tmp/actual +' + +test_expect_success 'match, excluded by literal pathname pattern' ' + cat >.gitignore <<-\EOF && + /tmp + /fooo + /foo + !foo/bar/bar + EOF + cat >fooo/.gitignore <<-\EOF && + !/* + EOF git ls-files -o --exclude-standard >tmp/actual && + cat >tmp/expected <<-\EOF && + .gitignore + abc + foo/bar/bar + EOF + test_cmp tmp/expected tmp/actual +' + +test_expect_success 'match, excluded by wildcard pathname pattern' ' + cat >.gitignore <<-\EOF && + /tmp + /fooo + /fo? + !foo/bar/bar + EOF + git ls-files -o --exclude-standard >tmp/actual && + cat >tmp/expected <<-\EOF && + .gitignore + abc + foo/bar/bar + EOF + test_cmp tmp/expected tmp/actual +' + +test_expect_success 'match, excluded by literal basename pattern' ' + cat >.gitignore <<-\EOF && + /tmp + /fooo + foo + !foo/bar/bar + EOF + git ls-files -o --exclude-standard >tmp/actual && + cat >tmp/expected <<-\EOF && + .gitignore + abc + foo/bar/bar + EOF + test_cmp tmp/expected tmp/actual +' + +test_expect_success 'match, excluded by wildcard basename pattern' ' + cat >.gitignore <<-\EOF && + /tmp + /fooo + fo? + !foo/bar/bar + EOF + git ls-files -o --exclude-standard >tmp/actual && + cat >tmp/expected <<-\EOF && + .gitignore + abc + foo/bar/bar + EOF + test_cmp tmp/expected tmp/actual +' + +test_expect_success 'match, excluded by literal mustbedir, basename pattern' ' + cat >.gitignore <<-\EOF && + /tmp + /fooo + foo/ + !foo/bar/bar + EOF + git ls-files -o --exclude-standard >tmp/actual && + cat >tmp/expected <<-\EOF && + .gitignore + abc + foo/bar/bar + EOF + test_cmp tmp/expected tmp/actual +' + +test_expect_success 'match, excluded by literal mustbedir, pathname pattern' ' + cat >.gitignore <<-\EOF && + /tmp + /fooo + /foo/ + !foo/bar/bar + EOF + git ls-files -o --exclude-standard >tmp/actual && + cat >tmp/expected <<-\EOF && + .gitignore + abc + foo/bar/bar + EOF + test_cmp tmp/expected tmp/actual +' + +test_expect_success 'prepare for nested negatives' ' + cat >.git/info/exclude <<-\EOF && + /.gitignore + /tmp + /foo + /abc + EOF + git ls-files -o --exclude-standard >tmp/actual && + test_must_be_empty tmp/actual && + mkdir -p 1/2/3/4 && + touch 1/f 1/2/f 1/2/3/f 1/2/3/4/f +' + +test_expect_success 'match, literal pathname, nested negatives' ' + cat >.gitignore <<-\EOF && + /1 + !1/2 + 1/2/3 + !1/2/3/4 + EOF + git ls-files -o --exclude-standard >tmp/actual && + cat >tmp/expected <<-\EOF && + 1/2/3/4/f + 1/2/f + EOF + test_cmp tmp/expected tmp/actual +' + +test_done