From eb41775ecc031cd8e38aebbd26826d74922a0db9 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Fri, 1 Jun 2012 11:28:00 -0700 Subject: [PATCH 1/6] ls-files -i: pay attention to exclusion of leading paths "git ls-files --exclude=t/ -i" does not show paths in directory t/ that have been added to the index, but it should. The excluded() API was designed for callers who walk the tree from the top, checking each level of the directory hierarchy as it descends if it is excluded, and not even bothering to recurse into an excluded directory. This would allow us optimize for a common case by not having to check if the exclude pattern "foo/" matches when looking at "foo/bar", because the caller should have noticed that "foo" is excluded and did not even bother to read "foo/bar" out of opendir()/readdir() to call it. The code for "ls-files -i" however walks the index linearly, feeding paths without checking if the leading directory is already excluded. Introduce a helper function path_excluded() to let this caller properly call excluded() check for higher hierarchies as necessary. Signed-off-by: Junio C Hamano --- builtin/ls-files.c | 17 +++++++++++------ dir.c | 32 ++++++++++++++++++++++++++++++++ dir.h | 16 ++++++++++++++++ 3 files changed, 59 insertions(+), 6 deletions(-) diff --git a/builtin/ls-files.c b/builtin/ls-files.c index 7cff17574..90dc3601a 100644 --- a/builtin/ls-files.c +++ b/builtin/ls-files.c @@ -203,6 +203,10 @@ static void show_ru_info(void) static void show_files(struct dir_struct *dir) { int i; + struct path_exclude_check check; + + if ((dir->flags & DIR_SHOW_IGNORED)) + path_exclude_check_init(&check, dir); /* For cached/deleted files we don't need to even do the readdir */ if (show_others || show_killed) { @@ -215,9 +219,8 @@ static void show_files(struct dir_struct *dir) if (show_cached | show_stage) { for (i = 0; i < active_nr; i++) { struct cache_entry *ce = active_cache[i]; - int dtype = ce_to_dtype(ce); - if (dir->flags & DIR_SHOW_IGNORED && - !excluded(dir, ce->name, &dtype)) + if ((dir->flags & DIR_SHOW_IGNORED) && + !path_excluded(&check, ce)) continue; if (show_unmerged && !ce_stage(ce)) continue; @@ -232,9 +235,8 @@ static void show_files(struct dir_struct *dir) struct cache_entry *ce = active_cache[i]; struct stat st; int err; - int dtype = ce_to_dtype(ce); - if (dir->flags & DIR_SHOW_IGNORED && - !excluded(dir, ce->name, &dtype)) + if ((dir->flags & DIR_SHOW_IGNORED) && + !path_excluded(&check, ce)) continue; if (ce->ce_flags & CE_UPDATE) continue; @@ -247,6 +249,9 @@ static void show_files(struct dir_struct *dir) show_ce_entry(tag_modified, ce); } } + + if ((dir->flags & DIR_SHOW_IGNORED)) + path_exclude_check_clear(&check); } /* diff --git a/dir.c b/dir.c index 0a78d00b5..c3f08849b 100644 --- a/dir.c +++ b/dir.c @@ -580,6 +580,38 @@ int excluded(struct dir_struct *dir, const char *pathname, int *dtype_p) return 0; } +void path_exclude_check_init(struct path_exclude_check *check, + struct dir_struct *dir) +{ + check->dir = dir; + strbuf_init(&check->path, 256); +} + +void path_exclude_check_clear(struct path_exclude_check *check) +{ + strbuf_release(&check->path); +} + +int path_excluded(struct path_exclude_check *check, struct cache_entry *ce) +{ + int i, dtype; + struct strbuf *path = &check->path; + + strbuf_setlen(path, 0); + for (i = 0; ce->name[i]; i++) { + int ch = ce->name[i]; + + if (ch == '/') { + dtype = DT_DIR; + if (excluded(check->dir, path->buf, &dtype)) + return 1; + } + strbuf_addch(path, ch); + } + dtype = ce_to_dtype(ce); + return excluded(check->dir, ce->name, &dtype); +} + static struct dir_entry *dir_entry_new(const char *pathname, int len) { struct dir_entry *ent; diff --git a/dir.h b/dir.h index dd6947e1d..7378e69c1 100644 --- a/dir.h +++ b/dir.h @@ -1,6 +1,8 @@ #ifndef DIR_H #define DIR_H +#include "strbuf.h" + struct dir_entry { unsigned int len; char name[FLEX_ARRAY]; /* more */ @@ -78,6 +80,20 @@ extern int excluded_from_list(const char *pathname, int pathlen, const char *bas int *dtype, struct exclude_list *el); extern int excluded(struct dir_struct *, const char *, int *); struct dir_entry *dir_add_ignored(struct dir_struct *dir, const char *pathname, int len); + +/* + * The excluded() API is meant for callers that check each level of leading + * directory hierarchies with excluded() to avoid recursing into excluded + * directories. Callers that do not do so should use this API instead. + */ +struct path_exclude_check { + struct dir_struct *dir; + struct strbuf path; +}; +extern void path_exclude_check_init(struct path_exclude_check *, struct dir_struct *); +extern void path_exclude_check_clear(struct path_exclude_check *); +extern int path_excluded(struct path_exclude_check *, struct cache_entry *); + extern int add_excludes_from_file_to_list(const char *fname, const char *base, int baselen, char **buf_p, struct exclude_list *which, int check_index); extern void add_excludes_from_file(struct dir_struct *, const char *fname); From 93921b07e985de38f7af6689d81e5c7dfb3f8aa0 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Fri, 1 Jun 2012 12:22:09 -0700 Subject: [PATCH 2/6] ls-files -i: micro-optimize path_excluded() As we know a caller that does not recurse is calling us in the index order, we can remember the last directory we found to be excluded and see if the path we are looking at is still inside it, in which case we can just answer that it is excluded. Signed-off-by: Junio C Hamano --- dir.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/dir.c b/dir.c index c3f08849b..839bc9f61 100644 --- a/dir.c +++ b/dir.c @@ -592,11 +592,25 @@ void path_exclude_check_clear(struct path_exclude_check *check) strbuf_release(&check->path); } +/* + * Is the ce->name excluded? This is for a caller like show_files() that + * do not honor directory hierarchy and iterate through paths that are + * possibly in an ignored directory. + * + * A path to a directory known to be excluded is left in check->path to + * optimize for repeated checks for files in the same excluded directory. + */ int path_excluded(struct path_exclude_check *check, struct cache_entry *ce) { int i, dtype; struct strbuf *path = &check->path; + if (path->len && + path->len <= ce_namelen(ce) && + !memcmp(ce->name, path->buf, path->len) && + (!ce->name[path->len] || ce->name[path->len] == '/')) + return 1; + strbuf_setlen(path, 0); for (i = 0; ce->name[i]; i++) { int ch = ce->name[i]; @@ -608,6 +622,10 @@ int path_excluded(struct path_exclude_check *check, struct cache_entry *ce) } strbuf_addch(path, ch); } + + /* An entry in the index; cannot be a directory with subentries */ + strbuf_setlen(path, 0); + dtype = ce_to_dtype(ce); return excluded(check->dir, ce->name, &dtype); } From 782cd4c0f6e0fef5147cb738009dde6e778f4932 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Tue, 5 Jun 2012 21:17:52 -0700 Subject: [PATCH 3/6] path_excluded(): update API to less cache-entry centric It was stupid of me to make the API too much cache-entry specific; the caller may want to check arbitrary pathname without having a corresponding cache-entry to see if a path is ignored. Signed-off-by: Junio C Hamano --- builtin/ls-files.c | 10 ++++++++-- dir.c | 32 ++++++++++++++++++++------------ dir.h | 3 ++- 3 files changed, 30 insertions(+), 15 deletions(-) diff --git a/builtin/ls-files.c b/builtin/ls-files.c index 90dc3601a..31b3f2d90 100644 --- a/builtin/ls-files.c +++ b/builtin/ls-files.c @@ -200,6 +200,12 @@ static void show_ru_info(void) } } +static int ce_excluded(struct path_exclude_check *check, struct cache_entry *ce) +{ + int dtype = ce_to_dtype(ce); + return path_excluded(check, ce->name, ce_namelen(ce), &dtype); +} + static void show_files(struct dir_struct *dir) { int i; @@ -220,7 +226,7 @@ static void show_files(struct dir_struct *dir) for (i = 0; i < active_nr; i++) { struct cache_entry *ce = active_cache[i]; if ((dir->flags & DIR_SHOW_IGNORED) && - !path_excluded(&check, ce)) + !ce_excluded(&check, ce)) continue; if (show_unmerged && !ce_stage(ce)) continue; @@ -236,7 +242,7 @@ static void show_files(struct dir_struct *dir) struct stat st; int err; if ((dir->flags & DIR_SHOW_IGNORED) && - !path_excluded(&check, ce)) + !ce_excluded(&check, ce)) continue; if (ce->ce_flags & CE_UPDATE) continue; diff --git a/dir.c b/dir.c index 839bc9f61..7ab7fc5c2 100644 --- a/dir.c +++ b/dir.c @@ -593,31 +593,40 @@ void path_exclude_check_clear(struct path_exclude_check *check) } /* - * Is the ce->name excluded? This is for a caller like show_files() that + * Is this name excluded? This is for a caller like show_files() that * do not honor directory hierarchy and iterate through paths that are * possibly in an ignored directory. * * A path to a directory known to be excluded is left in check->path to * optimize for repeated checks for files in the same excluded directory. */ -int path_excluded(struct path_exclude_check *check, struct cache_entry *ce) +int path_excluded(struct path_exclude_check *check, + const char *name, int namelen, int *dtype) { - int i, dtype; + int i; struct strbuf *path = &check->path; + /* + * we allow the caller to pass namelen as an optimization; it + * must match the length of the name, as we eventually call + * excluded() on the whole name string. + */ + if (namelen < 0) + namelen = strlen(name); + if (path->len && - path->len <= ce_namelen(ce) && - !memcmp(ce->name, path->buf, path->len) && - (!ce->name[path->len] || ce->name[path->len] == '/')) + path->len <= namelen && + !memcmp(name, path->buf, path->len) && + (!name[path->len] || name[path->len] == '/')) return 1; strbuf_setlen(path, 0); - for (i = 0; ce->name[i]; i++) { - int ch = ce->name[i]; + for (i = 0; name[i]; i++) { + int ch = name[i]; if (ch == '/') { - dtype = DT_DIR; - if (excluded(check->dir, path->buf, &dtype)) + int dt = DT_DIR; + if (excluded(check->dir, path->buf, &dt)) return 1; } strbuf_addch(path, ch); @@ -626,8 +635,7 @@ int path_excluded(struct path_exclude_check *check, struct cache_entry *ce) /* An entry in the index; cannot be a directory with subentries */ strbuf_setlen(path, 0); - dtype = ce_to_dtype(ce); - return excluded(check->dir, ce->name, &dtype); + return excluded(check->dir, name, dtype); } static struct dir_entry *dir_entry_new(const char *pathname, int len) diff --git a/dir.h b/dir.h index 7378e69c1..36a82b3bd 100644 --- a/dir.h +++ b/dir.h @@ -92,7 +92,8 @@ struct path_exclude_check { }; extern void path_exclude_check_init(struct path_exclude_check *, struct dir_struct *); extern void path_exclude_check_clear(struct path_exclude_check *); -extern int path_excluded(struct path_exclude_check *, struct cache_entry *); +extern int path_excluded(struct path_exclude_check *, const char *, int namelen, int *dtype); + extern int add_excludes_from_file_to_list(const char *fname, const char *base, int baselen, char **buf_p, struct exclude_list *which, int check_index); From eb69934bbd1200a318422c0d127c043d6b8457e1 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Tue, 5 Jun 2012 21:44:22 -0700 Subject: [PATCH 4/6] builtin/add.c: use path_excluded() This only happens in --ignore-missing --dry-run codepath which presumably nobody should care, but is for completeness. Signed-off-by: Junio C Hamano --- builtin/add.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/builtin/add.c b/builtin/add.c index c59b0c98f..e5b40d987 100644 --- a/builtin/add.c +++ b/builtin/add.c @@ -441,6 +441,9 @@ int cmd_add(int argc, const char **argv, const char *prefix) if (pathspec) { int i; + struct path_exclude_check check; + + path_exclude_check_init(&check, &dir); if (!seen) seen = find_used_pathspec(pathspec); for (i = 0; pathspec[i]; i++) { @@ -448,7 +451,7 @@ int cmd_add(int argc, const char **argv, const char *prefix) && !file_exists(pathspec[i])) { if (ignore_missing) { int dtype = DT_UNKNOWN; - if (excluded(&dir, pathspec[i], &dtype)) + if (path_excluded(&check, pathspec[i], -1, &dtype)) dir_add_ignored(&dir, pathspec[i], strlen(pathspec[i])); } else die(_("pathspec '%s' did not match any files"), @@ -456,6 +459,7 @@ int cmd_add(int argc, const char **argv, const char *prefix) } } free(seen); + path_exclude_check_clear(&check); } exit_status |= add_files_to_cache(prefix, pathspec, flags); From 589570dbe7d291e73defea367babb5400a87e9f0 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Tue, 5 Jun 2012 22:21:42 -0700 Subject: [PATCH 5/6] unpack-trees.c: use path_excluded() in check_ok_to_remove() This function is responsible for determining if a path that is not tracked is ignored and allow "checkout" to overwrite it as needed. It used excluded() without checking if higher level directory in the path is ignored; correct it to use path_excluded() for this check. Signed-off-by: Junio C Hamano --- * There are uses of lower-level interface excluded_from_list() in the codepath for narrow-checkout hack; they are supposed to be already checking each level as they descend, and are not touched with this patch. --- unpack-trees.c | 11 ++++++++++- unpack-trees.h | 1 + 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/unpack-trees.c b/unpack-trees.c index 7c9ecf665..133f2c9d2 100644 --- a/unpack-trees.c +++ b/unpack-trees.c @@ -1016,6 +1016,10 @@ int unpack_trees(unsigned len, struct tree_desc *t, struct unpack_trees_options o->el = ⪙ } + if (o->dir) { + o->path_exclude_check = xmalloc(sizeof(struct path_exclude_check)); + path_exclude_check_init(o->path_exclude_check, o->dir); + } memset(&o->result, 0, sizeof(o->result)); o->result.initialized = 1; o->result.timestamp.sec = o->src_index->timestamp.sec; @@ -1140,6 +1144,10 @@ int unpack_trees(unsigned len, struct tree_desc *t, struct unpack_trees_options done: free_excludes(&el); + if (o->path_exclude_check) { + path_exclude_check_clear(o->path_exclude_check); + free(o->path_exclude_check); + } return ret; return_failed: @@ -1355,7 +1363,8 @@ static int check_ok_to_remove(const char *name, int len, int dtype, if (ignore_case && icase_exists(o, name, len, st)) return 0; - if (o->dir && excluded(o->dir, name, &dtype)) + if (o->dir && + path_excluded(o->path_exclude_check, name, -1, &dtype)) /* * ce->name is explicitly excluded, so it is Ok to * overwrite it. diff --git a/unpack-trees.h b/unpack-trees.h index 5e432f576..ec74a9f19 100644 --- a/unpack-trees.h +++ b/unpack-trees.h @@ -52,6 +52,7 @@ struct unpack_trees_options { const char *prefix; int cache_bottom; struct dir_struct *dir; + struct path_exclude_check *path_exclude_check; struct pathspec *pathspec; merge_fn_t fn; const char *msgs[NB_UNPACK_TREES_ERROR_TYPES]; From 0d316f0ceff1c416c25327f40bc5fbdded98a01a Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Tue, 5 Jun 2012 22:26:12 -0700 Subject: [PATCH 6/6] dir.c: make excluded() file scope static Now there no longer is external callers of this interface, so we can make it static. Signed-off-by: Junio C Hamano --- dir.c | 2 +- dir.h | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/dir.c b/dir.c index 7ab7fc5c2..79e43377a 100644 --- a/dir.c +++ b/dir.c @@ -560,7 +560,7 @@ int excluded_from_list(const char *pathname, return -1; /* undecided */ } -int excluded(struct dir_struct *dir, const char *pathname, int *dtype_p) +static int excluded(struct dir_struct *dir, const char *pathname, int *dtype_p) { int pathlen = strlen(pathname); int st; diff --git a/dir.h b/dir.h index 36a82b3bd..1a88a7564 100644 --- a/dir.h +++ b/dir.h @@ -78,7 +78,6 @@ extern int read_directory(struct dir_struct *, const char *path, int len, const extern int excluded_from_list(const char *pathname, int pathlen, const char *basename, int *dtype, struct exclude_list *el); -extern int excluded(struct dir_struct *, const char *, int *); struct dir_entry *dir_add_ignored(struct dir_struct *dir, const char *pathname, int len); /*