Skip to content

Commit

Permalink
Optimize directory listing with pathspec limiter.
Browse files Browse the repository at this point in the history
The way things are set up, you can now pass a "pathspec" to the
"read_directory()" function. If you pass NULL, it acts exactly
like it used to do (read everything). If you pass a non-NULL
pointer, it will simplify it into a "these are the prefixes
without any special characters", and stop any readdir() early if
the path in question doesn't match any of the prefixes.

NOTE! This does *not* obviate the need for the caller to do the *exact*
pathspec match later. It's a first-level filter on "read_directory()", but
it does not do the full pathspec thing. Maybe it should. But in the
meantime, builtin-add.c really does need to do first

	read_directory(dir, .., pathspec);
	if (pathspec)
		prune_directory(dir, pathspec, baselen);

ie the "prune_directory()" part will do the *exact* pathspec pruning,
while the "read_directory()" will use the pathspec just to do some quick
high-level pruning of the directories it will recurse into.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
  • Loading branch information
Linus Torvalds authored and Junio C Hamano committed Apr 1, 2007
1 parent d8b6a1a commit 9fc42d6
Show file tree
Hide file tree
Showing 5 changed files with 95 additions and 9 deletions.
2 changes: 1 addition & 1 deletion builtin-add.c
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ static void fill_directory(struct dir_struct *dir, const char **pathspec)
}

/* Read the directory and prune it */
read_directory(dir, path, base, baselen);
read_directory(dir, path, base, baselen, pathspec);
if (pathspec)
prune_directory(dir, pathspec, baselen);
}
Expand Down
2 changes: 1 addition & 1 deletion builtin-ls-files.c
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,7 @@ static void show_files(struct dir_struct *dir, const char *prefix)

if (baselen)
path = base = prefix;
read_directory(dir, path, base, baselen);
read_directory(dir, path, base, baselen, pathspec);
if (show_others)
show_other_files(dir);
if (show_killed)
Expand Down
96 changes: 91 additions & 5 deletions dir.c
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,11 @@
#include "cache.h"
#include "dir.h"

struct path_simplify {
int len;
const char *path;
};

int common_prefix(const char **pathspec)
{
const char *path, *slash, *next;
Expand Down Expand Up @@ -292,6 +297,31 @@ static int dir_exists(const char *dirname, int len)
return !strncmp(active_cache[pos]->name, dirname, len);
}

/*
* This is an inexact early pruning of any recursive directory
* reading - if the path cannot possibly be in the pathspec,
* return true, and we'll skip it early.
*/
static int simplify_away(const char *path, int pathlen, const struct path_simplify *simplify)
{
if (simplify) {
for (;;) {
const char *match = simplify->path;
int len = simplify->len;

if (!match)
break;
if (len > pathlen)
len = pathlen;
if (!memcmp(path, match, len))
return 0;
simplify++;
}
return 1;
}
return 0;
}

/*
* Read a directory tree. We currently ignore anything but
* directories, regular files and symlinks. That's because git
Expand All @@ -301,7 +331,7 @@ static int dir_exists(const char *dirname, int len)
* Also, we ignore the name ".git" (even if it is not a directory).
* That likely will not change.
*/
static int read_directory_recursive(struct dir_struct *dir, const char *path, const char *base, int baselen, int check_only)
static int read_directory_recursive(struct dir_struct *dir, const char *path, const char *base, int baselen, int check_only, const struct path_simplify *simplify)
{
DIR *fdir = opendir(path);
int contents = 0;
Expand All @@ -324,6 +354,8 @@ static int read_directory_recursive(struct dir_struct *dir, const char *path, co
continue;
len = strlen(de->d_name);
memcpy(fullname + baselen, de->d_name, len+1);
if (simplify_away(fullname, baselen + len, simplify))
continue;
if (excluded(dir, fullname) != dir->show_ignored) {
if (!dir->show_ignored || DTYPE(de) != DT_DIR) {
continue;
Expand All @@ -350,13 +382,13 @@ static int read_directory_recursive(struct dir_struct *dir, const char *path, co
if (dir->hide_empty_directories &&
!read_directory_recursive(dir,
fullname, fullname,
baselen + len, 1))
baselen + len, 1, simplify))
continue;
break;
}

contents += read_directory_recursive(dir,
fullname, fullname, baselen + len, 0);
fullname, fullname, baselen + len, 0, simplify);
continue;
case DT_REG:
case DT_LNK:
Expand Down Expand Up @@ -386,8 +418,61 @@ static int cmp_name(const void *p1, const void *p2)
e2->name, e2->len);
}

int read_directory(struct dir_struct *dir, const char *path, const char *base, int baselen)
/*
* Return the length of the "simple" part of a path match limiter.
*/
static int simple_length(const char *match)
{
const char special[256] = {
[0] = 1, ['?'] = 1,
['\\'] = 1, ['*'] = 1,
['['] = 1
};
int len = -1;

for (;;) {
unsigned char c = *match++;
len++;
if (special[c])
return len;
}
}

static struct path_simplify *create_simplify(const char **pathspec)
{
int nr, alloc = 0;
struct path_simplify *simplify = NULL;

if (!pathspec)
return NULL;

for (nr = 0 ; ; nr++) {
const char *match;
if (nr >= alloc) {
alloc = alloc_nr(alloc);
simplify = xrealloc(simplify, alloc * sizeof(*simplify));
}
match = *pathspec++;
if (!match)
break;
simplify[nr].path = match;
simplify[nr].len = simple_length(match);
}
simplify[nr].path = NULL;
simplify[nr].len = 0;
return simplify;
}

static void free_simplify(struct path_simplify *simplify)
{
if (simplify)
free(simplify);
}

int read_directory(struct dir_struct *dir, const char *path, const char *base, int baselen, const char **pathspec)
{
struct path_simplify *simplify = create_simplify(pathspec);

/*
* Make sure to do the per-directory exclude for all the
* directories leading up to our base.
Expand All @@ -414,7 +499,8 @@ int read_directory(struct dir_struct *dir, const char *path, const char *base, i
}
}

read_directory_recursive(dir, path, base, baselen, 0);
read_directory_recursive(dir, path, base, baselen, 0, simplify);
free_simplify(simplify);
qsort(dir->entries, dir->nr, sizeof(struct dir_entry *), cmp_name);
return dir->nr;
}
Expand Down
2 changes: 1 addition & 1 deletion dir.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ extern int common_prefix(const char **pathspec);
#define MATCHED_EXACTLY 3
extern int match_pathspec(const char **pathspec, const char *name, int namelen, int prefix, char *seen);

extern int read_directory(struct dir_struct *, const char *path, const char *base, int baselen);
extern int read_directory(struct dir_struct *, const char *path, const char *base, int baselen, const char **pathspec);
extern int push_exclude_per_directory(struct dir_struct *, const char *, int);
extern void pop_exclude_per_directory(struct dir_struct *, int);

Expand Down
2 changes: 1 addition & 1 deletion wt-status.c
Original file line number Diff line number Diff line change
Expand Up @@ -260,7 +260,7 @@ static void wt_status_print_untracked(struct wt_status *s)
if (file_exists(x))
add_excludes_from_file(&dir, x);

read_directory(&dir, ".", "", 0);
read_directory(&dir, ".", "", 0, NULL);
for(i = 0; i < dir.nr; i++) {
/* check for matching entry, which is unmerged; lifted from
* builtin-ls-files:show_other_files */
Expand Down

0 comments on commit 9fc42d6

Please sign in to comment.