Skip to content

Commit

Permalink
Optimize diff-cache -p --cached
Browse files Browse the repository at this point in the history
This patch optimizes "diff-cache -p --cached" by avoiding to
inflate blobs into temporary files when the blob recorded in the
cache matches the corresponding file in the work tree.  The file
in the work tree is passed as the comparison source in such a
case instead.

This optimization kicks in only when we have already read the
cache this optimization and this is deliberate.  Especially,
diff-tree does not use this code, because changes are contained
in small number of files relative to the project size most of
the time, and reading cache is so expensive for a large project
that the cost of reading it outweighs the savings by not
inflating blobs.

Also this patch cleans up the structure passed from diff clients
by removing one unused structure member.

Signed-off-by: Junio C Hamano <junkio@cox.net>
  • Loading branch information
Junio C Hamano committed May 4, 2005
1 parent 6fa2806 commit b46f0b6
Show file tree
Hide file tree
Showing 3 changed files with 62 additions and 24 deletions.
6 changes: 3 additions & 3 deletions diff-tree-helper.c
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ static int parse_oneside_change(const char *cp, struct diff_spec *one,
if (strncmp(cp, "\tblob\t", 6))
return -1;
cp += 6;
if (get_sha1_hex(cp, one->u.sha1))
if (get_sha1_hex(cp, one->blob_sha1))
return -1;
cp += 40;
if (*cp++ != '\t')
Expand Down Expand Up @@ -83,13 +83,13 @@ static int parse_diff_tree_output(const char *buf,
if (strncmp(cp, "\tblob\t", 6))
return -1;
cp += 6;
if (get_sha1_hex(cp, old.u.sha1))
if (get_sha1_hex(cp, old.blob_sha1))
return -1;
cp += 40;
if (strncmp(cp, "->", 2))
return -1;
cp += 2;
if (get_sha1_hex(cp, new.u.sha1))
if (get_sha1_hex(cp, new.blob_sha1))
return -1;
cp += 40;
if (*cp++ != '\t')
Expand Down
67 changes: 54 additions & 13 deletions diff.c
Original file line number Diff line number Diff line change
Expand Up @@ -132,11 +132,50 @@ static void builtin_diff(const char *name,
execlp("/bin/sh","sh", "-c", cmd, NULL);
}

/*
* Given a name and sha1 pair, if the dircache tells us the file in
* the work tree has that object contents, return true, so that
* prepare_temp_file() does not have to inflate and extract.
*/
static int work_tree_matches(const char *name, const unsigned char *sha1)
{
struct cache_entry *ce;
struct stat st;
int pos, len;

/* We do not read the cache ourselves here, because the
* benchmark with my previous version that always reads cache
* shows that it makes things worse for diff-tree comparing
* two linux-2.6 kernel trees in an already checked out work
* tree. This is because most diff-tree comparison deals with
* only a small number of files, while reading the cache is
* expensive for a large project, and its cost outweighs the
* savings we get by not inflating the object to a temporary
* file. Practically, this code only helps when we are used
* by diff-cache --cached, which does read the cache before
* calling us.
*/
if (!active_cache)
return 0;

len = strlen(name);
pos = cache_name_pos(name, len);
if (pos < 0)
return 0;
ce = active_cache[pos];
if ((stat(name, &st) < 0) ||
cache_match_stat(ce, &st) ||
memcmp(sha1, ce->sha1, 20))
return 0;
return 1;
}

static void prepare_temp_file(const char *name,
struct diff_tempfile *temp,
struct diff_spec *one)
{
static unsigned char null_sha1[20] = { 0, };
int use_work_tree = 0;

if (!one->file_valid) {
not_a_valid_file:
Expand All @@ -150,20 +189,22 @@ static void prepare_temp_file(const char *name,
}

if (one->sha1_valid &&
!memcmp(one->u.sha1, null_sha1, sizeof(null_sha1))) {
one->sha1_valid = 0;
one->u.name = name;
}
(!memcmp(one->blob_sha1, null_sha1, sizeof(null_sha1)) ||
work_tree_matches(name, one->blob_sha1)))
use_work_tree = 1;

if (!one->sha1_valid) {
if (!one->sha1_valid || use_work_tree) {
struct stat st;
temp->name = one->u.name;
temp->name = name;
if (stat(temp->name, &st) < 0) {
if (errno == ENOENT)
goto not_a_valid_file;
die("stat(%s): %s", temp->name, strerror(errno));
}
strcpy(temp->hex, sha1_to_hex(null_sha1));
if (!one->sha1_valid)
strcpy(temp->hex, sha1_to_hex(null_sha1));
else
strcpy(temp->hex, sha1_to_hex(one->blob_sha1));
sprintf(temp->mode, "%06o",
S_IFREG |ce_permissions(st.st_mode));
}
Expand All @@ -173,10 +214,10 @@ static void prepare_temp_file(const char *name,
char type[20];
unsigned long size;

blob = read_sha1_file(one->u.sha1, type, &size);
blob = read_sha1_file(one->blob_sha1, type, &size);
if (!blob || strcmp(type, "blob"))
die("unable to read blob object for %s (%s)",
name, sha1_to_hex(one->u.sha1));
name, sha1_to_hex(one->blob_sha1));

strcpy(temp->tmp_path, ".diff_XXXXXX");
fd = mkstemp(temp->tmp_path);
Expand All @@ -187,7 +228,7 @@ static void prepare_temp_file(const char *name,
close(fd);
free(blob);
temp->name = temp->tmp_path;
strcpy(temp->hex, sha1_to_hex(one->u.sha1));
strcpy(temp->hex, sha1_to_hex(one->blob_sha1));
temp->hex[40] = 0;
sprintf(temp->mode, "%06o", one->mode);
}
Expand Down Expand Up @@ -286,7 +327,7 @@ void diff_addremove(int addremove, unsigned mode,
char concatpath[PATH_MAX];
struct diff_spec spec[2], *one, *two;

memcpy(spec[0].u.sha1, sha1, 20);
memcpy(spec[0].blob_sha1, sha1, 20);
spec[0].mode = mode;
spec[0].sha1_valid = spec[0].file_valid = 1;
spec[1].file_valid = 0;
Expand All @@ -311,9 +352,9 @@ void diff_change(unsigned old_mode, unsigned new_mode,
char concatpath[PATH_MAX];
struct diff_spec spec[2];

memcpy(spec[0].u.sha1, old_sha1, 20);
memcpy(spec[0].blob_sha1, old_sha1, 20);
spec[0].mode = old_mode;
memcpy(spec[1].u.sha1, new_sha1, 20);
memcpy(spec[1].blob_sha1, new_sha1, 20);
spec[1].mode = new_mode;
spec[0].sha1_valid = spec[0].file_valid = 1;
spec[1].sha1_valid = spec[1].file_valid = 1;
Expand Down
13 changes: 5 additions & 8 deletions diff.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,15 +20,12 @@ extern void diff_unmerge(const char *path);
/* These are for diff-tree-helper */

struct diff_spec {
union {
const char *name; /* path on the filesystem */
unsigned char sha1[20]; /* blob object ID */
} u;
unsigned char blob_sha1[20];
unsigned short mode; /* file mode */
unsigned sha1_valid : 1; /* if true, use u.sha1 and trust mode.
* (however with a NULL SHA1, read them
* from the file!).
* if false, use u.name and read mode from
unsigned sha1_valid : 1; /* if true, use blob_sha1 and trust mode;
* however with a NULL SHA1, read them
* from the file system.
* if false, use the name and read mode from
* the filesystem.
*/
unsigned file_valid : 1; /* if false the file does not even exist */
Expand Down

0 comments on commit b46f0b6

Please sign in to comment.