Skip to content

Commit

Permalink
Improve the safety check used in fetch.c
Browse files Browse the repository at this point in the history
The recent safety check to trust only the commits we have made
things impossibly slow and turn out to waste a lot of memory.

This commit fixes it with the following improvements:

 - mark already scanned objects and avoid rescanning the same
   object again;

 - free the tree entries when we have scanned the tree entries;
   this is the same as b0d8923
   which reduced memory usage by rev-list;

 - plug memory leak from the object_list dequeuing code;

 - use the process_queue not just for fetching but for scanning,
   to make things tail recursive to avoid deep recursion; the
   deep recursion was especially prominent when we cloned a big
   pack.

 - avoid has_sha1_file() call when we already know we do not have
   that object.

Signed-off-by: Junio C Hamano <junkio@cox.net>
  • Loading branch information
Junio C Hamano committed Sep 18, 2005
1 parent 490e23d commit 85d106c
Showing 1 changed file with 44 additions and 32 deletions.
76 changes: 44 additions & 32 deletions fetch.c
Original file line number Diff line number Diff line change
Expand Up @@ -33,36 +33,33 @@ static void report_missing(const char *what, const unsigned char *missing)
what, missing_hex, sha1_to_hex(current_commit_sha1));
}

static int make_sure_we_have_it(const char *what, unsigned char *sha1)
{
int status = 0;

if (!has_sha1_file(sha1)) {
status = fetch(sha1);
if (status && what)
report_missing(what, sha1);
}
return status;
}

static int process(unsigned char *sha1, const char *type);

static int process_tree(struct tree *tree)
{
struct tree_entry_list *entries;
struct tree_entry_list *entry;

if (parse_tree(tree))
return -1;

for (entries = tree->entries; entries; entries = entries->next) {
if (process(entries->item.any->sha1,
entries->directory ? tree_type : blob_type))
entry = tree->entries;
tree->entries = NULL;
while (entry) {
struct tree_entry_list *next = entry->next;
if (process(entry->item.any->sha1,
entry->directory ? tree_type : blob_type))
return -1;
free(entry);
entry = next;
}
return 0;
}

#define COMPLETE 1U
#define TO_FETCH 2U
#define TO_SCAN 4U
#define SCANNED 8U

static struct commit_list *complete = NULL;

static int process_commit(struct commit *commit)
Expand All @@ -73,13 +70,14 @@ static int process_commit(struct commit *commit)
while (complete && complete->item->date >= commit->date) {
pop_most_recent_commit(&complete, COMPLETE);
}


if (commit->object.flags & COMPLETE)
return 0;

memcpy(current_commit_sha1, commit->object.sha1, 20);

pull_say("walk %s\n", sha1_to_hex(commit->object.sha1));

if (get_tree) {
if (process(commit->tree->object.sha1, tree_type))
return -1;
Expand All @@ -89,8 +87,7 @@ static int process_commit(struct commit *commit)
if (get_history) {
struct commit_list *parents = commit->parents;
for (; parents; parents = parents->next) {
if (process(parents->item->object.sha1,
commit_type))
if (process(parents->item->object.sha1, commit_type))
return -1;
}
}
Expand All @@ -109,6 +106,10 @@ static struct object_list **process_queue_end = &process_queue;

static int process_object(struct object *obj)
{
if (obj->flags & SCANNED)
return 0;
obj->flags |= SCANNED;

if (obj->type == commit_type) {
if (process_commit((struct commit *)obj))
return -1;
Expand Down Expand Up @@ -139,36 +140,47 @@ static int process(unsigned char *sha1, const char *type)
if (has_sha1_file(sha1)) {
parse_object(sha1);
/* We already have it, so we should scan it now. */
return process_object(obj);
if (obj->flags & (SCANNED | TO_SCAN))
return 0;
object_list_insert(obj, process_queue_end);
process_queue_end = &(*process_queue_end)->next;
obj->flags |= TO_SCAN;
return 0;
}
if (object_list_contains(process_queue, obj))
if (obj->flags & (COMPLETE | TO_FETCH))
return 0;
object_list_insert(obj, process_queue_end);
process_queue_end = &(*process_queue_end)->next;
obj->flags |= TO_FETCH;

//fprintf(stderr, "prefetch %s\n", sha1_to_hex(sha1));
prefetch(sha1);

return 0;
}

static int loop(void)
{
struct object_list *elem;

while (process_queue) {
struct object *obj = process_queue->item;
/*
fprintf(stderr, "%d objects to pull\n",
object_list_length(process_queue));
*/
process_queue = process_queue->next;
elem = process_queue;
process_queue = elem->next;
free(elem);
if (!process_queue)
process_queue_end = &process_queue;

//fprintf(stderr, "fetch %s\n", sha1_to_hex(obj->sha1));

if (make_sure_we_have_it(obj->type ? obj->type : "object",
obj->sha1))
return -1;
/* If we are not scanning this object, we placed it in
* the queue because we needed to fetch it first.
*/
if (! (obj->flags & TO_SCAN)) {
if (fetch(obj->sha1)) {
report_missing(obj->type
? obj->type
: "object", obj->sha1);
return -1;
}
}
if (!obj->type)
parse_object(obj->sha1);
if (process_object(obj))
Expand Down

0 comments on commit 85d106c

Please sign in to comment.