From fe1b22686f26bed3047294cc4552e50ce58fa954 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Wed, 15 Oct 2014 18:33:13 -0400 Subject: [PATCH 01/28] foreach_alt_odb: propagate return value from callback We check the return value of the callback and stop iterating if it is non-zero. However, we do not make the non-zero return value available to the caller, so they have no way of knowing whether the operation succeeded or not (technically they can keep their own error flag in the callback data, but that is unlike our other for_each functions). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- cache.h | 2 +- sha1_file.c | 12 ++++++++---- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/cache.h b/cache.h index 3e6a914db..de290c4f0 100644 --- a/cache.h +++ b/cache.h @@ -1143,7 +1143,7 @@ extern void prepare_alt_odb(void); extern void read_info_alternates(const char * relative_base, int depth); extern void add_to_alternates_file(const char *reference); typedef int alt_odb_fn(struct alternate_object_database *, void *); -extern void foreach_alt_odb(alt_odb_fn, void*); +extern int foreach_alt_odb(alt_odb_fn, void*); struct pack_window { struct pack_window *next; diff --git a/sha1_file.c b/sha1_file.c index 6f18c22ab..aaa3c5286 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -412,14 +412,18 @@ void add_to_alternates_file(const char *reference) link_alt_odb_entries(alt, strlen(alt), '\n', NULL, 0); } -void foreach_alt_odb(alt_odb_fn fn, void *cb) +int foreach_alt_odb(alt_odb_fn fn, void *cb) { struct alternate_object_database *ent; + int r = 0; prepare_alt_odb(); - for (ent = alt_odb_list; ent; ent = ent->next) - if (fn(ent, cb)) - return; + for (ent = alt_odb_list; ent; ent = ent->next) { + r = fn(ent, cb); + if (r) + break; + } + return r; } void prepare_alt_odb(void) From 50a71776ab14c63c72c86e3ce1529052bcb2634a Mon Sep 17 00:00:00 2001 From: Jeff King Date: Wed, 15 Oct 2014 18:34:05 -0400 Subject: [PATCH 02/28] isxdigit: cast input to unsigned char Otherwise, callers must do so or risk triggering warnings -Wchar-subscript (and rightfully so; a signed char might cause us to use a bogus negative index into the hexval_table). While we are dropping the now-unnecessary casts from the caller in urlmatch.c, we can get rid of similar casts in actually parsing the hex by using the hexval() helper, which implicitly casts to unsigned (but note that we cannot implement isxdigit in terms of hexval(), as it also casts its return value to unsigned). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- git-compat-util.h | 2 +- urlmatch.c | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/git-compat-util.h b/git-compat-util.h index fb41118c0..44890d5b1 100644 --- a/git-compat-util.h +++ b/git-compat-util.h @@ -677,7 +677,7 @@ extern const unsigned char sane_ctype[256]; #define iscntrl(x) (sane_istest(x,GIT_CNTRL)) #define ispunct(x) sane_istest(x, GIT_PUNCT | GIT_REGEX_SPECIAL | \ GIT_GLOB_SPECIAL | GIT_PATHSPEC_MAGIC) -#define isxdigit(x) (hexval_table[x] != -1) +#define isxdigit(x) (hexval_table[(unsigned char)(x)] != -1) #define tolower(x) sane_case((unsigned char)(x), 0x20) #define toupper(x) sane_case((unsigned char)(x), 0) #define is_pathspec_magic(x) sane_istest(x,GIT_PATHSPEC_MAGIC) diff --git a/urlmatch.c b/urlmatch.c index 3d4c54b5c..618d21649 100644 --- a/urlmatch.c +++ b/urlmatch.c @@ -43,11 +43,11 @@ static int append_normalized_escapes(struct strbuf *buf, from_len--; if (ch == '%') { if (from_len < 2 || - !isxdigit((unsigned char)from[0]) || - !isxdigit((unsigned char)from[1])) + !isxdigit(from[0]) || + !isxdigit(from[1])) return 0; - ch = hexval_table[(unsigned char)*from++] << 4; - ch |= hexval_table[(unsigned char)*from++]; + ch = hexval(*from++) << 4; + ch |= hexval(*from++); from_len -= 2; was_esc = 1; } From 68f492359e29bbdf633201406d0646deee2b298c Mon Sep 17 00:00:00 2001 From: Jeff King Date: Wed, 15 Oct 2014 18:34:19 -0400 Subject: [PATCH 03/28] object_array: factor out slopbuf-freeing logic This is not a lot of code, but it's a logical construct that should not need to be repeated (and we are about to add a third repetition). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- object.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/object.c b/object.c index ca9d790f4..60f486463 100644 --- a/object.c +++ b/object.c @@ -355,6 +355,16 @@ void add_object_array_with_context(struct object *obj, const char *name, struct add_object_array_with_mode_context(obj, name, array, S_IFINVALID, context); } +/* + * Free all memory associated with an entry; the result is + * in an unspecified state and should not be examined. + */ +static void object_array_release_entry(struct object_array_entry *ent) +{ + if (ent->name != object_array_slopbuf) + free(ent->name); +} + void object_array_filter(struct object_array *array, object_array_each_func_t want, void *cb_data) { @@ -367,8 +377,7 @@ void object_array_filter(struct object_array *array, objects[dst] = objects[src]; dst++; } else { - if (objects[src].name != object_array_slopbuf) - free(objects[src].name); + object_array_release_entry(&objects[src]); } } array->nr = dst; @@ -400,8 +409,7 @@ void object_array_remove_duplicates(struct object_array *array) objects[array->nr] = objects[src]; array->nr++; } else { - if (objects[src].name != object_array_slopbuf) - free(objects[src].name); + object_array_release_entry(&objects[src]); } } } From 46be823124bb6a6ff0e06dc19c327b599ed97c72 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Wed, 15 Oct 2014 18:34:34 -0400 Subject: [PATCH 04/28] object_array: add a "clear" function There's currently no easy way to free the memory associated with an object_array (and in most cases, we simply leak the memory in a rev_info's pending array). Let's provide a helper to make this easier to handle. We can make use of it in list-objects.c, which does the same thing by hand (but fails to free the "name" field of each entry, potentially leaking memory). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- list-objects.c | 7 +------ object.c | 10 ++++++++++ object.h | 6 ++++++ 3 files changed, 17 insertions(+), 6 deletions(-) diff --git a/list-objects.c b/list-objects.c index 3595ee7a2..fad6808aa 100644 --- a/list-objects.c +++ b/list-objects.c @@ -228,11 +228,6 @@ void traverse_commit_list(struct rev_info *revs, die("unknown pending object %s (%s)", sha1_to_hex(obj->sha1), name); } - if (revs->pending.nr) { - free(revs->pending.objects); - revs->pending.nr = 0; - revs->pending.alloc = 0; - revs->pending.objects = NULL; - } + object_array_clear(&revs->pending); strbuf_release(&base); } diff --git a/object.c b/object.c index 60f486463..6aeb1bbbe 100644 --- a/object.c +++ b/object.c @@ -383,6 +383,16 @@ void object_array_filter(struct object_array *array, array->nr = dst; } +void object_array_clear(struct object_array *array) +{ + int i; + for (i = 0; i < array->nr; i++) + object_array_release_entry(&array->objects[i]); + free(array->objects); + array->objects = NULL; + array->nr = array->alloc = 0; +} + /* * Return true iff array already contains an entry with name. */ diff --git a/object.h b/object.h index e028ced74..2a755a237 100644 --- a/object.h +++ b/object.h @@ -133,6 +133,12 @@ void object_array_filter(struct object_array *array, */ void object_array_remove_duplicates(struct object_array *array); +/* + * Remove any objects from the array, freeing all used memory; afterwards + * the array is ready to store more objects with add_object_array(). + */ +void object_array_clear(struct object_array *array); + void clear_object_flags(unsigned flags); #endif /* OBJECT_H */ From 1da1e07c835e900337714cfad6c32a8dc0b36ac3 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Wed, 15 Oct 2014 18:35:12 -0400 Subject: [PATCH 05/28] clean up name allocation in prepare_revision_walk When we enter prepare_revision_walk, we have zero or more entries in our "pending" array. We disconnect that array from the rev_info, and then process each entry: 1. If the entry is a commit and the --source option is in effect, we keep a pointer to the object name. 2. Otherwise, we re-add the item to the pending list with a blank name. We then throw away the old array by freeing the array itself, but do not touch the "name" field of each entry. For any items of type (2), we leak the memory associated with the name. This commit fixes that by calling object_array_clear, which handles the cleanup for us. That breaks (1), though, because it depends on the memory pointed to by the name to last forever. We can solve that by making a copy of the name. This is slightly less efficient, but it shouldn't matter in practice, as we do it only for the tip commits of the traversal. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- revision.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/revision.c b/revision.c index e498b7c33..01cc276fa 100644 --- a/revision.c +++ b/revision.c @@ -300,7 +300,7 @@ static struct commit *handle_commit(struct rev_info *revs, revs->limited = 1; } if (revs->show_source && !commit->util) - commit->util = (void *) name; + commit->util = xstrdup(name); return commit; } @@ -2656,15 +2656,16 @@ void reset_revision_walk(void) int prepare_revision_walk(struct rev_info *revs) { - int nr = revs->pending.nr; - struct object_array_entry *e, *list; + int i; + struct object_array old_pending; struct commit_list **next = &revs->commits; - e = list = revs->pending.objects; + memcpy(&old_pending, &revs->pending, sizeof(old_pending)); revs->pending.nr = 0; revs->pending.alloc = 0; revs->pending.objects = NULL; - while (--nr >= 0) { + for (i = 0; i < old_pending.nr; i++) { + struct object_array_entry *e = old_pending.objects + i; struct commit *commit = handle_commit(revs, e->item, e->name); if (commit) { if (!(commit->object.flags & SEEN)) { @@ -2672,10 +2673,9 @@ int prepare_revision_walk(struct rev_info *revs) next = commit_list_append(commit, next); } } - e++; } if (!revs->leak_pending) - free(list); + object_array_clear(&old_pending); /* Signal whether we need per-parent treesame decoration */ if (revs->simplify_merges || From 5f78a431ab222189b11a9233a5902db61aa32976 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Wed, 15 Oct 2014 18:37:28 -0400 Subject: [PATCH 06/28] reachable: use traverse_commit_list instead of custom walk To find the set of reachable objects, we add a bunch of possible sources to our rev_info, call prepare_revision_walk, and then launch into a custom walker that handles each object top. This is a subset of what traverse_commit_list does, so we can just reuse that code (it can also handle more complex cases like UNINTERESTING commits and pathspecs, but we don't use those features). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- reachable.c | 130 +++++++--------------------------------------------- 1 file changed, 17 insertions(+), 113 deletions(-) diff --git a/reachable.c b/reachable.c index 6f6835bf2..02bf6c2fe 100644 --- a/reachable.c +++ b/reachable.c @@ -8,6 +8,7 @@ #include "reachable.h" #include "cache-tree.h" #include "progress.h" +#include "list-objects.h" struct connectivity_progress { struct progress *progress; @@ -21,118 +22,6 @@ static void update_progress(struct connectivity_progress *cp) display_progress(cp->progress, cp->count); } -static void process_blob(struct blob *blob, - struct object_array *p, - struct name_path *path, - const char *name, - struct connectivity_progress *cp) -{ - struct object *obj = &blob->object; - - if (!blob) - die("bad blob object"); - if (obj->flags & SEEN) - return; - obj->flags |= SEEN; - update_progress(cp); - /* Nothing to do, really .. The blob lookup was the important part */ -} - -static void process_gitlink(const unsigned char *sha1, - struct object_array *p, - struct name_path *path, - const char *name) -{ - /* I don't think we want to recurse into this, really. */ -} - -static void process_tree(struct tree *tree, - struct object_array *p, - struct name_path *path, - const char *name, - struct connectivity_progress *cp) -{ - struct object *obj = &tree->object; - struct tree_desc desc; - struct name_entry entry; - struct name_path me; - - if (!tree) - die("bad tree object"); - if (obj->flags & SEEN) - return; - obj->flags |= SEEN; - update_progress(cp); - if (parse_tree(tree) < 0) - die("bad tree object %s", sha1_to_hex(obj->sha1)); - add_object(obj, p, path, name); - me.up = path; - me.elem = name; - me.elem_len = strlen(name); - - init_tree_desc(&desc, tree->buffer, tree->size); - - while (tree_entry(&desc, &entry)) { - if (S_ISDIR(entry.mode)) - process_tree(lookup_tree(entry.sha1), p, &me, entry.path, cp); - else if (S_ISGITLINK(entry.mode)) - process_gitlink(entry.sha1, p, &me, entry.path); - else - process_blob(lookup_blob(entry.sha1), p, &me, entry.path, cp); - } - free_tree_buffer(tree); -} - -static void process_tag(struct tag *tag, struct object_array *p, - const char *name, struct connectivity_progress *cp) -{ - struct object *obj = &tag->object; - - if (obj->flags & SEEN) - return; - obj->flags |= SEEN; - update_progress(cp); - - if (parse_tag(tag) < 0) - die("bad tag object %s", sha1_to_hex(obj->sha1)); - if (tag->tagged) - add_object(tag->tagged, p, NULL, name); -} - -static void walk_commit_list(struct rev_info *revs, - struct connectivity_progress *cp) -{ - int i; - struct commit *commit; - struct object_array objects = OBJECT_ARRAY_INIT; - - /* Walk all commits, process their trees */ - while ((commit = get_revision(revs)) != NULL) { - process_tree(commit->tree, &objects, NULL, "", cp); - update_progress(cp); - } - - /* Then walk all the pending objects, recursively processing them too */ - for (i = 0; i < revs->pending.nr; i++) { - struct object_array_entry *pending = revs->pending.objects + i; - struct object *obj = pending->item; - const char *name = pending->name; - if (obj->type == OBJ_TAG) { - process_tag((struct tag *) obj, &objects, name, cp); - continue; - } - if (obj->type == OBJ_TREE) { - process_tree((struct tree *)obj, &objects, NULL, name, cp); - continue; - } - if (obj->type == OBJ_BLOB) { - process_blob((struct blob *)obj, &objects, NULL, name, cp); - continue; - } - die("unknown pending object %s (%s)", sha1_to_hex(obj->sha1), name); - } -} - static int add_one_reflog_ent(unsigned char *osha1, unsigned char *nsha1, const char *email, unsigned long timestamp, int tz, const char *message, void *cb_data) @@ -210,6 +99,21 @@ static void add_cache_refs(struct rev_info *revs) add_cache_tree(active_cache_tree, revs); } +/* + * The traversal will have already marked us as SEEN, so we + * only need to handle any progress reporting here. + */ +static void mark_object(struct object *obj, const struct name_path *path, + const char *name, void *data) +{ + update_progress(data); +} + +static void mark_commit(struct commit *c, void *data) +{ + mark_object(&c->object, NULL, NULL, data); +} + void mark_reachable_objects(struct rev_info *revs, int mark_reflog, struct progress *progress) { @@ -245,6 +149,6 @@ void mark_reachable_objects(struct rev_info *revs, int mark_reflog, */ if (prepare_revision_walk(revs)) die("revision walk setup failed"); - walk_commit_list(revs, &cp); + traverse_commit_list(revs, mark_commit, mark_object, &cp); display_progress(cp.progress, cp.count); } From 718ccc9731c4e98b123436c22c1cccf2beed5e29 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Wed, 15 Oct 2014 18:38:31 -0400 Subject: [PATCH 07/28] reachable: reuse revision.c "add all reflogs" code We want to add all reflog entries as tips for finding reachable objects. The revision machinery can already do this (to support "rev-list --reflog"); we can reuse that code. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- reachable.c | 24 +----------------------- revision.c | 4 ++-- revision.h | 1 + 3 files changed, 4 insertions(+), 25 deletions(-) diff --git a/reachable.c b/reachable.c index 02bf6c2fe..4e68cfadb 100644 --- a/reachable.c +++ b/reachable.c @@ -22,22 +22,6 @@ static void update_progress(struct connectivity_progress *cp) display_progress(cp->progress, cp->count); } -static int add_one_reflog_ent(unsigned char *osha1, unsigned char *nsha1, - const char *email, unsigned long timestamp, int tz, - const char *message, void *cb_data) -{ - struct object *object; - struct rev_info *revs = (struct rev_info *)cb_data; - - object = parse_object(osha1); - if (object) - add_pending_object(revs, object, ""); - object = parse_object(nsha1); - if (object) - add_pending_object(revs, object, ""); - return 0; -} - static int add_one_ref(const char *path, const unsigned char *sha1, int flag, void *cb_data) { struct object *object = parse_object_or_die(sha1, path); @@ -48,12 +32,6 @@ static int add_one_ref(const char *path, const unsigned char *sha1, int flag, vo return 0; } -static int add_one_reflog(const char *path, const unsigned char *sha1, int flag, void *cb_data) -{ - for_each_reflog_ent(path, add_one_reflog_ent, cb_data); - return 0; -} - static void add_one_tree(const unsigned char *sha1, struct rev_info *revs) { struct tree *tree = lookup_tree(sha1); @@ -138,7 +116,7 @@ void mark_reachable_objects(struct rev_info *revs, int mark_reflog, /* Add all reflog info */ if (mark_reflog) - for_each_reflog(add_one_reflog, revs); + add_reflogs_to_pending(revs, 0); cp.progress = progress; cp.count = 0; diff --git a/revision.c b/revision.c index 01cc276fa..b8e02e279 100644 --- a/revision.c +++ b/revision.c @@ -1275,7 +1275,7 @@ static int handle_one_reflog(const char *path, const unsigned char *sha1, int fl return 0; } -static void handle_reflog(struct rev_info *revs, unsigned flags) +void add_reflogs_to_pending(struct rev_info *revs, unsigned flags) { struct all_refs_cb cb; cb.all_revs = revs; @@ -2061,7 +2061,7 @@ static int handle_revision_pseudo_opt(const char *submodule, for_each_glob_ref_in(handle_one_ref, arg + 10, "refs/remotes/", &cb); clear_ref_exclusion(&revs->ref_excludes); } else if (!strcmp(arg, "--reflog")) { - handle_reflog(revs, *flags); + add_reflogs_to_pending(revs, *flags); } else if (!strcmp(arg, "--not")) { *flags ^= UNINTERESTING | BOTTOM; } else if (!strcmp(arg, "--no-walk")) { diff --git a/revision.h b/revision.h index a6205307c..e64404417 100644 --- a/revision.h +++ b/revision.h @@ -276,6 +276,7 @@ extern void add_pending_sha1(struct rev_info *revs, unsigned int flags); extern void add_head_to_pending(struct rev_info *); +extern void add_reflogs_to_pending(struct rev_info *, unsigned int flags); enum commit_action { commit_ignore, From 27e1e22d5ee3005f228b67ea94b5af29547b54fe Mon Sep 17 00:00:00 2001 From: Jeff King Date: Wed, 15 Oct 2014 18:38:55 -0400 Subject: [PATCH 08/28] prune: factor out loose-object directory traversal Prune has to walk $GIT_DIR/objects/?? in order to find the set of loose objects to prune. Other parts of the code (e.g., count-objects) want to do the same. Let's factor it out into a reusable for_each-style function. Note that this is not quite a straight code movement. The original code had strange behavior when it found a file of the form "[0-9a-f]{2}/.{38}" that did _not_ contain all hex digits. It executed a "break" from the loop, meaning that we stopped pruning in that directory (but still pruned other directories!). This was probably a bug; we do not want to process the file as an object, but we should keep going otherwise (and that is how the new code handles it). We are also a little more careful with loose object directories which fail to open. The original code silently ignored any failures, but the new code will complain about any problems besides ENOENT. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/prune.c | 87 +++++++++++++++---------------------------------- cache.h | 33 +++++++++++++++++++ sha1_file.c | 84 +++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 143 insertions(+), 61 deletions(-) diff --git a/builtin/prune.c b/builtin/prune.c index 144a3bdb3..763f53e06 100644 --- a/builtin/prune.c +++ b/builtin/prune.c @@ -31,11 +31,23 @@ static int prune_tmp_file(const char *fullpath) return 0; } -static int prune_object(const char *fullpath, const unsigned char *sha1) +static int prune_object(const unsigned char *sha1, const char *fullpath, + void *data) { struct stat st; - if (lstat(fullpath, &st)) - return error("Could not stat '%s'", fullpath); + + /* + * Do we know about this object? + * It must have been reachable + */ + if (lookup_object(sha1)) + return 0; + + if (lstat(fullpath, &st)) { + /* report errors, but do not stop pruning */ + error("Could not stat '%s'", fullpath); + return 0; + } if (st.st_mtime > expire) return 0; if (show_only || verbose) { @@ -48,68 +60,20 @@ static int prune_object(const char *fullpath, const unsigned char *sha1) return 0; } -static int prune_dir(int i, struct strbuf *path) +static int prune_cruft(const char *basename, const char *path, void *data) { - size_t baselen = path->len; - DIR *dir = opendir(path->buf); - struct dirent *de; - - if (!dir) - return 0; - - while ((de = readdir(dir)) != NULL) { - char name[100]; - unsigned char sha1[20]; - - if (is_dot_or_dotdot(de->d_name)) - continue; - if (strlen(de->d_name) == 38) { - sprintf(name, "%02x", i); - memcpy(name+2, de->d_name, 39); - if (get_sha1_hex(name, sha1) < 0) - break; - - /* - * Do we know about this object? - * It must have been reachable - */ - if (lookup_object(sha1)) - continue; - - strbuf_addf(path, "/%s", de->d_name); - prune_object(path->buf, sha1); - strbuf_setlen(path, baselen); - continue; - } - if (starts_with(de->d_name, "tmp_obj_")) { - strbuf_addf(path, "/%s", de->d_name); - prune_tmp_file(path->buf); - strbuf_setlen(path, baselen); - continue; - } - fprintf(stderr, "bad sha1 file: %s/%s\n", path->buf, de->d_name); - } - closedir(dir); - if (!show_only) - rmdir(path->buf); + if (starts_with(basename, "tmp_obj_")) + prune_tmp_file(path); + else + fprintf(stderr, "bad sha1 file: %s\n", path); return 0; } -static void prune_object_dir(const char *path) +static int prune_subdir(int nr, const char *path, void *data) { - struct strbuf buf = STRBUF_INIT; - size_t baselen; - int i; - - strbuf_addstr(&buf, path); - strbuf_addch(&buf, '/'); - baselen = buf.len; - - for (i = 0; i < 256; i++) { - strbuf_addf(&buf, "%02x", i); - prune_dir(i, &buf); - strbuf_setlen(&buf, baselen); - } + if (!show_only) + rmdir(path); + return 0; } /* @@ -173,7 +137,8 @@ int cmd_prune(int argc, const char **argv, const char *prefix) mark_reachable_objects(&revs, 1, progress); stop_progress(&progress); - prune_object_dir(get_object_directory()); + for_each_loose_file_in_objdir(get_object_directory(), prune_object, + prune_cruft, prune_subdir, NULL); prune_packed_objects(show_only ? PRUNE_PACKED_DRY_RUN : 0); remove_temporary_files(get_object_directory()); diff --git a/cache.h b/cache.h index de290c4f0..bdfbbcf79 100644 --- a/cache.h +++ b/cache.h @@ -1239,6 +1239,39 @@ extern unsigned long unpack_object_header_buffer(const unsigned char *buf, unsig extern unsigned long get_size_from_delta(struct packed_git *, struct pack_window **, off_t); extern int unpack_object_header(struct packed_git *, struct pack_window **, off_t *, unsigned long *); +/* + * Iterate over the files in the loose-object parts of the object + * directory "path", triggering the following callbacks: + * + * - loose_object is called for each loose object we find. + * + * - loose_cruft is called for any files that do not appear to be + * loose objects. Note that we only look in the loose object + * directories "objects/[0-9a-f]{2}/", so we will not report + * "objects/foobar" as cruft. + * + * - loose_subdir is called for each top-level hashed subdirectory + * of the object directory (e.g., "$OBJDIR/f0"). It is called + * after the objects in the directory are processed. + * + * Any callback that is NULL will be ignored. Callbacks returning non-zero + * will end the iteration. + */ +typedef int each_loose_object_fn(const unsigned char *sha1, + const char *path, + void *data); +typedef int each_loose_cruft_fn(const char *basename, + const char *path, + void *data); +typedef int each_loose_subdir_fn(int nr, + const char *path, + void *data); +int for_each_loose_file_in_objdir(const char *path, + each_loose_object_fn obj_cb, + each_loose_cruft_fn cruft_cb, + each_loose_subdir_fn subdir_cb, + void *data); + struct object_info { /* Request */ enum object_type *typep; diff --git a/sha1_file.c b/sha1_file.c index aaa3c5286..fa08475c9 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -3264,3 +3264,87 @@ void assert_sha1_type(const unsigned char *sha1, enum object_type expect) die("%s is not a valid '%s' object", sha1_to_hex(sha1), typename(expect)); } + +static int for_each_file_in_obj_subdir(int subdir_nr, + struct strbuf *path, + each_loose_object_fn obj_cb, + each_loose_cruft_fn cruft_cb, + each_loose_subdir_fn subdir_cb, + void *data) +{ + size_t baselen = path->len; + DIR *dir = opendir(path->buf); + struct dirent *de; + int r = 0; + + if (!dir) { + if (errno == ENOENT) + return 0; + return error("unable to open %s: %s", path->buf, strerror(errno)); + } + + while ((de = readdir(dir))) { + if (is_dot_or_dotdot(de->d_name)) + continue; + + strbuf_setlen(path, baselen); + strbuf_addf(path, "/%s", de->d_name); + + if (strlen(de->d_name) == 38) { + char hex[41]; + unsigned char sha1[20]; + + snprintf(hex, sizeof(hex), "%02x%s", + subdir_nr, de->d_name); + if (!get_sha1_hex(hex, sha1)) { + if (obj_cb) { + r = obj_cb(sha1, path->buf, data); + if (r) + break; + } + continue; + } + } + + if (cruft_cb) { + r = cruft_cb(de->d_name, path->buf, data); + if (r) + break; + } + } + strbuf_setlen(path, baselen); + + if (!r && subdir_cb) + r = subdir_cb(subdir_nr, path->buf, data); + + closedir(dir); + return r; +} + +int for_each_loose_file_in_objdir(const char *path, + each_loose_object_fn obj_cb, + each_loose_cruft_fn cruft_cb, + each_loose_subdir_fn subdir_cb, + void *data) +{ + struct strbuf buf = STRBUF_INIT; + size_t baselen; + int r = 0; + int i; + + strbuf_addstr(&buf, path); + strbuf_addch(&buf, '/'); + baselen = buf.len; + + for (i = 0; i < 256; i++) { + strbuf_addf(&buf, "%02x", i); + r = for_each_file_in_obj_subdir(i, &buf, obj_cb, cruft_cb, + subdir_cb, data); + strbuf_setlen(&buf, baselen); + if (r) + break; + } + + strbuf_release(&buf); + return r; +} From 3725427945657da052f2f833f5fd4616c949be3e Mon Sep 17 00:00:00 2001 From: Jeff King Date: Wed, 15 Oct 2014 18:40:37 -0400 Subject: [PATCH 09/28] reachable: mark index blobs as SEEN When we mark all reachable objects for pruning, that includes blobs mentioned by the index. However, we do not mark these with the SEEN flag, as we do for objects that we find by traversing (we also do not add them to the pending list, but that is because there is nothing further to traverse with them). This doesn't cause any problems with prune, because it checks only that the object exists in the global object hash, and not its flags. However, let's mark these objects to be consistent and avoid any later surprises. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- reachable.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/reachable.c b/reachable.c index 4e68cfadb..d03f8294e 100644 --- a/reachable.c +++ b/reachable.c @@ -55,6 +55,8 @@ static void add_cache_refs(struct rev_info *revs) read_cache(); for (i = 0; i < active_nr; i++) { + struct blob *blob; + /* * The index can contain blobs and GITLINKs, GITLINKs are hashes * that don't actually point to objects in the repository, it's @@ -65,7 +67,10 @@ static void add_cache_refs(struct rev_info *revs) if (S_ISGITLINK(active_cache[i]->ce_mode)) continue; - lookup_blob(active_cache[i]->sha1); + blob = lookup_blob(active_cache[i]->sha1); + if (blob) + blob->object.flags |= SEEN; + /* * We could add the blobs to the pending list, but quite * frankly, we don't care. Once we've looked them up, and From 0d3b729680e9cab0f567122e796dab2bc9bc8cfb Mon Sep 17 00:00:00 2001 From: Jeff King Date: Wed, 15 Oct 2014 18:40:53 -0400 Subject: [PATCH 10/28] prune-packed: use for_each_loose_file_in_objdir This saves us from manually traversing the directory structure ourselves. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/prune-packed.c | 69 ++++++++++++++---------------------------- 1 file changed, 23 insertions(+), 46 deletions(-) diff --git a/builtin/prune-packed.c b/builtin/prune-packed.c index d430731d7..f24a2c2bd 100644 --- a/builtin/prune-packed.c +++ b/builtin/prune-packed.c @@ -10,65 +10,42 @@ static const char * const prune_packed_usage[] = { static struct progress *progress; -static void prune_dir(int i, DIR *dir, struct strbuf *pathname, int opts) +static int prune_subdir(int nr, const char *path, void *data) { - struct dirent *de; - char hex[40]; - int top_len = pathname->len; + int *opts = data; + display_progress(progress, nr + 1); + if (!(*opts & PRUNE_PACKED_DRY_RUN)) + rmdir(path); + return 0; +} + +static int prune_object(const unsigned char *sha1, const char *path, + void *data) +{ + int *opts = data; - sprintf(hex, "%02x", i); - while ((de = readdir(dir)) != NULL) { - unsigned char sha1[20]; - if (strlen(de->d_name) != 38) - continue; - memcpy(hex + 2, de->d_name, 38); - if (get_sha1_hex(hex, sha1)) - continue; - if (!has_sha1_pack(sha1)) - continue; + if (!has_sha1_pack(sha1)) + return 0; - strbuf_add(pathname, de->d_name, 38); - if (opts & PRUNE_PACKED_DRY_RUN) - printf("rm -f %s\n", pathname->buf); - else - unlink_or_warn(pathname->buf); - display_progress(progress, i + 1); - strbuf_setlen(pathname, top_len); - } + if (*opts & PRUNE_PACKED_DRY_RUN) + printf("rm -f %s\n", path); + else + unlink_or_warn(path); + return 0; } void prune_packed_objects(int opts) { - int i; - const char *dir = get_object_directory(); - struct strbuf pathname = STRBUF_INIT; - int top_len; - - strbuf_addstr(&pathname, dir); if (opts & PRUNE_PACKED_VERBOSE) progress = start_progress_delay(_("Removing duplicate objects"), 256, 95, 2); - if (pathname.len && pathname.buf[pathname.len - 1] != '/') - strbuf_addch(&pathname, '/'); - - top_len = pathname.len; - for (i = 0; i < 256; i++) { - DIR *d; + for_each_loose_file_in_objdir(get_object_directory(), + prune_object, NULL, prune_subdir, &opts); - display_progress(progress, i + 1); - strbuf_setlen(&pathname, top_len); - strbuf_addf(&pathname, "%02x/", i); - d = opendir(pathname.buf); - if (!d) - continue; - prune_dir(i, d, &pathname, opts); - closedir(d); - strbuf_setlen(&pathname, top_len + 2); - rmdir(pathname.buf); - } + /* Ensure we show 100% before finishing progress */ + display_progress(progress, 256); stop_progress(&progress); - strbuf_release(&pathname); } int cmd_prune_packed(int argc, const char **argv, const char *prefix) From cac05d4dfd148071462939a61ecd44cf932a0b02 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Wed, 15 Oct 2014 18:40:58 -0400 Subject: [PATCH 11/28] count-objects: do not use xsize_t when counting object size The point of xsize_t is to safely cast an off_t into a size_t (because we are about to mmap). But in count-objects, we are summing the sizes in an off_t. Using xsize_t means that count-objects could fail on a 32-bit system with a 4G object (not likely, as other parts of git would fail, but we should at least be correct here). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/count-objects.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/builtin/count-objects.c b/builtin/count-objects.c index a7f70cb85..316a805a8 100644 --- a/builtin/count-objects.c +++ b/builtin/count-objects.c @@ -53,7 +53,7 @@ static void count_objects(DIR *d, char *path, int len, int verbose, if (lstat(path, &st) || !S_ISREG(st.st_mode)) bad = 1; else - (*loose_size) += xsize_t(on_disk_bytes(st)); + (*loose_size) += on_disk_bytes(st); } if (bad) { if (verbose) { From 4a1e693a30c816fa5293c5f9e83e1e98ee87584e Mon Sep 17 00:00:00 2001 From: Jeff King Date: Wed, 15 Oct 2014 18:41:11 -0400 Subject: [PATCH 12/28] count-objects: use for_each_loose_file_in_objdir This drops our line count considerably, and should make things more readable by keeping the counting logic separate from the traversal. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/count-objects.c | 101 ++++++++++++---------------------------- 1 file changed, 30 insertions(+), 71 deletions(-) diff --git a/builtin/count-objects.c b/builtin/count-objects.c index 316a805a8..e47ef0b1a 100644 --- a/builtin/count-objects.c +++ b/builtin/count-objects.c @@ -11,6 +11,9 @@ static unsigned long garbage; static off_t size_garbage; +static int verbose; +static unsigned long loose, packed, packed_loose; +static off_t loose_size; static void real_report_garbage(const char *desc, const char *path) { @@ -21,61 +24,31 @@ static void real_report_garbage(const char *desc, const char *path) garbage++; } -static void count_objects(DIR *d, char *path, int len, int verbose, - unsigned long *loose, - off_t *loose_size, - unsigned long *packed_loose) +static void loose_garbage(const char *path) { - struct dirent *ent; - while ((ent = readdir(d)) != NULL) { - char hex[41]; - unsigned char sha1[20]; - const char *cp; - int bad = 0; + if (verbose) + report_garbage("garbage found", path); +} - if (is_dot_or_dotdot(ent->d_name)) - continue; - for (cp = ent->d_name; *cp; cp++) { - int ch = *cp; - if (('0' <= ch && ch <= '9') || - ('a' <= ch && ch <= 'f')) - continue; - bad = 1; - break; - } - if (cp - ent->d_name != 38) - bad = 1; - else { - struct stat st; - memcpy(path + len + 3, ent->d_name, 38); - path[len + 2] = '/'; - path[len + 41] = 0; - if (lstat(path, &st) || !S_ISREG(st.st_mode)) - bad = 1; - else - (*loose_size) += on_disk_bytes(st); - } - if (bad) { - if (verbose) { - struct strbuf sb = STRBUF_INIT; - strbuf_addf(&sb, "%.*s/%s", - len + 2, path, ent->d_name); - report_garbage("garbage found", sb.buf); - strbuf_release(&sb); - } - continue; - } - (*loose)++; - if (!verbose) - continue; - memcpy(hex, path+len, 2); - memcpy(hex+2, ent->d_name, 38); - hex[40] = 0; - if (get_sha1_hex(hex, sha1)) - die("internal error"); - if (has_sha1_pack(sha1)) - (*packed_loose)++; +static int count_loose(const unsigned char *sha1, const char *path, void *data) +{ + struct stat st; + + if (lstat(path, &st) || !S_ISREG(st.st_mode)) + loose_garbage(path); + else { + loose_size += on_disk_bytes(st); + loose++; + if (verbose && has_sha1_pack(sha1)) + packed_loose++; } + return 0; +} + +static int count_cruft(const char *basename, const char *path, void *data) +{ + loose_garbage(path); + return 0; } static char const * const count_objects_usage[] = { @@ -85,12 +58,7 @@ static char const * const count_objects_usage[] = { int cmd_count_objects(int argc, const char **argv, const char *prefix) { - int i, verbose = 0, human_readable = 0; - const char *objdir = get_object_directory(); - int len = strlen(objdir); - char *path = xmalloc(len + 50); - unsigned long loose = 0, packed = 0, packed_loose = 0; - off_t loose_size = 0; + int human_readable = 0; struct option opts[] = { OPT__VERBOSE(&verbose, N_("be verbose")), OPT_BOOL('H', "human-readable", &human_readable, @@ -104,19 +72,10 @@ int cmd_count_objects(int argc, const char **argv, const char *prefix) usage_with_options(count_objects_usage, opts); if (verbose) report_garbage = real_report_garbage; - memcpy(path, objdir, len); - if (len && objdir[len-1] != '/') - path[len++] = '/'; - for (i = 0; i < 256; i++) { - DIR *d; - sprintf(path + len, "%02x", i); - d = opendir(path); - if (!d) - continue; - count_objects(d, path, len, verbose, - &loose, &loose_size, &packed_loose); - closedir(d); - } + + for_each_loose_file_in_objdir(get_object_directory(), + count_loose, count_cruft, NULL, NULL); + if (verbose) { struct packed_git *p; unsigned long num_pack = 0; From 660c889e46d185dc98ba78963528826728b0a55d Mon Sep 17 00:00:00 2001 From: Jeff King Date: Wed, 15 Oct 2014 18:41:21 -0400 Subject: [PATCH 13/28] sha1_file: add for_each iterators for loose and packed objects We typically iterate over the reachable objects in a repository by starting at the tips and walking the graph. There's no easy way to iterate over all of the objects, including unreachable ones. Let's provide a way of doing so. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- cache.h | 11 ++++++++++ sha1_file.c | 62 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 73 insertions(+) diff --git a/cache.h b/cache.h index bdfbbcf79..51ee856ac 100644 --- a/cache.h +++ b/cache.h @@ -1272,6 +1272,17 @@ int for_each_loose_file_in_objdir(const char *path, each_loose_subdir_fn subdir_cb, void *data); +/* + * Iterate over loose and packed objects in both the local + * repository and any alternates repositories. + */ +typedef int each_packed_object_fn(const unsigned char *sha1, + struct packed_git *pack, + uint32_t pos, + void *data); +extern int for_each_loose_object(each_loose_object_fn, void *); +extern int for_each_packed_object(each_packed_object_fn, void *); + struct object_info { /* Request */ enum object_type *typep; diff --git a/sha1_file.c b/sha1_file.c index fa08475c9..55c65b7ef 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -3348,3 +3348,65 @@ int for_each_loose_file_in_objdir(const char *path, strbuf_release(&buf); return r; } + +struct loose_alt_odb_data { + each_loose_object_fn *cb; + void *data; +}; + +static int loose_from_alt_odb(struct alternate_object_database *alt, + void *vdata) +{ + struct loose_alt_odb_data *data = vdata; + return for_each_loose_file_in_objdir(alt->base, + data->cb, NULL, NULL, + data->data); +} + +int for_each_loose_object(each_loose_object_fn cb, void *data) +{ + struct loose_alt_odb_data alt; + int r; + + r = for_each_loose_file_in_objdir(get_object_directory(), + cb, NULL, NULL, data); + if (r) + return r; + + alt.cb = cb; + alt.data = data; + return foreach_alt_odb(loose_from_alt_odb, &alt); +} + +static int for_each_object_in_pack(struct packed_git *p, each_packed_object_fn cb, void *data) +{ + uint32_t i; + int r = 0; + + for (i = 0; i < p->num_objects; i++) { + const unsigned char *sha1 = nth_packed_object_sha1(p, i); + + if (!sha1) + return error("unable to get sha1 of object %u in %s", + i, p->pack_name); + + r = cb(sha1, p, i, data); + if (r) + break; + } + return r; +} + +int for_each_packed_object(each_packed_object_fn cb, void *data) +{ + struct packed_git *p; + int r = 0; + + prepare_packed_git(); + for (p = packed_git; p; p = p->next) { + r = for_each_object_in_pack(p, cb, data); + if (r) + break; + } + return r; +} From d3038d22f91aad9620bd8e6fc43fc67c16219738 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Wed, 15 Oct 2014 18:41:35 -0400 Subject: [PATCH 14/28] prune: keep objects reachable from recent objects Our current strategy with prune is that an object falls into one of three categories: 1. Reachable (from ref tips, reflogs, index, etc). 2. Not reachable, but recent (based on the --expire time). 3. Not reachable and not recent. We keep objects from (1) and (2), but prune objects in (3). The point of (2) is that these objects may be part of an in-progress operation that has not yet updated any refs. However, it is not always the case that objects for an in-progress operation will have a recent mtime. For example, the object database may have an old copy of a blob (from an abandoned operation, a branch that was deleted, etc). If we create a new tree that points to it, a simultaneous prune will leave our tree, but delete the blob. Referencing that tree with a commit will then work (we check that the tree is in the object database, but not that all of its referred objects are), as will mentioning the commit in a ref. But the resulting repo is corrupt; we are missing the blob reachable from a ref. One way to solve this is to be more thorough when referencing a sha1: make sure that not only do we have that sha1, but that we have objects it refers to, and so forth recursively. The problem is that this is very expensive. Creating a parent link would require traversing the entire object graph! Instead, this patch pushes the extra work onto prune, which runs less frequently (and has to look at the whole object graph anyway). It creates a new category of objects: objects which are not recent, but which are reachable from a recent object. We do not prune these objects, just like the reachable and recent ones. This lets us avoid the recursive check above, because if we have an object, even if it is unreachable, we should have its referent. We can make a simple inductive argument that with this patch, this property holds (that there are no objects with missing referents in the repository): 0. When we have no objects, we have nothing to refer or be referred to, so the property holds. 1. If we add objects to the repository, their direct referents must generally exist (e.g., if you create a tree, the blobs it references must exist; if you create a commit to point at the tree, the tree must exist). This is already the case before this patch. And it is not 100% foolproof (you can make bogus objects using `git hash-object`, for example), but it should be the case for normal usage. Therefore for any sequence of object additions, the property will continue to hold. 2. If we remove objects from the repository, then we will not remove a child object (like a blob) if an object that refers to it is being kept. That is the part implemented by this patch. Note, however, that our reachability check and the actual pruning are not atomic. So it _is_ still possible to violate the property (e.g., an object becomes referenced just as we are deleting it). This patch is shooting for eliminating problems where the mtimes of dependent objects differ by hours or days, and one is dropped without the other. It does nothing to help with short races. Naively, the simplest way to implement this would be to add all recent objects as tips to the reachability traversal. However, this does not perform well. In a recently-packed repository, all reachable objects will also be recent, and therefore we have to look at each object twice. This patch instead performs the reachability traversal, then follows up with a second traversal for recent objects, skipping any that have already been marked. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/prune.c | 2 +- builtin/reflog.c | 2 +- reachable.c | 112 +++++++++++++++++++++++++++++++++++++ reachable.h | 3 +- t/t6501-freshen-objects.sh | 88 +++++++++++++++++++++++++++++ 5 files changed, 204 insertions(+), 3 deletions(-) create mode 100755 t/t6501-freshen-objects.sh diff --git a/builtin/prune.c b/builtin/prune.c index 763f53e06..04d3b12ae 100644 --- a/builtin/prune.c +++ b/builtin/prune.c @@ -135,7 +135,7 @@ int cmd_prune(int argc, const char **argv, const char *prefix) if (show_progress) progress = start_progress_delay(_("Checking connectivity"), 0, 0, 2); - mark_reachable_objects(&revs, 1, progress); + mark_reachable_objects(&revs, 1, expire, progress); stop_progress(&progress); for_each_loose_file_in_objdir(get_object_directory(), prune_object, prune_cruft, prune_subdir, NULL); diff --git a/builtin/reflog.c b/builtin/reflog.c index e8a8fb13b..80bddc259 100644 --- a/builtin/reflog.c +++ b/builtin/reflog.c @@ -649,7 +649,7 @@ static int cmd_reflog_expire(int argc, const char **argv, const char *prefix) init_revisions(&cb.revs, prefix); if (cb.verbose) printf("Marking reachable objects..."); - mark_reachable_objects(&cb.revs, 0, NULL); + mark_reachable_objects(&cb.revs, 0, 0, NULL); if (cb.verbose) putchar('\n'); } diff --git a/reachable.c b/reachable.c index d03f8294e..55589a02e 100644 --- a/reachable.c +++ b/reachable.c @@ -97,7 +97,109 @@ static void mark_commit(struct commit *c, void *data) mark_object(&c->object, NULL, NULL, data); } +struct recent_data { + struct rev_info *revs; + unsigned long timestamp; +}; + +static void add_recent_object(const unsigned char *sha1, + unsigned long mtime, + struct recent_data *data) +{ + struct object *obj; + enum object_type type; + + if (mtime <= data->timestamp) + return; + + /* + * We do not want to call parse_object here, because + * inflating blobs and trees could be very expensive. + * However, we do need to know the correct type for + * later processing, and the revision machinery expects + * commits and tags to have been parsed. + */ + type = sha1_object_info(sha1, NULL); + if (type < 0) + die("unable to get object info for %s", sha1_to_hex(sha1)); + + switch (type) { + case OBJ_TAG: + case OBJ_COMMIT: + obj = parse_object_or_die(sha1, NULL); + break; + case OBJ_TREE: + obj = (struct object *)lookup_tree(sha1); + break; + case OBJ_BLOB: + obj = (struct object *)lookup_blob(sha1); + break; + default: + die("unknown object type for %s: %s", + sha1_to_hex(sha1), typename(type)); + } + + if (!obj) + die("unable to lookup %s", sha1_to_hex(sha1)); + + add_pending_object(data->revs, obj, ""); +} + +static int add_recent_loose(const unsigned char *sha1, + const char *path, void *data) +{ + struct stat st; + struct object *obj = lookup_object(sha1); + + if (obj && obj->flags & SEEN) + return 0; + + if (stat(path, &st) < 0) { + /* + * It's OK if an object went away during our iteration; this + * could be due to a simultaneous repack. But anything else + * we should abort, since we might then fail to mark objects + * which should not be pruned. + */ + if (errno == ENOENT) + return 0; + return error("unable to stat %s: %s", + sha1_to_hex(sha1), strerror(errno)); + } + + add_recent_object(sha1, st.st_mtime, data); + return 0; +} + +static int add_recent_packed(const unsigned char *sha1, + struct packed_git *p, uint32_t pos, + void *data) +{ + struct object *obj = lookup_object(sha1); + + if (obj && obj->flags & SEEN) + return 0; + add_recent_object(sha1, p->mtime, data); + return 0; +} + +static int add_unseen_recent_objects_to_traversal(struct rev_info *revs, + unsigned long timestamp) +{ + struct recent_data data; + int r; + + data.revs = revs; + data.timestamp = timestamp; + + r = for_each_loose_object(add_recent_loose, &data); + if (r) + return r; + return for_each_packed_object(add_recent_packed, &data); +} + void mark_reachable_objects(struct rev_info *revs, int mark_reflog, + unsigned long mark_recent, struct progress *progress) { struct connectivity_progress cp; @@ -133,5 +235,15 @@ void mark_reachable_objects(struct rev_info *revs, int mark_reflog, if (prepare_revision_walk(revs)) die("revision walk setup failed"); traverse_commit_list(revs, mark_commit, mark_object, &cp); + + if (mark_recent) { + revs->ignore_missing_links = 1; + if (add_unseen_recent_objects_to_traversal(revs, mark_recent)) + die("unable to mark recent objects"); + if (prepare_revision_walk(revs)) + die("revision walk setup failed"); + traverse_commit_list(revs, mark_commit, mark_object, &cp); + } + display_progress(cp.progress, cp.count); } diff --git a/reachable.h b/reachable.h index 5d082adfe..141fe3087 100644 --- a/reachable.h +++ b/reachable.h @@ -2,6 +2,7 @@ #define REACHEABLE_H struct progress; -extern void mark_reachable_objects(struct rev_info *revs, int mark_reflog, struct progress *); +extern void mark_reachable_objects(struct rev_info *revs, int mark_reflog, + unsigned long mark_recent, struct progress *); #endif diff --git a/t/t6501-freshen-objects.sh b/t/t6501-freshen-objects.sh new file mode 100755 index 000000000..de941c2cb --- /dev/null +++ b/t/t6501-freshen-objects.sh @@ -0,0 +1,88 @@ +#!/bin/sh +# +# This test covers the handling of objects which might have old +# mtimes in the filesystem (because they were used previously) +# and are just now becoming referenced again. +# +# We're going to do two things that are a little bit "fake" to +# help make our simulation easier: +# +# 1. We'll turn off reflogs. You can still run into +# problems with reflogs on, but your objects +# don't get pruned until both the reflog expiration +# has passed on their references, _and_ they are out +# of prune's expiration period. Dropping reflogs +# means we only have to deal with one variable in our tests, +# but the results generalize. +# +# 2. We'll use a temporary index file to create our +# works-in-progress. Most workflows would mention +# referenced objects in the index, which prune takes +# into account. However, many operations don't. For +# example, a partial commit with "git commit foo" +# will use a temporary index. Or they may not need +# an index at all (e.g., creating a new commit +# to refer to an existing tree). + +test_description='check pruning of dependent objects' +. ./test-lib.sh + +# We care about reachability, so we do not want to use +# the normal test_commit, which creates extra tags. +add () { + echo "$1" >"$1" && + git add "$1" +} +commit () { + test_tick && + add "$1" && + git commit -m "$1" +} + +test_expect_success 'disable reflogs' ' + git config core.logallrefupdates false && + rm -rf .git/logs +' + +test_expect_success 'setup basic history' ' + commit base +' + +test_expect_success 'create and abandon some objects' ' + git checkout -b experiment && + commit abandon && + git checkout master && + git branch -D experiment +' + +test_expect_success 'simulate time passing' ' + find .git/objects -type f | + xargs test-chmtime -v -86400 +' + +test_expect_success 'start writing new commit with old blob' ' + tree=$( + GIT_INDEX_FILE=index.tmp && + export GIT_INDEX_FILE && + git read-tree HEAD && + add unrelated && + add abandon && + git write-tree + ) +' + +test_expect_success 'simultaneous gc' ' + git gc --prune=12.hours.ago +' + +test_expect_success 'finish writing out commit' ' + commit=$(echo foo | git commit-tree -p HEAD $tree) && + git update-ref HEAD $commit +' + +# "abandon" blob should have been rescued by reference from new tree +test_expect_success 'repository passes fsck' ' + git fsck +' + +test_done From d0d46abc167d18fdbdbf2ece8ca6df704517c62f Mon Sep 17 00:00:00 2001 From: Jeff King Date: Wed, 15 Oct 2014 18:41:53 -0400 Subject: [PATCH 15/28] pack-objects: refactor unpack-unreachable expiration check When we are loosening unreachable packed objects, we do not bother to process objects that would simply be pruned immediately anyway. The "would be pruned" check is a simple comparison, but is about to get more complicated. Let's pull it out into a separate function. Note that this is slightly less efficient than the original, which avoided even opening old packs, since no object in them could pass the current check, which cares only about the pack mtime. But the new rules will depend on the exact object, so we need to perform the check even for old packs. Note also that we fix a minor buglet when the pack mtime is exactly the same as the expiration time. The prune code considers that worth pruning, whereas our check here considered it worth keeping. This wasn't a big deal. Besides being unlikely to happen, the result was simply that the object was loosened and then pruned, missing the optimization. Still, we can easily fix it while we are here. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/pack-objects.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index d39193453..2fe2ab060 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -2407,6 +2407,16 @@ static int has_sha1_pack_kept_or_nonlocal(const unsigned char *sha1) return 0; } +static int loosened_object_can_be_discarded(const unsigned char *sha1, + unsigned long mtime) +{ + if (!unpack_unreachable_expiration) + return 0; + if (mtime > unpack_unreachable_expiration) + return 0; + return 1; +} + static void loosen_unused_packed_objects(struct rev_info *revs) { struct packed_git *p; @@ -2417,17 +2427,14 @@ static void loosen_unused_packed_objects(struct rev_info *revs) if (!p->pack_local || p->pack_keep) continue; - if (unpack_unreachable_expiration && - p->mtime < unpack_unreachable_expiration) - continue; - if (open_pack_index(p)) die("cannot open pack index"); for (i = 0; i < p->num_objects; i++) { sha1 = nth_packed_object_sha1(p, i); if (!packlist_find(&to_pack, sha1, NULL) && - !has_sha1_pack_kept_or_nonlocal(sha1)) + !has_sha1_pack_kept_or_nonlocal(sha1) && + !loosened_object_can_be_discarded(sha1, p->mtime)) if (force_object_loose(sha1, p->mtime)) die("unable to force loose object"); } From abcb86553d3ec4afffa4e3963089dffe0559740e Mon Sep 17 00:00:00 2001 From: Jeff King Date: Wed, 15 Oct 2014 18:42:09 -0400 Subject: [PATCH 16/28] pack-objects: match prune logic for discarding objects A recent commit taught git-prune to keep non-recent objects that are reachable from recent ones. However, pack-objects, when loosening unreachable objects, tries to optimize out the write in the case that the object will be immediately pruned. It now gets this wrong, since its rule does not reflect the new prune code (and this can be seen by running t6501 with a strategically placed repack). Let's teach pack-objects similar logic. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/pack-objects.c | 39 ++++++++++++++++ reachable.c | 4 +- reachable.h | 2 + t/t6501-freshen-objects.sh | 93 ++++++++++++++++++++++---------------- 4 files changed, 98 insertions(+), 40 deletions(-) diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 2fe2ab060..4df949904 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -20,6 +20,8 @@ #include "streaming.h" #include "thread-utils.h" #include "pack-bitmap.h" +#include "reachable.h" +#include "sha1-array.h" static const char *pack_usage[] = { N_("git pack-objects --stdout [options...] [< ref-list | < object-list]"), @@ -2407,6 +2409,15 @@ static int has_sha1_pack_kept_or_nonlocal(const unsigned char *sha1) return 0; } +/* + * Store a list of sha1s that are should not be discarded + * because they are either written too recently, or are + * reachable from another object that was. + * + * This is filled by get_object_list. + */ +static struct sha1_array recent_objects; + static int loosened_object_can_be_discarded(const unsigned char *sha1, unsigned long mtime) { @@ -2414,6 +2425,8 @@ static int loosened_object_can_be_discarded(const unsigned char *sha1, return 0; if (mtime > unpack_unreachable_expiration) return 0; + if (sha1_array_lookup(&recent_objects, sha1) >= 0) + return 0; return 1; } @@ -2470,6 +2483,19 @@ static int get_object_list_from_bitmap(struct rev_info *revs) return 0; } +static void record_recent_object(struct object *obj, + const struct name_path *path, + const char *last, + void *data) +{ + sha1_array_append(&recent_objects, obj->sha1); +} + +static void record_recent_commit(struct commit *commit, void *data) +{ + sha1_array_append(&recent_objects, commit->object.sha1); +} + static void get_object_list(int ac, const char **av) { struct rev_info revs; @@ -2517,10 +2543,23 @@ static void get_object_list(int ac, const char **av) mark_edges_uninteresting(&revs, show_edge); traverse_commit_list(&revs, show_commit, show_object, NULL); + if (unpack_unreachable_expiration) { + revs.ignore_missing_links = 1; + if (add_unseen_recent_objects_to_traversal(&revs, + unpack_unreachable_expiration)) + die("unable to add recent objects"); + if (prepare_revision_walk(&revs)) + die("revision walk setup failed"); + traverse_commit_list(&revs, record_recent_commit, + record_recent_object, NULL); + } + if (keep_unreachable) add_objects_in_unpacked_packs(&revs); if (unpack_unreachable) loosen_unused_packed_objects(&revs); + + sha1_array_clear(&recent_objects); } static int option_parse_index_version(const struct option *opt, diff --git a/reachable.c b/reachable.c index 55589a02e..0176a88b8 100644 --- a/reachable.c +++ b/reachable.c @@ -183,8 +183,8 @@ static int add_recent_packed(const unsigned char *sha1, return 0; } -static int add_unseen_recent_objects_to_traversal(struct rev_info *revs, - unsigned long timestamp) +int add_unseen_recent_objects_to_traversal(struct rev_info *revs, + unsigned long timestamp) { struct recent_data data; int r; diff --git a/reachable.h b/reachable.h index 141fe3087..d23efc36e 100644 --- a/reachable.h +++ b/reachable.h @@ -2,6 +2,8 @@ #define REACHEABLE_H struct progress; +extern int add_unseen_recent_objects_to_traversal(struct rev_info *revs, + unsigned long timestamp); extern void mark_reachable_objects(struct rev_info *revs, int mark_reflog, unsigned long mark_recent, struct progress *); diff --git a/t/t6501-freshen-objects.sh b/t/t6501-freshen-objects.sh index de941c2cb..e25c47dd5 100755 --- a/t/t6501-freshen-objects.sh +++ b/t/t6501-freshen-objects.sh @@ -39,50 +39,67 @@ commit () { git commit -m "$1" } -test_expect_success 'disable reflogs' ' - git config core.logallrefupdates false && - rm -rf .git/logs -' +maybe_repack () { + if test -n "$repack"; then + git repack -ad + fi +} + +for repack in '' true; do + title=${repack:+repack} + title=${title:-loose} + + test_expect_success "make repo completely empty ($title)" ' + rm -rf .git && + git init + ' + + test_expect_success "disable reflogs ($title)" ' + git config core.logallrefupdates false && + rm -rf .git/logs + ' -test_expect_success 'setup basic history' ' - commit base -' + test_expect_success "setup basic history ($title)" ' + commit base + ' -test_expect_success 'create and abandon some objects' ' - git checkout -b experiment && - commit abandon && - git checkout master && - git branch -D experiment -' + test_expect_success "create and abandon some objects ($title)" ' + git checkout -b experiment && + commit abandon && + maybe_repack && + git checkout master && + git branch -D experiment + ' -test_expect_success 'simulate time passing' ' - find .git/objects -type f | - xargs test-chmtime -v -86400 -' + test_expect_success "simulate time passing ($title)" ' + find .git/objects -type f | + xargs test-chmtime -v -86400 + ' -test_expect_success 'start writing new commit with old blob' ' - tree=$( - GIT_INDEX_FILE=index.tmp && - export GIT_INDEX_FILE && - git read-tree HEAD && - add unrelated && - add abandon && - git write-tree - ) -' + test_expect_success "start writing new commit with old blob ($title)" ' + tree=$( + GIT_INDEX_FILE=index.tmp && + export GIT_INDEX_FILE && + git read-tree HEAD && + add unrelated && + add abandon && + git write-tree + ) + ' -test_expect_success 'simultaneous gc' ' - git gc --prune=12.hours.ago -' + test_expect_success "simultaneous gc ($title)" ' + git gc --prune=12.hours.ago + ' -test_expect_success 'finish writing out commit' ' - commit=$(echo foo | git commit-tree -p HEAD $tree) && - git update-ref HEAD $commit -' + test_expect_success "finish writing out commit ($title)" ' + commit=$(echo foo | git commit-tree -p HEAD $tree) && + git update-ref HEAD $commit + ' -# "abandon" blob should have been rescued by reference from new tree -test_expect_success 'repository passes fsck' ' - git fsck -' + # "abandon" blob should have been rescued by reference from new tree + test_expect_success "repository passes fsck ($title)" ' + git fsck + ' +done test_done From 33d4221c79c89844bed6b9558cc2bc497251ef70 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Wed, 15 Oct 2014 18:42:22 -0400 Subject: [PATCH 17/28] write_sha1_file: freshen existing objects When we try to write a loose object file, we first check whether that object already exists. If so, we skip the write as an optimization. However, this can interfere with prune's strategy of using mtimes to mark files in progress. For example, if a branch contains a particular tree object and is deleted, that tree object may become unreachable, and have an old mtime. If a new operation then tries to write the same tree, this ends up as a noop; we notice we already have the object and do nothing. A prune running simultaneously with this operation will see the object as old, and may delete it. We can solve this by "freshening" objects that we avoid writing by updating their mtime. The algorithm for doing so is essentially the same as that of has_sha1_file. Therefore we provide a new (static) interface "check_and_freshen", which finds and optionally freshens the object. It's trivial to implement freshening and simple checking by tweaking a single parameter. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- sha1_file.c | 51 ++++++++++++++++++++++++++++++++------ t/t6501-freshen-objects.sh | 27 ++++++++++++++++++++ 2 files changed, 71 insertions(+), 7 deletions(-) diff --git a/sha1_file.c b/sha1_file.c index 55c65b7ef..c63264198 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -442,27 +442,53 @@ void prepare_alt_odb(void) read_info_alternates(get_object_directory(), 0); } -static int has_loose_object_local(const unsigned char *sha1) +static int freshen_file(const char *fn) { - return !access(sha1_file_name(sha1), F_OK); + struct utimbuf t; + t.actime = t.modtime = time(NULL); + return !utime(fn, &t); } -int has_loose_object_nonlocal(const unsigned char *sha1) +static int check_and_freshen_file(const char *fn, int freshen) +{ + if (access(fn, F_OK)) + return 0; + if (freshen && freshen_file(fn)) + return 0; + return 1; +} + +static int check_and_freshen_local(const unsigned char *sha1, int freshen) +{ + return check_and_freshen_file(sha1_file_name(sha1), freshen); +} + +static int check_and_freshen_nonlocal(const unsigned char *sha1, int freshen) { struct alternate_object_database *alt; prepare_alt_odb(); for (alt = alt_odb_list; alt; alt = alt->next) { fill_sha1_path(alt->name, sha1); - if (!access(alt->base, F_OK)) + if (check_and_freshen_file(alt->base, freshen)) return 1; } return 0; } +static int check_and_freshen(const unsigned char *sha1, int freshen) +{ + return check_and_freshen_local(sha1, freshen) || + check_and_freshen_nonlocal(sha1, freshen); +} + +int has_loose_object_nonlocal(const unsigned char *sha1) +{ + return check_and_freshen_nonlocal(sha1, 0); +} + static int has_loose_object(const unsigned char *sha1) { - return has_loose_object_local(sha1) || - has_loose_object_nonlocal(sha1); + return check_and_freshen(sha1, 0); } static unsigned int pack_used_ctr; @@ -2965,6 +2991,17 @@ static int write_loose_object(const unsigned char *sha1, char *hdr, int hdrlen, return move_temp_to_file(tmp_file, filename); } +static int freshen_loose_object(const unsigned char *sha1) +{ + return check_and_freshen(sha1, 1); +} + +static int freshen_packed_object(const unsigned char *sha1) +{ + struct pack_entry e; + return find_pack_entry(sha1, &e) && freshen_file(e.p->pack_name); +} + int write_sha1_file(const void *buf, unsigned long len, const char *type, unsigned char *returnsha1) { unsigned char sha1[20]; @@ -2977,7 +3014,7 @@ int write_sha1_file(const void *buf, unsigned long len, const char *type, unsign write_sha1_file_prepare(buf, len, type, sha1, hdr, &hdrlen); if (returnsha1) hashcpy(returnsha1, sha1); - if (has_sha1_file(sha1)) + if (freshen_loose_object(sha1) || freshen_packed_object(sha1)) return 0; return write_loose_object(sha1, hdr, hdrlen, buf, len, 0); } diff --git a/t/t6501-freshen-objects.sh b/t/t6501-freshen-objects.sh index e25c47dd5..157f3f91d 100755 --- a/t/t6501-freshen-objects.sh +++ b/t/t6501-freshen-objects.sh @@ -100,6 +100,33 @@ for repack in '' true; do test_expect_success "repository passes fsck ($title)" ' git fsck ' + + test_expect_success "abandon objects again ($title)" ' + git reset --hard HEAD^ && + find .git/objects -type f | + xargs test-chmtime -v -86400 + ' + + test_expect_success "start writing new commit with same tree ($title)" ' + tree=$( + GIT_INDEX_FILE=index.tmp && + export GIT_INDEX_FILE && + git read-tree HEAD && + add abandon && + add unrelated && + git write-tree + ) + ' + + test_expect_success "simultaneous gc ($title)" ' + git gc --prune=12.hours.ago + ' + + # tree should have been refreshed by write-tree + test_expect_success "finish writing out commit ($title)" ' + commit=$(echo foo | git commit-tree -p HEAD $tree) && + git update-ref HEAD $commit + ' done test_done From 9e0c3c4fcdf3775a9e0256ee231efa4698297a0e Mon Sep 17 00:00:00 2001 From: Jeff King Date: Wed, 15 Oct 2014 18:42:57 -0400 Subject: [PATCH 18/28] make add_object_array_with_context interface more sane When you resolve a sha1, you can optionally keep any context found during the resolution, including the path and mode of a tree entry (e.g., when looking up "HEAD:subdir/file.c"). The add_object_array_with_context function lets you then attach that context to an entry in a list. Unfortunately, the interface for doing so is horrible. The object_context structure is large and most object_array users do not use it. Therefore we keep a pointer to the structure to avoid burdening other users too much. But that means when we do use it that we must allocate the struct ourselves. And the struct contains a fixed PATH_MAX-sized buffer, which makes this wholly unsuitable for any large arrays. We can observe that there is only a single user of the "with_context" variant: builtin/grep.c. And in that use case, the only element we care about is the path. We can therefore store only the path as a pointer (the context's mode field was redundant with the object_array_entry itself, and nobody actually cared about the surrounding tree). This still requires a strdup of the pathname, but at least we are only consuming the minimum amount of memory for each string. We can also handle the copying ourselves in add_object_array_*, and free it as appropriate in object_array_release_entry. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/grep.c | 8 ++++---- object.c | 23 +++++++++-------------- object.h | 4 ++-- 3 files changed, 15 insertions(+), 20 deletions(-) diff --git a/builtin/grep.c b/builtin/grep.c index c86a142f3..4063882f0 100644 --- a/builtin/grep.c +++ b/builtin/grep.c @@ -456,10 +456,10 @@ static int grep_tree(struct grep_opt *opt, const struct pathspec *pathspec, } static int grep_object(struct grep_opt *opt, const struct pathspec *pathspec, - struct object *obj, const char *name, struct object_context *oc) + struct object *obj, const char *name, const char *path) { if (obj->type == OBJ_BLOB) - return grep_sha1(opt, obj->sha1, name, 0, oc ? oc->path : NULL); + return grep_sha1(opt, obj->sha1, name, 0, path); if (obj->type == OBJ_COMMIT || obj->type == OBJ_TREE) { struct tree_desc tree; void *data; @@ -501,7 +501,7 @@ static int grep_objects(struct grep_opt *opt, const struct pathspec *pathspec, for (i = 0; i < nr; i++) { struct object *real_obj; real_obj = deref_tag(list->objects[i].item, NULL, 0); - if (grep_object(opt, pathspec, real_obj, list->objects[i].name, list->objects[i].context)) { + if (grep_object(opt, pathspec, real_obj, list->objects[i].name, list->objects[i].path)) { hit = 1; if (opt->status_only) break; @@ -821,7 +821,7 @@ int cmd_grep(int argc, const char **argv, const char *prefix) struct object *object = parse_object_or_die(sha1, arg); if (!seen_dashdash) verify_non_filename(prefix, arg); - add_object_array_with_context(object, arg, &list, xmemdupz(&oc, sizeof(struct object_context))); + add_object_array_with_path(object, arg, &list, oc.mode, oc.path); continue; } if (!strcmp(arg, "--")) { diff --git a/object.c b/object.c index 6aeb1bbbe..df86bdd5a 100644 --- a/object.c +++ b/object.c @@ -307,10 +307,9 @@ int object_list_contains(struct object_list *list, struct object *obj) */ static char object_array_slopbuf[1]; -static void add_object_array_with_mode_context(struct object *obj, const char *name, - struct object_array *array, - unsigned mode, - struct object_context *context) +void add_object_array_with_path(struct object *obj, const char *name, + struct object_array *array, + unsigned mode, const char *path) { unsigned nr = array->nr; unsigned alloc = array->alloc; @@ -333,7 +332,10 @@ static void add_object_array_with_mode_context(struct object *obj, const char *n else entry->name = xstrdup(name); entry->mode = mode; - entry->context = context; + if (path) + entry->path = xstrdup(path); + else + entry->path = NULL; array->nr = ++nr; } @@ -344,15 +346,7 @@ void add_object_array(struct object *obj, const char *name, struct object_array void add_object_array_with_mode(struct object *obj, const char *name, struct object_array *array, unsigned mode) { - add_object_array_with_mode_context(obj, name, array, mode, NULL); -} - -void add_object_array_with_context(struct object *obj, const char *name, struct object_array *array, struct object_context *context) -{ - if (context) - add_object_array_with_mode_context(obj, name, array, context->mode, context); - else - add_object_array_with_mode_context(obj, name, array, S_IFINVALID, context); + add_object_array_with_path(obj, name, array, mode, NULL); } /* @@ -363,6 +357,7 @@ static void object_array_release_entry(struct object_array_entry *ent) { if (ent->name != object_array_slopbuf) free(ent->name); + free(ent->path); } void object_array_filter(struct object_array *array, diff --git a/object.h b/object.h index 2a755a237..e5178a516 100644 --- a/object.h +++ b/object.h @@ -18,8 +18,8 @@ struct object_array { * empty string. */ char *name; + char *path; unsigned mode; - struct object_context *context; } *objects; }; @@ -115,7 +115,7 @@ int object_list_contains(struct object_list *list, struct object *obj); /* Object array handling .. */ void add_object_array(struct object *obj, const char *name, struct object_array *array); void add_object_array_with_mode(struct object *obj, const char *name, struct object_array *array, unsigned mode); -void add_object_array_with_context(struct object *obj, const char *name, struct object_array *array, struct object_context *context); +void add_object_array_with_path(struct object *obj, const char *name, struct object_array *array, unsigned mode, const char *path); typedef int (*object_array_each_func_t)(struct object_array_entry *, void *); From 207394908e9465d0169608725aeaa5bb355086e0 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Wed, 15 Oct 2014 18:43:19 -0400 Subject: [PATCH 19/28] traverse_commit_list: support pending blobs/trees with paths When we call traverse_commit_list, we may have trees and blobs in the pending array. As we process these, we pass the "name" field from the pending entry as the path of the object within the tree (which then becomes the root path if we recurse into subtrees). When we set up the traversal in prepare_revision_walk, though, the "name" field of any pending trees and blobs is likely to be the ref at which we found the object. We would not want to make this part of the path (e.g., doing so would make "git rev-list --objects v2.6.11-tree" in linux.git show paths like "v2.6.11-tree/Makefile", which is nonsensical). Therefore prepare_revision_walk sets the name field of each pending tree and blobs to the empty string. However, this leaves no room for a caller who does know the correct path of a pending object to propagate that information to the revision walker. We can fix this by making two related changes: 1. Use the "path" field as the path instead of the "name" field in traverse_commit_list. If the path is not set, default to "" (which is what we always ended up with in the current code, because of prepare_revision_walk). 2. In prepare_revision_walk, make a complete copy of the entry. This makes the path field available to the walker (if there is one), solving our problem. Leaving the name field intact is now OK, as we do not use it as a path due to point (1) above (and we can use it to make more meaningful error messages if we want). We also make the original "mode" field available to the walker, though it does not actually use it. Note that we still re-add the pending objects and free the old ones (so we may strdup the path and name only to free the old ones). This could be made more efficient by simply copying the object_array entries that we are keeping. However, that would require more restructuring of the code, and is not done here. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- list-objects.c | 7 +++++-- revision.c | 34 +++++++++++++++++++++++++++------- 2 files changed, 32 insertions(+), 9 deletions(-) diff --git a/list-objects.c b/list-objects.c index fad6808aa..2910becd6 100644 --- a/list-objects.c +++ b/list-objects.c @@ -208,6 +208,7 @@ void traverse_commit_list(struct rev_info *revs, struct object_array_entry *pending = revs->pending.objects + i; struct object *obj = pending->item; const char *name = pending->name; + const char *path = pending->path; if (obj->flags & (UNINTERESTING | SEEN)) continue; if (obj->type == OBJ_TAG) { @@ -215,14 +216,16 @@ void traverse_commit_list(struct rev_info *revs, show_object(obj, NULL, name, data); continue; } + if (!path) + path = ""; if (obj->type == OBJ_TREE) { process_tree(revs, (struct tree *)obj, show_object, - NULL, &base, name, data); + NULL, &base, path, data); continue; } if (obj->type == OBJ_BLOB) { process_blob(revs, (struct blob *)obj, show_object, - NULL, name, data); + NULL, path, data); continue; } die("unknown pending object %s (%s)", diff --git a/revision.c b/revision.c index b8e02e279..ebe3e93d1 100644 --- a/revision.c +++ b/revision.c @@ -198,9 +198,10 @@ void mark_parents_uninteresting(struct commit *commit) } } -static void add_pending_object_with_mode(struct rev_info *revs, +static void add_pending_object_with_path(struct rev_info *revs, struct object *obj, - const char *name, unsigned mode) + const char *name, unsigned mode, + const char *path) { if (!obj) return; @@ -220,7 +221,14 @@ static void add_pending_object_with_mode(struct rev_info *revs, if (st) return; } - add_object_array_with_mode(obj, name, &revs->pending, mode); + add_object_array_with_path(obj, name, &revs->pending, mode, path); +} + +static void add_pending_object_with_mode(struct rev_info *revs, + struct object *obj, + const char *name, unsigned mode) +{ + add_pending_object_with_path(revs, obj, name, mode, NULL); } void add_pending_object(struct rev_info *revs, @@ -265,8 +273,12 @@ void add_pending_sha1(struct rev_info *revs, const char *name, } static struct commit *handle_commit(struct rev_info *revs, - struct object *object, const char *name) + struct object_array_entry *entry) { + struct object *object = entry->item; + const char *name = entry->name; + const char *path = entry->path; + unsigned int mode = entry->mode; unsigned long flags = object->flags; /* @@ -285,6 +297,14 @@ static struct commit *handle_commit(struct rev_info *revs, die("bad object %s", sha1_to_hex(tag->tagged->sha1)); } object->flags |= flags; + /* + * We'll handle the tagged object by looping or dropping + * through to the non-tag handlers below. Do not + * propagate data from the tag's pending entry. + */ + name = ""; + path = NULL; + mode = 0; } /* @@ -316,7 +336,7 @@ static struct commit *handle_commit(struct rev_info *revs, mark_tree_contents_uninteresting(tree); return NULL; } - add_pending_object(revs, object, ""); + add_pending_object_with_path(revs, object, name, mode, path); return NULL; } @@ -328,7 +348,7 @@ static struct commit *handle_commit(struct rev_info *revs, return NULL; if (flags & UNINTERESTING) return NULL; - add_pending_object(revs, object, ""); + add_pending_object_with_path(revs, object, name, mode, path); return NULL; } die("%s is unknown object", name); @@ -2666,7 +2686,7 @@ int prepare_revision_walk(struct rev_info *revs) revs->pending.objects = NULL; for (i = 0; i < old_pending.nr; i++) { struct object_array_entry *e = old_pending.objects + i; - struct commit *commit = handle_commit(revs, e->item, e->name); + struct commit *commit = handle_commit(revs, e); if (commit) { if (!(commit->object.flags & SEEN)) { commit->object.flags |= SEEN; From 458a7e508c57afb6bbd4177fead21c63a0a20131 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 16 Oct 2014 20:03:41 -0400 Subject: [PATCH 20/28] t5516: test pushing a tag of an otherwise unreferenced blob It's not unreasonable to have a tag that points to a blob that is not part of the normal history. We do this in git.git to distribute gpg keys. However, we never explicitly checked in our test suite that this actually works (i.e., that pack-objects actually sends the blob because of the tag mentioning it). It does in fact work fine, but a recent patch under discussion broke this, and the test suite didn't notice. Let's make the test suite more complete. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- t/t5516-fetch-push.sh | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/t/t5516-fetch-push.sh b/t/t5516-fetch-push.sh index 67e0ab346..7c8a769a9 100755 --- a/t/t5516-fetch-push.sh +++ b/t/t5516-fetch-push.sh @@ -1277,4 +1277,17 @@ EOF git push --no-thin --receive-pack="$rcvpck" no-thin/.git refs/heads/master:refs/heads/foo ' +test_expect_success 'pushing a tag pushes the tagged object' ' + rm -rf dst.git && + blob=$(echo unreferenced | git hash-object -w --stdin) && + git tag -m foo tag-of-blob $blob && + git init --bare dst.git && + git push dst.git tag-of-blob && + # the receiving index-pack should have noticed + # any problems, but we double check + echo unreferenced >expect && + git --git-dir=dst.git cat-file blob tag-of-blob >actual && + test_cmp expect actual +' + test_done From 41d018d146f03f7b9320c7492d7cbde5df9f68a1 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Wed, 15 Oct 2014 18:43:28 -0400 Subject: [PATCH 21/28] rev-list: document --reflog option This is mostly used internally, but it does not hurt to explain it. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- Documentation/rev-list-options.txt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Documentation/rev-list-options.txt b/Documentation/rev-list-options.txt index 5d311b8d4..4cf94c680 100644 --- a/Documentation/rev-list-options.txt +++ b/Documentation/rev-list-options.txt @@ -168,6 +168,10 @@ respectively, and they must begin with `refs/` when applied to `--glob` or `--all`. If a trailing '/{asterisk}' is intended, it must be given explicitly. +--reflog:: + Pretend as if all objects mentioned by reflogs are listed on the + command line as ``. + --ignore-missing:: Upon seeing an invalid object name in the input, pretend as if the bad input was not given. From 4fe10219bca6b010e2103116c653928992d5141b Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 16 Oct 2014 20:44:23 -0400 Subject: [PATCH 22/28] rev-list: add --indexed-objects option There is currently no easy way to ask the revision traversal machinery to include objects reachable from the index (e.g., blobs and trees that have not yet been committed). This patch adds an option to do so. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- Documentation/rev-list-options.txt | 5 +++ revision.c | 51 ++++++++++++++++++++++++++++++ revision.h | 1 + t/t6000-rev-list-misc.sh | 23 ++++++++++++++ 4 files changed, 80 insertions(+) diff --git a/Documentation/rev-list-options.txt b/Documentation/rev-list-options.txt index 4cf94c680..3301fdebf 100644 --- a/Documentation/rev-list-options.txt +++ b/Documentation/rev-list-options.txt @@ -172,6 +172,11 @@ explicitly. Pretend as if all objects mentioned by reflogs are listed on the command line as ``. +--indexed-objects:: + Pretend as if all trees and blobs used by the index are listed + on the command line. Note that you probably want to use + `--objects`, too. + --ignore-missing:: Upon seeing an invalid object name in the input, pretend as if the bad input was not given. diff --git a/revision.c b/revision.c index ebe3e93d1..a2337f8ff 100644 --- a/revision.c +++ b/revision.c @@ -17,6 +17,7 @@ #include "mailmap.h" #include "commit-slab.h" #include "dir.h" +#include "cache-tree.h" volatile show_early_output_fn_t show_early_output; @@ -1303,6 +1304,53 @@ void add_reflogs_to_pending(struct rev_info *revs, unsigned flags) for_each_reflog(handle_one_reflog, &cb); } +static void add_cache_tree(struct cache_tree *it, struct rev_info *revs, + struct strbuf *path) +{ + size_t baselen = path->len; + int i; + + if (it->entry_count >= 0) { + struct tree *tree = lookup_tree(it->sha1); + add_pending_object_with_path(revs, &tree->object, "", + 040000, path->buf); + } + + for (i = 0; i < it->subtree_nr; i++) { + struct cache_tree_sub *sub = it->down[i]; + strbuf_addf(path, "%s%s", baselen ? "/" : "", sub->name); + add_cache_tree(sub->cache_tree, revs, path); + strbuf_setlen(path, baselen); + } + +} + +void add_index_objects_to_pending(struct rev_info *revs, unsigned flags) +{ + int i; + + read_cache(); + for (i = 0; i < active_nr; i++) { + struct cache_entry *ce = active_cache[i]; + struct blob *blob; + + if (S_ISGITLINK(ce->ce_mode)) + continue; + + blob = lookup_blob(ce->sha1); + if (!blob) + die("unable to add index blob to traversal"); + add_pending_object_with_path(revs, &blob->object, "", + ce->ce_mode, ce->name); + } + + if (active_cache_tree) { + struct strbuf path = STRBUF_INIT; + add_cache_tree(active_cache_tree, revs, &path); + strbuf_release(&path); + } +} + static int add_parents_only(struct rev_info *revs, const char *arg_, int flags) { unsigned char sha1[20]; @@ -1653,6 +1701,7 @@ static int handle_revision_opt(struct rev_info *revs, int argc, const char **arg !strcmp(arg, "--reflog") || !strcmp(arg, "--not") || !strcmp(arg, "--no-walk") || !strcmp(arg, "--do-walk") || !strcmp(arg, "--bisect") || starts_with(arg, "--glob=") || + !strcmp(arg, "--indexed-objects") || starts_with(arg, "--exclude=") || starts_with(arg, "--branches=") || starts_with(arg, "--tags=") || starts_with(arg, "--remotes=") || starts_with(arg, "--no-walk=")) @@ -2082,6 +2131,8 @@ static int handle_revision_pseudo_opt(const char *submodule, clear_ref_exclusion(&revs->ref_excludes); } else if (!strcmp(arg, "--reflog")) { add_reflogs_to_pending(revs, *flags); + } else if (!strcmp(arg, "--indexed-objects")) { + add_index_objects_to_pending(revs, *flags); } else if (!strcmp(arg, "--not")) { *flags ^= UNINTERESTING | BOTTOM; } else if (!strcmp(arg, "--no-walk")) { diff --git a/revision.h b/revision.h index e64404417..e6dcd5dab 100644 --- a/revision.h +++ b/revision.h @@ -277,6 +277,7 @@ extern void add_pending_sha1(struct rev_info *revs, extern void add_head_to_pending(struct rev_info *); extern void add_reflogs_to_pending(struct rev_info *, unsigned int flags); +extern void add_index_objects_to_pending(struct rev_info *, unsigned int flags); enum commit_action { commit_ignore, diff --git a/t/t6000-rev-list-misc.sh b/t/t6000-rev-list-misc.sh index 3794e4cea..260208630 100755 --- a/t/t6000-rev-list-misc.sh +++ b/t/t6000-rev-list-misc.sh @@ -73,4 +73,27 @@ test_expect_success 'symleft flag bit is propagated down from tag' ' test_cmp expect actual ' +test_expect_success 'rev-list can show index objects' ' + # Of the blobs and trees in the index, note: + # + # - we do not show two/three, because it is the + # same blob as "one", and we show objects only once + # + # - we do show the tree "two", because it has a valid cache tree + # from the last commit + # + # - we do not show the root tree; since we updated the index, it + # does not have a valid cache tree + # + cat >expect <<-\EOF + 8e4020bb5a8d8c873b25de15933e75cc0fc275df one + d9d3a7417b9605cfd88ee6306b28dadc29e6ab08 only-in-index + 9200b628cf9dc883a85a7abc8d6e6730baee589c two + EOF + echo only-in-index >only-in-index && + git add only-in-index && + git rev-list --objects --indexed-objects >actual && + test_cmp expect actual +' + test_done From 1be111d88f526130dcf3b7c7ad0f5df17c6d2bae Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 16 Oct 2014 20:44:30 -0400 Subject: [PATCH 23/28] reachable: use revision machinery's --indexed-objects code This does the same thing as our custom code, so let's not repeat ourselves. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- reachable.c | 52 +--------------------------------------------------- 1 file changed, 1 insertion(+), 51 deletions(-) diff --git a/reachable.c b/reachable.c index 0176a88b8..a647267ae 100644 --- a/reachable.c +++ b/reachable.c @@ -32,56 +32,6 @@ static int add_one_ref(const char *path, const unsigned char *sha1, int flag, vo return 0; } -static void add_one_tree(const unsigned char *sha1, struct rev_info *revs) -{ - struct tree *tree = lookup_tree(sha1); - if (tree) - add_pending_object(revs, &tree->object, ""); -} - -static void add_cache_tree(struct cache_tree *it, struct rev_info *revs) -{ - int i; - - if (it->entry_count >= 0) - add_one_tree(it->sha1, revs); - for (i = 0; i < it->subtree_nr; i++) - add_cache_tree(it->down[i]->cache_tree, revs); -} - -static void add_cache_refs(struct rev_info *revs) -{ - int i; - - read_cache(); - for (i = 0; i < active_nr; i++) { - struct blob *blob; - - /* - * The index can contain blobs and GITLINKs, GITLINKs are hashes - * that don't actually point to objects in the repository, it's - * almost guaranteed that they are NOT blobs, so we don't call - * lookup_blob() on them, to avoid populating the hash table - * with invalid information - */ - if (S_ISGITLINK(active_cache[i]->ce_mode)) - continue; - - blob = lookup_blob(active_cache[i]->sha1); - if (blob) - blob->object.flags |= SEEN; - - /* - * We could add the blobs to the pending list, but quite - * frankly, we don't care. Once we've looked them up, and - * added them as objects, we've really done everything - * there is to do for a blob - */ - } - if (active_cache_tree) - add_cache_tree(active_cache_tree, revs); -} - /* * The traversal will have already marked us as SEEN, so we * only need to handle any progress reporting here. @@ -213,7 +163,7 @@ void mark_reachable_objects(struct rev_info *revs, int mark_reflog, revs->tree_objects = 1; /* Add all refs from the index file */ - add_cache_refs(revs); + add_index_objects_to_pending(revs, 0); /* Add all external refs */ for_each_ref(add_one_ref, revs); From edfbb2aa538148b38ee0ba6d62c95b7edda5d817 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 16 Oct 2014 20:44:35 -0400 Subject: [PATCH 24/28] pack-objects: use argv_array This saves us from having to bump the rp_av count when we add new traversal options. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/pack-objects.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 4df949904..b26276b42 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -22,6 +22,7 @@ #include "pack-bitmap.h" #include "reachable.h" #include "sha1-array.h" +#include "argv-array.h" static const char *pack_usage[] = { N_("git pack-objects --stdout [options...] [< ref-list | < object-list]"), @@ -2614,8 +2615,7 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix) int use_internal_rev_list = 0; int thin = 0; int all_progress_implied = 0; - const char *rp_av[6]; - int rp_ac = 0; + struct argv_array rp = ARGV_ARRAY_INIT; int rev_list_unpacked = 0, rev_list_all = 0, rev_list_reflog = 0; struct option pack_objects_options[] = { OPT_SET_INT('q', "quiet", &progress, @@ -2705,24 +2705,24 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix) if (pack_to_stdout != !base_name || argc) usage_with_options(pack_usage, pack_objects_options); - rp_av[rp_ac++] = "pack-objects"; + argv_array_push(&rp, "pack-objects"); if (thin) { use_internal_rev_list = 1; - rp_av[rp_ac++] = "--objects-edge"; + argv_array_push(&rp, "--objects-edge"); } else - rp_av[rp_ac++] = "--objects"; + argv_array_push(&rp, "--objects"); if (rev_list_all) { use_internal_rev_list = 1; - rp_av[rp_ac++] = "--all"; + argv_array_push(&rp, "--all"); } if (rev_list_reflog) { use_internal_rev_list = 1; - rp_av[rp_ac++] = "--reflog"; + argv_array_push(&rp, "--reflog"); } if (rev_list_unpacked) { use_internal_rev_list = 1; - rp_av[rp_ac++] = "--unpacked"; + argv_array_push(&rp, "--unpacked"); } if (!reuse_object) @@ -2766,8 +2766,8 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix) if (!use_internal_rev_list) read_object_list_from_stdin(); else { - rp_av[rp_ac] = NULL; - get_object_list(rp_ac, rp_av); + get_object_list(rp.argc, rp.argv); + argv_array_clear(&rp); } cleanup_preferred_base(); if (include_tag && nr_result) From c90f9e13abae630551ada3e895633bdc2cf4e080 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 16 Oct 2014 20:44:49 -0400 Subject: [PATCH 25/28] repack: pack objects mentioned by the index When we pack all objects, we use only the objects reachable from references and reflogs. This misses any objects which are reachable from the index, but not yet referenced. By itself this isn't a big deal; the objects can remain loose until they are actually used in a commit. However, it does create a problem when we drop packed but unreachable objects. We try to optimize out the writing of objects that we will immediately prune, which means we must follow the same rules as prune in determining what is reachable. And prune uses the index for this purpose. This is rather uncommon in practice, as objects in the index would not usually have been packed in the first place. But it could happen in a sequence like: 1. You make a commit on a branch that references blob X. 2. You repack, moving X into the pack. 3. You delete the branch (and its reflog), so that X is unreferenced. 4. You "git add" blob X so that it is now referenced only by the index. 5. You repack again with git-gc. The pack-objects we invoke will see that X is neither referenced nor recent and not bother loosening it. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/pack-objects.c | 8 ++++++++ builtin/repack.c | 1 + t/t7701-repack-unpack-unreachable.sh | 13 +++++++++++++ 3 files changed, 22 insertions(+) diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index b26276b42..0cf95c990 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -2617,6 +2617,7 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix) int all_progress_implied = 0; struct argv_array rp = ARGV_ARRAY_INIT; int rev_list_unpacked = 0, rev_list_all = 0, rev_list_reflog = 0; + int rev_list_index = 0; struct option pack_objects_options[] = { OPT_SET_INT('q', "quiet", &progress, N_("do not show progress meter"), 0), @@ -2663,6 +2664,9 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix) { OPTION_SET_INT, 0, "reflog", &rev_list_reflog, NULL, N_("include objects referred by reflog entries"), PARSE_OPT_NOARG | PARSE_OPT_NONEG, NULL, 1 }, + { OPTION_SET_INT, 0, "indexed-objects", &rev_list_index, NULL, + N_("include objects referred to by the index"), + PARSE_OPT_NOARG | PARSE_OPT_NONEG, NULL, 1 }, OPT_BOOL(0, "stdout", &pack_to_stdout, N_("output pack to stdout")), OPT_BOOL(0, "include-tag", &include_tag, @@ -2720,6 +2724,10 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix) use_internal_rev_list = 1; argv_array_push(&rp, "--reflog"); } + if (rev_list_index) { + use_internal_rev_list = 1; + argv_array_push(&rp, "--indexed-objects"); + } if (rev_list_unpacked) { use_internal_rev_list = 1; argv_array_push(&rp, "--unpacked"); diff --git a/builtin/repack.c b/builtin/repack.c index 2aae05d36..28456206c 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -209,6 +209,7 @@ int cmd_repack(int argc, const char **argv, const char *prefix) argv_array_push(&cmd_args, "--non-empty"); argv_array_push(&cmd_args, "--all"); argv_array_push(&cmd_args, "--reflog"); + argv_array_push(&cmd_args, "--indexed-objects"); if (window) argv_array_pushf(&cmd_args, "--window=%s", window); if (window_memory) diff --git a/t/t7701-repack-unpack-unreachable.sh b/t/t7701-repack-unpack-unreachable.sh index b8d4cdea8..aad8a9c64 100755 --- a/t/t7701-repack-unpack-unreachable.sh +++ b/t/t7701-repack-unpack-unreachable.sh @@ -109,4 +109,17 @@ test_expect_success 'do not bother loosening old objects' ' test_must_fail git cat-file -p $obj2 ' +test_expect_success 'keep packed objects found only in index' ' + echo my-unique-content >file && + git add file && + git commit -m "make it reachable" && + git gc && + git reset HEAD^ && + git reflog expire --expire=now --all && + git add file && + test-chmtime =-86400 .git/objects/pack/* && + git gc --prune=1.hour.ago && + git cat-file blob :file +' + test_done From b1e757f36377df1f2a6c165ebf171b09a8ad957b Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 16 Oct 2014 20:44:54 -0400 Subject: [PATCH 26/28] pack-objects: double-check options before discarding objects When we are given an expiration time like --unpack-unreachable=2.weeks.ago, we avoid writing out old, unreachable loose objects entirely, under the assumption that running "prune" would simply delete them immediately anyway. However, this is only valid if we computed the same set of reachable objects as prune would. In practice, this is the case, because only git-repack uses the --unpack-unreachable option with an expiration, and it always feeds as many objects into the pack as possible. But we can double-check at runtime just to be sure. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/pack-objects.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 0cf95c990..64123d422 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -2757,6 +2757,8 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix) if (keep_unreachable && unpack_unreachable) die("--keep-unreachable and --unpack-unreachable are incompatible."); + if (!rev_list_all || !rev_list_reflog || !rev_list_index) + unpack_unreachable_expiration = 0; if (!use_internal_rev_list || !pack_to_stdout || is_repository_shallow()) use_bitmap_index = 0; From d7702be1e1c003a9a26ab60bb302c32b398e033d Mon Sep 17 00:00:00 2001 From: Ramsay Jones Date: Sat, 18 Oct 2014 22:36:12 +0100 Subject: [PATCH 27/28] revision: remove definition of unused 'add_object' function Signed-off-by: Ramsay Jones Signed-off-by: Junio C Hamano --- revision.c | 10 ---------- revision.h | 5 ----- 2 files changed, 15 deletions(-) diff --git a/revision.c b/revision.c index a2337f8ff..75dda928e 100644 --- a/revision.c +++ b/revision.c @@ -87,16 +87,6 @@ void show_object_with_name(FILE *out, struct object *obj, fputc('\n', out); } -void add_object(struct object *obj, - struct object_array *p, - struct name_path *path, - const char *name) -{ - char *pn = path_name(path, name); - add_object_array(obj, pn, p); - free(pn); -} - static void mark_blob_uninteresting(struct blob *blob) { if (!blob) diff --git a/revision.h b/revision.h index e6dcd5dab..9cb5adc4e 100644 --- a/revision.h +++ b/revision.h @@ -264,11 +264,6 @@ char *path_name(const struct name_path *path, const char *name); extern void show_object_with_name(FILE *, struct object *, const struct name_path *, const char *); -extern void add_object(struct object *obj, - struct object_array *p, - struct name_path *path, - const char *name); - extern void add_pending_object(struct rev_info *revs, struct object *obj, const char *name); extern void add_pending_sha1(struct rev_info *revs, From 189a1222493f73977291f57d0f2030e982aff282 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Sat, 18 Oct 2014 22:03:19 -0400 Subject: [PATCH 28/28] drop add_object_array_with_mode This is a thin compatibility wrapper around add_pending_object_with_path. But the only caller is add_object_array, which is itself just a thin compatibility wrapper. There are no external callers, so we can just remove this middle wrapper. Noticed-by: Ramsay Jones Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- object.c | 7 +------ object.h | 1 - 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/object.c b/object.c index df86bdd5a..23d6c9671 100644 --- a/object.c +++ b/object.c @@ -341,12 +341,7 @@ void add_object_array_with_path(struct object *obj, const char *name, void add_object_array(struct object *obj, const char *name, struct object_array *array) { - add_object_array_with_mode(obj, name, array, S_IFINVALID); -} - -void add_object_array_with_mode(struct object *obj, const char *name, struct object_array *array, unsigned mode) -{ - add_object_array_with_path(obj, name, array, mode, NULL); + add_object_array_with_path(obj, name, array, S_IFINVALID, NULL); } /* diff --git a/object.h b/object.h index e5178a516..6416247de 100644 --- a/object.h +++ b/object.h @@ -114,7 +114,6 @@ int object_list_contains(struct object_list *list, struct object *obj); /* Object array handling .. */ void add_object_array(struct object *obj, const char *name, struct object_array *array); -void add_object_array_with_mode(struct object *obj, const char *name, struct object_array *array, unsigned mode); void add_object_array_with_path(struct object *obj, const char *name, struct object_array *array, unsigned mode, const char *path); typedef int (*object_array_each_func_t)(struct object_array_entry *, void *);