Skip to content

Commit

Permalink
implement some resilience against pack corruptions
Browse files Browse the repository at this point in the history
We should be able to fall back to loose objects or alternative packs when
a pack becomes corrupted.  This is especially true when an object exists
in one pack only as a delta but its base object is corrupted.  Currently
there is no way to retrieve the former object even if the later is
available in another pack or loose.

This patch allows for a delta to be resolved (with a performance cost)
using a base object from a source other than the pack where that delta
is located.  Same thing for non-delta objects: rather than failing
outright, a search is made in other packs or used loose when the
currently active pack has it but corrupted.

Of course git will become extremely noisy with error messages when that
happens.  However, if the operation succeeds nevertheless, a simple
'git repack -a -f -d' will "fix" the corrupted repository given that all
corrupted objects have a good duplicate somewhere in the object store,
possibly manually copied from another source.

Signed-off-by: Nicolas Pitre <nico@cam.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
  • Loading branch information
Nicolas Pitre authored and Junio C Hamano committed Jun 24, 2008
1 parent 1f5c74f commit 8eca0b4
Show file tree
Hide file tree
Showing 2 changed files with 78 additions and 16 deletions.
2 changes: 2 additions & 0 deletions cache.h
Original file line number Diff line number Diff line change
Expand Up @@ -641,6 +641,8 @@ extern struct packed_git {
const void *index_data;
size_t index_size;
uint32_t num_objects;
uint32_t num_bad_objects;
unsigned char *bad_object_sha1;
int index_version;
time_t mtime;
int pack_fd;
Expand Down
92 changes: 76 additions & 16 deletions sha1_file.c
Original file line number Diff line number Diff line change
Expand Up @@ -818,6 +818,8 @@ struct packed_git *add_packed_git(const char *path, int path_len, int local)
p->index_data = NULL;
p->index_size = 0;
p->num_objects = 0;
p->num_bad_objects = 0;
p->bad_object_sha1 = NULL;
p->pack_size = st.st_size;
p->next = NULL;
p->windows = NULL;
Expand Down Expand Up @@ -982,6 +984,18 @@ void reprepare_packed_git(void)
prepare_packed_git();
}

static void mark_bad_packed_object(struct packed_git *p,
const unsigned char *sha1)
{
unsigned i;
for (i = 0; i < p->num_bad_objects; i++)
if (!hashcmp(sha1, p->bad_object_sha1 + 20 * i))
return;
p->bad_object_sha1 = xrealloc(p->bad_object_sha1, 20 * (p->num_bad_objects + 1));
hashcpy(p->bad_object_sha1 + 20 * p->num_bad_objects, sha1);
p->num_bad_objects++;
}

int check_sha1_signature(const unsigned char *sha1, void *map, unsigned long size, const char *type)
{
unsigned char real_sha1[20];
Expand Down Expand Up @@ -1300,20 +1314,17 @@ static off_t get_delta_base(struct packed_git *p,
while (c & 128) {
base_offset += 1;
if (!base_offset || MSB(base_offset, 7))
die("offset value overflow for delta base object");
return 0; /* overflow */
c = base_info[used++];
base_offset = (base_offset << 7) + (c & 127);
}
base_offset = delta_obj_offset - base_offset;
if (base_offset >= delta_obj_offset)
die("delta base offset out of bound");
return 0; /* out of bound */
*curpos += used;
} else if (type == OBJ_REF_DELTA) {
/* The base entry _must_ be in the same pack */
base_offset = find_pack_entry_one(base_info, p);
if (!base_offset)
die("failed to find delta-pack base object %s",
sha1_to_hex(base_info));
*curpos += 20;
} else
die("I am totally screwed");
Expand Down Expand Up @@ -1406,6 +1417,9 @@ const char *packed_object_info_detail(struct packed_git *p,
return typename(type);
case OBJ_OFS_DELTA:
obj_offset = get_delta_base(p, &w_curs, &curpos, type, obj_offset);
if (!obj_offset)
die("pack %s contains bad delta base reference of type %s",
p->pack_name, typename(type));
if (*delta_chain_length == 0) {
revidx = find_pack_revindex(p, obj_offset);
hashcpy(base_sha1, nth_packed_object_sha1(p, revidx->nr));
Expand Down Expand Up @@ -1600,17 +1614,41 @@ static void *unpack_delta_entry(struct packed_git *p,
off_t base_offset;

base_offset = get_delta_base(p, w_curs, &curpos, *type, obj_offset);
if (!base_offset) {
error("failed to validate delta base reference "
"at offset %"PRIuMAX" from %s",
(uintmax_t)curpos, p->pack_name);
return NULL;
}
base = cache_or_unpack_entry(p, base_offset, &base_size, type, 0);
if (!base)
die("failed to read delta base object"
" at %"PRIuMAX" from %s",
(uintmax_t)base_offset, p->pack_name);
if (!base) {
/*
* We're probably in deep shit, but let's try to fetch
* the required base anyway from another pack or loose.
* This is costly but should happen only in the presence
* of a corrupted pack, and is better than failing outright.
*/
struct revindex_entry *revidx = find_pack_revindex(p, base_offset);
const unsigned char *base_sha1 =
nth_packed_object_sha1(p, revidx->nr);
error("failed to read delta base object %s"
" at offset %"PRIuMAX" from %s",
sha1_to_hex(base_sha1), (uintmax_t)base_offset,
p->pack_name);
mark_bad_packed_object(p, base_sha1);
base = read_sha1_file(base_sha1, type, &base_size);
if (!base)
return NULL;
}

delta_data = unpack_compressed_entry(p, w_curs, curpos, delta_size);
if (!delta_data)
die("failed to unpack compressed delta"
" at %"PRIuMAX" from %s",
(uintmax_t)curpos, p->pack_name);
if (!delta_data) {
error("failed to unpack compressed delta "
"at offset %"PRIuMAX" from %s",
(uintmax_t)curpos, p->pack_name);
free(base);
return NULL;
}
result = patch_delta(base, base_size,
delta_data, delta_size,
sizep);
Expand Down Expand Up @@ -1642,7 +1680,9 @@ void *unpack_entry(struct packed_git *p, off_t obj_offset,
data = unpack_compressed_entry(p, &w_curs, curpos, *sizep);
break;
default:
die("unknown object type %i in %s", *type, p->pack_name);
data = NULL;
error("unknown object type %i at offset %"PRIuMAX" in %s",
*type, (uintmax_t)obj_offset, p->pack_name);
}
unuse_pack(&w_curs);
return data;
Expand Down Expand Up @@ -1788,6 +1828,13 @@ static int find_pack_entry(const unsigned char *sha1, struct pack_entry *e, cons
goto next;
}

if (p->num_bad_objects) {
unsigned i;
for (i = 0; i < p->num_bad_objects; i++)
if (!hashcmp(sha1, p->bad_object_sha1 + 20 * i))
goto next;
}

offset = find_pack_entry_one(sha1, p);
if (offset) {
/*
Expand Down Expand Up @@ -1872,11 +1919,24 @@ static void *read_packed_sha1(const unsigned char *sha1,
enum object_type *type, unsigned long *size)
{
struct pack_entry e;
void *data;

if (!find_pack_entry(sha1, &e, NULL))
return NULL;
else
return cache_or_unpack_entry(e.p, e.offset, size, type, 1);
data = cache_or_unpack_entry(e.p, e.offset, size, type, 1);
if (!data) {
/*
* We're probably in deep shit, but let's try to fetch
* the required object anyway from another pack or loose.
* This should happen only in the presence of a corrupted
* pack, and is better than failing outright.
*/
error("failed to read object %s at offset %"PRIuMAX" from %s",
sha1_to_hex(sha1), (uintmax_t)e.offset, e.p->pack_name);
mark_bad_packed_object(e.p, sha1);
data = read_sha1_file(sha1, type, size);
}
return data;
}

/*
Expand Down

0 comments on commit 8eca0b4

Please sign in to comment.