From a1283866bab1cd12da57b3e427664180f5dee333 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 25 Feb 2016 09:21:12 -0500 Subject: [PATCH 1/4] t5313: test bounds-checks of corrupted/malicious pack/idx files Our on-disk .pack and .idx files may reference other data by offset. We should make sure that we are not fooled by corrupt data into accessing memory outside of our mmap'd boundaries. This patch adds a series of tests for offsets found in .pack and .idx files. For the most part we get this right, but there are two tests of .idx files marked as failures: we do not bounds-check offsets in the v2 index's extended offset table, nor do we handle .idx offsets that overflow a signed off_t. With these tests, we should have good coverage of all offsets found in these files. Note that this doesn't cover .bitmap files, which may have similar bugs. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- t/t5313-pack-bounds-checks.sh | 179 ++++++++++++++++++++++++++++++++++ 1 file changed, 179 insertions(+) create mode 100755 t/t5313-pack-bounds-checks.sh diff --git a/t/t5313-pack-bounds-checks.sh b/t/t5313-pack-bounds-checks.sh new file mode 100755 index 000000000..efc519734 --- /dev/null +++ b/t/t5313-pack-bounds-checks.sh @@ -0,0 +1,179 @@ +#!/bin/sh + +test_description='bounds-checking of access to mmapped on-disk file formats' +. ./test-lib.sh + +clear_base () { + test_when_finished 'restore_base' && + rm -f $base +} + +restore_base () { + cp base-backup/* .git/objects/pack/ +} + +do_pack () { + pack_objects=$1; shift + sha1=$( + for i in $pack_objects + do + echo $i + done | git pack-objects "$@" .git/objects/pack/pack + ) && + pack=.git/objects/pack/pack-$sha1.pack && + idx=.git/objects/pack/pack-$sha1.idx && + chmod +w $pack $idx && + test_when_finished 'rm -f "$pack" "$idx"' +} + +munge () { + printf "$3" | dd of="$1" bs=1 conv=notrunc seek=$2 +} + +# Offset in a v2 .idx to its initial and extended offset tables. For an index +# with "nr" objects, this is: +# +# magic(4) + version(4) + fan-out(4*256) + sha1s(20*nr) + crc(4*nr), +# +# for the initial, and another ofs(4*nr) past that for the extended. +# +ofs_table () { + echo $((4 + 4 + 4*256 + 20*$1 + 4*$1)) +} +extended_table () { + echo $(($(ofs_table "$1") + 4*$1)) +} + +test_expect_success 'set up base packfile and variables' ' + # the hash of this content starts with ff, which + # makes some later computations much simpler + echo 74 >file && + git add file && + git commit -m base && + git repack -ad && + base=$(echo .git/objects/pack/*) && + chmod +w $base && + mkdir base-backup && + cp $base base-backup/ && + object=$(git rev-parse HEAD:file) +' + +test_expect_success 'pack/index object count mismatch' ' + do_pack $object && + munge $pack 8 "\377\0\0\0" && + clear_base && + + # We enumerate the objects from the completely-fine + # .idx, but notice later that the .pack is bogus + # and fail to show any data. + echo "$object missing" >expect && + git cat-file --batch-all-objects --batch-check >actual && + test_cmp expect actual && + + # ...and here fail to load the object (without segfaulting), + # but fallback to a good copy if available. + test_must_fail git cat-file blob $object && + restore_base && + git cat-file blob $object >actual && + test_cmp file actual && + + # ...and make sure that index-pack --verify, which has its + # own reading routines, does not segfault. + test_must_fail git index-pack --verify $pack +' + +test_expect_success 'matched bogus object count' ' + do_pack $object && + munge $pack 8 "\377\0\0\0" && + munge $idx $((255 * 4)) "\377\0\0\0" && + clear_base && + + # Unlike above, we should notice early that the .idx is totally + # bogus, and not even enumerate its contents. + >expect && + git cat-file --batch-all-objects --batch-check >actual && + test_cmp expect actual && + + # But as before, we can do the same object-access checks. + test_must_fail git cat-file blob $object && + restore_base && + git cat-file blob $object >actual && + test_cmp file actual && + + test_must_fail git index-pack --verify $pack +' + +# Note that we cannot check the fallback case for these +# further .idx tests, as we notice the problem in functions +# whose interface doesn't allow an error return (like use_pack()), +# and thus we just die(). +# +# There's also no point in doing enumeration tests, as +# we are munging offsets here, which are about looking up +# specific objects. + +test_expect_success 'bogus object offset (v1)' ' + do_pack $object --index-version=1 && + munge $idx $((4 * 256)) "\377\0\0\0" && + clear_base && + test_must_fail git cat-file blob $object && + test_must_fail git index-pack --verify $pack +' + +test_expect_success 'bogus object offset (v2, no msb)' ' + do_pack $object --index-version=2 && + munge $idx $(ofs_table 1) "\0\377\0\0" && + clear_base && + test_must_fail git cat-file blob $object && + test_must_fail git index-pack --verify $pack +' + +test_expect_failure 'bogus offset into v2 extended table' ' + do_pack $object --index-version=2 && + munge $idx $(ofs_table 1) "\377\0\0\0" && + clear_base && + test_must_fail git cat-file blob $object && + test_must_fail git index-pack --verify $pack +' + +test_expect_failure 'bogus offset inside v2 extended table' ' + # We need two objects here, so we can plausibly require + # an extended table (if the first object were larger than 2^31). + do_pack "$object $(git rev-parse HEAD)" --index-version=2 && + + # We have to make extra room for the table, so we cannot + # just munge in place as usual. + { + dd if=$idx bs=1 count=$(($(ofs_table 2) + 4)) && + printf "\200\0\0\0" && + printf "\377\0\0\0\0\0\0\0" && + dd if=$idx bs=1 skip=$(extended_table 2) + } >tmp && + mv tmp "$idx" && + clear_base && + test_must_fail git cat-file blob $object && + test_must_fail git index-pack --verify $pack +' + +test_expect_success 'bogus OFS_DELTA in packfile' ' + # Generate a pack with a delta in it. + base=$(test-genrandom foo 3000 | git hash-object --stdin -w) && + delta=$(test-genrandom foo 2000 | git hash-object --stdin -w) && + do_pack "$base $delta" --delta-base-offset && + rm -f .git/objects/??/* && + + # Double check that we have the delta we expect. + echo $base >expect && + echo $delta | git cat-file --batch-check="%(deltabase)" >actual && + test_cmp expect actual && + + # Now corrupt it. We assume the varint size for the delta is small + # enough to fit in the first byte (which it should be, since it + # is a pure deletion from the base), and that original ofs_delta + # takes 2 bytes (which it should, as it should be ~3000). + ofs=$(git show-index <$idx | grep $delta | cut -d" " -f1) && + munge $pack $(($ofs + 1)) "\177\377" && + test_must_fail git cat-file blob $delta >/dev/null +' + +test_done From 47fe3f6ef0f5a336db90d816c5fb4330ffa23668 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 25 Feb 2016 09:22:52 -0500 Subject: [PATCH 2/4] nth_packed_object_offset: bounds-check extended offset If a pack .idx file has a corrupted offset for an object, we may try to access an offset in the .idx or .pack file that is larger than the file's size. For the .pack case, we have use_pack() to protect us, which realizes the access is out of bounds. But if the corrupted value asks us to look in the .idx file's secondary 64-bit offset table, we blindly add it to the mmap'd index data and access arbitrary memory. We can fix this with a simple bounds-check compared to the size we found when we opened the .idx file. Note that there's similar code in index-pack that is triggered only during "index-pack --verify". To support both, we pull the bounds-check into a separate function, which dies when it sees a corrupted file. It would be nice if we could return an error, so that the pack code could try to find a good copy of the object elsewhere. Currently nth_packed_object_offset doesn't have any way to return an error, but it could probably use "0" as a sentinel value (since no object can start there). This is the minimal fix, and we can improve the resilience later on top. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/index-pack.c | 1 + cache.h | 10 ++++++++++ sha1_file.c | 15 +++++++++++++++ t/t5313-pack-bounds-checks.sh | 2 +- 4 files changed, 27 insertions(+), 1 deletion(-) diff --git a/builtin/index-pack.c b/builtin/index-pack.c index 723fe8e11..98bdbb5e6 100644 --- a/builtin/index-pack.c +++ b/builtin/index-pack.c @@ -1445,6 +1445,7 @@ static void read_v2_anomalous_offsets(struct packed_git *p, if (!(off & 0x80000000)) continue; off = off & 0x7fffffff; + check_pack_index_ptr(p, &idx2[off * 2]); if (idx2[off * 2]) continue; /* diff --git a/cache.h b/cache.h index 4427945bc..6c9aaa1ae 100644 --- a/cache.h +++ b/cache.h @@ -1236,6 +1236,16 @@ extern void free_pack_by_name(const char *); extern void clear_delta_base_cache(void); extern struct packed_git *add_packed_git(const char *, int, int); +/* + * Make sure that a pointer access into an mmap'd index file is within bounds, + * and can provide at least 8 bytes of data. + * + * Note that this is only necessary for variable-length segments of the file + * (like the 64-bit extended offset table), as we compare the size to the + * fixed-length parts when we open the file. + */ +extern void check_pack_index_ptr(const struct packed_git *p, const void *ptr); + /* * Return the SHA-1 of the nth object within the specified packfile. * Open the index if it is not already open. The return value points diff --git a/sha1_file.c b/sha1_file.c index 99155c0d6..bd0f8f7c8 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -2359,6 +2359,20 @@ const unsigned char *nth_packed_object_sha1(struct packed_git *p, } } +void check_pack_index_ptr(const struct packed_git *p, const void *vptr) +{ + const unsigned char *ptr = vptr; + const unsigned char *start = p->index_data; + const unsigned char *end = start + p->index_size; + if (ptr < start) + die("offset before start of pack index for %s (corrupt index?)", + p->pack_name); + /* No need to check for underflow; .idx files must be at least 8 bytes */ + if (ptr >= end - 8) + die("offset beyond end of pack index for %s (truncated index?)", + p->pack_name); +} + off_t nth_packed_object_offset(const struct packed_git *p, uint32_t n) { const unsigned char *index = p->index_data; @@ -2372,6 +2386,7 @@ off_t nth_packed_object_offset(const struct packed_git *p, uint32_t n) if (!(off & 0x80000000)) return off; index += p->num_objects * 4 + (off & 0x7fffffff) * 8; + check_pack_index_ptr(p, index); return (((uint64_t)ntohl(*((uint32_t *)(index + 0)))) << 32) | ntohl(*((uint32_t *)(index + 4))); } diff --git a/t/t5313-pack-bounds-checks.sh b/t/t5313-pack-bounds-checks.sh index efc519734..071774647 100755 --- a/t/t5313-pack-bounds-checks.sh +++ b/t/t5313-pack-bounds-checks.sh @@ -128,7 +128,7 @@ test_expect_success 'bogus object offset (v2, no msb)' ' test_must_fail git index-pack --verify $pack ' -test_expect_failure 'bogus offset into v2 extended table' ' +test_expect_success 'bogus offset into v2 extended table' ' do_pack $object --index-version=2 && munge $idx $(ofs_table 1) "\377\0\0\0" && clear_base && From 13e0b0d3dc76353632dcb0bc63cdf03426154317 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 25 Feb 2016 09:23:26 -0500 Subject: [PATCH 3/4] use_pack: handle signed off_t overflow A v2 pack index file can specify an offset within a packfile of up to 2^64-1 bytes. On a system with a signed 64-bit off_t, we can represent only up to 2^63-1. This means that a corrupted .idx file can end up with a negative offset in the pack code. Our bounds-checking use_pack function looks for too-large offsets, but not for ones that have wrapped around to negative. Let's do so, which fixes an out-of-bounds access demonstrated in t5313. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- sha1_file.c | 2 ++ t/t5313-pack-bounds-checks.sh | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/sha1_file.c b/sha1_file.c index bd0f8f7c8..4a3a032d5 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -1041,6 +1041,8 @@ unsigned char *use_pack(struct packed_git *p, die("packfile %s cannot be accessed", p->pack_name); if (offset > (p->pack_size - 20)) die("offset beyond end of packfile (truncated pack?)"); + if (offset < 0) + die("offset before end of packfile (broken .idx?)"); if (!win || !in_window(win, offset)) { if (win) diff --git a/t/t5313-pack-bounds-checks.sh b/t/t5313-pack-bounds-checks.sh index 071774647..a8a587abc 100755 --- a/t/t5313-pack-bounds-checks.sh +++ b/t/t5313-pack-bounds-checks.sh @@ -136,7 +136,7 @@ test_expect_success 'bogus offset into v2 extended table' ' test_must_fail git index-pack --verify $pack ' -test_expect_failure 'bogus offset inside v2 extended table' ' +test_expect_success 'bogus offset inside v2 extended table' ' # We need two objects here, so we can plausibly require # an extended table (if the first object were larger than 2^31). do_pack "$object $(git rev-parse HEAD)" --index-version=2 && From 7465feba513a8bd3d47f27630ccc9ab7e82d916c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nguy=E1=BB=85n=20Th=C3=A1i=20Ng=E1=BB=8Dc=20Duy?= Date: Sat, 27 Feb 2016 14:49:33 +0700 Subject: [PATCH 4/4] sha1_file.c: mark strings for translation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Nguyễn Thái Ngọc Duy Signed-off-by: Junio C Hamano --- sha1_file.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sha1_file.c b/sha1_file.c index 4a3a032d5..b8da68baa 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -1042,7 +1042,7 @@ unsigned char *use_pack(struct packed_git *p, if (offset > (p->pack_size - 20)) die("offset beyond end of packfile (truncated pack?)"); if (offset < 0) - die("offset before end of packfile (broken .idx?)"); + die(_("offset before end of packfile (broken .idx?)")); if (!win || !in_window(win, offset)) { if (win) @@ -2367,11 +2367,11 @@ void check_pack_index_ptr(const struct packed_git *p, const void *vptr) const unsigned char *start = p->index_data; const unsigned char *end = start + p->index_size; if (ptr < start) - die("offset before start of pack index for %s (corrupt index?)", + die(_("offset before start of pack index for %s (corrupt index?)"), p->pack_name); /* No need to check for underflow; .idx files must be at least 8 bytes */ if (ptr >= end - 8) - die("offset beyond end of pack index for %s (truncated index?)", + die(_("offset beyond end of pack index for %s (truncated index?)"), p->pack_name); }