Skip to content

Commit

Permalink
Merge branch 'nd/large-blobs'
Browse files Browse the repository at this point in the history
Teach a few codepaths to punt (instead of dying) when large blobs
that would not fit in core are involved in the operation.

* nd/large-blobs:
  diff: shortcut for diff'ing two binary SHA-1 objects
  diff --stat: mark any file larger than core.bigfilethreshold binary
  diff.c: allow to pass more flags to diff_populate_filespec
  sha1_file.c: do not die failing to malloc in unpack_compressed_entry
  wrapper.c: introduce gentle xmallocz that does not die()
  • Loading branch information
Junio C Hamano committed Sep 11, 2014
2 parents 08ad26a + 1aaf69e commit bedd3b4
Show file tree
Hide file tree
Showing 9 changed files with 125 additions and 37 deletions.
3 changes: 2 additions & 1 deletion Documentation/config.txt
Original file line number Diff line number Diff line change
Expand Up @@ -499,7 +499,8 @@ core.bigFileThreshold::
Files larger than this size are stored deflated, without
attempting delta compression. Storing large files without
delta compression avoids excessive memory usage, at the
slight expense of increased disk usage.
slight expense of increased disk usage. Additionally files
larger than this size are always treated as binary.
+
Default is 512 MiB on all platforms. This should be reasonable
for most projects as source code and other text files can still
Expand Down
4 changes: 2 additions & 2 deletions Documentation/gitattributes.txt
Original file line number Diff line number Diff line change
Expand Up @@ -440,8 +440,8 @@ Unspecified::

A path to which the `diff` attribute is unspecified
first gets its contents inspected, and if it looks like
text, it is treated as text. Otherwise it would
generate `Binary files differ`.
text and is smaller than core.bigFileThreshold, it is treated
as text. Otherwise it would generate `Binary files differ`.

String::

Expand Down
52 changes: 38 additions & 14 deletions diff.c
Original file line number Diff line number Diff line change
Expand Up @@ -376,7 +376,7 @@ static unsigned long diff_filespec_size(struct diff_filespec *one)
{
if (!DIFF_FILE_VALID(one))
return 0;
diff_populate_filespec(one, 1);
diff_populate_filespec(one, CHECK_SIZE_ONLY);
return one->size;
}

Expand Down Expand Up @@ -1910,11 +1910,11 @@ static void show_dirstat(struct diff_options *options)
diff_free_filespec_data(p->one);
diff_free_filespec_data(p->two);
} else if (DIFF_FILE_VALID(p->one)) {
diff_populate_filespec(p->one, 1);
diff_populate_filespec(p->one, CHECK_SIZE_ONLY);
copied = added = 0;
diff_free_filespec_data(p->one);
} else if (DIFF_FILE_VALID(p->two)) {
diff_populate_filespec(p->two, 1);
diff_populate_filespec(p->two, CHECK_SIZE_ONLY);
copied = 0;
added = p->two->size;
diff_free_filespec_data(p->two);
Expand Down Expand Up @@ -2188,8 +2188,8 @@ int diff_filespec_is_binary(struct diff_filespec *one)
one->is_binary = one->driver->binary;
else {
if (!one->data && DIFF_FILE_VALID(one))
diff_populate_filespec(one, 0);
if (one->data)
diff_populate_filespec(one, CHECK_BINARY);
if (one->is_binary == -1 && one->data)
one->is_binary = buffer_is_binary(one->data,
one->size);
if (one->is_binary == -1)
Expand Down Expand Up @@ -2324,6 +2324,19 @@ static void builtin_diff(const char *name_a,
} else if (!DIFF_OPT_TST(o, TEXT) &&
( (!textconv_one && diff_filespec_is_binary(one)) ||
(!textconv_two && diff_filespec_is_binary(two)) )) {
if (!one->data && !two->data &&
S_ISREG(one->mode) && S_ISREG(two->mode) &&
!DIFF_OPT_TST(o, BINARY)) {
if (!hashcmp(one->sha1, two->sha1)) {
if (must_show_header)
fprintf(o->file, "%s", header.buf);
goto free_ab_and_return;
}
fprintf(o->file, "%s", header.buf);
fprintf(o->file, "%sBinary files %s and %s differ\n",
line_prefix, lbl[0], lbl[1]);
goto free_ab_and_return;
}
if (fill_mmfile(&mf1, one) < 0 || fill_mmfile(&mf2, two) < 0)
die("unable to read files to diff");
/* Quite common confusing case */
Expand Down Expand Up @@ -2668,8 +2681,9 @@ static int diff_populate_gitlink(struct diff_filespec *s, int size_only)
* grab the data for the blob (or file) for our own in-core comparison.
* diff_filespec has data and size fields for this purpose.
*/
int diff_populate_filespec(struct diff_filespec *s, int size_only)
int diff_populate_filespec(struct diff_filespec *s, unsigned int flags)
{
int size_only = flags & CHECK_SIZE_ONLY;
int err = 0;
/*
* demote FAIL to WARN to allow inspecting the situation
Expand Down Expand Up @@ -2724,6 +2738,11 @@ int diff_populate_filespec(struct diff_filespec *s, int size_only)
}
if (size_only)
return 0;
if ((flags & CHECK_BINARY) &&
s->size > big_file_threshold && s->is_binary == -1) {
s->is_binary = 1;
return 0;
}
fd = open(s->path, O_RDONLY);
if (fd < 0)
goto err_empty;
Expand All @@ -2745,16 +2764,21 @@ int diff_populate_filespec(struct diff_filespec *s, int size_only)
}
else {
enum object_type type;
if (size_only) {
if (size_only || (flags & CHECK_BINARY)) {
type = sha1_object_info(s->sha1, &s->size);
if (type < 0)
die("unable to read %s", sha1_to_hex(s->sha1));
} else {
s->data = read_sha1_file(s->sha1, &type, &s->size);
if (!s->data)
die("unable to read %s", sha1_to_hex(s->sha1));
s->should_free = 1;
if (size_only)
return 0;
if (s->size > big_file_threshold && s->is_binary == -1) {
s->is_binary = 1;
return 0;
}
}
s->data = read_sha1_file(s->sha1, &type, &s->size);
if (!s->data)
die("unable to read %s", sha1_to_hex(s->sha1));
s->should_free = 1;
}
return 0;
}
Expand Down Expand Up @@ -4688,8 +4712,8 @@ static int diff_filespec_check_stat_unmatch(struct diff_filepair *p)
!DIFF_FILE_VALID(p->two) ||
(p->one->sha1_valid && p->two->sha1_valid) ||
(p->one->mode != p->two->mode) ||
diff_populate_filespec(p->one, 1) ||
diff_populate_filespec(p->two, 1) ||
diff_populate_filespec(p->one, CHECK_SIZE_ONLY) ||
diff_populate_filespec(p->two, CHECK_SIZE_ONLY) ||
(p->one->size != p->two->size) ||
!diff_filespec_is_identical(p->one, p->two)) /* (2) */
p->skip_stat_unmatch_result = 1;
Expand Down
6 changes: 4 additions & 2 deletions diffcore-rename.c
Original file line number Diff line number Diff line change
Expand Up @@ -147,9 +147,11 @@ static int estimate_similarity(struct diff_filespec *src,
* is a possible size - we really should have a flag to
* say whether the size is valid or not!)
*/
if (!src->cnt_data && diff_populate_filespec(src, 1))
if (!src->cnt_data &&
diff_populate_filespec(src, CHECK_SIZE_ONLY))
return 0;
if (!dst->cnt_data && diff_populate_filespec(dst, 1))
if (!dst->cnt_data &&
diff_populate_filespec(dst, CHECK_SIZE_ONLY))
return 0;

max_size = ((src->size > dst->size) ? src->size : dst->size);
Expand Down
4 changes: 3 additions & 1 deletion diffcore.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,9 @@ extern void free_filespec(struct diff_filespec *);
extern void fill_filespec(struct diff_filespec *, const unsigned char *,
int, unsigned short);

extern int diff_populate_filespec(struct diff_filespec *, int);
#define CHECK_SIZE_ONLY 1
#define CHECK_BINARY 2
extern int diff_populate_filespec(struct diff_filespec *, unsigned int);
extern void diff_free_filespec_data(struct diff_filespec *);
extern void diff_free_filespec_blob(struct diff_filespec *);
extern int diff_filespec_is_binary(struct diff_filespec *);
Expand Down
1 change: 1 addition & 0 deletions git-compat-util.h
Original file line number Diff line number Diff line change
Expand Up @@ -609,6 +609,7 @@ extern try_to_free_t set_try_to_free_routine(try_to_free_t);
extern char *xstrdup(const char *str);
extern void *xmalloc(size_t size);
extern void *xmallocz(size_t size);
extern void *xmallocz_gently(size_t size);
extern void *xmemdupz(const void *data, size_t len);
extern char *xstrndup(const char *str, size_t len);
extern void *xrealloc(void *ptr, size_t size);
Expand Down
4 changes: 3 additions & 1 deletion sha1_file.c
Original file line number Diff line number Diff line change
Expand Up @@ -1923,7 +1923,9 @@ static void *unpack_compressed_entry(struct packed_git *p,
git_zstream stream;
unsigned char *buffer, *in;

buffer = xmallocz(size);
buffer = xmallocz_gently(size);
if (!buffer)
return NULL;
memset(&stream, 0, sizeof(stream));
stream.next_out = buffer;
stream.avail_out = size + 1;
Expand Down
20 changes: 20 additions & 0 deletions t/t1050-large.sh
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,20 @@ test_expect_success 'diff --raw' '
git diff --raw HEAD^
'

test_expect_success 'diff --stat' '
git diff --stat HEAD^ HEAD
'

test_expect_success 'diff' '
git diff HEAD^ HEAD >actual &&
grep "Binary files.*differ" actual
'

test_expect_success 'diff --cached' '
git diff --cached HEAD^ >actual &&
grep "Binary files.*differ" actual
'

test_expect_success 'hash-object' '
git hash-object large1
'
Expand Down Expand Up @@ -163,4 +177,10 @@ test_expect_success 'zip achiving, deflate' '
git archive --format=zip HEAD >/dev/null
'

test_expect_success 'fsck' '
test_must_fail git fsck 2>err &&
n=$(grep "error: attempting to allocate .* over limit" err | wc -l) &&
test "$n" -gt 1
'

test_done
68 changes: 52 additions & 16 deletions wrapper.c
Original file line number Diff line number Diff line change
Expand Up @@ -9,16 +9,23 @@ static void do_nothing(size_t size)

static void (*try_to_free_routine)(size_t size) = do_nothing;

static void memory_limit_check(size_t size)
static int memory_limit_check(size_t size, int gentle)
{
static int limit = -1;
if (limit == -1) {
const char *env = getenv("GIT_ALLOC_LIMIT");
limit = env ? atoi(env) * 1024 : 0;
}
if (limit && size > limit)
die("attempting to allocate %"PRIuMAX" over limit %d",
(intmax_t)size, limit);
if (limit && size > limit) {
if (gentle) {
error("attempting to allocate %"PRIuMAX" over limit %d",
(intmax_t)size, limit);
return -1;
} else
die("attempting to allocate %"PRIuMAX" over limit %d",
(intmax_t)size, limit);
}
return 0;
}

try_to_free_t set_try_to_free_routine(try_to_free_t routine)
Expand All @@ -42,11 +49,12 @@ char *xstrdup(const char *str)
return ret;
}

void *xmalloc(size_t size)
static void *do_xmalloc(size_t size, int gentle)
{
void *ret;

memory_limit_check(size);
if (memory_limit_check(size, gentle))
return NULL;
ret = malloc(size);
if (!ret && !size)
ret = malloc(1);
Expand All @@ -55,26 +63,54 @@ void *xmalloc(size_t size)
ret = malloc(size);
if (!ret && !size)
ret = malloc(1);
if (!ret)
die("Out of memory, malloc failed (tried to allocate %lu bytes)",
(unsigned long)size);
if (!ret) {
if (!gentle)
die("Out of memory, malloc failed (tried to allocate %lu bytes)",
(unsigned long)size);
else {
error("Out of memory, malloc failed (tried to allocate %lu bytes)",
(unsigned long)size);
return NULL;
}
}
}
#ifdef XMALLOC_POISON
memset(ret, 0xA5, size);
#endif
return ret;
}

void *xmallocz(size_t size)
void *xmalloc(size_t size)
{
return do_xmalloc(size, 0);
}

static void *do_xmallocz(size_t size, int gentle)
{
void *ret;
if (unsigned_add_overflows(size, 1))
die("Data too large to fit into virtual memory space.");
ret = xmalloc(size + 1);
((char*)ret)[size] = 0;
if (unsigned_add_overflows(size, 1)) {
if (gentle) {
error("Data too large to fit into virtual memory space.");
return NULL;
} else
die("Data too large to fit into virtual memory space.");
}
ret = do_xmalloc(size + 1, gentle);
if (ret)
((char*)ret)[size] = 0;
return ret;
}

void *xmallocz(size_t size)
{
return do_xmallocz(size, 0);
}

void *xmallocz_gently(size_t size)
{
return do_xmallocz(size, 1);
}

/*
* xmemdupz() allocates (len + 1) bytes of memory, duplicates "len" bytes of
* "data" to the allocated memory, zero terminates the allocated memory,
Expand All @@ -96,7 +132,7 @@ void *xrealloc(void *ptr, size_t size)
{
void *ret;

memory_limit_check(size);
memory_limit_check(size, 0);
ret = realloc(ptr, size);
if (!ret && !size)
ret = realloc(ptr, 1);
Expand All @@ -115,7 +151,7 @@ void *xcalloc(size_t nmemb, size_t size)
{
void *ret;

memory_limit_check(size * nmemb);
memory_limit_check(size * nmemb, 0);
ret = calloc(nmemb, size);
if (!ret && (!nmemb || !size))
ret = calloc(1, 1);
Expand Down

0 comments on commit bedd3b4

Please sign in to comment.