Skip to content

Commit

Permalink
Merge branch 'nd/stream-more'
Browse files Browse the repository at this point in the history
Use API to read blob data in smaller chunks in more places to reduce the
memory footprint.

By Nguyễn Thái Ngọc Duy (6) and Junio C Hamano (1)
* nd/stream-more:
  update-server-info: respect core.bigfilethreshold
  fsck: use streaming API for writing lost-found blobs
  show: use streaming API for showing blobs
  parse_object: avoid putting whole blob in core
  cat-file: use streaming API to print blobs
  Add more large blob test cases
  streaming: make streaming-write-entry to be more reusable
  • Loading branch information
Junio C Hamano committed Apr 16, 2012
2 parents 30fd3a5 + da591a7 commit 47de6b0
Show file tree
Hide file tree
Showing 11 changed files with 221 additions and 75 deletions.
25 changes: 25 additions & 0 deletions builtin/cat-file.c
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include "parse-options.h"
#include "diff.h"
#include "userdiff.h"
#include "streaming.h"

#define BATCH 1
#define BATCH_CHECK 2
Expand Down Expand Up @@ -127,6 +128,8 @@ static int cat_one_file(int opt, const char *exp_type, const char *obj_name)
return cmd_ls_tree(2, ls_args, NULL);
}

if (type == OBJ_BLOB)
return stream_blob_to_fd(1, sha1, NULL, 0);
buf = read_sha1_file(sha1, &type, &size);
if (!buf)
die("Cannot read object %s", obj_name);
Expand All @@ -149,6 +152,28 @@ static int cat_one_file(int opt, const char *exp_type, const char *obj_name)
break;

case 0:
if (type_from_string(exp_type) == OBJ_BLOB) {
unsigned char blob_sha1[20];
if (sha1_object_info(sha1, NULL) == OBJ_TAG) {
enum object_type type;
unsigned long size;
char *buffer = read_sha1_file(sha1, &type, &size);
if (memcmp(buffer, "object ", 7) ||
get_sha1_hex(buffer + 7, blob_sha1))
die("%s not a valid tag", sha1_to_hex(sha1));
free(buffer);
} else
hashcpy(blob_sha1, sha1);

if (sha1_object_info(blob_sha1, NULL) == OBJ_BLOB)
return stream_blob_to_fd(1, blob_sha1, NULL, 0);
/*
* we attempted to dereference a tag to a blob
* and failed; there may be new dereference
* mechanisms this code is not aware of.
* fall-back to the usual case.
*/
}
buf = read_object_with_reference(sha1, exp_type, &size, NULL);
break;

Expand Down
8 changes: 2 additions & 6 deletions builtin/fsck.c
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include "parse-options.h"
#include "dir.h"
#include "progress.h"
#include "streaming.h"

#define REACHABLE 0x0001
#define SEEN 0x0002
Expand Down Expand Up @@ -238,13 +239,8 @@ static void check_unreachable_object(struct object *obj)
if (!(f = fopen(filename, "w")))
die_errno("Could not open '%s'", filename);
if (obj->type == OBJ_BLOB) {
enum object_type type;
unsigned long size;
char *buf = read_sha1_file(obj->sha1,
&type, &size);
if (buf && fwrite(buf, 1, size, f) != size)
if (stream_blob_to_fd(fileno(f), obj->sha1, NULL, 1))
die_errno("Could not write '%s'", filename);
free(buf);
} else
fprintf(f, "%s\n", sha1_to_hex(obj->sha1));
if (fclose(f))
Expand Down
34 changes: 20 additions & 14 deletions builtin/log.c
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include "string-list.h"
#include "parse-options.h"
#include "branch.h"
#include "streaming.h"

/* Set a default date-time format for git log ("log.date" config variable) */
static const char *default_date_mode = NULL;
Expand Down Expand Up @@ -383,8 +384,13 @@ static void show_tagger(char *buf, int len, struct rev_info *rev)
strbuf_release(&out);
}

static int show_object(const unsigned char *sha1, int show_tag_object,
struct rev_info *rev)
static int show_blob_object(const unsigned char *sha1, struct rev_info *rev)
{
fflush(stdout);
return stream_blob_to_fd(1, sha1, NULL, 0);
}

static int show_tag_object(const unsigned char *sha1, struct rev_info *rev)
{
unsigned long size;
enum object_type type;
Expand All @@ -394,16 +400,16 @@ static int show_object(const unsigned char *sha1, int show_tag_object,
if (!buf)
return error(_("Could not read object %s"), sha1_to_hex(sha1));

if (show_tag_object)
while (offset < size && buf[offset] != '\n') {
int new_offset = offset + 1;
while (new_offset < size && buf[new_offset++] != '\n')
; /* do nothing */
if (!prefixcmp(buf + offset, "tagger "))
show_tagger(buf + offset + 7,
new_offset - offset - 7, rev);
offset = new_offset;
}
assert(type == OBJ_TAG);
while (offset < size && buf[offset] != '\n') {
int new_offset = offset + 1;
while (new_offset < size && buf[new_offset++] != '\n')
; /* do nothing */
if (!prefixcmp(buf + offset, "tagger "))
show_tagger(buf + offset + 7,
new_offset - offset - 7, rev);
offset = new_offset;
}

if (offset < size)
fwrite(buf + offset, size - offset, 1, stdout);
Expand Down Expand Up @@ -463,7 +469,7 @@ int cmd_show(int argc, const char **argv, const char *prefix)
const char *name = objects[i].name;
switch (o->type) {
case OBJ_BLOB:
ret = show_object(o->sha1, 0, NULL);
ret = show_blob_object(o->sha1, NULL);
break;
case OBJ_TAG: {
struct tag *t = (struct tag *)o;
Expand All @@ -474,7 +480,7 @@ int cmd_show(int argc, const char **argv, const char *prefix)
diff_get_color_opt(&rev.diffopt, DIFF_COMMIT),
t->tag,
diff_get_color_opt(&rev.diffopt, DIFF_RESET));
ret = show_object(o->sha1, 1, &rev);
ret = show_tag_object(o->sha1, &rev);
rev.shown_one = 1;
if (ret)
break;
Expand Down
1 change: 1 addition & 0 deletions builtin/update-server-info.c
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ int cmd_update_server_info(int argc, const char **argv, const char *prefix)
OPT_END()
};

git_config(git_default_config, NULL);
argc = parse_options(argc, argv, prefix, options,
update_server_info_usage, 0);
if (argc > 0)
Expand Down
53 changes: 5 additions & 48 deletions entry.c
Original file line number Diff line number Diff line change
Expand Up @@ -120,58 +120,15 @@ static int streaming_write_entry(struct cache_entry *ce, char *path,
const struct checkout *state, int to_tempfile,
int *fstat_done, struct stat *statbuf)
{
struct git_istream *st;
enum object_type type;
unsigned long sz;
int result = -1;
ssize_t kept = 0;
int fd = -1;

st = open_istream(ce->sha1, &type, &sz, filter);
if (!st)
return -1;
if (type != OBJ_BLOB)
goto close_and_exit;
int fd;

fd = open_output_fd(path, ce, to_tempfile);
if (fd < 0)
goto close_and_exit;

for (;;) {
char buf[1024 * 16];
ssize_t wrote, holeto;
ssize_t readlen = read_istream(st, buf, sizeof(buf));

if (!readlen)
break;
if (sizeof(buf) == readlen) {
for (holeto = 0; holeto < readlen; holeto++)
if (buf[holeto])
break;
if (readlen == holeto) {
kept += holeto;
continue;
}
}

if (kept && lseek(fd, kept, SEEK_CUR) == (off_t) -1)
goto close_and_exit;
else
kept = 0;
wrote = write_in_full(fd, buf, readlen);

if (wrote != readlen)
goto close_and_exit;
}
if (kept && (lseek(fd, kept - 1, SEEK_CUR) == (off_t) -1 ||
write(fd, "", 1) != 1))
goto close_and_exit;
*fstat_done = fstat_output(fd, state, statbuf);

close_and_exit:
close_istream(st);
if (0 <= fd)
if (0 <= fd) {
result = stream_blob_to_fd(fd, ce->sha1, filter, 1);
*fstat_done = fstat_output(fd, state, statbuf);
result = close(fd);
}
if (result && 0 <= fd)
unlink(path);
return result;
Expand Down
11 changes: 11 additions & 0 deletions object.c
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,17 @@ struct object *parse_object(const unsigned char *sha1)
if (obj && obj->parsed)
return obj;

if ((obj && obj->type == OBJ_BLOB) ||
(!obj && has_sha1_file(sha1) &&
sha1_object_info(sha1, NULL) == OBJ_BLOB)) {
if (check_sha1_signature(repl, NULL, 0, NULL) < 0) {
error("sha1 mismatch %s\n", sha1_to_hex(repl));
return NULL;
}
parse_blob_buffer(lookup_blob(sha1), NULL, 0);
return lookup_object(sha1);
}

buffer = read_sha1_file(sha1, &type, &size);
if (buffer) {
if (check_sha1_signature(repl, buffer, size, typename(type)) < 0) {
Expand Down
42 changes: 40 additions & 2 deletions sha1_file.c
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include "pack-revindex.h"
#include "sha1-lookup.h"
#include "bulk-checkin.h"
#include "streaming.h"

#ifndef O_NOATIME
#if defined(__linux__) && (defined(__i386__) || defined(__PPC__))
Expand Down Expand Up @@ -1146,10 +1147,47 @@ static const struct packed_git *has_packed_and_bad(const unsigned char *sha1)
return NULL;
}

int check_sha1_signature(const unsigned char *sha1, void *map, unsigned long size, const char *type)
/*
* With an in-core object data in "map", rehash it to make sure the
* object name actually matches "sha1" to detect object corruption.
* With "map" == NULL, try reading the object named with "sha1" using
* the streaming interface and rehash it to do the same.
*/
int check_sha1_signature(const unsigned char *sha1, void *map,
unsigned long size, const char *type)
{
unsigned char real_sha1[20];
hash_sha1_file(map, size, type, real_sha1);
enum object_type obj_type;
struct git_istream *st;
git_SHA_CTX c;
char hdr[32];
int hdrlen;

if (map) {
hash_sha1_file(map, size, type, real_sha1);
return hashcmp(sha1, real_sha1) ? -1 : 0;
}

st = open_istream(sha1, &obj_type, &size, NULL);
if (!st)
return -1;

/* Generate the header */
hdrlen = sprintf(hdr, "%s %lu", typename(obj_type), size) + 1;

/* Sha1.. */
git_SHA1_Init(&c);
git_SHA1_Update(&c, hdr, hdrlen);
for (;;) {
char buf[1024 * 16];
ssize_t readlen = read_istream(st, buf, sizeof(buf));

if (!readlen)
break;
git_SHA1_Update(&c, buf, readlen);
}
git_SHA1_Final(real_sha1, &c);
close_istream(st);
return hashcmp(sha1, real_sha1) ? -1 : 0;
}

Expand Down
55 changes: 55 additions & 0 deletions streaming.c
Original file line number Diff line number Diff line change
Expand Up @@ -489,3 +489,58 @@ static open_method_decl(incore)

return st->u.incore.buf ? 0 : -1;
}


/****************************************************************
* Users of streaming interface
****************************************************************/

int stream_blob_to_fd(int fd, unsigned const char *sha1, struct stream_filter *filter,
int can_seek)
{
struct git_istream *st;
enum object_type type;
unsigned long sz;
ssize_t kept = 0;
int result = -1;

st = open_istream(sha1, &type, &sz, filter);
if (!st)
return result;
if (type != OBJ_BLOB)
goto close_and_exit;
for (;;) {
char buf[1024 * 16];
ssize_t wrote, holeto;
ssize_t readlen = read_istream(st, buf, sizeof(buf));

if (!readlen)
break;
if (can_seek && sizeof(buf) == readlen) {
for (holeto = 0; holeto < readlen; holeto++)
if (buf[holeto])
break;
if (readlen == holeto) {
kept += holeto;
continue;
}
}

if (kept && lseek(fd, kept, SEEK_CUR) == (off_t) -1)
goto close_and_exit;
else
kept = 0;
wrote = write_in_full(fd, buf, readlen);

if (wrote != readlen)
goto close_and_exit;
}
if (kept && (lseek(fd, kept - 1, SEEK_CUR) == (off_t) -1 ||
write(fd, "", 1) != 1))
goto close_and_exit;
result = 0;

close_and_exit:
close_istream(st);
return result;
}
2 changes: 2 additions & 0 deletions streaming.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,6 @@ extern struct git_istream *open_istream(const unsigned char *, enum object_type
extern int close_istream(struct git_istream *);
extern ssize_t read_istream(struct git_istream *, char *, size_t);

extern int stream_blob_to_fd(int fd, const unsigned char *, struct stream_filter *, int can_seek);

#endif /* STREAMING_H */
Loading

0 comments on commit 47de6b0

Please sign in to comment.