Skip to content

Commit

Permalink
Merge branch 'sp/mmap'
Browse files Browse the repository at this point in the history
* sp/mmap: (27 commits)
  Spell default packedgitlimit slightly differently
  Increase packedGit{Limit,WindowSize} on 64 bit systems.
  Update packedGit config option documentation.
  mmap: set FD_CLOEXEC for file descriptors we keep open for mmap()
  pack-objects: fix use of use_pack().
  Fix random segfaults in pack-objects.
  Cleanup read_cache_from error handling.
  Replace mmap with xmmap, better handling MAP_FAILED.
  Release pack windows before reporting out of memory.
  Default core.packdGitWindowSize to 1 MiB if NO_MMAP.
  Test suite for sliding window mmap implementation.
  Create pack_report() as a debugging aid.
  Support unmapping windows on 'temporary' packfiles.
  Improve error message when packfile mmap fails.
  Ensure core.packedGitWindowSize cannot be less than 2 pages.
  Load core configuration in git-verify-pack.
  Fully activate the sliding window pack access.
  Unmap individual windows rather than entire files.
  Document why header parsing won't exceed a window.
  Loop over pack_windows when inflating/accessing data.
  ...

Conflicts:

	cache.h
	pack-check.c
  • Loading branch information
Junio C Hamano committed Jan 7, 2007
2 parents e7bb17a + ecaebf4 commit cf2999e
Show file tree
Hide file tree
Showing 14 changed files with 582 additions and 231 deletions.
28 changes: 28 additions & 0 deletions Documentation/config.txt
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,34 @@ core.legacyheaders::
database directly (where the "http://" and "rsync://" protocols
count as direct access).

core.packedGitWindowSize::
Number of bytes of a pack file to map into memory in a
single mapping operation. Larger window sizes may allow
your system to process a smaller number of large pack files
more quickly. Smaller window sizes will negatively affect
performance due to increased calls to the operating system's
memory manager, but may improve performance when accessing
a large number of large pack files.
+
Default is 1 MiB if NO_MMAP was set at compile time, otherwise 32
MiB on 32 bit platforms and 1 GiB on 64 bit platforms. This should
be reasonable for all users/operating systems. You probably do
not need to adjust this value.
+
Common unit suffixes of 'k', 'm', or 'g' are supported.

core.packedGitLimit::
Maximum number of bytes to map simultaneously into memory
from pack files. If Git needs to access more than this many
bytes at once to complete an operation it will unmap existing
regions to reclaim virtual address space within the process.
+
Default is 256 MiB on 32 bit platforms and 8 GiB on 64 bit platforms.
This should be reasonable for all users/operating systems, except on
the largest projects. You probably do not need to adjust this value.
+
Common unit suffixes of 'k', 'm', or 'g' are supported.

alias.*::
Command aliases for the gitlink:git[1] command wrapper - e.g.
after defining "alias.last = cat-file commit HEAD", the invocation
Expand Down
84 changes: 66 additions & 18 deletions builtin-pack-objects.c
Original file line number Diff line number Diff line change
Expand Up @@ -276,7 +276,52 @@ static int encode_header(enum object_type type, unsigned long size, unsigned cha
* we are going to reuse the existing object data as is. make
* sure it is not corrupt.
*/
static int check_inflate(unsigned char *data, unsigned long len, unsigned long expect)
static int check_pack_inflate(struct packed_git *p,
struct pack_window **w_curs,
unsigned long offset,
unsigned long len,
unsigned long expect)
{
z_stream stream;
unsigned char fakebuf[4096], *in;
int st;

memset(&stream, 0, sizeof(stream));
inflateInit(&stream);
do {
in = use_pack(p, w_curs, offset, &stream.avail_in);
stream.next_in = in;
stream.next_out = fakebuf;
stream.avail_out = sizeof(fakebuf);
st = inflate(&stream, Z_FINISH);
offset += stream.next_in - in;
} while (st == Z_OK || st == Z_BUF_ERROR);
inflateEnd(&stream);
return (st == Z_STREAM_END &&
stream.total_out == expect &&
stream.total_in == len) ? 0 : -1;
}

static void copy_pack_data(struct sha1file *f,
struct packed_git *p,
struct pack_window **w_curs,
unsigned long offset,
unsigned long len)
{
unsigned char *in;
unsigned int avail;

while (len) {
in = use_pack(p, w_curs, offset, &avail);
if (avail > len)
avail = len;
sha1write(f, in, avail);
offset += avail;
len -= avail;
}
}

static int check_loose_inflate(unsigned char *data, unsigned long len, unsigned long expect)
{
z_stream stream;
unsigned char fakebuf[4096];
Expand Down Expand Up @@ -323,7 +368,7 @@ static int revalidate_loose_object(struct object_entry *entry,
return -1;
map += used;
mapsize -= used;
return check_inflate(map, mapsize, size);
return check_loose_inflate(map, mapsize, size);
}

static unsigned long write_object(struct sha1file *f,
Expand Down Expand Up @@ -416,6 +461,8 @@ static unsigned long write_object(struct sha1file *f,
}
else {
struct packed_git *p = entry->in_pack;
struct pack_window *w_curs = NULL;
unsigned long offset;

if (entry->delta) {
obj_type = (allow_ofs_delta && entry->delta->offset) ?
Expand All @@ -437,16 +484,14 @@ static unsigned long write_object(struct sha1file *f,
hdrlen += 20;
}

use_packed_git(p);
buf = (char *) p->pack_base
+ entry->in_pack_offset
+ entry->in_pack_header_size;
offset = entry->in_pack_offset + entry->in_pack_header_size;
datalen = find_packed_object_size(p, entry->in_pack_offset)
- entry->in_pack_header_size;
if (!pack_to_stdout && check_inflate(buf, datalen, entry->size))
if (!pack_to_stdout && check_pack_inflate(p, &w_curs,
offset, datalen, entry->size))
die("corrupt delta in pack %s", sha1_to_hex(entry->sha1));
sha1write(f, buf, datalen);
unuse_packed_git(p);
copy_pack_data(f, p, &w_curs, offset, datalen);
unuse_pack(&w_curs);
reused++;
}
if (entry->delta)
Expand Down Expand Up @@ -937,22 +982,19 @@ static void check_object(struct object_entry *entry)

if (entry->in_pack && !entry->preferred_base) {
struct packed_git *p = entry->in_pack;
struct pack_window *w_curs = NULL;
unsigned long left = p->pack_size - entry->in_pack_offset;
unsigned long size, used;
unsigned char *buf;
struct object_entry *base_entry = NULL;

use_packed_git(p);
buf = p->pack_base;
buf += entry->in_pack_offset;
buf = use_pack(p, &w_curs, entry->in_pack_offset, NULL);

/* We want in_pack_type even if we do not reuse delta.
* There is no point not reusing non-delta representations.
*/
used = unpack_object_header_gently(buf, left,
&entry->in_pack_type, &size);
if (!used || left - used <= 20)
die("corrupt pack for %s", sha1_to_hex(entry->sha1));

/* Check if it is delta, and the base is also an object
* we are going to pack. If so we will reuse the existing
Expand All @@ -961,36 +1003,42 @@ static void check_object(struct object_entry *entry)
if (!no_reuse_delta) {
unsigned char c, *base_name;
unsigned long ofs;
unsigned long used_0;
/* there is at least 20 bytes left in the pack */
switch (entry->in_pack_type) {
case OBJ_REF_DELTA:
base_name = buf + used;
base_name = use_pack(p, &w_curs,
entry->in_pack_offset + used, NULL);
used += 20;
break;
case OBJ_OFS_DELTA:
c = buf[used++];
buf = use_pack(p, &w_curs,
entry->in_pack_offset + used, NULL);
used_0 = 0;
c = buf[used_0++];
ofs = c & 127;
while (c & 128) {
ofs += 1;
if (!ofs || ofs & ~(~0UL >> 7))
die("delta base offset overflow in pack for %s",
sha1_to_hex(entry->sha1));
c = buf[used++];
c = buf[used_0++];
ofs = (ofs << 7) + (c & 127);
}
if (ofs >= entry->in_pack_offset)
die("delta base offset out of bound for %s",
sha1_to_hex(entry->sha1));
ofs = entry->in_pack_offset - ofs;
base_name = find_packed_object_name(p, ofs);
used += used_0;
break;
default:
base_name = NULL;
}
if (base_name)
base_entry = locate_object_entry(base_name);
}
unuse_packed_git(p);
unuse_pack(&w_curs);
entry->in_pack_header_size = used;

if (base_entry) {
Expand Down
1 change: 1 addition & 0 deletions builtin-verify-pack.c
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ int cmd_verify_pack(int argc, const char **argv, const char *prefix)
int no_more_options = 0;
int nothing_done = 1;

git_config(git_default_config);
while (1 < argc) {
if (!no_more_options && argv[1][0] == '-') {
if (!strcmp("-v", argv[1]))
Expand Down
28 changes: 20 additions & 8 deletions cache.h
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,8 @@ extern int warn_ambiguous_refs;
extern int shared_repository;
extern const char *apply_default_whitespace;
extern int zlib_compression_level;
extern size_t packed_git_window_size;
extern size_t packed_git_limit;

#define GIT_REPO_VERSION 0
extern int repository_format_version;
Expand Down Expand Up @@ -336,14 +338,22 @@ extern struct alternate_object_database {
} *alt_odb_list;
extern void prepare_alt_odb(void);

struct pack_window {
struct pack_window *next;
unsigned char *base;
off_t offset;
size_t len;
unsigned int last_used;
unsigned int inuse_cnt;
};

extern struct packed_git {
struct packed_git *next;
unsigned long index_size;
unsigned long pack_size;
struct pack_window *windows;
unsigned int *index_base;
void *pack_base;
unsigned int pack_last_used;
unsigned int pack_use_cnt;
off_t index_size;
off_t pack_size;
int pack_fd;
int pack_local;
unsigned char sha1[20];
/* something like ".git/objects/pack/xxxxx.pack" */
Expand Down Expand Up @@ -389,13 +399,14 @@ extern void install_packed_git(struct packed_git *pack);
extern struct packed_git *find_sha1_pack(const unsigned char *sha1,
struct packed_git *packs);

extern int use_packed_git(struct packed_git *);
extern void unuse_packed_git(struct packed_git *);
extern void pack_report();
extern unsigned char* use_pack(struct packed_git *, struct pack_window **, unsigned long, unsigned int *);
extern void unuse_pack(struct pack_window **);
extern struct packed_git *add_packed_git(char *, int, int);
extern int num_packed_objects(const struct packed_git *p);
extern int nth_packed_object_sha1(const struct packed_git *, int, unsigned char*);
extern unsigned long find_pack_entry_one(const unsigned char *, struct packed_git *);
extern void *unpack_entry_gently(struct packed_git *, unsigned long, char *, unsigned long *);
extern void *unpack_entry(struct packed_git *, unsigned long, char *, unsigned long *);
extern unsigned long unpack_object_header_gently(const unsigned char *buf, unsigned long len, enum object_type *type, unsigned long *sizep);
extern void packed_object_info_detail(struct packed_git *, unsigned long, char *, unsigned long *, unsigned long *, unsigned int *, unsigned char *);

Expand All @@ -421,6 +432,7 @@ extern char *git_commit_encoding;
extern char *git_log_output_encoding;

extern int copy_fd(int ifd, int ofd);
extern void read_or_die(int fd, void *buf, size_t count);
extern int write_in_full(int fd, const void *buf, size_t count, const char *);
extern void write_or_die(int fd, const void *buf, size_t count);
extern int write_or_whine(int fd, const void *buf, size_t count, const char *msg);
Expand Down
17 changes: 16 additions & 1 deletion config.c
Original file line number Diff line number Diff line change
Expand Up @@ -304,6 +304,21 @@ int git_default_config(const char *var, const char *value)
return 0;
}

if (!strcmp(var, "core.packedgitwindowsize")) {
int pgsz = getpagesize();
packed_git_window_size = git_config_int(var, value);
packed_git_window_size /= pgsz;
if (packed_git_window_size < 2)
packed_git_window_size = 2;
packed_git_window_size *= pgsz;
return 0;
}

if (!strcmp(var, "core.packedgitlimit")) {
packed_git_limit = git_config_int(var, value);
return 0;
}

if (!strcmp(var, "user.name")) {
strlcpy(git_default_name, value, sizeof(git_default_name));
return 0;
Expand Down Expand Up @@ -695,7 +710,7 @@ int git_config_set_multivar(const char* key, const char* value,
}

fstat(in_fd, &st);
contents = mmap(NULL, st.st_size, PROT_READ,
contents = xmmap(NULL, st.st_size, PROT_READ,
MAP_PRIVATE, in_fd, 0);
close(in_fd);

Expand Down
4 changes: 1 addition & 3 deletions diff.c
Original file line number Diff line number Diff line change
Expand Up @@ -1341,10 +1341,8 @@ int diff_populate_filespec(struct diff_filespec *s, int size_only)
fd = open(s->path, O_RDONLY);
if (fd < 0)
goto err_empty;
s->data = mmap(NULL, s->size, PROT_READ, MAP_PRIVATE, fd, 0);
s->data = xmmap(NULL, s->size, PROT_READ, MAP_PRIVATE, fd, 0);
close(fd);
if (s->data == MAP_FAILED)
goto err_empty;
s->should_munmap = 1;
}
else {
Expand Down
2 changes: 2 additions & 0 deletions environment.c
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ char *git_log_output_encoding;
int shared_repository = PERM_UMASK;
const char *apply_default_whitespace;
int zlib_compression_level = Z_DEFAULT_COMPRESSION;
size_t packed_git_window_size = DEFAULT_PACKED_GIT_WINDOW_SIZE;
size_t packed_git_limit = DEFAULT_PACKED_GIT_LIMIT;
int pager_in_use;
int pager_use_color = 1;

Expand Down
Loading

0 comments on commit cf2999e

Please sign in to comment.