From 051308f6e9cebeb76b8fb4f52b7e9e7ce064445c Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Thu, 4 May 2006 16:51:44 -0700 Subject: [PATCH 1/4] binary patch. This adds "binary patch" to the diff output and teaches apply what to do with them. On the diff generation side, traditionally, we said "Binary files differ\n" without giving anything other than the preimage and postimage object name on the index line. This was good enough for applying a patch generated from your own repository (very useful while rebasing), because the postimage would be available in such a case. However, this was not useful when the recipient of such a patch via e-mail were to apply it, even if the preimage was available. This patch allows the diff to generate "binary" patch when operating under --full-index option. The binary patch follows the usual extended git diff headers, and looks like this: "GIT binary patch\n" "\n" ... "\n" Each line is prefixed with a "length-byte", whose value is upper or lowercase alphabet that encodes number of bytes that the data on the line decodes to (1..52 -- 'A' means 1, 'B' means 2, ..., 'Z' means 26, 'a' means 27, ...). is 1 or more groups of 5-byte sequence, each of which encodes up to 4 bytes in base85 encoding. Because 52 / 4 * 5 = 65 and we have the length byte, an output line is capped to 66 characters. The payload is the same diff-delta as we use in the packfiles. On the consumption side, git-apply now can decode and apply the binary patch when --allow-binary-replacement is given, the diff was generated with --full-index, and the receiving repository has the preimage blob, which is the same condition as it always required when accepting an "Binary files differ\n" patch. Signed-off-by: Junio C Hamano --- Makefile | 2 +- apply.c | 232 +++++++++++++++++++++++++++++++++++++++++-------------- cache.h | 3 + diff.c | 106 +++++++++++++++++++++++-- 4 files changed, 275 insertions(+), 68 deletions(-) diff --git a/Makefile b/Makefile index 45484fcd8..814010d7b 100644 --- a/Makefile +++ b/Makefile @@ -205,7 +205,7 @@ DIFF_OBJS = \ diffcore-delta.o log-tree.o LIB_OBJS = \ - blob.o commit.o connect.o csum-file.o \ + blob.o commit.o connect.o csum-file.o base85.o \ date.o diff-delta.o entry.o exec_cmd.o ident.o index.o \ object.o pack-check.o patch-delta.o path.o pkt-line.o \ quote.o read-cache.o refs.o run-command.o \ diff --git a/apply.c b/apply.c index 269210a57..e37c4ebf5 100644 --- a/apply.c +++ b/apply.c @@ -10,6 +10,7 @@ #include "cache.h" #include "quote.h" #include "blob.h" +#include "delta.h" // --check turns on checking that the working tree matches the // files that are being modified, but doesn't apply the patch @@ -966,6 +967,70 @@ static inline int metadata_changes(struct patch *patch) patch->old_mode != patch->new_mode); } +static int parse_binary(char *buffer, unsigned long size, struct patch *patch) +{ + /* We have read "GIT binary patch\n"; what follows is a + * sequence of 'length-byte' followed by base-85 encoded + * delta data. + * + * Each 5-byte sequence of base-85 encodes up to 4 bytes, + * and we would limit the patch line to 66 characters, + * so one line can fit up to 13 groups that would decode + * to 52 bytes max. The length byte 'A'-'Z' corresponds + * to 1-26 bytes, and 'a'-'z' corresponds to 27-52 bytes. + * The end of binary is signalled with an empty line. + */ + int llen, used; + struct fragment *fragment; + char *delta = NULL; + + patch->is_binary = 1; + patch->fragments = fragment = xcalloc(1, sizeof(*fragment)); + used = 0; + while (1) { + int byte_length, max_byte_length, newsize; + llen = linelen(buffer, size); + used += llen; + linenr++; + if (llen == 1) + break; + /* Minimum line is "A00000\n" which is 7-byte long, + * and the line length must be multiple of 5 plus 2. + */ + if ((llen < 7) || (llen-2) % 5) + goto corrupt; + max_byte_length = (llen - 2) / 5 * 4; + byte_length = *buffer; + if ('A' <= byte_length && byte_length <= 'Z') + byte_length = byte_length - 'A' + 1; + else if ('a' <= byte_length && byte_length <= 'z') + byte_length = byte_length - 'a' + 27; + else + goto corrupt; + /* if the input length was not multiple of 4, we would + * have filler at the end but the filler should never + * exceed 3 bytes + */ + if (max_byte_length < byte_length || + byte_length <= max_byte_length - 4) + goto corrupt; + newsize = fragment->size + byte_length; + delta = xrealloc(delta, newsize); + if (decode_85(delta + fragment->size, + buffer + 1, + byte_length)) + goto corrupt; + fragment->size = newsize; + buffer += llen; + size -= llen; + } + fragment->patch = delta; + return used; + corrupt: + return error("corrupt binary patch at line %d: %.*s", + linenr-1, llen-1, buffer); +} + static int parse_chunk(char *buffer, unsigned long size, struct patch *patch) { int hdrsize, patchsize; @@ -982,19 +1047,34 @@ static int parse_chunk(char *buffer, unsigned long size, struct patch *patch) "Files ", NULL, }; + static const char git_binary[] = "GIT binary patch\n"; int i; int hd = hdrsize + offset; unsigned long llen = linelen(buffer + hd, size - hd); - if (!memcmp(" differ\n", buffer + hd + llen - 8, 8)) + if (llen == sizeof(git_binary) - 1 && + !memcmp(git_binary, buffer + hd, llen)) { + int used; + linenr++; + used = parse_binary(buffer + hd + llen, + size - hd - llen, patch); + if (used) + patchsize = used + llen; + else + patchsize = 0; + } + else if (!memcmp(" differ\n", buffer + hd + llen - 8, 8)) { for (i = 0; binhdr[i]; i++) { int len = strlen(binhdr[i]); if (len < size - hd && !memcmp(binhdr[i], buffer + hd, len)) { + linenr++; patch->is_binary = 1; + patchsize = llen; break; } } + } /* Empty patch cannot be applied if: * - it is a binary patch and we do not do binary_replace, or @@ -1345,76 +1425,108 @@ static int apply_one_fragment(struct buffer_desc *desc, struct fragment *frag) return offset; } -static int apply_fragments(struct buffer_desc *desc, struct patch *patch) +static int apply_binary(struct buffer_desc *desc, struct patch *patch) { - struct fragment *frag = patch->fragments; const char *name = patch->old_name ? patch->old_name : patch->new_name; + unsigned char sha1[20]; + unsigned char hdr[50]; + int hdrlen; - if (patch->is_binary) { - unsigned char sha1[20]; + if (!allow_binary_replacement) + return error("cannot apply binary patch to '%s' " + "without --allow-binary-replacement", + name); - if (!allow_binary_replacement) - return error("cannot apply binary patch to '%s' " - "without --allow-binary-replacement", - name); + /* For safety, we require patch index line to contain + * full 40-byte textual SHA1 for old and new, at least for now. + */ + if (strlen(patch->old_sha1_prefix) != 40 || + strlen(patch->new_sha1_prefix) != 40 || + get_sha1_hex(patch->old_sha1_prefix, sha1) || + get_sha1_hex(patch->new_sha1_prefix, sha1)) + return error("cannot apply binary patch to '%s' " + "without full index line", name); - /* For safety, we require patch index line to contain - * full 40-byte textual SHA1 for old and new, at least for now. + if (patch->old_name) { + /* See if the old one matches what the patch + * applies to. */ - if (strlen(patch->old_sha1_prefix) != 40 || - strlen(patch->new_sha1_prefix) != 40 || - get_sha1_hex(patch->old_sha1_prefix, sha1) || - get_sha1_hex(patch->new_sha1_prefix, sha1)) - return error("cannot apply binary patch to '%s' " - "without full index line", name); - - if (patch->old_name) { - unsigned char hdr[50]; - int hdrlen; - - /* See if the old one matches what the patch - * applies to. - */ - write_sha1_file_prepare(desc->buffer, desc->size, - blob_type, sha1, hdr, &hdrlen); - if (strcmp(sha1_to_hex(sha1), patch->old_sha1_prefix)) - return error("the patch applies to '%s' (%s), " - "which does not match the " - "current contents.", - name, sha1_to_hex(sha1)); - } - else { - /* Otherwise, the old one must be empty. */ - if (desc->size) - return error("the patch applies to an empty " - "'%s' but it is not empty", name); - } + write_sha1_file_prepare(desc->buffer, desc->size, + blob_type, sha1, hdr, &hdrlen); + if (strcmp(sha1_to_hex(sha1), patch->old_sha1_prefix)) + return error("the patch applies to '%s' (%s), " + "which does not match the " + "current contents.", + name, sha1_to_hex(sha1)); + } + else { + /* Otherwise, the old one must be empty. */ + if (desc->size) + return error("the patch applies to an empty " + "'%s' but it is not empty", name); + } + + if (desc->buffer) { + free(desc->buffer); + desc->alloc = desc->size = 0; + } + get_sha1_hex(patch->new_sha1_prefix, sha1); + if (!memcmp(sha1, null_sha1, 20)) + return 0; /* deletion patch */ + + if (has_sha1_file(sha1)) { + char type[10]; + unsigned long size; - /* For now, we do not record post-image data in the patch, - * and require the object already present in the recipient's - * object database. + desc->buffer = read_sha1_file(sha1, type, &size); + if (!desc->buffer) + return error("the necessary postimage %s for " + "'%s' cannot be read", + patch->new_sha1_prefix, name); + desc->alloc = desc->size = size; + } + else { + char type[10]; + unsigned long src_size, dst_size; + void *src; + + get_sha1_hex(patch->old_sha1_prefix, sha1); + src = read_sha1_file(sha1, type, &src_size); + if (!src) + return error("the necessary preimage %s for " + "'%s' cannot be read", + patch->old_sha1_prefix, name); + + /* patch->fragment->patch has the delta data and + * we should apply it to the preimage. */ - if (desc->buffer) { - free(desc->buffer); - desc->alloc = desc->size = 0; - } - get_sha1_hex(patch->new_sha1_prefix, sha1); - - if (memcmp(sha1, null_sha1, 20)) { - char type[10]; - unsigned long size; - - desc->buffer = read_sha1_file(sha1, type, &size); - if (!desc->buffer) - return error("the necessary postimage %s for " - "'%s' does not exist", - patch->new_sha1_prefix, name); - desc->alloc = desc->size = size; - } + desc->buffer = patch_delta(src, src_size, + (void*) patch->fragments->patch, + patch->fragments->size, + &dst_size); + if (!desc->buffer) + return error("binary patch does not apply to '%s'", + name); + desc->size = desc->alloc = dst_size; - return 0; + /* verify that the result matches */ + write_sha1_file_prepare(desc->buffer, desc->size, blob_type, + sha1, hdr, &hdrlen); + if (strcmp(sha1_to_hex(sha1), patch->new_sha1_prefix)) + return error("binary patch to '%s' creates incorrect result", name); } + return 0; +} + +static int apply_fragments(struct buffer_desc *desc, struct patch *patch) +{ + struct fragment *frag = patch->fragments; + const char *name = patch->old_name ? patch->old_name : patch->new_name; + + if (patch->is_binary) + return apply_binary(desc, patch); + while (frag) { if (apply_one_fragment(desc, frag) < 0) return error("patch failed: %s:%ld", diff --git a/cache.h b/cache.h index 9d0ddcff0..2f32f3d62 100644 --- a/cache.h +++ b/cache.h @@ -363,4 +363,7 @@ extern int receive_keep_pack(int fd[2], const char *me, int quiet); /* pager.c */ extern void setup_pager(void); +/* base85 */ +int decode_85(char *dst, char *line, int linelen); + #endif /* CACHE_H */ diff --git a/diff.c b/diff.c index c845c8711..b14d897f1 100644 --- a/diff.c +++ b/diff.c @@ -8,6 +8,7 @@ #include "quote.h" #include "diff.h" #include "diffcore.h" +#include "delta.h" #include "xdiff-interface.h" static int use_size_cache; @@ -391,6 +392,90 @@ static void show_stats(struct diffstat_t* data) total_files, adds, dels); } +static void *encode_delta_size(void *data, unsigned long size) +{ + unsigned char *cp = data; + *cp++ = size; + size >>= 7; + while (size) { + cp[-1] |= 0x80; + *cp++ = size; + size >>= 7; + } + return cp; +} + +static void *safe_diff_delta(const unsigned char *src, unsigned long src_size, + const unsigned char *dst, unsigned long dst_size, + unsigned long *delta_size) +{ + unsigned long bufsize; + unsigned char *data; + unsigned char *cp; + + if (src_size && dst_size) + return diff_delta(src, src_size, dst, dst_size, delta_size, 0); + + /* diff-delta does not like to do delta with empty, so + * we do that by hand here. Sigh... + */ + + if (!src_size) + /* literal copy can be done only 127-byte at a time. + */ + bufsize = dst_size + (dst_size / 127) + 40; + else + bufsize = 40; + data = xmalloc(bufsize); + cp = encode_delta_size(data, src_size); + cp = encode_delta_size(cp, dst_size); + + if (dst_size) { + /* copy out literally */ + while (dst_size) { + int sz = (127 < dst_size) ? 127 : dst_size; + *cp++ = sz; + dst_size -= sz; + while (sz) { + *cp++ = *dst++; + sz--; + } + } + } + *delta_size = (cp - data); + return data; +} + +static void emit_binary_diff(mmfile_t *one, mmfile_t *two) +{ + void *delta, *cp; + unsigned long delta_size; + + printf("GIT binary patch\n"); + delta = safe_diff_delta(one->ptr, one->size, + two->ptr, two->size, + &delta_size); + if (!delta) + die("unable to generate binary diff"); + + /* emit delta encoded in base85 */ + cp = delta; + while (delta_size) { + int bytes = (52 < delta_size) ? 52 : delta_size; + char line[70]; + delta_size -= bytes; + if (bytes <= 26) + line[0] = bytes + 'A' - 1; + else + line[0] = bytes - 26 + 'a' - 1; + encode_85(line + 1, cp, bytes); + cp += bytes; + puts(line); + } + printf("\n"); + free(delta); +} + #define FIRST_FEW_BYTES 8000 static int mmfile_is_binary(mmfile_t *mf) { @@ -407,6 +492,7 @@ static void builtin_diff(const char *name_a, struct diff_filespec *one, struct diff_filespec *two, const char *xfrm_msg, + struct diff_options *o, int complete_rewrite) { mmfile_t mf1, mf2; @@ -451,8 +537,13 @@ static void builtin_diff(const char *name_a, if (fill_mmfile(&mf1, one) < 0 || fill_mmfile(&mf2, two) < 0) die("unable to read files to diff"); - if (mmfile_is_binary(&mf1) || mmfile_is_binary(&mf2)) - printf("Binary files %s and %s differ\n", lbl[0], lbl[1]); + if (mmfile_is_binary(&mf1) || mmfile_is_binary(&mf2)) { + if (o->full_index) + emit_binary_diff(&mf1, &mf2); + else + printf("Binary files %s and %s differ\n", + lbl[0], lbl[1]); + } else { /* Crazy xdl interfaces.. */ const char *diffopts = getenv("GIT_DIFF_OPTS"); @@ -928,6 +1019,7 @@ static void run_diff_cmd(const char *pgm, struct diff_filespec *one, struct diff_filespec *two, const char *xfrm_msg, + struct diff_options *o, int complete_rewrite) { if (pgm) { @@ -937,7 +1029,7 @@ static void run_diff_cmd(const char *pgm, } if (one && two) builtin_diff(name, other ? other : name, - one, two, xfrm_msg, complete_rewrite); + one, two, xfrm_msg, o, complete_rewrite); else printf("* Unmerged path %s\n", name); } @@ -971,7 +1063,7 @@ static void run_diff(struct diff_filepair *p, struct diff_options *o) if (DIFF_PAIR_UNMERGED(p)) { /* unmerged */ - run_diff_cmd(pgm, p->one->path, NULL, NULL, NULL, NULL, 0); + run_diff_cmd(pgm, p->one->path, NULL, NULL, NULL, NULL, o, 0); return; } @@ -1041,14 +1133,14 @@ static void run_diff(struct diff_filepair *p, struct diff_options *o) * needs to be split into deletion and creation. */ struct diff_filespec *null = alloc_filespec(two->path); - run_diff_cmd(NULL, name, other, one, null, xfrm_msg, 0); + run_diff_cmd(NULL, name, other, one, null, xfrm_msg, o, 0); free(null); null = alloc_filespec(one->path); - run_diff_cmd(NULL, name, other, null, two, xfrm_msg, 0); + run_diff_cmd(NULL, name, other, null, two, xfrm_msg, o, 0); free(null); } else - run_diff_cmd(pgm, name, other, one, two, xfrm_msg, + run_diff_cmd(pgm, name, other, one, two, xfrm_msg, o, complete_rewrite); free(name_munged); From 0660626caff6ac3745cd7b7908a2ca540141a2ec Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Fri, 5 May 2006 02:41:53 -0700 Subject: [PATCH 2/4] binary diff: further updates. This updates the user interface and generated diff data format. * "diff --binary" is used to signal that we want an e-mailable binary patch. It implies --full-index and -p. * "apply --allow-binary-replacement" acquired a short synonym "apply --binary". * After the "GIT binary patch\n" header line there is a token to record which binary patch mechanism was used, so that we can extend it later. Currently there are two mechanisms defined: "literal" and "delta". The former records the deflated postimage and the latter records the deflated delta from the preimage to postimage. For purely implementation convenience, I added the deflated length after these "literal/delta" tokens (otherwise the decoding side needs to guess and reallocate the buffer while inflating). Improvement patches are very welcomed. Signed-off-by: Junio C Hamano --- apply.c | 130 +++++++++++++++++++++++++++++++++++++++-------------- base85.c | 134 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ cache.h | 1 + diff.c | 134 +++++++++++++++++++++++++++++-------------------------- diff.h | 1 + 5 files changed, 304 insertions(+), 96 deletions(-) create mode 100644 base85.c diff --git a/apply.c b/apply.c index e37c4ebf5..1b93aab8a 100644 --- a/apply.c +++ b/apply.c @@ -114,6 +114,9 @@ struct patch { char *new_name, *old_name, *def_name; unsigned int old_mode, new_mode; int is_rename, is_copy, is_new, is_delete, is_binary; +#define BINARY_DELTA_DEFLATED 1 +#define BINARY_LITERAL_DEFLATED 2 + unsigned long deflate_origlen; int lines_added, lines_deleted; int score; struct fragment *fragments; @@ -969,9 +972,11 @@ static inline int metadata_changes(struct patch *patch) static int parse_binary(char *buffer, unsigned long size, struct patch *patch) { - /* We have read "GIT binary patch\n"; what follows is a - * sequence of 'length-byte' followed by base-85 encoded - * delta data. + /* We have read "GIT binary patch\n"; what follows is a line + * that says the patch method (currently, either "deflated + * literal" or "deflated delta") and the length of data before + * deflating; a sequence of 'length-byte' followed by base-85 + * encoded data follows. * * Each 5-byte sequence of base-85 encodes up to 4 bytes, * and we would limit the patch line to 66 characters, @@ -982,11 +987,27 @@ static int parse_binary(char *buffer, unsigned long size, struct patch *patch) */ int llen, used; struct fragment *fragment; - char *delta = NULL; + char *data = NULL; - patch->is_binary = 1; patch->fragments = fragment = xcalloc(1, sizeof(*fragment)); - used = 0; + + /* Grab the type of patch */ + llen = linelen(buffer, size); + used = llen; + linenr++; + + if (!strncmp(buffer, "delta ", 6)) { + patch->is_binary = BINARY_DELTA_DEFLATED; + patch->deflate_origlen = strtoul(buffer + 6, NULL, 10); + } + else if (!strncmp(buffer, "literal ", 8)) { + patch->is_binary = BINARY_LITERAL_DEFLATED; + patch->deflate_origlen = strtoul(buffer + 8, NULL, 10); + } + else + return error("unrecognized binary patch at line %d: %.*s", + linenr-1, llen-1, buffer); + buffer += llen; while (1) { int byte_length, max_byte_length, newsize; llen = linelen(buffer, size); @@ -1015,8 +1036,8 @@ static int parse_binary(char *buffer, unsigned long size, struct patch *patch) byte_length <= max_byte_length - 4) goto corrupt; newsize = fragment->size + byte_length; - delta = xrealloc(delta, newsize); - if (decode_85(delta + fragment->size, + data = xrealloc(data, newsize); + if (decode_85(data + fragment->size, buffer + 1, byte_length)) goto corrupt; @@ -1024,7 +1045,7 @@ static int parse_binary(char *buffer, unsigned long size, struct patch *patch) buffer += llen; size -= llen; } - fragment->patch = delta; + fragment->patch = data; return used; corrupt: return error("corrupt binary patch at line %d: %.*s", @@ -1425,6 +1446,61 @@ static int apply_one_fragment(struct buffer_desc *desc, struct fragment *frag) return offset; } +static char *inflate_it(const void *data, unsigned long size, + unsigned long inflated_size) +{ + z_stream stream; + void *out; + int st; + + memset(&stream, 0, sizeof(stream)); + + stream.next_in = (unsigned char *)data; + stream.avail_in = size; + stream.next_out = out = xmalloc(inflated_size); + stream.avail_out = inflated_size; + inflateInit(&stream); + st = inflate(&stream, Z_FINISH); + if ((st != Z_STREAM_END) || stream.total_out != inflated_size) { + free(out); + return NULL; + } + return out; +} + +static int apply_binary_fragment(struct buffer_desc *desc, struct patch *patch) +{ + unsigned long dst_size; + struct fragment *fragment = patch->fragments; + void *data; + void *result; + + data = inflate_it(fragment->patch, fragment->size, + patch->deflate_origlen); + if (!data) + return error("corrupt patch data"); + switch (patch->is_binary) { + case BINARY_DELTA_DEFLATED: + result = patch_delta(desc->buffer, desc->size, + data, + patch->deflate_origlen, + &dst_size); + free(desc->buffer); + desc->buffer = result; + free(data); + break; + case BINARY_LITERAL_DEFLATED: + free(desc->buffer); + desc->buffer = data; + dst_size = patch->deflate_origlen; + break; + } + if (!desc->buffer) + return -1; + desc->size = desc->alloc = dst_size; + return 0; +} + static int apply_binary(struct buffer_desc *desc, struct patch *patch) { const char *name = patch->old_name ? patch->old_name : patch->new_name; @@ -1466,18 +1542,20 @@ static int apply_binary(struct buffer_desc *desc, struct patch *patch) "'%s' but it is not empty", name); } - if (desc->buffer) { + get_sha1_hex(patch->new_sha1_prefix, sha1); + if (!memcmp(sha1, null_sha1, 20)) { free(desc->buffer); desc->alloc = desc->size = 0; - } - get_sha1_hex(patch->new_sha1_prefix, sha1); - if (!memcmp(sha1, null_sha1, 20)) + desc->buffer = NULL; return 0; /* deletion patch */ + } if (has_sha1_file(sha1)) { + /* We already have the postimage */ char type[10]; unsigned long size; + free(desc->buffer); desc->buffer = read_sha1_file(sha1, type, &size); if (!desc->buffer) return error("the necessary postimage %s for " @@ -1486,28 +1564,13 @@ static int apply_binary(struct buffer_desc *desc, struct patch *patch) desc->alloc = desc->size = size; } else { - char type[10]; - unsigned long src_size, dst_size; - void *src; - - get_sha1_hex(patch->old_sha1_prefix, sha1); - src = read_sha1_file(sha1, type, &src_size); - if (!src) - return error("the necessary preimage %s for " - "'%s' cannot be read", - patch->old_sha1_prefix, name); - - /* patch->fragment->patch has the delta data and - * we should apply it to the preimage. + /* We have verified desc matches the preimage; + * apply the patch data to it, which is stored + * in the patch->fragments->{patch,size}. */ - desc->buffer = patch_delta(src, src_size, - (void*) patch->fragments->patch, - patch->fragments->size, - &dst_size); - if (!desc->buffer) + if (apply_binary_fragment(desc, patch)) return error("binary patch does not apply to '%s'", name); - desc->size = desc->alloc = dst_size; /* verify that the result matches */ write_sha1_file_prepare(desc->buffer, desc->size, blob_type, @@ -2102,7 +2165,8 @@ int main(int argc, char **argv) diffstat = 1; continue; } - if (!strcmp(arg, "--allow-binary-replacement")) { + if (!strcmp(arg, "--allow-binary-replacement") || + !strcmp(arg, "--binary")) { allow_binary_replacement = 1; continue; } diff --git a/base85.c b/base85.c new file mode 100644 index 000000000..b97f7f933 --- /dev/null +++ b/base85.c @@ -0,0 +1,134 @@ +#include "cache.h" + +#undef DEBUG_85 + +#ifdef DEBUG_85 +#define say(a) fprintf(stderr, a) +#define say1(a,b) fprintf(stderr, a, b) +#define say2(a,b,c) fprintf(stderr, a, b, c) +#else +#define say(a) do {} while(0) +#define say1(a,b) do {} while(0) +#define say2(a,b,c) do {} while(0) +#endif + +static const char en85[] = { + '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', + 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', + 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', + 'U', 'V', 'W', 'X', 'Y', 'Z', + 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', + 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', + 'u', 'v', 'w', 'x', 'y', 'z', + '!', '#', '$', '%', '&', '(', ')', '*', '+', '-', + ';', '<', '=', '>', '?', '@', '^', '_', '`', '{', + '|', '}', '~' +}; + +static char de85[256]; +static void prep_base85(void) +{ + int i; + if (de85['Z']) + return; + for (i = 0; i < ARRAY_SIZE(en85); i++) { + int ch = en85[i]; + de85[ch] = i + 1; + } +} + +int decode_85(char *dst, char *buffer, int len) +{ + prep_base85(); + + say2("decode 85 <%.*s>", len/4*5, buffer); + while (len) { + unsigned acc = 0; + int cnt; + for (cnt = 0; cnt < 5; cnt++, buffer++) { + int ch = *((unsigned char *)buffer); + int de = de85[ch]; + if (!de) + return error("invalid base85 alphabet %c", ch); + de--; + if (cnt == 4) { + /* + * Detect overflow. The largest + * 5-letter possible is "|NsC0" to + * encode 0xffffffff, and "|NsC" gives + * 0x03030303 at this point (i.e. + * 0xffffffff = 0x03030303 * 85). + */ + if (0x03030303 < acc || + (0x03030303 == acc && de)) + error("invalid base85 sequence %.5s", + buffer-3); + } + acc = acc * 85 + de; + say1(" <%08x>", acc); + } + say1(" %08x", acc); + for (cnt = 0; cnt < 4 && len; cnt++, len--) { + *dst++ = (acc >> 24) & 0xff; + acc = acc << 8; + } + } + say("\n"); + + return 0; +} + +void encode_85(char *buf, unsigned char *data, int bytes) +{ + prep_base85(); + + say("encode 85"); + while (bytes) { + unsigned acc = 0; + int cnt; + for (cnt = 0; cnt < 4 && bytes; cnt++, bytes--) { + int ch = *data++; + acc |= ch << ((3-cnt)*8); + } + say1(" %08x", acc); + for (cnt = 0; cnt < 5; cnt++) { + int val = acc % 85; + acc /= 85; + buf[4-cnt] = en85[val]; + } + buf += 5; + } + say("\n"); + + *buf = 0; +} + +#ifdef DEBUG_85 +int main(int ac, char **av) +{ + char buf[1024]; + + if (!strcmp(av[1], "-e")) { + int len = strlen(av[2]); + encode_85(buf, av[2], len); + if (len <= 26) len = len + 'A' - 1; + else len = len + 'a' - 26 + 1; + printf("encoded: %c%s\n", len, buf); + return 0; + } + if (!strcmp(av[1], "-d")) { + int len = *av[2]; + if ('A' <= len && len <= 'Z') len = len - 'A' + 1; + else len = len - 'a' + 26 + 1; + decode_85(buf, av[2]+1, len); + printf("decoded: %.*s\n", len, buf); + return 0; + } + if (!strcmp(av[1], "-t")) { + char t[4] = { -1,-1,-1,-1 }; + encode_85(buf, t, 4); + printf("encoded: D%s\n", buf); + return 0; + } +} +#endif diff --git a/cache.h b/cache.h index 2f32f3d62..4b7a43925 100644 --- a/cache.h +++ b/cache.h @@ -365,5 +365,6 @@ extern void setup_pager(void); /* base85 */ int decode_85(char *dst, char *line, int linelen); +void encode_85(char *buf, unsigned char *data, int bytes); #endif /* CACHE_H */ diff --git a/diff.c b/diff.c index b14d897f1..bfe54c3e0 100644 --- a/diff.c +++ b/diff.c @@ -392,78 +392,78 @@ static void show_stats(struct diffstat_t* data) total_files, adds, dels); } -static void *encode_delta_size(void *data, unsigned long size) +static unsigned char *deflate_it(char *data, + unsigned long size, + unsigned long *result_size) { - unsigned char *cp = data; - *cp++ = size; - size >>= 7; - while (size) { - cp[-1] |= 0x80; - *cp++ = size; - size >>= 7; - } - return cp; + int bound; + unsigned char *deflated; + z_stream stream; + + memset(&stream, 0, sizeof(stream)); + deflateInit(&stream, Z_BEST_COMPRESSION); + bound = deflateBound(&stream, size); + deflated = xmalloc(bound); + stream.next_out = deflated; + stream.avail_out = bound; + + stream.next_in = (unsigned char *)data; + stream.avail_in = size; + while (deflate(&stream, Z_FINISH) == Z_OK) + ; /* nothing */ + deflateEnd(&stream); + *result_size = stream.total_out; + return deflated; } -static void *safe_diff_delta(const unsigned char *src, unsigned long src_size, - const unsigned char *dst, unsigned long dst_size, - unsigned long *delta_size) +static void emit_binary_diff(mmfile_t *one, mmfile_t *two) { - unsigned long bufsize; - unsigned char *data; - unsigned char *cp; - - if (src_size && dst_size) - return diff_delta(src, src_size, dst, dst_size, delta_size, 0); + void *cp; + void *delta; + void *deflated; + void *data; + unsigned long orig_size; + unsigned long delta_size; + unsigned long deflate_size; + unsigned long data_size; - /* diff-delta does not like to do delta with empty, so - * we do that by hand here. Sigh... + printf("GIT binary patch\n"); + /* We could do deflated delta, or we could do just deflated two, + * whichever is smaller. */ - - if (!src_size) - /* literal copy can be done only 127-byte at a time. - */ - bufsize = dst_size + (dst_size / 127) + 40; - else - bufsize = 40; - data = xmalloc(bufsize); - cp = encode_delta_size(data, src_size); - cp = encode_delta_size(cp, dst_size); - - if (dst_size) { - /* copy out literally */ - while (dst_size) { - int sz = (127 < dst_size) ? 127 : dst_size; - *cp++ = sz; - dst_size -= sz; - while (sz) { - *cp++ = *dst++; - sz--; - } + delta = NULL; + deflated = deflate_it(two->ptr, two->size, &deflate_size); + if (one->size && two->size) { + delta = diff_delta(one->ptr, one->size, + two->ptr, two->size, + &delta_size, deflate_size); + if (delta) { + void *to_free = delta; + orig_size = delta_size; + delta = deflate_it(delta, delta_size, &delta_size); + free(to_free); } } - *delta_size = (cp - data); - return data; -} -static void emit_binary_diff(mmfile_t *one, mmfile_t *two) -{ - void *delta, *cp; - unsigned long delta_size; + if (delta && delta_size < deflate_size) { + printf("delta %lu\n", orig_size); + free(deflated); + data = delta; + data_size = delta_size; + } + else { + printf("literal %lu\n", two->size); + free(delta); + data = deflated; + data_size = deflate_size; + } - printf("GIT binary patch\n"); - delta = safe_diff_delta(one->ptr, one->size, - two->ptr, two->size, - &delta_size); - if (!delta) - die("unable to generate binary diff"); - - /* emit delta encoded in base85 */ - cp = delta; - while (delta_size) { - int bytes = (52 < delta_size) ? 52 : delta_size; + /* emit data encoded in base85 */ + cp = data; + while (data_size) { + int bytes = (52 < data_size) ? 52 : data_size; char line[70]; - delta_size -= bytes; + data_size -= bytes; if (bytes <= 26) line[0] = bytes + 'A' - 1; else @@ -473,7 +473,7 @@ static void emit_binary_diff(mmfile_t *one, mmfile_t *two) puts(line); } printf("\n"); - free(delta); + free(data); } #define FIRST_FEW_BYTES 8000 @@ -538,7 +538,11 @@ static void builtin_diff(const char *name_a, die("unable to read files to diff"); if (mmfile_is_binary(&mf1) || mmfile_is_binary(&mf2)) { - if (o->full_index) + /* Quite common confusing case */ + if (mf1.size == mf2.size && + !memcmp(mf1.ptr, mf2.ptr, mf1.size)) + goto free_ab_and_return; + if (o->binary) emit_binary_diff(&mf1, &mf2); else printf("Binary files %s and %s differ\n", @@ -1239,6 +1243,10 @@ int diff_opt_parse(struct diff_options *options, const char **av, int ac) options->rename_limit = strtoul(arg+2, NULL, 10); else if (!strcmp(arg, "--full-index")) options->full_index = 1; + else if (!strcmp(arg, "--binary")) { + options->output_format = DIFF_FORMAT_PATCH; + options->full_index = options->binary = 1; + } else if (!strcmp(arg, "--name-only")) options->output_format = DIFF_FORMAT_NAME; else if (!strcmp(arg, "--name-status")) diff --git a/diff.h b/diff.h index b3b2c4dd2..d05260840 100644 --- a/diff.h +++ b/diff.h @@ -28,6 +28,7 @@ struct diff_options { with_raw:1, with_stat:1, tree_in_recursive:1, + binary:1, full_index:1, silent_on_remove:1, find_copies_harder:1; From 42d0ee8302c361a0e3bde7bc59858eda94bc13a4 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Sat, 6 May 2006 00:15:54 -0700 Subject: [PATCH 3/4] binary diff and apply: testsuite. Signed-off-by: Junio C Hamano --- t/t4012-diff-binary.sh | 85 +++++++++++++++++++++++++++++++++++++++++ t/test4012.png | Bin 0 -> 5660 bytes 2 files changed, 85 insertions(+) create mode 100755 t/t4012-diff-binary.sh create mode 100644 t/test4012.png diff --git a/t/t4012-diff-binary.sh b/t/t4012-diff-binary.sh new file mode 100755 index 000000000..bdd95c0d3 --- /dev/null +++ b/t/t4012-diff-binary.sh @@ -0,0 +1,85 @@ +#!/bin/sh +# +# Copyright (c) 2006 Junio C Hamano +# + +test_description='Binary diff and apply +' + +. ./test-lib.sh + +test_expect_success 'prepare repository' \ + 'echo AIT >a && echo BIT >b && echo CIT >c && echo DIT >d && + git-update-index --add a b c d && + echo git >a && + cat ../test4012.png >b && + echo git >c && + cat b b >d' + +test_expect_success 'diff without --binary' \ + 'git-diff | git-apply --stat --summary >current && + cmp current - <<\EOF + a | 2 +- + b | Bin + c | 2 +- + d | Bin + 4 files changed, 2 insertions(+), 2 deletions(-) +EOF' + +test_expect_success 'diff with --binary' \ + 'git-diff --binary | git-apply --stat --summary >current && + cmp current - <<\EOF + a | 2 +- + b | Bin + c | 2 +- + d | Bin + 4 files changed, 2 insertions(+), 2 deletions(-) +EOF' + +# apply needs to be able to skip the binary material correctly +# in order to report the line number of a corrupt patch. +test_expect_success 'apply detecting corrupt patch correctly' \ + 'git-diff | sed -e 's/-CIT/xCIT/' >broken && + if git-apply --stat --summary broken 2>detected + then + echo unhappy - should have detected an error + (exit 1) + else + echo happy + fi && + detected=`cat detected` && + detected=`expr "$detected" : "fatal.*at line \\([0-9]*\\)\$"` && + detected=`sed -ne "${detected}p" broken` && + test "$detected" = xCIT' + +test_expect_success 'apply detecting corrupt patch correctly' \ + 'git-diff --binary | sed -e 's/-CIT/xCIT/' >broken && + if git-apply --stat --summary broken 2>detected + then + echo unhappy - should have detected an error + (exit 1) + else + echo happy + fi && + detected=`cat detected` && + detected=`expr "$detected" : "fatal.*at line \\([0-9]*\\)\$"` && + detected=`sed -ne "${detected}p" broken` && + test "$detected" = xCIT' + +test_expect_success 'initial commit' 'git-commit -a -m initial' + +# Try removal (b), modification (d), and creation (e). +test_expect_success 'diff-index with --binary' \ + 'echo AIT >a && mv b e && echo CIT >c && cat e >d && + git-update-index --add --remove a b c d e && + tree0=`git-write-tree` && + git-diff --cached --binary >current && + git-apply --stat --summary current' + +test_expect_success 'apply binary patch' \ + 'git-reset --hard && + git-apply --binary --index sqc34tLUx=XqS5RmRtNT#adw*x2I9+WOA_7_h002OwrmCoqmhk@u9uE2`GlnCC7C6=# zDvE$&03+ax@KXu}T7&PQYT^X|JZJfDVEBKL^+hXjz16goaObeGK-d!bT|z-<6`i+| zvA2S|tE-)xH$cJD&dS@)meJ4A+kx?wgB_!anwIXVEt?wvKoO>7>Bb1@wA( z;IT7g@ymCuJiLhbPfJr|i5H4;)mjji#JOK|U)hzPX8i5ho0BF*QXf6}{V_RmjX^h8 zBX9QX=3g&vi&AGE{oEKtLP%V(w6-2<2(l+h-nm!Hmw@v)rNU2(v7(ZQ1OQ+h$~Qc!!FTf57o zu@)h>i~DEs@uRKF6UyxAS&3G<*BiMjfLv_bQk;T##&v(4!pgG<+=YYE3cc(axf8D9 zEEhtoFt%jRU1M;}oLSBkd3UE+Bp6q6dcVEAvKKT8h56DsbkcsuTR<;ULeL+`fsARWL=kI0d{qqjDcA8beq&VqMECGM`@nbtHGW~M zVgSM};mmy>`nnA#(QCN?Baj(X%|*?D80!1maWLA8wjEi;G#0;OVGMoH)_Pcp$ery0 zjs>Q-gtJ$l@)T}w%&Z^$)Y2wNbTYowJmp2ztd9e|I_BXcEKR<=vvYxUi1R{eofw4( zMn>91(?+TULadq}xsDau-@@Y(1<-!VbRi6SK0dN5AE40;n;cI&kg4VgwFoy&V_kkw zMpno-g;(Q79&+8Y7g|7Mw8fTt*t^$t#3+Cmsj7Wxt-F|Tn&i!&)O9T_bFYg7{O{1~ zI_7r$E{te8LodhD-=41?TV6m!a2rR2BpX4tl@qt)kKy`LSmzZhS@=3dqH<2LSlS7G zqexi>4h67BIPL)zq|QpF%1Wk*b481+X8bG9OE<0Rs)8KvheMithBa8p@KJ! zGDzAwDsJz>&;~0Po|7fsG|CN~p2KI)lJFfeqeau#ui)mq*Mu1LEyUtt@jB~KexY3_c>~fyvfggniw}& zdnxRc9x^A~u&L>fk9SNr2W_yq%?ouL9vWra=|z6#ETbznso0QZ}oTY6UA({ zEKp{AT0f&U=JusDJ>4v2y3=YUWi8TF4+O--;H0HrapHBQb&xgm47GmLa9oPLK<}STSm3r&wP;{e>M5Rw?8m00o*DjrM^O}oz3{S)!bHrKzY z0mX3mj9w>xMA&MlpZ#5kr6mJqc$k~!{q)+=p8+H^IsKyP_yxT@-(3;)n>-V`S0HRb zlR^=$1Z8p$2-DBTEZtLK+Jcm4#FA*l=?r-&tiFI|%@DPtQGcF%BhI9jCYh~7Y8_3e zz*{EaCZ757z09w~MrgKXoqIprKzuDd2A%jVh2yU%YJ1XdAO@X5Aic;6&cdcQBDAw( zfwp;ITKp@*0Ecfb{qSjx^=2m9s`3^QxP7=J8L3(4Hy1k~8fi-nL|CpGHiJUNgWVX* z33>WWc7$DpSM{FI7{}rh2UR@~lR6n@Nu(SjKo3Oc3Rco%9Pw;4_y_MDEUA!kY4RT= zMD*$LpKv0(5ME{59VMJ8tZ{FW?Q+63(tz)d-MK#sR+wAGSV{53?*fq}R*dn?nEF#g zpQUJd=w!*_7A&r}2#~y$XuH`Pn?59m6Cf%ML**0R0b6KxHeF&gk$#f$jWerg-j#r` ziX4!S$h)0_asLcO!h;#k&=zOlbC|5s^-l!7<-}OZZ9^aA! zhrGGI8ooJ)Pz8peMO>osg~`oBb5J0xBb4@h=y1l7U9%b^2SYar^9#-f|;- z^CdSO22=U$bMhUi#i^@|=HK{U4Y!X#^Ho~>4Ji>#n_MG9hstW!va&0K!Xr81VzkRS zmq4hLdgQr}-WH1j&Z&Kt44MY>I($WF#~kF8Qh#N%dx+2~v62bG8HLfs`Q@Wb!GQ<; zKoi*a=U^a0MiOqfc%DDx^<)XT*j|VHm_-T1z%Tear)ig;#fs;m^VAFM&?!VPWbdOW zDy8emY;zOMaX!No3-%XYAIk#M#nQ{;e)_@V`R6m}+^`pP|9G$>og=}k^lFBzgtP7a6BF605y6thdiZ7al`>^Wwe%VkXs z9h>Dz^hXo%U@C=7Q-gQlAI4NwiN@p!H?}-w>kDLU4M@3EPj!-WYT6D;&>!G5?z9DI(4ac#n<6Bdi6IDKX&u9K7hKy>Bzf;OZ1syrvwr`4W(X`5`PL3IV zOeAwoaR^-2SRMcY8X6u|)m>be$Hhm!cj-H9=d7D`?HLc17`z=(EsXf;F$g!tej_uq zKDf1R433)i7*V^VZGE}6>D)Diu*{FKV{+&i2H#7)(d738 zVcEP2dTA6M`i<7>!d6;_B(A8TaLfw zcVQh*4VYp$-qMp#gxC)qhkL~cIz=(KB?WH1ghU@ol|x>TaFC*~MpqPwVst29%e?Dw zMF4~o`0fl9=w|Zl$VcyH#E_9!AwP}RjgBRkhiQ@~GbkSEjB`l$%|0EUe}Jx}G23xM z5(1j~;T+mlPbv+GBo=14t>@4@t9`20!)uI#DHmUcfQF3pzlM6e!-sO{g8D=Plw5Y~v zJ^5zy8&(4ae>PW6JWOVY&+d>7P|?VM9t!5ZOrmHOWH(GYk4->OkC@(92NG>4)4`oW zXXm_(wv(k4??xN^=WJ;6JKV2cj7dl+$TbB@aB%o(2b^dMp7l*QpoEXi^Qii|mc#w{ z{5lkx4p-W+v^Ir>sQ;}h;^lj@9?CRCI!gDj6{{J%{Qth-|Jnl#E#{rbi-Nx_uuXtE z<(}1UjeMj!<{$~~;Dv$Wc@Hr9hWzj4o*wE^Ac85csjmgY+>EsE$81!#Wma<9u(Xp2 z8KC%afHePiY^mR)0FHK&=eHNvQ5ljNm-V&^)R#^dl~<}AHz%s#g0T-0*H^LEVZ`fC zzc-5i;B@BvoXJe1ud^)By(ARd^JGqMTZk*aNrWWSy+~dlU9++p_7pu0N1Ix~?btlW zl>U3}4O&&jCr=BYSf%d!z@Ve@a|`=7hWWWa#@9!a$hR1Fe5!L4ym-YoMM{~)VnnF? zF9F8q<04MjWPUaN_PGaVy&&YqGt3HCrU}i)34{3-5L2+icbeiX1Zi1zj%|=8;yAJX zUT(r9S7nhfjfLBpQv@7HjT1N&Ent$no&E6winqTR&sYOU1J8El27`=%Cm4YnP6VQ= z<0E)}(P4#_Dr-60T^~IL2)!#LQvt~#DLl0+^YXg1+lc2%T(RsGi2jgR3+Ika~0*%RSSu<27&D`xup+6VyAzu7p0`gFH1UP8J%l>bfon-ap>^t z%{%oRXW{Sc?^oEnXv-~sc3o~s{17l%qU7M>(uYMN9k$qHzjgQTQX=@TK#SXId^(4Y zQ=kbTx3I3Mi9G1fG0nBGl3p=qd}UC`VP(0lzJ5e)?emWxKYo7jy$NxstgnBH_O61t zIb9UEz1pB$8}{`pHw9(21%j3#^iD87KAs)%=Ww>lxYL*K&*{4Rkn?b3e#C^spu7oj zTJ73@yB_shd58Pwc(D{;7yzQ6fHgN$w64FGqDlII=kUyX9|@be&J%Z6mZ%PG>4Q5? z*9Tleg}@jmT4=|XciU?@)|j=Y5vs@q87L?yRG!+8=MSL@X@sFE8+U}>etzP^!^66U zhGOHTJ3pGC6NNPgHCLZA4jBL8?T6g{fwi_$ArOeasA(OqL(`Egew~nS&hyX9uIsy# zw)KjqW9$&G`DHDRhB%5eL)NFJB8+**#)eh3X)}S8-|u{Qi5LVDUm)dYJ^S@FH9h^G(R+U5`nP8G(@!SW0+0AUWo4Cq z{tO%&8>?NHssSx+Z=n6~0ga`6nIKA|TC<*}i5CfeYXP`CMzxwXp!W~|_Vy1Bth#?^ zS-y`Gb)Nf$I+*ULOJ`N{igJIr`{&^5`ZMHuwfy$M=V+ufj}UQ zKdQ^8%XLaiOIdi3b=^1I#Prg!TLZ~9{{Gas#NhAYSh$V8oBjR$E7wRF8JUGnKmJ+^ z1cb|N=Gh!{G&@{{Aq4b<>N)DJS*WtHk!(HqlBmUXIXW|wGGEG1`_gA;XUBgdit21i zga5#yuC|t#A@~gA{l6V51J`#We60er=Kn#Wv=plj@rTy;ctNmoi62|%{fpn-w{FrX zq=Ip)8=YI{iP(I23nH_$s|zf9k4mr1-dP@~eLXa6wvuHZ~B#l~_tU;$;ppZ1$Mta*j@@`1pu^t*R0YEn1W3=H`|rhoJNR$<@_WMP1#s zQiZ(~GKPeqzdPjCdQnVB&BtABt;>AXmD_F^tqa}^2A7UKJA>9 zIXOA$nh1L>|PFfM4D!`PCYp zX3A~CK=0EBtH$R;?`Ss0pCYk1RL+-4FQ!=cP|K+jXcAq*5&?LNk)&m)B@J=9M~A<- zzQPM!;zThn))lr`<=+POSrlXZs^a_57MO|t?AH70n6rDyTAEe>8}%y+X#MC#Q9AE< zmC|QfI-ed)WT7g71*Z)J2VK&d6!n=8taU*pX6gw=0Qq;@kcWHj04FlpiMvyyy3K8l zc`eIc&wu+vI<;HKB_5DLI`6<}%$!97P?)NC_H4_N#Io!dcj!~#QuKLU41XT6@YK)X z7P_ik-m%^^BCF9V7|{-E$h|LX()h0YbH-&H_*XPzQr152yXy%4^lDe{C>6l^BvIS1 zZ?ZFV0ZCANBv;P!3M@CYDLvXH8mp@Y__qOQJ_hLp3xvZSfDd;8pnKVRqp<*5^i?L; wB)iFf!5~HcZ%ohvO_R*UYX6BwgGUmE)3jjAGZHKGr7b{BNn5e@<=cq=0b0Y+i2wiq literal 0 HcmV?d00001 From addaacab07d91e5ff1f06ada31c9e96c0edd31cd Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Mon, 8 May 2006 11:31:11 -0400 Subject: [PATCH 4/4] improve base85 generated assembly code This code is arguably pretty hot, if you use binary patches of course. This patch helps gcc generate both smaller and faster code especially in the error free path. Signed-off-by: Nicolas Pitre Signed-off-by: Junio C Hamano --- base85.c | 64 +++++++++++++++++++++++++++++++------------------------- 1 file changed, 35 insertions(+), 29 deletions(-) diff --git a/base85.c b/base85.c index b97f7f933..a9e97f89d 100644 --- a/base85.c +++ b/base85.c @@ -44,34 +44,38 @@ int decode_85(char *dst, char *buffer, int len) say2("decode 85 <%.*s>", len/4*5, buffer); while (len) { unsigned acc = 0; - int cnt; - for (cnt = 0; cnt < 5; cnt++, buffer++) { - int ch = *((unsigned char *)buffer); - int de = de85[ch]; - if (!de) + int de, cnt = 4; + unsigned char ch; + do { + ch = *buffer++; + de = de85[ch]; + if (--de < 0) return error("invalid base85 alphabet %c", ch); - de--; - if (cnt == 4) { - /* - * Detect overflow. The largest - * 5-letter possible is "|NsC0" to - * encode 0xffffffff, and "|NsC" gives - * 0x03030303 at this point (i.e. - * 0xffffffff = 0x03030303 * 85). - */ - if (0x03030303 < acc || - (0x03030303 == acc && de)) - error("invalid base85 sequence %.5s", - buffer-3); - } acc = acc * 85 + de; - say1(" <%08x>", acc); - } + } while (--cnt); + ch = *buffer++; + de = de85[ch]; + if (--de < 0) + return error("invalid base85 alphabet %c", ch); + /* + * Detect overflow. The largest + * 5-letter possible is "|NsC0" to + * encode 0xffffffff, and "|NsC" gives + * 0x03030303 at this point (i.e. + * 0xffffffff = 0x03030303 * 85). + */ + if (0x03030303 < acc || + 0xffffffff - de < (acc *= 85)) + error("invalid base85 sequence %.5s", buffer-5); + acc += de; say1(" %08x", acc); - for (cnt = 0; cnt < 4 && len; cnt++, len--) { - *dst++ = (acc >> 24) & 0xff; - acc = acc << 8; - } + + cnt = (len < 4) ? len : 4; + len -= cnt; + do { + acc = (acc << 8) | (acc >> 24); + *dst++ = acc; + } while (--cnt); } say("\n"); @@ -86,15 +90,17 @@ void encode_85(char *buf, unsigned char *data, int bytes) while (bytes) { unsigned acc = 0; int cnt; - for (cnt = 0; cnt < 4 && bytes; cnt++, bytes--) { + for (cnt = 24; cnt >= 0; cnt -= 8) { int ch = *data++; - acc |= ch << ((3-cnt)*8); + acc |= ch << cnt; + if (--bytes == 0) + break; } say1(" %08x", acc); - for (cnt = 0; cnt < 5; cnt++) { + for (cnt = 4; cnt >= 0; cnt--) { int val = acc % 85; acc /= 85; - buf[4-cnt] = en85[val]; + buf[cnt] = en85[val]; } buf += 5; }