Skip to content

Commit

Permalink
archive-zip: mark text files in archives
Browse files Browse the repository at this point in the history
Set the text flag for ZIP archive entries that look like text files so
that unzip -a can be used to perform end-of-line conversions.  Info-ZIP
zip does the same.

Detect binary files the same way as git diff and git grep do, namely by
checking for the attribute "diff" and its negation "-diff", and if none
is found by falling back to checking for the presence of NUL bytes in
the first few bytes of the file contents.

7-Zip, Windows' built-in ZIP functionality and Info-ZIP unzip without
the switch -a are not affected by the change and still extract text
files without doing any end-of-line conversions.

NB: The actual end-of-line style used in the archive entries doesn't
matter to unzip -a, as it converts any CR, CRLF and LF to the line end
characters appropriate for the platform it is running on.

Suggested-by: Ulrike Fischer <luatex@nililand.de>
Signed-off-by: Rene Scharfe <l.s.r@web.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
  • Loading branch information
René Scharfe authored and Junio C Hamano committed Mar 5, 2015
1 parent 282616c commit 4aff646
Show file tree
Hide file tree
Showing 2 changed files with 70 additions and 2 deletions.
25 changes: 24 additions & 1 deletion archive-zip.c
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
#include "archive.h"
#include "streaming.h"
#include "utf8.h"
#include "userdiff.h"
#include "xdiff-interface.h"

static int zip_date;
static int zip_time;
Expand Down Expand Up @@ -189,6 +191,16 @@ static int has_only_ascii(const char *s)
}
}

static int entry_is_binary(const char *path, const void *buffer, size_t size)
{
struct userdiff_driver *driver = userdiff_find_by_path(path);
if (!driver)
driver = userdiff_find_by_name("default");
if (driver->binary != -1)
return driver->binary;
return buffer_is_binary(buffer, size);
}

#define STREAM_BUFFER_SIZE (1024 * 16)

static int write_zip_entry(struct archiver_args *args,
Expand All @@ -210,6 +222,8 @@ static int write_zip_entry(struct archiver_args *args,
struct git_istream *stream = NULL;
unsigned long flags = 0;
unsigned long size;
int is_binary = -1;
const char *path_without_prefix = path + args->baselen;

crc = crc32(0, NULL, 0);

Expand Down Expand Up @@ -256,6 +270,8 @@ static int write_zip_entry(struct archiver_args *args,
return error("cannot read %s",
sha1_to_hex(sha1));
crc = crc32(crc, buffer, size);
is_binary = entry_is_binary(path_without_prefix,
buffer, size);
out = buffer;
}
compressed_size = (method == 0) ? size : 0;
Expand Down Expand Up @@ -300,7 +316,6 @@ static int write_zip_entry(struct archiver_args *args,
copy_le16(dirent.extra_length, ZIP_EXTRA_MTIME_SIZE);
copy_le16(dirent.comment_length, 0);
copy_le16(dirent.disk, 0);
copy_le16(dirent.attr1, 0);
copy_le32(dirent.attr2, attr2);
copy_le32(dirent.offset, zip_offset);

Expand Down Expand Up @@ -328,6 +343,9 @@ static int write_zip_entry(struct archiver_args *args,
if (readlen <= 0)
break;
crc = crc32(crc, buf, readlen);
if (is_binary == -1)
is_binary = entry_is_binary(path_without_prefix,
buf, readlen);
write_or_die(1, buf, readlen);
}
close_istream(stream);
Expand Down Expand Up @@ -361,6 +379,9 @@ static int write_zip_entry(struct archiver_args *args,
if (readlen <= 0)
break;
crc = crc32(crc, buf, readlen);
if (is_binary == -1)
is_binary = entry_is_binary(path_without_prefix,
buf, readlen);

zstream.next_in = buf;
zstream.avail_in = readlen;
Expand Down Expand Up @@ -405,6 +426,8 @@ static int write_zip_entry(struct archiver_args *args,
free(deflated);
free(buffer);

copy_le16(dirent.attr1, !is_binary);

memcpy(zip_dir + zip_dir_offset, &dirent, ZIP_DIR_HEADER_SIZE);
zip_dir_offset += ZIP_DIR_HEADER_SIZE;
memcpy(zip_dir + zip_dir_offset, path, pathlen);
Expand Down
47 changes: 46 additions & 1 deletion t/t5003-archive-zip.sh
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,37 @@ check_zip() {
test_expect_success UNZIP " validate file contents" "
diff -r a ${dir_with_prefix}a
"

dir=eol_$1
dir_with_prefix=$dir/$2
extracted=${dir_with_prefix}a
original=a

test_expect_success UNZIP " extract ZIP archive with EOL conversion" '
(mkdir $dir && cd $dir && "$GIT_UNZIP" -a ../$zipfile)
'

test_expect_success UNZIP " validate that text files are converted" "
test_cmp_bin $extracted/text.cr $extracted/text.crlf &&
test_cmp_bin $extracted/text.cr $extracted/text.lf
"

test_expect_success UNZIP " validate that binary files are unchanged" "
test_cmp_bin $original/binary.cr $extracted/binary.cr &&
test_cmp_bin $original/binary.crlf $extracted/binary.crlf &&
test_cmp_bin $original/binary.lf $extracted/binary.lf
"

test_expect_success UNZIP " validate that diff files are converted" "
test_cmp_bin $extracted/diff.cr $extracted/diff.crlf &&
test_cmp_bin $extracted/diff.cr $extracted/diff.lf
"

test_expect_success UNZIP " validate that -diff files are unchanged" "
test_cmp_bin $original/nodiff.cr $extracted/nodiff.cr &&
test_cmp_bin $original/nodiff.crlf $extracted/nodiff.crlf &&
test_cmp_bin $original/nodiff.lf $extracted/nodiff.lf
"
}

test_expect_success \
Expand All @@ -41,6 +72,18 @@ test_expect_success \
echo simple textfile >a/a &&
mkdir a/bin &&
cp /bin/sh a/bin &&
printf "text\r" >a/text.cr &&
printf "text\r\n" >a/text.crlf &&
printf "text\n" >a/text.lf &&
printf "text\r" >a/nodiff.cr &&
printf "text\r\n" >a/nodiff.crlf &&
printf "text\n" >a/nodiff.lf &&
printf "\0\r" >a/binary.cr &&
printf "\0\r\n" >a/binary.crlf &&
printf "\0\n" >a/binary.lf &&
printf "\0\r" >a/diff.cr &&
printf "\0\r\n" >a/diff.crlf &&
printf "\0\n" >a/diff.lf &&
printf "A\$Format:%s\$O" "$SUBSTFORMAT" >a/substfile1 &&
printf "A not substituted O" >a/substfile2 &&
(p=long_path_to_a_file && cd a &&
Expand Down Expand Up @@ -70,7 +113,9 @@ test_expect_success \
git update-ref HEAD $(TZ=GMT GIT_COMMITTER_DATE="2005-05-27 22:00:00" \
git commit-tree $treeid </dev/null)'

test_expect_success 'setup export-subst' '
test_expect_success 'setup export-subst and diff attributes' '
echo "a/nodiff.* -diff" >>.git/info/attributes &&
echo "a/diff.* diff" >>.git/info/attributes &&
echo "substfile?" export-subst >>.git/info/attributes &&
git log --max-count=1 "--pretty=format:A${SUBSTFORMAT}O" HEAD \
>a/substfile1
Expand Down

0 comments on commit 4aff646

Please sign in to comment.