From 1f05d07c456e23c0827efbbb3e738afc9f3152e7 Mon Sep 17 00:00:00 2001 From: David Barr Date: Wed, 17 Nov 2010 23:03:51 -0600 Subject: [PATCH 01/31] vcs-svn: Allow simple v3 dumps (no deltas yet) Since the dumpfile version 1 days, the Subversion dump format gained some new fields: - a unique identifier for the repository (version 2 format) - whether the text and properties for a node should be interpreted as deltas - checksums for a delta's preimage - SHA-1 sums as alternatives to the existing MD5 checksums for copy source and the payload (delta). For now what is relevant to us is the Text-delta and Prop-delta fields, since not noticing these causes a dump file to be misinterpreted (see the previous commit). [jn: with tests] Signed-off-by: David Barr Signed-off-by: Jonathan Nieder Signed-off-by: Junio C Hamano --- t/t9010-svn-fe.sh | 350 +++++++++++++++++++++++++++++++++++++++++++++- vcs-svn/svndump.c | 21 ++- 2 files changed, 365 insertions(+), 6 deletions(-) diff --git a/t/t9010-svn-fe.sh b/t/t9010-svn-fe.sh index e9e46ea0f..be5372ab3 100755 --- a/t/t9010-svn-fe.sh +++ b/t/t9010-svn-fe.sh @@ -9,6 +9,30 @@ reinit_git () { git init } +properties () { + while test "$#" -ne 0 + do + property="$1" && + value="$2" && + printf "%s\n" "K ${#property}" && + printf "%s\n" "$property" && + printf "%s\n" "V ${#value}" && + printf "%s\n" "$value" && + shift 2 || + return 1 + done +} + +text_no_props () { + text="$1 +" && + printf "%s\n" "Prop-content-length: 10" && + printf "%s\n" "Text-content-length: ${#text}" && + printf "%s\n" "Content-length: $((${#text} + 10))" && + printf "%s\n" "" "PROPS-END" && + printf "%s\n" "$text" +} + >empty test_expect_success 'empty dump' ' @@ -18,13 +42,333 @@ test_expect_success 'empty dump' ' git fast-import input && - test_must_fail test-svn-fe input >stream && + echo "SVN-fs-dump-format-version: 4" >v4.dump && + test_must_fail test-svn-fe v4.dump >stream && test_cmp empty stream ' +test_expect_failure 'empty revision' ' + reinit_git && + printf "rev : %s\n" "" "" >expect && + cat >emptyrev.dump <<-\EOF && + SVN-fs-dump-format-version: 3 + + Revision-number: 1 + Prop-content-length: 0 + Content-length: 0 + + Revision-number: 2 + Prop-content-length: 0 + Content-length: 0 + + EOF + test-svn-fe emptyrev.dump >stream && + git fast-import actual && + test_cmp expect actual +' + +test_expect_success 'empty properties' ' + reinit_git && + printf "rev : %s\n" "" "" >expect && + cat >emptyprop.dump <<-\EOF && + SVN-fs-dump-format-version: 3 + + Revision-number: 1 + Prop-content-length: 10 + Content-length: 10 + + PROPS-END + + Revision-number: 2 + Prop-content-length: 10 + Content-length: 10 + + PROPS-END + EOF + test-svn-fe emptyprop.dump >stream && + git fast-import actual && + test_cmp expect actual +' + +test_expect_success 'author name and commit message' ' + reinit_git && + echo "" >expect.author && + cat >message <<-\EOF && + A concise summary of the change + + A detailed description of the change, why it is needed, what + was broken and why applying this is the best course of action. + + * file.c + Details pertaining to an individual file. + EOF + { + properties \ + svn:author author@example.com \ + svn:log "$(cat message)" && + echo PROPS-END + } >props && + { + echo "SVN-fs-dump-format-version: 3" && + echo && + echo "Revision-number: 1" && + echo Prop-content-length: $(wc -c log.dump && + test-svn-fe log.dump >stream && + git fast-import actual.log && + git log --format="<%an, %ae>" >actual.author && + test_cmp message actual.log && + test_cmp expect.author actual.author +' + +test_expect_success 'unsupported properties are ignored' ' + reinit_git && + echo author >expect && + cat >extraprop.dump <<-\EOF && + SVN-fs-dump-format-version: 3 + + Revision-number: 1 + Prop-content-length: 56 + Content-length: 56 + + K 8 + nonsense + V 1 + y + K 10 + svn:author + V 6 + author + PROPS-END + EOF + test-svn-fe extraprop.dump >stream && + git fast-import actual && + test_cmp expect actual +' + +test_expect_failure 'timestamp and empty file' ' + echo author@example.com >expect.author && + echo 1999-01-01 >expect.date && + echo file >expect.files && + reinit_git && + { + properties \ + svn:author author@example.com \ + svn:date "1999-01-01T00:01:002.000000Z" \ + svn:log "add empty file" && + echo PROPS-END + } >props && + { + cat <<-EOF && + SVN-fs-dump-format-version: 3 + + Revision-number: 1 + EOF + echo Prop-content-length: $(wc -c emptyfile.dump && + test-svn-fe emptyfile.dump >stream && + git fast-import actual.author && + git log --date=short --format=%ad HEAD >actual.date && + git ls-tree -r --name-only HEAD >actual.files && + test_cmp expect.author actual.author && + test_cmp expect.date actual.date && + test_cmp expect.files actual.files && + git checkout HEAD empty-file && + test_cmp empty file +' + +test_expect_success 'directory with files' ' + reinit_git && + printf "%s\n" directory/file1 directory/file2 >expect.files && + echo hi >hi && + echo hello >hello && + { + properties \ + svn:author author@example.com \ + svn:date "1999-02-01T00:01:002.000000Z" \ + svn:log "add directory with some files in it" && + echo PROPS-END + } >props && + { + cat <<-EOF && + SVN-fs-dump-format-version: 3 + + Revision-number: 1 + EOF + echo Prop-content-length: $(wc -c directory.dump && + test-svn-fe directory.dump >stream && + git fast-import actual.files && + git checkout HEAD directory && + test_cmp expect.files actual.files && + test_cmp hello directory/file1 && + test_cmp hi directory/file2 +' + +test_expect_success 'deltas not supported' ' + { + # (old) h + (inline) ello + (old) \n + printf "SVNQ%b%b%s" "Q\003\006\005\004" "\001Q\0204\001\002" "ello" | + q_to_nul + } >delta && + { + properties \ + svn:author author@example.com \ + svn:date "1999-01-05T00:01:002.000000Z" \ + svn:log "add greeting" && + echo PROPS-END + } >props && + { + properties \ + svn:author author@example.com \ + svn:date "1999-01-06T00:01:002.000000Z" \ + svn:log "change it" && + echo PROPS-END + } >props2 && + { + echo SVN-fs-dump-format-version: 3 && + echo && + echo Revision-number: 1 && + echo Prop-content-length: $(wc -c delta.dump && + test_must_fail test-svn-fe delta.dump +' + +test_expect_success 'property deltas not supported' ' + { + properties \ + svn:author author@example.com \ + svn:date "1999-03-06T00:01:002.000000Z" \ + svn:log "make an executable, or chmod -x it" && + echo PROPS-END + } >revprops && + { + echo SVN-fs-dump-format-version: 3 && + echo && + echo Revision-number: 1 && + echo Prop-content-length: $(wc -c propdelta.dump && + test_must_fail test-svn-fe propdelta.dump +' + test_expect_success 't9135/svn.dump' ' svnadmin create simple-svn && svnadmin load simple-svn <"$TEST_DIRECTORY/t9135/svn.dump" && diff --git a/vcs-svn/svndump.c b/vcs-svn/svndump.c index fa580e62d..6b64c1b85 100644 --- a/vcs-svn/svndump.c +++ b/vcs-svn/svndump.c @@ -42,6 +42,7 @@ static char* log_copy(uint32_t length, char *log) static struct { uint32_t action, propLength, textLength, srcRev, srcMode, mark, type; uint32_t src[REPO_MAX_PATH_DEPTH], dst[REPO_MAX_PATH_DEPTH]; + uint32_t text_delta, prop_delta; } node_ctx; static struct { @@ -58,7 +59,9 @@ static struct { uint32_t svn_log, svn_author, svn_date, svn_executable, svn_special, uuid, revision_number, node_path, node_kind, node_action, node_copyfrom_path, node_copyfrom_rev, text_content_length, - prop_content_length, content_length, svn_fs_dump_format_version; + prop_content_length, content_length, svn_fs_dump_format_version, + /* version 3 format */ + text_delta, prop_delta; } keys; static void reset_node_ctx(char *fname) @@ -72,6 +75,8 @@ static void reset_node_ctx(char *fname) node_ctx.srcMode = 0; pool_tok_seq(REPO_MAX_PATH_DEPTH, node_ctx.dst, "/", fname); node_ctx.mark = 0; + node_ctx.text_delta = 0; + node_ctx.prop_delta = 0; } static void reset_rev_ctx(uint32_t revision) @@ -107,6 +112,9 @@ static void init_keys(void) keys.prop_content_length = pool_intern("Prop-content-length"); keys.content_length = pool_intern("Content-length"); keys.svn_fs_dump_format_version = pool_intern("SVN-fs-dump-format-version"); + /* version 3 format (Subversion 1.1.0) */ + keys.text_delta = pool_intern("Text-delta"); + keys.prop_delta = pool_intern("Prop-delta"); } static void read_props(void) @@ -144,6 +152,9 @@ static void read_props(void) static void handle_node(void) { + if (node_ctx.text_delta || node_ctx.prop_delta) + die("text and property deltas not supported"); + if (node_ctx.propLength != LENGTH_UNKNOWN && node_ctx.propLength) read_props(); @@ -210,8 +221,8 @@ void svndump_read(const char *url) if (key == keys.svn_fs_dump_format_version) { dump_ctx.version = atoi(val); - if (dump_ctx.version > 2) - die("expected svn dump format version <= 2, found %d", + if (dump_ctx.version > 3) + die("expected svn dump format version <= 3, found %d", dump_ctx.version); } else if (key == keys.uuid) { dump_ctx.uuid = pool_intern(val); @@ -255,6 +266,10 @@ void svndump_read(const char *url) node_ctx.textLength = atoi(val); } else if (key == keys.prop_content_length) { node_ctx.propLength = atoi(val); + } else if (key == keys.text_delta) { + node_ctx.text_delta = !strcmp(val, "true"); + } else if (key == keys.prop_delta) { + node_ctx.prop_delta = !strcmp(val, "true"); } else if (key == keys.content_length) { len = atoi(val); buffer_read_line(); From 5c28a8b054cb69a37638b0261fc370422c8fab58 Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Fri, 19 Nov 2010 18:46:06 -0600 Subject: [PATCH 02/31] vcs-svn: Check for errors from open() test-svn-fe segfaults when passed a bogus path. Simplify debugging by exiting with a meaningful error message instead. Signed-off-by: Jonathan Nieder Signed-off-by: Junio C Hamano --- contrib/svn-fe/svn-fe.c | 3 ++- test-svn-fe.c | 3 ++- vcs-svn/svndump.c | 6 ++++-- vcs-svn/svndump.h | 2 +- 4 files changed, 9 insertions(+), 5 deletions(-) diff --git a/contrib/svn-fe/svn-fe.c b/contrib/svn-fe/svn-fe.c index a2677b03e..35db24f5e 100644 --- a/contrib/svn-fe/svn-fe.c +++ b/contrib/svn-fe/svn-fe.c @@ -8,7 +8,8 @@ int main(int argc, char **argv) { - svndump_init(NULL); + if (svndump_init(NULL)) + return 1; svndump_read((argc > 1) ? argv[1] : NULL); svndump_deinit(); svndump_reset(); diff --git a/test-svn-fe.c b/test-svn-fe.c index 77cf78abc..b42ba789b 100644 --- a/test-svn-fe.c +++ b/test-svn-fe.c @@ -9,7 +9,8 @@ int main(int argc, char *argv[]) { if (argc != 2) usage("test-svn-fe "); - svndump_init(argv[1]); + if (svndump_init(argv[1])) + return 1; svndump_read(NULL); svndump_deinit(); svndump_reset(); diff --git a/vcs-svn/svndump.c b/vcs-svn/svndump.c index 6b64c1b85..db1185122 100644 --- a/vcs-svn/svndump.c +++ b/vcs-svn/svndump.c @@ -290,14 +290,16 @@ void svndump_read(const char *url) handle_revision(); } -void svndump_init(const char *filename) +int svndump_init(const char *filename) { - buffer_init(filename); + if (buffer_init(filename)) + return error("cannot open %s: %s", filename, strerror(errno)); repo_init(); reset_dump_ctx(~0); reset_rev_ctx(0); reset_node_ctx(NULL); init_keys(); + return 0; } void svndump_deinit(void) diff --git a/vcs-svn/svndump.h b/vcs-svn/svndump.h index 93c412f14..df9ceb0e8 100644 --- a/vcs-svn/svndump.h +++ b/vcs-svn/svndump.h @@ -1,7 +1,7 @@ #ifndef SVNDUMP_H_ #define SVNDUMP_H_ -void svndump_init(const char *filename); +int svndump_init(const char *filename); void svndump_read(const char *url); void svndump_deinit(void); void svndump_reset(void); From 1d13e9f600986b7ced8db37a9a9c4967ee7ff9d5 Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Fri, 19 Nov 2010 18:46:22 -0600 Subject: [PATCH 03/31] vcs-svn: Eliminate node_ctx.srcRev global The srcRev variable is only used in handle_node(); its purpose is to hold the old mode for a path, to only be used if properties are not being changed. Narrow its scope to make its meaningful lifetime more obvious. No functional change intended. Add some tests as a sanity-check for the simplest case (no renames). Signed-off-by: Jonathan Nieder Signed-off-by: Junio C Hamano --- t/t9010-svn-fe.sh | 158 ++++++++++++++++++++++++++++++++++++++++++++++ vcs-svn/svndump.c | 15 +++-- 2 files changed, 166 insertions(+), 7 deletions(-) diff --git a/t/t9010-svn-fe.sh b/t/t9010-svn-fe.sh index be5372ab3..729e73ddd 100755 --- a/t/t9010-svn-fe.sh +++ b/t/t9010-svn-fe.sh @@ -252,6 +252,164 @@ test_expect_success 'directory with files' ' test_cmp hi directory/file2 ' +test_expect_failure 'change file mode but keep old content' ' + reinit_git && + cat >expect <<-\EOF && + OBJID + :120000 100644 OBJID OBJID T greeting + OBJID + :100644 120000 OBJID OBJID T greeting + OBJID + :000000 100644 OBJID OBJID A greeting + EOF + echo "link hello" >expect.blob && + echo hello >hello && + cat >filemode.dump <<-\EOF && + SVN-fs-dump-format-version: 3 + + Revision-number: 1 + Prop-content-length: 10 + Content-length: 10 + + PROPS-END + + Node-path: greeting + Node-kind: file + Node-action: add + Prop-content-length: 10 + Text-content-length: 11 + Content-length: 21 + + PROPS-END + link hello + + Revision-number: 2 + Prop-content-length: 10 + Content-length: 10 + + PROPS-END + + Node-path: greeting + Node-kind: file + Node-action: change + Prop-content-length: 33 + Content-length: 33 + + K 11 + svn:special + V 1 + * + PROPS-END + + Revision-number: 3 + Prop-content-length: 10 + Content-length: 10 + + PROPS-END + + Node-path: greeting + Node-kind: file + Node-action: change + Prop-content-length: 10 + Content-length: 10 + + PROPS-END + EOF + test-svn-fe filemode.dump >stream && + git fast-import actual && + git show HEAD:greeting >actual.blob && + git show HEAD^:greeting >actual.target && + test_cmp expect actual && + test_cmp expect.blob actual.blob && + test_cmp hello actual.target +' + +test_expect_success 'change file mode and reiterate content' ' + reinit_git && + cat >expect <<-\EOF && + OBJID + :120000 100644 OBJID OBJID T greeting + OBJID + :100644 120000 OBJID OBJID T greeting + OBJID + :000000 100644 OBJID OBJID A greeting + EOF + echo "link hello" >expect.blob && + echo hello >hello && + cat >filemode.dump <<-\EOF && + SVN-fs-dump-format-version: 3 + + Revision-number: 1 + Prop-content-length: 10 + Content-length: 10 + + PROPS-END + + Node-path: greeting + Node-kind: file + Node-action: add + Prop-content-length: 10 + Text-content-length: 11 + Content-length: 21 + + PROPS-END + link hello + + Revision-number: 2 + Prop-content-length: 10 + Content-length: 10 + + PROPS-END + + Node-path: greeting + Node-kind: file + Node-action: change + Prop-content-length: 33 + Text-content-length: 11 + Content-length: 44 + + K 11 + svn:special + V 1 + * + PROPS-END + link hello + + Revision-number: 3 + Prop-content-length: 10 + Content-length: 10 + + PROPS-END + + Node-path: greeting + Node-kind: file + Node-action: change + Prop-content-length: 10 + Text-content-length: 11 + Content-length: 21 + + PROPS-END + link hello + EOF + test-svn-fe filemode.dump >stream && + git fast-import actual && + git show HEAD:greeting >actual.blob && + git show HEAD^:greeting >actual.target && + test_cmp expect actual && + test_cmp expect.blob actual.blob && + test_cmp hello actual.target +' + test_expect_success 'deltas not supported' ' { # (old) h + (inline) ello + (old) \n diff --git a/vcs-svn/svndump.c b/vcs-svn/svndump.c index db1185122..65bd335aa 100644 --- a/vcs-svn/svndump.c +++ b/vcs-svn/svndump.c @@ -40,7 +40,7 @@ static char* log_copy(uint32_t length, char *log) } static struct { - uint32_t action, propLength, textLength, srcRev, srcMode, mark, type; + uint32_t action, propLength, textLength, srcRev, mark, type; uint32_t src[REPO_MAX_PATH_DEPTH], dst[REPO_MAX_PATH_DEPTH]; uint32_t text_delta, prop_delta; } node_ctx; @@ -72,7 +72,6 @@ static void reset_node_ctx(char *fname) node_ctx.textLength = LENGTH_UNKNOWN; node_ctx.src[0] = ~0; node_ctx.srcRev = 0; - node_ctx.srcMode = 0; pool_tok_seq(REPO_MAX_PATH_DEPTH, node_ctx.dst, "/", fname); node_ctx.mark = 0; node_ctx.text_delta = 0; @@ -152,6 +151,8 @@ static void read_props(void) static void handle_node(void) { + uint32_t old_mode = 0; + if (node_ctx.text_delta || node_ctx.prop_delta) die("text and property deltas not supported"); @@ -159,7 +160,7 @@ static void handle_node(void) read_props(); if (node_ctx.srcRev) - node_ctx.srcMode = repo_copy(node_ctx.srcRev, node_ctx.src, node_ctx.dst); + old_mode = repo_copy(node_ctx.srcRev, node_ctx.src, node_ctx.dst); if (node_ctx.textLength != LENGTH_UNKNOWN && node_ctx.type != REPO_MODE_DIR) @@ -175,19 +176,19 @@ static void handle_node(void) else if (node_ctx.propLength != LENGTH_UNKNOWN) repo_modify(node_ctx.dst, node_ctx.type, node_ctx.mark); else if (node_ctx.textLength != LENGTH_UNKNOWN) - node_ctx.srcMode = repo_replace(node_ctx.dst, node_ctx.mark); + old_mode = repo_replace(node_ctx.dst, node_ctx.mark); } else if (node_ctx.action == NODEACT_ADD) { if (node_ctx.srcRev && node_ctx.propLength != LENGTH_UNKNOWN) repo_modify(node_ctx.dst, node_ctx.type, node_ctx.mark); else if (node_ctx.srcRev && node_ctx.textLength != LENGTH_UNKNOWN) - node_ctx.srcMode = repo_replace(node_ctx.dst, node_ctx.mark); + old_mode = repo_replace(node_ctx.dst, node_ctx.mark); else if ((node_ctx.type == REPO_MODE_DIR && !node_ctx.srcRev) || node_ctx.textLength != LENGTH_UNKNOWN) repo_add(node_ctx.dst, node_ctx.type, node_ctx.mark); } - if (node_ctx.propLength == LENGTH_UNKNOWN && node_ctx.srcMode) - node_ctx.type = node_ctx.srcMode; + if (node_ctx.propLength == LENGTH_UNKNOWN && old_mode) + node_ctx.type = old_mode; if (node_ctx.mark) fast_export_blob(node_ctx.type, node_ctx.mark, node_ctx.textLength); From da3e217447390d52363989474a5e33bd298ff3ad Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Fri, 19 Nov 2010 18:46:54 -0600 Subject: [PATCH 04/31] vcs-svn: Eliminate node_ctx.mark global The mark variable is only used in handle_node(). Its life is very short and simple: first, a new mark number is allocated if this node has text attached, then that mark is recorded in the in-core tree being built up, and lastly the mark is communicated to fast-import in the stream along with the associated text. A new reader may worry about interaction with other code, especially since mark is not initialized to zero in handle_node() itself. Disperse such worries by making it local. No functional change intended. Signed-off-by: Jonathan Nieder Signed-off-by: Junio C Hamano --- vcs-svn/svndump.c | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/vcs-svn/svndump.c b/vcs-svn/svndump.c index 65bd335aa..1fb7f82bb 100644 --- a/vcs-svn/svndump.c +++ b/vcs-svn/svndump.c @@ -40,7 +40,7 @@ static char* log_copy(uint32_t length, char *log) } static struct { - uint32_t action, propLength, textLength, srcRev, mark, type; + uint32_t action, propLength, textLength, srcRev, type; uint32_t src[REPO_MAX_PATH_DEPTH], dst[REPO_MAX_PATH_DEPTH]; uint32_t text_delta, prop_delta; } node_ctx; @@ -73,7 +73,6 @@ static void reset_node_ctx(char *fname) node_ctx.src[0] = ~0; node_ctx.srcRev = 0; pool_tok_seq(REPO_MAX_PATH_DEPTH, node_ctx.dst, "/", fname); - node_ctx.mark = 0; node_ctx.text_delta = 0; node_ctx.prop_delta = 0; } @@ -151,7 +150,7 @@ static void read_props(void) static void handle_node(void) { - uint32_t old_mode = 0; + uint32_t old_mode = 0, mark = 0; if (node_ctx.text_delta || node_ctx.prop_delta) die("text and property deltas not supported"); @@ -164,7 +163,7 @@ static void handle_node(void) if (node_ctx.textLength != LENGTH_UNKNOWN && node_ctx.type != REPO_MODE_DIR) - node_ctx.mark = next_blob_mark(); + mark = next_blob_mark(); if (node_ctx.action == NODEACT_DELETE) { repo_delete(node_ctx.dst); @@ -172,26 +171,26 @@ static void handle_node(void) node_ctx.action == NODEACT_REPLACE) { if (node_ctx.action == NODEACT_REPLACE && node_ctx.type == REPO_MODE_DIR) - repo_replace(node_ctx.dst, node_ctx.mark); + repo_replace(node_ctx.dst, mark); else if (node_ctx.propLength != LENGTH_UNKNOWN) - repo_modify(node_ctx.dst, node_ctx.type, node_ctx.mark); + repo_modify(node_ctx.dst, node_ctx.type, mark); else if (node_ctx.textLength != LENGTH_UNKNOWN) - old_mode = repo_replace(node_ctx.dst, node_ctx.mark); + old_mode = repo_replace(node_ctx.dst, mark); } else if (node_ctx.action == NODEACT_ADD) { if (node_ctx.srcRev && node_ctx.propLength != LENGTH_UNKNOWN) - repo_modify(node_ctx.dst, node_ctx.type, node_ctx.mark); + repo_modify(node_ctx.dst, node_ctx.type, mark); else if (node_ctx.srcRev && node_ctx.textLength != LENGTH_UNKNOWN) - old_mode = repo_replace(node_ctx.dst, node_ctx.mark); + old_mode = repo_replace(node_ctx.dst, mark); else if ((node_ctx.type == REPO_MODE_DIR && !node_ctx.srcRev) || node_ctx.textLength != LENGTH_UNKNOWN) - repo_add(node_ctx.dst, node_ctx.type, node_ctx.mark); + repo_add(node_ctx.dst, node_ctx.type, mark); } if (node_ctx.propLength == LENGTH_UNKNOWN && old_mode) node_ctx.type = old_mode; - if (node_ctx.mark) - fast_export_blob(node_ctx.type, node_ctx.mark, node_ctx.textLength); + if (mark) + fast_export_blob(node_ctx.type, mark, node_ctx.textLength); else if (node_ctx.textLength != LENGTH_UNKNOWN) buffer_skip_bytes(node_ctx.textLength); } From d6e81a03153810f122f1b8ec3635fd84c5429f69 Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Fri, 19 Nov 2010 18:47:41 -0600 Subject: [PATCH 05/31] vcs-svn: Unclutter handle_node by introducing have_props var It is possible for a path node in an SVN-format dump file to leave out the properties section. svn-fe handles this by carrying over the properties (in particular, file type) from the old version of that node. To support this, handle_node tests several times whether a Prop-content-length field is present. Ancient Subversion actually leaves out the Prop-content-length field even for nodes with properties, so that's not quite the right check. Besides, this detail of mechanism is distracting when the question at hand is instead what content the new node should have. So introduce a local have_props variable. The semantics are the same as before; the adaptations to support ancient streams that leave out the prop-content-length can wait until someone needs them. Signed-off-by: Jonathan Nieder Signed-off-by: Junio C Hamano --- vcs-svn/svndump.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/vcs-svn/svndump.c b/vcs-svn/svndump.c index 1fb7f82bb..45f0e477d 100644 --- a/vcs-svn/svndump.c +++ b/vcs-svn/svndump.c @@ -151,11 +151,12 @@ static void read_props(void) static void handle_node(void) { uint32_t old_mode = 0, mark = 0; + const int have_props = node_ctx.propLength != LENGTH_UNKNOWN; if (node_ctx.text_delta || node_ctx.prop_delta) die("text and property deltas not supported"); - if (node_ctx.propLength != LENGTH_UNKNOWN && node_ctx.propLength) + if (have_props && node_ctx.propLength) read_props(); if (node_ctx.srcRev) @@ -172,12 +173,12 @@ static void handle_node(void) if (node_ctx.action == NODEACT_REPLACE && node_ctx.type == REPO_MODE_DIR) repo_replace(node_ctx.dst, mark); - else if (node_ctx.propLength != LENGTH_UNKNOWN) + else if (have_props) repo_modify(node_ctx.dst, node_ctx.type, mark); else if (node_ctx.textLength != LENGTH_UNKNOWN) old_mode = repo_replace(node_ctx.dst, mark); } else if (node_ctx.action == NODEACT_ADD) { - if (node_ctx.srcRev && node_ctx.propLength != LENGTH_UNKNOWN) + if (node_ctx.srcRev && have_props) repo_modify(node_ctx.dst, node_ctx.type, mark); else if (node_ctx.srcRev && node_ctx.textLength != LENGTH_UNKNOWN) old_mode = repo_replace(node_ctx.dst, mark); @@ -186,7 +187,7 @@ static void handle_node(void) repo_add(node_ctx.dst, node_ctx.type, mark); } - if (node_ctx.propLength == LENGTH_UNKNOWN && old_mode) + if (!have_props && old_mode) node_ctx.type = old_mode; if (mark) From 462e1f51a5648ce9d7ca26d44ed86327c454889a Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Fri, 19 Nov 2010 18:48:51 -0600 Subject: [PATCH 06/31] vcs-svn: Use mark to indicate nodes with included text Allocate a mark if needed as soon as possible so later code can use "if (mark)" to check if this node has text attached rather than explicitly checking for Text-content-length. While at it, reject directory nodes with text attached; the presence of such a node would indicate a bug in the dump generator or svn-fe's understanding. In the long term, it would be nice to be able to continue parsing and save the error for later, but for now it is simpler to error out right away. Signed-off-by: Jonathan Nieder Signed-off-by: Junio C Hamano --- vcs-svn/svndump.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/vcs-svn/svndump.c b/vcs-svn/svndump.c index 45f0e477d..844076b66 100644 --- a/vcs-svn/svndump.c +++ b/vcs-svn/svndump.c @@ -156,15 +156,17 @@ static void handle_node(void) if (node_ctx.text_delta || node_ctx.prop_delta) die("text and property deltas not supported"); + if (node_ctx.textLength != LENGTH_UNKNOWN) + mark = next_blob_mark(); + if (have_props && node_ctx.propLength) read_props(); if (node_ctx.srcRev) old_mode = repo_copy(node_ctx.srcRev, node_ctx.src, node_ctx.dst); - if (node_ctx.textLength != LENGTH_UNKNOWN && - node_ctx.type != REPO_MODE_DIR) - mark = next_blob_mark(); + if (mark && node_ctx.type == REPO_MODE_DIR) + die("invalid dump: directories cannot have text attached"); if (node_ctx.action == NODEACT_DELETE) { repo_delete(node_ctx.dst); @@ -175,15 +177,15 @@ static void handle_node(void) repo_replace(node_ctx.dst, mark); else if (have_props) repo_modify(node_ctx.dst, node_ctx.type, mark); - else if (node_ctx.textLength != LENGTH_UNKNOWN) + else if (mark) old_mode = repo_replace(node_ctx.dst, mark); } else if (node_ctx.action == NODEACT_ADD) { if (node_ctx.srcRev && have_props) repo_modify(node_ctx.dst, node_ctx.type, mark); - else if (node_ctx.srcRev && node_ctx.textLength != LENGTH_UNKNOWN) + else if (node_ctx.srcRev && mark) old_mode = repo_replace(node_ctx.dst, mark); else if ((node_ctx.type == REPO_MODE_DIR && !node_ctx.srcRev) || - node_ctx.textLength != LENGTH_UNKNOWN) + mark) repo_add(node_ctx.dst, node_ctx.type, mark); } @@ -192,8 +194,6 @@ static void handle_node(void) if (mark) fast_export_blob(node_ctx.type, mark, node_ctx.textLength); - else if (node_ctx.textLength != LENGTH_UNKNOWN) - buffer_skip_bytes(node_ctx.textLength); } static void handle_revision(void) From 5af8fae2df03d1888dbf315da29d1cdaa6214f57 Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Fri, 19 Nov 2010 18:49:17 -0600 Subject: [PATCH 07/31] vcs-svn: handle_node: Handle deletion case early Take care of "Node-action: delete" as soon as possible, so we can stop worrying about that case in the rest of the function. Functional change: catch deletion nodes with features that would not apply to them (text, properties, or origin data) and error out for those cases. Signed-off-by: Jonathan Nieder Signed-off-by: Junio C Hamano --- vcs-svn/svndump.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/vcs-svn/svndump.c b/vcs-svn/svndump.c index 844076b66..bc7002307 100644 --- a/vcs-svn/svndump.c +++ b/vcs-svn/svndump.c @@ -159,6 +159,13 @@ static void handle_node(void) if (node_ctx.textLength != LENGTH_UNKNOWN) mark = next_blob_mark(); + if (node_ctx.action == NODEACT_DELETE) { + if (mark || have_props || node_ctx.srcRev) + die("invalid dump: deletion node has " + "copyfrom info, text, or properties"); + return repo_delete(node_ctx.dst); + } + if (have_props && node_ctx.propLength) read_props(); @@ -168,9 +175,7 @@ static void handle_node(void) if (mark && node_ctx.type == REPO_MODE_DIR) die("invalid dump: directories cannot have text attached"); - if (node_ctx.action == NODEACT_DELETE) { - repo_delete(node_ctx.dst); - } else if (node_ctx.action == NODEACT_CHANGE || + if (node_ctx.action == NODEACT_CHANGE || node_ctx.action == NODEACT_REPLACE) { if (node_ctx.action == NODEACT_REPLACE && node_ctx.type == REPO_MODE_DIR) From 6ee4a9be48ee714ddacf313a7073dabdd6c6ee11 Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Fri, 19 Nov 2010 18:49:55 -0600 Subject: [PATCH 08/31] vcs-svn: Replace = Delete + Add Simplify by reducing the "Node-action: replace" case to "Node-action: add". This way, the main part of handle_node() only has to deal with "add" and "change" nodes. Functional change: replacing a symlink or executable without setting properties will reset the mode. Signed-off-by: Jonathan Nieder Signed-off-by: Junio C Hamano --- vcs-svn/svndump.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/vcs-svn/svndump.c b/vcs-svn/svndump.c index bc7002307..6a6aaf92b 100644 --- a/vcs-svn/svndump.c +++ b/vcs-svn/svndump.c @@ -166,6 +166,11 @@ static void handle_node(void) return repo_delete(node_ctx.dst); } + if (node_ctx.action == NODEACT_REPLACE) { + repo_delete(node_ctx.dst); + node_ctx.action = NODEACT_ADD; + } + if (have_props && node_ctx.propLength) read_props(); @@ -175,12 +180,8 @@ static void handle_node(void) if (mark && node_ctx.type == REPO_MODE_DIR) die("invalid dump: directories cannot have text attached"); - if (node_ctx.action == NODEACT_CHANGE || - node_ctx.action == NODEACT_REPLACE) { - if (node_ctx.action == NODEACT_REPLACE && - node_ctx.type == REPO_MODE_DIR) - repo_replace(node_ctx.dst, mark); - else if (have_props) + if (node_ctx.action == NODEACT_CHANGE) { + if (have_props) repo_modify(node_ctx.dst, node_ctx.type, mark); else if (mark) old_mode = repo_replace(node_ctx.dst, mark); From 08c39b5c44449cb649ac32274e27be8046e373d4 Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Fri, 19 Nov 2010 18:51:50 -0600 Subject: [PATCH 09/31] vcs-svn: Combine repo_replace and repo_modify functions There are two functions to change the staged content for a path in the svn importer's active commit: repo_replace, which changes the text and returns the mode, and repo_modify, which changes the text and mode and returns nothing. Worse, there are more subtle differences: - A mark of 0 passed to repo_modify means "use the existing content". repo_replace uses it as mark :0 and produces a corrupt stream. - When passed a path that is not part of the active commit, repo_replace returns without doing anything. repo_modify transparently adds a new directory entry. Get rid of both and introduce a new function with the best features of both: repo_modify_path modifies the mode, content, or both for a path, depending on which arguments are zero. If no such dirent already exists, it does nothing and reports the error by returning 0. Otherwise, the return value is the resulting mode. Signed-off-by: Jonathan Nieder Signed-off-by: Junio C Hamano --- vcs-svn/repo_tree.c | 21 +++++++-------------- vcs-svn/repo_tree.h | 3 +-- vcs-svn/svndump.c | 8 ++++---- 3 files changed, 12 insertions(+), 20 deletions(-) diff --git a/vcs-svn/repo_tree.c b/vcs-svn/repo_tree.c index e94d91d12..7214ac8d0 100644 --- a/vcs-svn/repo_tree.c +++ b/vcs-svn/repo_tree.c @@ -175,25 +175,18 @@ void repo_add(uint32_t *path, uint32_t mode, uint32_t blob_mark) repo_write_dirent(path, mode, blob_mark, 0); } -uint32_t repo_replace(uint32_t *path, uint32_t blob_mark) +uint32_t repo_modify_path(uint32_t *path, uint32_t mode, uint32_t blob_mark) { - uint32_t mode = 0; struct repo_dirent *src_dent; src_dent = repo_read_dirent(active_commit, path); - if (src_dent != NULL) { - mode = src_dent->mode; - repo_write_dirent(path, mode, blob_mark, 0); - } - return mode; -} - -void repo_modify(uint32_t *path, uint32_t mode, uint32_t blob_mark) -{ - struct repo_dirent *src_dent; - src_dent = repo_read_dirent(active_commit, path); - if (src_dent != NULL && blob_mark == 0) + if (!src_dent) + return 0; + if (!blob_mark) blob_mark = src_dent->content_offset; + if (!mode) + mode = src_dent->mode; repo_write_dirent(path, mode, blob_mark, 0); + return mode; } void repo_delete(uint32_t *path) diff --git a/vcs-svn/repo_tree.h b/vcs-svn/repo_tree.h index 547617592..68baeb582 100644 --- a/vcs-svn/repo_tree.h +++ b/vcs-svn/repo_tree.h @@ -14,8 +14,7 @@ uint32_t next_blob_mark(void); uint32_t repo_copy(uint32_t revision, uint32_t *src, uint32_t *dst); void repo_add(uint32_t *path, uint32_t mode, uint32_t blob_mark); -uint32_t repo_replace(uint32_t *path, uint32_t blob_mark); -void repo_modify(uint32_t *path, uint32_t mode, uint32_t blob_mark); +uint32_t repo_modify_path(uint32_t *path, uint32_t mode, uint32_t blob_mark); void repo_delete(uint32_t *path); void repo_commit(uint32_t revision, uint32_t author, char *log, uint32_t uuid, uint32_t url, long unsigned timestamp); diff --git a/vcs-svn/svndump.c b/vcs-svn/svndump.c index 6a6aaf92b..e40be580a 100644 --- a/vcs-svn/svndump.c +++ b/vcs-svn/svndump.c @@ -182,14 +182,14 @@ static void handle_node(void) if (node_ctx.action == NODEACT_CHANGE) { if (have_props) - repo_modify(node_ctx.dst, node_ctx.type, mark); + repo_modify_path(node_ctx.dst, node_ctx.type, mark); else if (mark) - old_mode = repo_replace(node_ctx.dst, mark); + old_mode = repo_modify_path(node_ctx.dst, 0, mark); } else if (node_ctx.action == NODEACT_ADD) { if (node_ctx.srcRev && have_props) - repo_modify(node_ctx.dst, node_ctx.type, mark); + repo_modify_path(node_ctx.dst, node_ctx.type, mark); else if (node_ctx.srcRev && mark) - old_mode = repo_replace(node_ctx.dst, mark); + old_mode = repo_modify_path(node_ctx.dst, 0, mark); else if ((node_ctx.type == REPO_MODE_DIR && !node_ctx.srcRev) || mark) repo_add(node_ctx.dst, node_ctx.type, mark); From 1c7bb316169c700df0d1711555564f86c9cb9366 Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Fri, 19 Nov 2010 18:52:28 -0600 Subject: [PATCH 10/31] vcs-svn: Delay read of per-path properties The mode for each file in an svn-format dump is kept in the properties section. The properties section is read as soon as possible to allow the correct mode to be filled in when registering the file with the repo_tree lib. To support nodes with a missing properties section, svn-fe determines the mode in three stages: - The kind (directory or file) of the node is read from the dump and used to make an initial estimate (040000 or 100644). - Properties are read in and allowed to override this for symlinks and executables. - If there is no properties section, the mode from the previous content of the path is left alone, overriding the above considerations. This is a bit of a mess, and worse, it would get even more complicated once we start to support property deltas. If we could only register the file with a provisional value for mode and then change it later when properties say so, the procedure would be much simpler. ... oh, right, we can. Signed-off-by: Jonathan Nieder Signed-off-by: Junio C Hamano --- vcs-svn/svndump.c | 40 ++++++++++++++++++---------------------- 1 file changed, 18 insertions(+), 22 deletions(-) diff --git a/vcs-svn/svndump.c b/vcs-svn/svndump.c index e40be580a..4fdfcbbc0 100644 --- a/vcs-svn/svndump.c +++ b/vcs-svn/svndump.c @@ -150,7 +150,8 @@ static void read_props(void) static void handle_node(void) { - uint32_t old_mode = 0, mark = 0; + uint32_t mark = 0; + const uint32_t type = node_ctx.type; const int have_props = node_ctx.propLength != LENGTH_UNKNOWN; if (node_ctx.text_delta || node_ctx.prop_delta) @@ -171,33 +172,28 @@ static void handle_node(void) node_ctx.action = NODEACT_ADD; } - if (have_props && node_ctx.propLength) - read_props(); - - if (node_ctx.srcRev) - old_mode = repo_copy(node_ctx.srcRev, node_ctx.src, node_ctx.dst); + if (node_ctx.srcRev) { + repo_copy(node_ctx.srcRev, node_ctx.src, node_ctx.dst); + node_ctx.action = NODEACT_CHANGE; + } - if (mark && node_ctx.type == REPO_MODE_DIR) + if (mark && type == REPO_MODE_DIR) die("invalid dump: directories cannot have text attached"); - if (node_ctx.action == NODEACT_CHANGE) { - if (have_props) + if (node_ctx.action == NODEACT_CHANGE) + node_ctx.type = repo_modify_path(node_ctx.dst, 0, mark); + else /* Node-action: add */ + repo_add(node_ctx.dst, type, mark); + + if (have_props) { + const uint32_t old_mode = node_ctx.type; + node_ctx.type = type; + if (node_ctx.propLength) + read_props(); + if (node_ctx.type != old_mode) repo_modify_path(node_ctx.dst, node_ctx.type, mark); - else if (mark) - old_mode = repo_modify_path(node_ctx.dst, 0, mark); - } else if (node_ctx.action == NODEACT_ADD) { - if (node_ctx.srcRev && have_props) - repo_modify_path(node_ctx.dst, node_ctx.type, mark); - else if (node_ctx.srcRev && mark) - old_mode = repo_modify_path(node_ctx.dst, 0, mark); - else if ((node_ctx.type == REPO_MODE_DIR && !node_ctx.srcRev) || - mark) - repo_add(node_ctx.dst, node_ctx.type, mark); } - if (!have_props && old_mode) - node_ctx.type = old_mode; - if (mark) fast_export_blob(node_ctx.type, mark, node_ctx.textLength); } From 414e569e453a49171b1f3db613f88378324104e8 Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Fri, 19 Nov 2010 18:52:59 -0600 Subject: [PATCH 11/31] vcs-svn: Reject path nodes without Node-action It would be better to flag such errors and let the import proceed anyway, but for now it is simpler not to worry about recovery from such weird cases. Signed-off-by: Jonathan Nieder Signed-off-by: Junio C Hamano --- t/t9010-svn-fe.sh | 20 ++++++++++++++++++++ vcs-svn/svndump.c | 7 +++++-- 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/t/t9010-svn-fe.sh b/t/t9010-svn-fe.sh index 729e73ddd..cb9a23624 100755 --- a/t/t9010-svn-fe.sh +++ b/t/t9010-svn-fe.sh @@ -252,6 +252,26 @@ test_expect_success 'directory with files' ' test_cmp hi directory/file2 ' +test_expect_success 'node without action' ' + cat >inaction.dump <<-\EOF && + SVN-fs-dump-format-version: 3 + + Revision-number: 1 + Prop-content-length: 10 + Content-length: 10 + + PROPS-END + + Node-path: directory + Node-kind: dir + Prop-content-length: 10 + Content-length: 10 + + PROPS-END + EOF + test_must_fail test-svn-fe inaction.dump +' + test_expect_failure 'change file mode but keep old content' ' reinit_git && cat >expect <<-\EOF && diff --git a/vcs-svn/svndump.c b/vcs-svn/svndump.c index 4fdfcbbc0..0af8ac680 100644 --- a/vcs-svn/svndump.c +++ b/vcs-svn/svndump.c @@ -174,7 +174,8 @@ static void handle_node(void) if (node_ctx.srcRev) { repo_copy(node_ctx.srcRev, node_ctx.src, node_ctx.dst); - node_ctx.action = NODEACT_CHANGE; + if (node_ctx.action == NODEACT_ADD) + node_ctx.action = NODEACT_CHANGE; } if (mark && type == REPO_MODE_DIR) @@ -182,8 +183,10 @@ static void handle_node(void) if (node_ctx.action == NODEACT_CHANGE) node_ctx.type = repo_modify_path(node_ctx.dst, 0, mark); - else /* Node-action: add */ + else if (node_ctx.action == NODEACT_ADD) repo_add(node_ctx.dst, type, mark); + else + die("invalid dump: Node-path block lacks Node-action"); if (have_props) { const uint32_t old_mode = node_ctx.type; From c7dbf35e91cffbc326078d0c0470662f6422150d Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Fri, 19 Nov 2010 18:53:34 -0600 Subject: [PATCH 12/31] vcs-svn: More dump format sanity checks Node-action: change is not appropriate when switching between file and directory or adding a new file. Current svn-fe silently accepts such nodes and the resulting tree has missing files in the "changed when meant to add" case. Node-action: add requires some content (text or directory); there is no such thing as an "intent to add" node in svn dumps. Current svn-fe accepts such contentless adds but produces an invalid fast-import stream that refers to nonexistent mark :0 in response. Signed-off-by: Jonathan Nieder Signed-off-by: Junio C Hamano --- t/t9010-svn-fe.sh | 21 +++++++++++++++++++++ vcs-svn/svndump.c | 18 ++++++++++++++---- 2 files changed, 35 insertions(+), 4 deletions(-) diff --git a/t/t9010-svn-fe.sh b/t/t9010-svn-fe.sh index cb9a23624..f1e8799bb 100755 --- a/t/t9010-svn-fe.sh +++ b/t/t9010-svn-fe.sh @@ -272,6 +272,27 @@ test_expect_success 'node without action' ' test_must_fail test-svn-fe inaction.dump ' +test_expect_success 'action: add node without text' ' + cat >textless.dump <<-\EOF && + SVN-fs-dump-format-version: 3 + + Revision-number: 1 + Prop-content-length: 10 + Content-length: 10 + + PROPS-END + + Node-path: textless + Node-kind: file + Node-action: add + Prop-content-length: 10 + Content-length: 10 + + PROPS-END + EOF + test_must_fail test-svn-fe textless.dump +' + test_expect_failure 'change file mode but keep old content' ' reinit_git && cat >expect <<-\EOF && diff --git a/vcs-svn/svndump.c b/vcs-svn/svndump.c index 0af8ac680..ab4ccfc55 100644 --- a/vcs-svn/svndump.c +++ b/vcs-svn/svndump.c @@ -181,12 +181,22 @@ static void handle_node(void) if (mark && type == REPO_MODE_DIR) die("invalid dump: directories cannot have text attached"); - if (node_ctx.action == NODEACT_CHANGE) - node_ctx.type = repo_modify_path(node_ctx.dst, 0, mark); - else if (node_ctx.action == NODEACT_ADD) + if (node_ctx.action == NODEACT_CHANGE) { + uint32_t mode = repo_modify_path(node_ctx.dst, 0, mark); + if (!mode) + die("invalid dump: path to be modified is missing"); + if (mode == REPO_MODE_DIR && type != REPO_MODE_DIR) + die("invalid dump: cannot modify a directory into a file"); + if (mode != REPO_MODE_DIR && type == REPO_MODE_DIR) + die("invalid dump: cannot modify a file into a directory"); + node_ctx.type = mode; + } else if (node_ctx.action == NODEACT_ADD) { + if (!mark && type != REPO_MODE_DIR) + die("invalid dump: adds node without text"); repo_add(node_ctx.dst, type, mark); - else + } else { die("invalid dump: Node-path block lacks Node-action"); + } if (have_props) { const uint32_t old_mode = node_ctx.type; From 3f3e676d6e6c1d445181107770670368e0ad3160 Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Fri, 19 Nov 2010 18:53:54 -0600 Subject: [PATCH 13/31] vcs-svn: Make source easier to read on small screens Remove some newlines from handle_node() that are not needed for clarity. Signed-off-by: Jonathan Nieder Signed-off-by: Junio C Hamano --- vcs-svn/svndump.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/vcs-svn/svndump.c b/vcs-svn/svndump.c index ab4ccfc55..153b0c337 100644 --- a/vcs-svn/svndump.c +++ b/vcs-svn/svndump.c @@ -156,31 +156,25 @@ static void handle_node(void) if (node_ctx.text_delta || node_ctx.prop_delta) die("text and property deltas not supported"); - if (node_ctx.textLength != LENGTH_UNKNOWN) mark = next_blob_mark(); - if (node_ctx.action == NODEACT_DELETE) { if (mark || have_props || node_ctx.srcRev) die("invalid dump: deletion node has " "copyfrom info, text, or properties"); return repo_delete(node_ctx.dst); } - if (node_ctx.action == NODEACT_REPLACE) { repo_delete(node_ctx.dst); node_ctx.action = NODEACT_ADD; } - if (node_ctx.srcRev) { repo_copy(node_ctx.srcRev, node_ctx.src, node_ctx.dst); if (node_ctx.action == NODEACT_ADD) node_ctx.action = NODEACT_CHANGE; } - if (mark && type == REPO_MODE_DIR) die("invalid dump: directories cannot have text attached"); - if (node_ctx.action == NODEACT_CHANGE) { uint32_t mode = repo_modify_path(node_ctx.dst, 0, mark); if (!mode) @@ -197,7 +191,6 @@ static void handle_node(void) } else { die("invalid dump: Node-path block lacks Node-action"); } - if (have_props) { const uint32_t old_mode = node_ctx.type; node_ctx.type = type; @@ -206,7 +199,6 @@ static void handle_node(void) if (node_ctx.type != old_mode) repo_modify_path(node_ctx.dst, node_ctx.type, mark); } - if (mark) fast_export_blob(node_ctx.type, mark, node_ctx.textLength); } From 2a48afe1c256db6273a4ff99eaddc5c18dc46ffd Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Fri, 19 Nov 2010 18:54:20 -0600 Subject: [PATCH 14/31] vcs-svn: Split off function for handling of individual properties The handle_property function is the part of read_props that would be interesting for most people: semantics of properties rather than the algorithm for parsing them. Signed-off-by: Jonathan Nieder Signed-off-by: Junio C Hamano --- vcs-svn/svndump.c | 33 +++++++++++++++++++-------------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/vcs-svn/svndump.c b/vcs-svn/svndump.c index 153b0c337..5de8dadcd 100644 --- a/vcs-svn/svndump.c +++ b/vcs-svn/svndump.c @@ -30,7 +30,7 @@ /* Create memory pool for log messages */ obj_pool_gen(log, char, 4096) -static char* log_copy(uint32_t length, char *log) +static char *log_copy(uint32_t length, const char *log) { char *buffer; log_free(log_pool.size); @@ -115,6 +115,23 @@ static void init_keys(void) keys.prop_delta = pool_intern("Prop-delta"); } +static void handle_property(uint32_t key, const char *val, uint32_t len) +{ + if (key == keys.svn_log) { + /* Value length excludes terminating nul. */ + rev_ctx.log = log_copy(len + 1, val); + } else if (key == keys.svn_author) { + rev_ctx.author = pool_intern(val); + } else if (key == keys.svn_date) { + if (parse_date_basic(val, &rev_ctx.timestamp, NULL)) + fprintf(stderr, "Invalid timestamp: %s\n", val); + } else if (key == keys.svn_executable) { + node_ctx.type = REPO_MODE_EXE; + } else if (key == keys.svn_special) { + node_ctx.type = REPO_MODE_LNK; + } +} + static void read_props(void) { uint32_t len; @@ -129,19 +146,7 @@ static void read_props(void) } else if (!strncmp(t, "V ", 2)) { len = atoi(&t[2]); val = buffer_read_string(len); - if (key == keys.svn_log) { - /* Value length excludes terminating nul. */ - rev_ctx.log = log_copy(len + 1, val); - } else if (key == keys.svn_author) { - rev_ctx.author = pool_intern(val); - } else if (key == keys.svn_date) { - if (parse_date_basic(val, &rev_ctx.timestamp, NULL)) - fprintf(stderr, "Invalid timestamp: %s\n", val); - } else if (key == keys.svn_executable) { - node_ctx.type = REPO_MODE_EXE; - } else if (key == keys.svn_special) { - node_ctx.type = REPO_MODE_LNK; - } + handle_property(key, val, len); key = ~0; buffer_read_line(); } From 6263c06d49abdf5e5defdf528c3ff67bf948ac9b Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Fri, 19 Nov 2010 18:54:45 -0600 Subject: [PATCH 15/31] vcs-svn: Sharpen parsing of property lines Prepare to add a new type of property line (the 'D' line) to handle property deltas. Signed-off-by: Jonathan Nieder Signed-off-by: Junio C Hamano --- vcs-svn/svndump.c | 30 +++++++++++++++++++----------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/vcs-svn/svndump.c b/vcs-svn/svndump.c index 5de8dadcd..576d148e5 100644 --- a/vcs-svn/svndump.c +++ b/vcs-svn/svndump.c @@ -134,21 +134,29 @@ static void handle_property(uint32_t key, const char *val, uint32_t len) static void read_props(void) { - uint32_t len; uint32_t key = ~0; - char *val = NULL; - char *t; + const char *t; while ((t = buffer_read_line()) && strcmp(t, "PROPS-END")) { - if (!strncmp(t, "K ", 2)) { - len = atoi(&t[2]); - key = pool_intern(buffer_read_string(len)); - buffer_read_line(); - } else if (!strncmp(t, "V ", 2)) { - len = atoi(&t[2]); - val = buffer_read_string(len); + uint32_t len; + const char *val; + const char type = t[0]; + + if (!type || t[1] != ' ') + die("invalid property line: %s\n", t); + len = atoi(&t[2]); + val = buffer_read_string(len); + buffer_skip_bytes(1); /* Discard trailing newline. */ + + switch (type) { + case 'K': + key = pool_intern(val); + continue; + case 'V': handle_property(key, val, len); key = ~0; - buffer_read_line(); + continue; + default: + die("invalid property line: %s\n", t); } } } From 6b01b67658e2905b550739f1aee56a00911ca13c Mon Sep 17 00:00:00 2001 From: David Barr Date: Fri, 19 Nov 2010 18:57:46 -0600 Subject: [PATCH 16/31] vcs-svn: Implement Prop-delta handling The rules for what file is used as delta source for each file are not documented in dump-load-format.txt. Luckily, the Apache Software Foundation repository has rich enough examples to figure out most of the rules: Node-action: replace implies the empty property set and empty text as preimage for deltas. Otherwise, if a copyfrom source is given, that node is the preimage for deltas. Lastly, if none of the above applies and the node path exists in the current revision, then that version forms the basis. [jn: refactored, with tests] Signed-off-by: David Barr Signed-off-by: Jonathan Nieder Signed-off-by: Junio C Hamano --- t/t9010-svn-fe.sh | 102 +++++++++++++++++++++++++++++++++++++++++++++- vcs-svn/svndump.c | 54 +++++++++++++++++++----- 2 files changed, 144 insertions(+), 12 deletions(-) diff --git a/t/t9010-svn-fe.sh b/t/t9010-svn-fe.sh index f1e8799bb..7dc06707a 100755 --- a/t/t9010-svn-fe.sh +++ b/t/t9010-svn-fe.sh @@ -514,7 +514,12 @@ test_expect_success 'deltas not supported' ' test_must_fail test-svn-fe delta.dump ' -test_expect_success 'property deltas not supported' ' +test_expect_success 'property deltas supported' ' + reinit_git && + cat >expect <<-\EOF && + OBJID + :100755 100644 OBJID OBJID M script.sh + EOF { properties \ svn:author author@example.com \ @@ -565,7 +570,100 @@ test_expect_success 'property deltas not supported' ' PROPS-END EOF } >propdelta.dump && - test_must_fail test-svn-fe propdelta.dump + test-svn-fe propdelta.dump >stream && + git fast-import actual && + test_cmp expect actual +' + +test_expect_success 'deltas for typechange' ' + reinit_git && + cat >expect <<-\EOF && + OBJID + :120000 100644 OBJID OBJID T test-file + OBJID + :100755 120000 OBJID OBJID T test-file + OBJID + :000000 100755 OBJID OBJID A test-file + EOF + cat >deleteprop.dump <<-\EOF && + SVN-fs-dump-format-version: 3 + + Revision-number: 1 + Prop-content-length: 10 + Content-length: 10 + + PROPS-END + + Node-path: test-file + Node-kind: file + Node-action: add + Prop-delta: true + Prop-content-length: 35 + Text-content-length: 17 + Content-length: 52 + + K 14 + svn:executable + V 0 + + PROPS-END + link testing 123 + + Revision-number: 2 + Prop-content-length: 10 + Content-length: 10 + + PROPS-END + + Node-path: test-file + Node-kind: file + Node-action: change + Prop-delta: true + Prop-content-length: 53 + Text-content-length: 17 + Content-length: 70 + + K 11 + svn:special + V 1 + * + D 14 + svn:executable + PROPS-END + link testing 231 + + Revision-number: 3 + Prop-content-length: 10 + Content-length: 10 + + PROPS-END + + Node-path: test-file + Node-kind: file + Node-action: change + Prop-delta: true + Prop-content-length: 27 + Text-content-length: 17 + Content-length: 44 + + D 11 + svn:special + PROPS-END + link testing 321 + EOF + test-svn-fe deleteprop.dump >stream && + git fast-import actual && + test_cmp expect actual ' test_expect_success 't9135/svn.dump' ' diff --git a/vcs-svn/svndump.c b/vcs-svn/svndump.c index 576d148e5..c71a57599 100644 --- a/vcs-svn/svndump.c +++ b/vcs-svn/svndump.c @@ -115,20 +115,35 @@ static void init_keys(void) keys.prop_delta = pool_intern("Prop-delta"); } -static void handle_property(uint32_t key, const char *val, uint32_t len) +static void handle_property(uint32_t key, const char *val, uint32_t len, + uint32_t *type_set) { if (key == keys.svn_log) { + if (!val) + die("invalid dump: unsets svn:log"); /* Value length excludes terminating nul. */ rev_ctx.log = log_copy(len + 1, val); } else if (key == keys.svn_author) { rev_ctx.author = pool_intern(val); } else if (key == keys.svn_date) { + if (!val) + die("invalid dump: unsets svn:date"); if (parse_date_basic(val, &rev_ctx.timestamp, NULL)) - fprintf(stderr, "Invalid timestamp: %s\n", val); - } else if (key == keys.svn_executable) { - node_ctx.type = REPO_MODE_EXE; - } else if (key == keys.svn_special) { - node_ctx.type = REPO_MODE_LNK; + warning("invalid timestamp: %s", val); + } else if (key == keys.svn_executable || key == keys.svn_special) { + if (*type_set) { + if (!val) + return; + die("invalid dump: sets type twice"); + } + if (!val) { + node_ctx.type = REPO_MODE_BLB; + return; + } + *type_set = 1; + node_ctx.type = key == keys.svn_executable ? + REPO_MODE_EXE : + REPO_MODE_LNK; } } @@ -136,6 +151,19 @@ static void read_props(void) { uint32_t key = ~0; const char *t; + /* + * NEEDSWORK: to support simple mode changes like + * K 11 + * svn:special + * V 1 + * * + * D 14 + * svn:executable + * we keep track of whether a mode has been set and reset to + * plain file only if not. We should be keeping track of the + * symlink and executable bits separately instead. + */ + uint32_t type_set = 0; while ((t = buffer_read_line()) && strcmp(t, "PROPS-END")) { uint32_t len; const char *val; @@ -151,8 +179,13 @@ static void read_props(void) case 'K': key = pool_intern(val); continue; + case 'D': + key = pool_intern(val); + val = NULL; + len = 0; + /* fall through */ case 'V': - handle_property(key, val, len); + handle_property(key, val, len, &type_set); key = ~0; continue; default: @@ -167,8 +200,8 @@ static void handle_node(void) const uint32_t type = node_ctx.type; const int have_props = node_ctx.propLength != LENGTH_UNKNOWN; - if (node_ctx.text_delta || node_ctx.prop_delta) - die("text and property deltas not supported"); + if (node_ctx.text_delta) + die("text deltas not supported"); if (node_ctx.textLength != LENGTH_UNKNOWN) mark = next_blob_mark(); if (node_ctx.action == NODEACT_DELETE) { @@ -206,7 +239,8 @@ static void handle_node(void) } if (have_props) { const uint32_t old_mode = node_ctx.type; - node_ctx.type = type; + if (!node_ctx.prop_delta) + node_ctx.type = type; if (node_ctx.propLength) read_props(); if (node_ctx.type != old_mode) From 9e8c532108b9078812f23c53a2df3509e7ce71bf Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Mon, 6 Dec 2010 16:19:32 -0600 Subject: [PATCH 17/31] vcs-svn: Allow change nodes for root of tree (/) It is not uncommon for a svn repository to include change records for properties at the top level of the tracked tree: Node-path: Node-kind: dir Node-action: change Prop-delta: true Prop-content-length: 43 Content-length: 43 K 10 svn:ignore V 11 build-area PROPS-END Unfortunately a recent svn-fe change (vcs-svn: More dump format sanity checks, 2010-11-19) causes such nodes to be rejected with the error message fatal: invalid dump: path to be modified is missing The repo_tree module does not keep a dirent for the root of the tree. Add a block to the dump parser to take care of this case. Signed-off-by: Jonathan Nieder Signed-off-by: Junio C Hamano --- t/t9010-svn-fe.sh | 55 +++++++++++++++++++++++++++++++++++++++++++++++ vcs-svn/svndump.c | 5 ++++- 2 files changed, 59 insertions(+), 1 deletion(-) diff --git a/t/t9010-svn-fe.sh b/t/t9010-svn-fe.sh index 7dc06707a..87945073b 100755 --- a/t/t9010-svn-fe.sh +++ b/t/t9010-svn-fe.sh @@ -580,6 +580,61 @@ test_expect_success 'property deltas supported' ' test_cmp expect actual ' +test_expect_success 'properties on /' ' + reinit_git && + cat <<-\EOF >expect && + OBJID + OBJID + :000000 100644 OBJID OBJID A greeting + EOF + sed -e "s/X$//" <<-\EOF >changeroot.dump && + SVN-fs-dump-format-version: 3 + + Revision-number: 1 + Prop-content-length: 10 + Content-length: 10 + + PROPS-END + + Node-path: greeting + Node-kind: file + Node-action: add + Text-content-length: 0 + Prop-content-length: 10 + Content-length: 10 + + PROPS-END + + Revision-number: 2 + Prop-content-length: 10 + Content-length: 10 + + PROPS-END + + Node-path: X + Node-kind: dir + Node-action: change + Prop-delta: true + Prop-content-length: 43 + Content-length: 43 + + K 10 + svn:ignore + V 11 + build-area + + PROPS-END + EOF + test-svn-fe changeroot.dump >stream && + git fast-import actual && + test_cmp expect actual +' + test_expect_success 'deltas for typechange' ' reinit_git && cat >expect <<-\EOF && diff --git a/vcs-svn/svndump.c b/vcs-svn/svndump.c index c71a57599..1669d0fa5 100644 --- a/vcs-svn/svndump.c +++ b/vcs-svn/svndump.c @@ -221,7 +221,10 @@ static void handle_node(void) } if (mark && type == REPO_MODE_DIR) die("invalid dump: directories cannot have text attached"); - if (node_ctx.action == NODEACT_CHANGE) { + if (node_ctx.action == NODEACT_CHANGE && !~*node_ctx.dst) { + if (type != REPO_MODE_DIR) + die("invalid dump: root of tree is not a regular file"); + } else if (node_ctx.action == NODEACT_CHANGE) { uint32_t mode = repo_modify_path(node_ctx.dst, 0, mark); if (!mode) die("invalid dump: path to be modified is missing"); From 8dc6a373d201839859fe7924b63e57526ee2fc22 Mon Sep 17 00:00:00 2001 From: David Barr Date: Thu, 2 Dec 2010 21:40:20 +1100 Subject: [PATCH 18/31] fast-import: add 'ls' command Lazy fast-import frontend authors that want to rely on the backend to keep track of the content of the imported trees _almost_ have what they need in the 'cat-blob' command (v1.7.4-rc0~30^2~3, 2010-11-28). But it is not quite enough, since (1) cat-blob can be used to retrieve the content of files, but not their mode, and (2) using cat-blob requires the frontend to keep track of a name (mark number or object id) for each blob to be retrieved Introduce an 'ls' command to complement cat-blob and take care of the remaining needs. The 'ls' command finds what is at a given path within a given tree-ish (tag, commit, or tree): 'ls' SP SP LF or in fast-import's active commit: 'ls' SP LF The response is a single line sent through the cat-blob channel, imitating ls-tree output. So for example: FE> ls :1 Documentation gfi> 040000 tree 9e6c2b599341d28a2a375f8207507e0a2a627fe9 Documentation FE> ls 9e6c2b599341d28a2a375f8207507e0a2a627fe9 git-fast-import.txt gfi> 100644 blob 4f92954396e3f0f97e75b6838a5635b583708870 git-fast-import.txt FE> ls :1 RelNotes gfi> 120000 blob b942e499449d97aeb50c73ca2bdc1c6e6d528743 RelNotes FE> cat-blob b942e499449d97aeb50c73ca2bdc1c6e6d528743 gfi> b942e499449d97aeb50c73ca2bdc1c6e6d528743 blob 32 gfi> Documentation/RelNotes/1.7.4.txt The most interesting parts of the reply are the first word, which is a 6-digit octal mode (regular file, executable, symlink, directory, or submodule), and the part from the second space to the tab, which is a that can be used in later cat-blob, ls, and filemodify (M) commands to refer to the content (blob, tree, or commit) at that path. If there is nothing there, the response is "missing some/path". The intent is for this command to be used to read files from the active commit, so a frontend can apply patches to them, and to copy files and directories from previous revisions. For example, proposed updates to svn-fe use this command in place of its internal representation of the repository directory structure. This simplifies the frontend a great deal and means support for resuming an import in a separate fast-import run (i.e., incremental import) is basically free. Signed-off-by: David Barr Signed-off-by: Jonathan Nieder Improved-by: Junio C Hamano Improved-by: Sverre Rabbelier --- Documentation/git-fast-import.txt | 63 +++++++++++- fast-import.c | 162 +++++++++++++++++++++++++++++- t/t9300-fast-import.sh | 92 +++++++++++++++-- 3 files changed, 303 insertions(+), 14 deletions(-) diff --git a/Documentation/git-fast-import.txt b/Documentation/git-fast-import.txt index c3a2766b2..e1b7a0f9e 100644 --- a/Documentation/git-fast-import.txt +++ b/Documentation/git-fast-import.txt @@ -196,7 +196,8 @@ especially when a higher level language such as Perl, Python or Ruby is being used. fast-import is very strict about its input. Where we say SP below we mean -*exactly* one space. Likewise LF means one (and only one) linefeed. +*exactly* one space. Likewise LF means one (and only one) linefeed +and HT one (and only one) horizontal tab. Supplying additional whitespace characters will cause unexpected results, such as branch names or file names with leading or trailing spaces in their name, or early termination of fast-import when it encounters @@ -334,6 +335,11 @@ and control the current import process. More detailed discussion format to the file descriptor set with `--cat-blob-fd` or `stdout` if unspecified. +`ls`:: + Causes fast-import to print a line describing a directory + entry in 'ls-tree' format to the file descriptor set with + `--cat-blob-fd` or `stdout` if unspecified. + `feature`:: Require that fast-import supports the specified feature, or abort if it does not. @@ -919,6 +925,55 @@ This command can be used anywhere in the stream that comments are accepted. In particular, the `cat-blob` command can be used in the middle of a commit but not in the middle of a `data` command. +`ls` +~~~~ +Prints information about the object at a path to a file descriptor +previously arranged with the `--cat-blob-fd` argument. This allows +printing a blob from the active commit (with `cat-blob`) or copying a +blob or tree from a previous commit for use in the current one (with +`filemodify`). + +The `ls` command can be used anywhere in the stream that comments are +accepted, including the middle of a commit. + +Reading from the active commit:: + This form can only be used in the middle of a `commit`. + The path names a directory entry within fast-import's + active commit. The path must be quoted in this case. ++ +.... + 'ls' SP LF +.... + +Reading from a named tree:: + The `` can be a mark reference (`:`) or the + full 40-byte SHA-1 of a Git tag, commit, or tree object, + preexisting or waiting to be written. + The path is relative to the top level of the tree + named by ``. ++ +.... + 'ls' SP SP LF +.... + +See `filemodify` above for a detailed description of ``. + +Output uses the same format as `git ls-tree {litdd} `: + +==== + SP ('blob' | 'tree' | 'commit') SP HT LF +==== + +The represents the blob, tree, or commit object at +and can be used in later 'cat-blob', 'filemodify', or 'ls' commands. + +If there is no file or subtree at that path, 'git fast-import' will +instead report + +==== + missing SP LF +==== + `feature` ~~~~~~~~~ Require that fast-import supports the specified feature, or abort if @@ -946,8 +1001,10 @@ import-marks:: any "feature import-marks" command in the stream. cat-blob:: - Ignored. Versions of fast-import not supporting the - "cat-blob" command will exit with a message indicating so. +ls:: + Require that the backend support the 'cat-blob' or 'ls' command. + Versions of fast-import not supporting the specified command + will exit with a message indicating so. This lets the import error out early with a clear message, rather than wasting time on the early part of an import before the unsupported command is detected. diff --git a/fast-import.c b/fast-import.c index 3886a1b46..6c37b8400 100644 --- a/fast-import.c +++ b/fast-import.c @@ -24,10 +24,12 @@ Format of STDIN stream: commit_msg ('from' sp committish lf)? ('merge' sp committish lf)* - file_change* + (file_change | ls)* lf?; commit_msg ::= data; + ls ::= 'ls' sp '"' quoted(path) '"' lf; + file_change ::= file_clr | file_del | file_rnm @@ -132,7 +134,7 @@ Format of STDIN stream: ts ::= # time since the epoch in seconds, ascii base10 notation; tz ::= # GIT style timezone; - # note: comments and cat requests may appear anywhere + # note: comments, ls and cat requests may appear anywhere # in the input, except within a data command. Any form # of the data command always escapes the related input # from comment processing. @@ -141,7 +143,9 @@ Format of STDIN stream: # must be the first character on that line (an lf # preceded it). # + cat_blob ::= 'cat-blob' sp (hexsha1 | idnum) lf; + ls_tree ::= 'ls' sp (hexsha1 | idnum) sp path_str lf; comment ::= '#' not_lf* lf; not_lf ::= # Any byte that is not ASCII newline (LF); @@ -374,6 +378,7 @@ static int cat_blob_fd = STDOUT_FILENO; static void parse_argv(void); static void parse_cat_blob(void); +static void parse_ls(struct branch *b); static void write_branch_report(FILE *rpt, struct branch *b) { @@ -2614,6 +2619,8 @@ static void parse_new_commit(void) note_change_n(b, prev_fanout); else if (!strcmp("deleteall", command_buf.buf)) file_change_deleteall(b); + else if (!prefixcmp(command_buf.buf, "ls ")) + parse_ls(b); else { unread_command_buf = 1; break; @@ -2837,6 +2844,153 @@ static void parse_cat_blob(void) cat_blob(oe, sha1); } +static struct object_entry *dereference(struct object_entry *oe, + unsigned char sha1[20]) +{ + unsigned long size; + void *buf = NULL; + if (!oe) { + enum object_type type = sha1_object_info(sha1, NULL); + if (type < 0) + die("object not found: %s", sha1_to_hex(sha1)); + /* cache it! */ + oe = insert_object(sha1); + oe->type = type; + oe->pack_id = MAX_PACK_ID; + oe->idx.offset = 1; + } + switch (oe->type) { + case OBJ_TREE: /* easy case. */ + return oe; + case OBJ_COMMIT: + case OBJ_TAG: + break; + default: + die("Not a treeish: %s", command_buf.buf); + } + + if (oe->pack_id != MAX_PACK_ID) { /* in a pack being written */ + buf = gfi_unpack_entry(oe, &size); + } else { + enum object_type unused; + buf = read_sha1_file(sha1, &unused, &size); + } + if (!buf) + die("Can't load object %s", sha1_to_hex(sha1)); + + /* Peel one layer. */ + switch (oe->type) { + case OBJ_TAG: + if (size < 40 + strlen("object ") || + get_sha1_hex(buf + strlen("object "), sha1)) + die("Invalid SHA1 in tag: %s", command_buf.buf); + break; + case OBJ_COMMIT: + if (size < 40 + strlen("tree ") || + get_sha1_hex(buf + strlen("tree "), sha1)) + die("Invalid SHA1 in commit: %s", command_buf.buf); + } + + free(buf); + return find_object(sha1); +} + +static struct object_entry *parse_treeish_dataref(const char **p) +{ + unsigned char sha1[20]; + struct object_entry *e; + + if (**p == ':') { /* */ + char *endptr; + e = find_mark(strtoumax(*p + 1, &endptr, 10)); + if (endptr == *p + 1) + die("Invalid mark: %s", command_buf.buf); + if (!e) + die("Unknown mark: %s", command_buf.buf); + *p = endptr; + hashcpy(sha1, e->idx.sha1); + } else { /* */ + if (get_sha1_hex(*p, sha1)) + die("Invalid SHA1: %s", command_buf.buf); + e = find_object(sha1); + *p += 40; + } + + while (!e || e->type != OBJ_TREE) + e = dereference(e, sha1); + return e; +} + +static void print_ls(int mode, const unsigned char *sha1, const char *path) +{ + static struct strbuf line = STRBUF_INIT; + + /* See show_tree(). */ + const char *type = + S_ISGITLINK(mode) ? commit_type : + S_ISDIR(mode) ? tree_type : + blob_type; + + if (!mode) { + /* missing SP path LF */ + strbuf_reset(&line); + strbuf_addstr(&line, "missing "); + quote_c_style(path, &line, NULL, 0); + strbuf_addch(&line, '\n'); + } else { + /* mode SP type SP object_name TAB path LF */ + strbuf_reset(&line); + strbuf_addf(&line, "%06o %s %s\t", + mode, type, sha1_to_hex(sha1)); + quote_c_style(path, &line, NULL, 0); + strbuf_addch(&line, '\n'); + } + cat_blob_write(line.buf, line.len); +} + +static void parse_ls(struct branch *b) +{ + const char *p; + struct tree_entry *root = NULL; + struct tree_entry leaf = {0}; + + /* ls SP ( SP)? */ + p = command_buf.buf + strlen("ls "); + if (*p == '"') { + if (!b) + die("Not in a commit: %s", command_buf.buf); + root = &b->branch_tree; + } else { + struct object_entry *e = parse_treeish_dataref(&p); + root = new_tree_entry(); + hashcpy(root->versions[1].sha1, e->idx.sha1); + load_tree(root); + if (*p++ != ' ') + die("Missing space after tree-ish: %s", command_buf.buf); + } + if (*p == '"') { + static struct strbuf uq = STRBUF_INIT; + const char *endp; + strbuf_reset(&uq); + if (unquote_c_style(&uq, p, &endp)) + die("Invalid path: %s", command_buf.buf); + if (*endp) + die("Garbage after path in: %s", command_buf.buf); + p = uq.buf; + } + tree_content_get(root, p, &leaf); + /* + * A directory in preparation would have a sha1 of zero + * until it is saved. Save, for simplicity. + */ + if (S_ISDIR(leaf.versions[1].mode)) + store_tree(&leaf); + + print_ls(leaf.versions[1].mode, leaf.versions[1].sha1, p); + if (!b || root != &b->branch_tree) + release_tree_entry(root); +} + static void checkpoint(void) { checkpoint_requested = 0; @@ -3001,7 +3155,7 @@ static int parse_one_feature(const char *feature, int from_stream) relative_marks_paths = 0; } else if (!prefixcmp(feature, "force")) { force_update = 1; - } else if (!strcmp(feature, "notes")) { + } else if (!strcmp(feature, "notes") || !strcmp(feature, "ls")) { ; /* do nothing; we have the feature */ } else { return 0; @@ -3142,6 +3296,8 @@ int main(int argc, const char **argv) while (read_next_command() != EOF) { if (!strcmp("blob", command_buf.buf)) parse_new_blob(); + else if (!prefixcmp(command_buf.buf, "ls ")) + parse_ls(NULL); else if (!prefixcmp(command_buf.buf, "commit ")) parse_new_commit(); else if (!prefixcmp(command_buf.buf, "tag ")) diff --git a/t/t9300-fast-import.sh b/t/t9300-fast-import.sh index 52ac0e56d..6b1ba6c85 100755 --- a/t/t9300-fast-import.sh +++ b/t/t9300-fast-import.sh @@ -42,6 +42,14 @@ echo "$@"' >empty +test_expect_success 'setup: have pipes?' ' + rm -f frob && + if mkfifo frob + then + test_set_prereq PIPE + fi +' + ### ### series A ### @@ -898,6 +906,77 @@ test_expect_success \ git diff-tree -C --find-copies-harder -r N4^ N4 >actual && compare_diff_raw expect actual' +test_expect_success PIPE 'N: read and copy directory' ' + cat >expect <<-\EOF + :100755 100755 f1fb5da718392694d0076d677d6d0e364c79b0bc f1fb5da718392694d0076d677d6d0e364c79b0bc C100 file2/newf file3/newf + :100644 100644 7123f7f44e39be127c5eb701e5968176ee9d78b1 7123f7f44e39be127c5eb701e5968176ee9d78b1 C100 file2/oldf file3/oldf + EOF + git update-ref -d refs/heads/N4 && + rm -f backflow && + mkfifo backflow && + ( + exec $GIT_COMMITTER_DATE + data <backflow && + git diff-tree -C --find-copies-harder -r N4^ N4 >actual && + compare_diff_raw expect actual +' + +test_expect_success PIPE 'N: empty directory reads as missing' ' + cat <<-\EOF >expect && + OBJNAME + :000000 100644 OBJNAME OBJNAME A unrelated + EOF + echo "missing src" >expect.response && + git update-ref -d refs/heads/read-empty && + rm -f backflow && + mkfifo backflow && + ( + exec $GIT_COMMITTER_DATE + data <response && + cat <<-\EOF + D dst1 + D dst2 + EOF + ) | + git fast-import --cat-blob-fd=3 3>backflow && + test_cmp expect.response response && + git rev-list read-empty | + git diff-tree -r --root --stdin | + sed "s/$_x40/OBJNAME/g" >actual && + test_cmp expect actual +' + test_expect_success \ 'N: copy root directory by tree hash' \ 'cat >expect <<-\EOF && @@ -1861,6 +1940,11 @@ test_expect_success 'R: feature no-relative-marks should be honoured' ' test_cmp marks.new non-relative.out ' +test_expect_success 'R: feature ls supported' ' + echo "feature ls" | + git fast-import +' + test_expect_success 'R: feature cat-blob supported' ' echo "feature cat-blob" | git fast-import @@ -1986,14 +2070,6 @@ test_expect_success 'R: print two blobs to stdout' ' test_cmp expect actual ' -test_expect_success 'setup: have pipes?' ' - rm -f frob && - if mkfifo frob - then - test_set_prereq PIPE - fi -' - test_expect_success PIPE 'R: copy using cat-file' ' expect_id=$(git hash-object big) && expect_len=$(wc -c Date: Sun, 10 Oct 2010 21:37:10 -0500 Subject: [PATCH 19/31] vcs-svn: eliminate global byte_buffer The data stored in byte_buffer[] is always either discarded or written to stdout immediately. No need for it to persist between function calls. Signed-off-by: Jonathan Nieder --- vcs-svn/line_buffer.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/vcs-svn/line_buffer.c b/vcs-svn/line_buffer.c index 154356709..f22c94f02 100644 --- a/vcs-svn/line_buffer.c +++ b/vcs-svn/line_buffer.c @@ -14,7 +14,6 @@ obj_pool_gen(blob, char, 4096) static char line_buffer[LINE_BUFFER_LEN]; -static char byte_buffer[COPY_BUFFER_LEN]; static FILE *infile; int buffer_init(const char *filename) @@ -68,6 +67,7 @@ char *buffer_read_string(uint32_t len) void buffer_copy_bytes(uint32_t len) { + char byte_buffer[COPY_BUFFER_LEN]; uint32_t in; while (len > 0 && !feof(infile) && !ferror(infile)) { in = len < COPY_BUFFER_LEN ? len : COPY_BUFFER_LEN; @@ -83,6 +83,7 @@ void buffer_copy_bytes(uint32_t len) void buffer_skip_bytes(uint32_t len) { + char byte_buffer[COPY_BUFFER_LEN]; uint32_t in; while (len > 0 && !feof(infile) && !ferror(infile)) { in = len < COPY_BUFFER_LEN ? len : COPY_BUFFER_LEN; From deadcef4c15d54d0a397180a1783ae8939254188 Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Sat, 6 Nov 2010 12:01:28 -0500 Subject: [PATCH 20/31] vcs-svn: replace buffer_read_string memory pool with a strbuf obj_pool is inherently global and does not use the standard growing factor alloc_nr, which makes it feel out of place in the git codebase. Plus it is overkill for this application: all that is needed is a buffer that can grow between requests to accomodate larger strings. Use a strbuf instead. As a side effect, this improves the error handling: allocation failures will result in a clean exit instead of segfaults. It would be nice to add a test case (using ulimit or failmalloc) but that can wait for another day. Signed-off-by: Jonathan Nieder --- vcs-svn/line_buffer.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/vcs-svn/line_buffer.c b/vcs-svn/line_buffer.c index f22c94f02..6f32f28e5 100644 --- a/vcs-svn/line_buffer.c +++ b/vcs-svn/line_buffer.c @@ -5,15 +5,13 @@ #include "git-compat-util.h" #include "line_buffer.h" -#include "obj_pool.h" +#include "strbuf.h" #define LINE_BUFFER_LEN 10000 #define COPY_BUFFER_LEN 4096 -/* Create memory pool for char sequence of known length */ -obj_pool_gen(blob, char, 4096) - static char line_buffer[LINE_BUFFER_LEN]; +static struct strbuf blob_buffer = STRBUF_INIT; static FILE *infile; int buffer_init(const char *filename) @@ -58,11 +56,9 @@ char *buffer_read_line(void) char *buffer_read_string(uint32_t len) { - char *s; - blob_free(blob_pool.size); - s = blob_pointer(blob_alloc(len + 1)); - s[fread(s, 1, len, infile)] = '\0'; - return ferror(infile) ? NULL : s; + strbuf_reset(&blob_buffer); + strbuf_fread(&blob_buffer, len, infile); + return ferror(infile) ? NULL : blob_buffer.buf; } void buffer_copy_bytes(uint32_t len) @@ -94,5 +90,5 @@ void buffer_skip_bytes(uint32_t len) void buffer_reset(void) { - blob_reset(); + strbuf_release(&blob_buffer); } From d350822fa7d14052713bea0ec62ff1246d8a2f7a Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Sun, 10 Oct 2010 21:39:21 -0500 Subject: [PATCH 21/31] vcs-svn: collect line_buffer data in a struct Prepare for the line_buffer lib to support input from multiple files, by collecting global state in a struct that can be easily passed around. No API change yet. Signed-off-by: Jonathan Nieder --- vcs-svn/line_buffer.c | 45 +++++++++++++++++++++---------------------- vcs-svn/line_buffer.h | 11 +++++++++++ 2 files changed, 33 insertions(+), 23 deletions(-) diff --git a/vcs-svn/line_buffer.c b/vcs-svn/line_buffer.c index 6f32f28e5..e7bc230fc 100644 --- a/vcs-svn/line_buffer.c +++ b/vcs-svn/line_buffer.c @@ -7,17 +7,16 @@ #include "line_buffer.h" #include "strbuf.h" -#define LINE_BUFFER_LEN 10000 #define COPY_BUFFER_LEN 4096 - -static char line_buffer[LINE_BUFFER_LEN]; -static struct strbuf blob_buffer = STRBUF_INIT; -static FILE *infile; +static struct line_buffer buf_ = LINE_BUFFER_INIT; +static struct line_buffer *buf; int buffer_init(const char *filename) { - infile = filename ? fopen(filename, "r") : stdin; - if (!infile) + buf = &buf_; + + buf->infile = filename ? fopen(filename, "r") : stdin; + if (!buf->infile) return -1; return 0; } @@ -25,10 +24,10 @@ int buffer_init(const char *filename) int buffer_deinit(void) { int err; - if (infile == stdin) - return ferror(infile); - err = ferror(infile); - err |= fclose(infile); + if (buf->infile == stdin) + return ferror(buf->infile); + err = ferror(buf->infile); + err |= fclose(buf->infile); return err; } @@ -36,13 +35,13 @@ int buffer_deinit(void) char *buffer_read_line(void) { char *end; - if (!fgets(line_buffer, sizeof(line_buffer), infile)) + if (!fgets(buf->line_buffer, sizeof(buf->line_buffer), buf->infile)) /* Error or data exhausted. */ return NULL; - end = line_buffer + strlen(line_buffer); + end = buf->line_buffer + strlen(buf->line_buffer); if (end[-1] == '\n') end[-1] = '\0'; - else if (feof(infile)) + else if (feof(buf->infile)) ; /* No newline at end of file. That's fine. */ else /* @@ -51,23 +50,23 @@ char *buffer_read_line(void) * but for now let's return an error. */ return NULL; - return line_buffer; + return buf->line_buffer; } char *buffer_read_string(uint32_t len) { - strbuf_reset(&blob_buffer); - strbuf_fread(&blob_buffer, len, infile); - return ferror(infile) ? NULL : blob_buffer.buf; + strbuf_reset(&buf->blob_buffer); + strbuf_fread(&buf->blob_buffer, len, buf->infile); + return ferror(buf->infile) ? NULL : buf->blob_buffer.buf; } void buffer_copy_bytes(uint32_t len) { char byte_buffer[COPY_BUFFER_LEN]; uint32_t in; - while (len > 0 && !feof(infile) && !ferror(infile)) { + while (len > 0 && !feof(buf->infile) && !ferror(buf->infile)) { in = len < COPY_BUFFER_LEN ? len : COPY_BUFFER_LEN; - in = fread(byte_buffer, 1, in, infile); + in = fread(byte_buffer, 1, in, buf->infile); len -= in; fwrite(byte_buffer, 1, in, stdout); if (ferror(stdout)) { @@ -81,14 +80,14 @@ void buffer_skip_bytes(uint32_t len) { char byte_buffer[COPY_BUFFER_LEN]; uint32_t in; - while (len > 0 && !feof(infile) && !ferror(infile)) { + while (len > 0 && !feof(buf->infile) && !ferror(buf->infile)) { in = len < COPY_BUFFER_LEN ? len : COPY_BUFFER_LEN; - in = fread(byte_buffer, 1, in, infile); + in = fread(byte_buffer, 1, in, buf->infile); len -= in; } } void buffer_reset(void) { - strbuf_release(&blob_buffer); + strbuf_release(&buf->blob_buffer); } diff --git a/vcs-svn/line_buffer.h b/vcs-svn/line_buffer.h index 9c78ae11a..4ae1133a9 100644 --- a/vcs-svn/line_buffer.h +++ b/vcs-svn/line_buffer.h @@ -1,6 +1,17 @@ #ifndef LINE_BUFFER_H_ #define LINE_BUFFER_H_ +#include "strbuf.h" + +#define LINE_BUFFER_LEN 10000 + +struct line_buffer { + char line_buffer[LINE_BUFFER_LEN]; + struct strbuf blob_buffer; + FILE *infile; +}; +#define LINE_BUFFER_INIT {"", STRBUF_INIT, NULL} + int buffer_init(const char *filename); int buffer_deinit(void); char *buffer_read_line(void); From e5e45ca1e35482d120a7ce776cf208369edcc459 Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Sun, 10 Oct 2010 21:41:06 -0500 Subject: [PATCH 22/31] vcs-svn: teach line_buffer to handle multiple input files Collect the line_buffer state in a newly public line_buffer struct. Callers can use multiple line_buffers to manage input from multiple files at a time. svn-fe's delta applier will use this to stream a delta from svnrdump and the preimage it applies to from fast-import at the same time. The tests don't take advantage of the new features, but I think that's okay. It is easier to find lingering examples of nonreentrant code by searching for "static" in line_buffer.c. Signed-off-by: Jonathan Nieder --- test-line-buffer.c | 17 +++++++++-------- vcs-svn/fast_export.c | 6 +++--- vcs-svn/fast_export.h | 5 ++++- vcs-svn/line_buffer.c | 20 ++++++++------------ vcs-svn/line_buffer.h | 14 +++++++------- vcs-svn/line_buffer.txt | 5 +++-- vcs-svn/svndump.c | 29 ++++++++++++++++------------- 7 files changed, 50 insertions(+), 46 deletions(-) diff --git a/test-line-buffer.c b/test-line-buffer.c index c11bf7f96..f9af892d2 100644 --- a/test-line-buffer.c +++ b/test-line-buffer.c @@ -22,25 +22,26 @@ static uint32_t strtouint32(const char *s) int main(int argc, char *argv[]) { + struct line_buffer buf = LINE_BUFFER_INIT; char *s; if (argc != 1) usage("test-line-buffer < input.txt"); - if (buffer_init(NULL)) + if (buffer_init(&buf, NULL)) die_errno("open error"); - while ((s = buffer_read_line())) { - s = buffer_read_string(strtouint32(s)); + while ((s = buffer_read_line(&buf))) { + s = buffer_read_string(&buf, strtouint32(s)); fputs(s, stdout); fputc('\n', stdout); - buffer_skip_bytes(1); - if (!(s = buffer_read_line())) + buffer_skip_bytes(&buf, 1); + if (!(s = buffer_read_line(&buf))) break; - buffer_copy_bytes(strtouint32(s) + 1); + buffer_copy_bytes(&buf, strtouint32(s) + 1); } - if (buffer_deinit()) + if (buffer_deinit(&buf)) die("input error"); if (ferror(stdout)) die("output error"); - buffer_reset(); + buffer_reset(&buf); return 0; } diff --git a/vcs-svn/fast_export.c b/vcs-svn/fast_export.c index 6cfa256a3..260cf50e7 100644 --- a/vcs-svn/fast_export.c +++ b/vcs-svn/fast_export.c @@ -63,14 +63,14 @@ void fast_export_commit(uint32_t revision, uint32_t author, char *log, printf("progress Imported commit %"PRIu32".\n\n", revision); } -void fast_export_blob(uint32_t mode, uint32_t mark, uint32_t len) +void fast_export_blob(uint32_t mode, uint32_t mark, uint32_t len, struct line_buffer *input) { if (mode == REPO_MODE_LNK) { /* svn symlink blobs start with "link " */ - buffer_skip_bytes(5); + buffer_skip_bytes(input, 5); len -= 5; } printf("blob\nmark :%"PRIu32"\ndata %"PRIu32"\n", mark, len); - buffer_copy_bytes(len); + buffer_copy_bytes(input, len); fputc('\n', stdout); } diff --git a/vcs-svn/fast_export.h b/vcs-svn/fast_export.h index 2aaaea53d..054e7d5eb 100644 --- a/vcs-svn/fast_export.h +++ b/vcs-svn/fast_export.h @@ -1,11 +1,14 @@ #ifndef FAST_EXPORT_H_ #define FAST_EXPORT_H_ +#include "line_buffer.h" + void fast_export_delete(uint32_t depth, uint32_t *path); void fast_export_modify(uint32_t depth, uint32_t *path, uint32_t mode, uint32_t mark); void fast_export_commit(uint32_t revision, uint32_t author, char *log, uint32_t uuid, uint32_t url, unsigned long timestamp); -void fast_export_blob(uint32_t mode, uint32_t mark, uint32_t len); +void fast_export_blob(uint32_t mode, uint32_t mark, uint32_t len, + struct line_buffer *input); #endif diff --git a/vcs-svn/line_buffer.c b/vcs-svn/line_buffer.c index e7bc230fc..806932b32 100644 --- a/vcs-svn/line_buffer.c +++ b/vcs-svn/line_buffer.c @@ -8,20 +8,16 @@ #include "strbuf.h" #define COPY_BUFFER_LEN 4096 -static struct line_buffer buf_ = LINE_BUFFER_INIT; -static struct line_buffer *buf; -int buffer_init(const char *filename) +int buffer_init(struct line_buffer *buf, const char *filename) { - buf = &buf_; - buf->infile = filename ? fopen(filename, "r") : stdin; if (!buf->infile) return -1; return 0; } -int buffer_deinit(void) +int buffer_deinit(struct line_buffer *buf) { int err; if (buf->infile == stdin) @@ -32,7 +28,7 @@ int buffer_deinit(void) } /* Read a line without trailing newline. */ -char *buffer_read_line(void) +char *buffer_read_line(struct line_buffer *buf) { char *end; if (!fgets(buf->line_buffer, sizeof(buf->line_buffer), buf->infile)) @@ -53,14 +49,14 @@ char *buffer_read_line(void) return buf->line_buffer; } -char *buffer_read_string(uint32_t len) +char *buffer_read_string(struct line_buffer *buf, uint32_t len) { strbuf_reset(&buf->blob_buffer); strbuf_fread(&buf->blob_buffer, len, buf->infile); return ferror(buf->infile) ? NULL : buf->blob_buffer.buf; } -void buffer_copy_bytes(uint32_t len) +void buffer_copy_bytes(struct line_buffer *buf, uint32_t len) { char byte_buffer[COPY_BUFFER_LEN]; uint32_t in; @@ -70,13 +66,13 @@ void buffer_copy_bytes(uint32_t len) len -= in; fwrite(byte_buffer, 1, in, stdout); if (ferror(stdout)) { - buffer_skip_bytes(len); + buffer_skip_bytes(buf, len); return; } } } -void buffer_skip_bytes(uint32_t len) +void buffer_skip_bytes(struct line_buffer *buf, uint32_t len) { char byte_buffer[COPY_BUFFER_LEN]; uint32_t in; @@ -87,7 +83,7 @@ void buffer_skip_bytes(uint32_t len) } } -void buffer_reset(void) +void buffer_reset(struct line_buffer *buf) { strbuf_release(&buf->blob_buffer); } diff --git a/vcs-svn/line_buffer.h b/vcs-svn/line_buffer.h index 4ae1133a9..fb373903d 100644 --- a/vcs-svn/line_buffer.h +++ b/vcs-svn/line_buffer.h @@ -12,12 +12,12 @@ struct line_buffer { }; #define LINE_BUFFER_INIT {"", STRBUF_INIT, NULL} -int buffer_init(const char *filename); -int buffer_deinit(void); -char *buffer_read_line(void); -char *buffer_read_string(uint32_t len); -void buffer_copy_bytes(uint32_t len); -void buffer_skip_bytes(uint32_t len); -void buffer_reset(void); +int buffer_init(struct line_buffer *buf, const char *filename); +int buffer_deinit(struct line_buffer *buf); +char *buffer_read_line(struct line_buffer *buf); +char *buffer_read_string(struct line_buffer *buf, uint32_t len); +void buffer_copy_bytes(struct line_buffer *buf, uint32_t len); +void buffer_skip_bytes(struct line_buffer *buf, uint32_t len); +void buffer_reset(struct line_buffer *buf); #endif diff --git a/vcs-svn/line_buffer.txt b/vcs-svn/line_buffer.txt index 8906fb1f5..f8eaa4dd8 100644 --- a/vcs-svn/line_buffer.txt +++ b/vcs-svn/line_buffer.txt @@ -14,14 +14,15 @@ Calling sequence The calling program: + - initializes a `struct line_buffer` to LINE_BUFFER_INIT - specifies a file to read with `buffer_init` - processes input with `buffer_read_line`, `buffer_read_string`, `buffer_skip_bytes`, and `buffer_copy_bytes` - closes the file with `buffer_deinit`, perhaps to start over and read another file. -Before exiting, the caller can use `buffer_reset` to deallocate -resources for the benefit of profiling tools. +When finished, the caller can use `buffer_reset` to deallocate +resources. Functions --------- diff --git a/vcs-svn/svndump.c b/vcs-svn/svndump.c index 2ad2c307d..4195da9cf 100644 --- a/vcs-svn/svndump.c +++ b/vcs-svn/svndump.c @@ -30,6 +30,8 @@ /* Create memory pool for log messages */ obj_pool_gen(log, char, 4096) +static struct line_buffer input = LINE_BUFFER_INIT; + static char* log_copy(uint32_t length, char *log) { char *buffer; @@ -115,14 +117,14 @@ static void read_props(void) uint32_t key = ~0; char *val = NULL; char *t; - while ((t = buffer_read_line()) && strcmp(t, "PROPS-END")) { + while ((t = buffer_read_line(&input)) && strcmp(t, "PROPS-END")) { if (!strncmp(t, "K ", 2)) { len = atoi(&t[2]); - key = pool_intern(buffer_read_string(len)); - buffer_read_line(); + key = pool_intern(buffer_read_string(&input, len)); + buffer_read_line(&input); } else if (!strncmp(t, "V ", 2)) { len = atoi(&t[2]); - val = buffer_read_string(len); + val = buffer_read_string(&input, len); if (key == keys.svn_log) { /* Value length excludes terminating nul. */ rev_ctx.log = log_copy(len + 1, val); @@ -137,7 +139,7 @@ static void read_props(void) node_ctx.type = REPO_MODE_LNK; } key = ~0; - buffer_read_line(); + buffer_read_line(&input); } } } @@ -179,9 +181,10 @@ static void handle_node(void) node_ctx.type = node_ctx.srcMode; if (node_ctx.mark) - fast_export_blob(node_ctx.type, node_ctx.mark, node_ctx.textLength); + fast_export_blob(node_ctx.type, + node_ctx.mark, node_ctx.textLength, &input); else if (node_ctx.textLength != LENGTH_UNKNOWN) - buffer_skip_bytes(node_ctx.textLength); + buffer_skip_bytes(&input, node_ctx.textLength); } static void handle_revision(void) @@ -200,7 +203,7 @@ void svndump_read(const char *url) uint32_t key; reset_dump_ctx(pool_intern(url)); - while ((t = buffer_read_line())) { + while ((t = buffer_read_line(&input))) { val = strstr(t, ": "); if (!val) continue; @@ -257,7 +260,7 @@ void svndump_read(const char *url) node_ctx.propLength = atoi(val); } else if (key == keys.content_length) { len = atoi(val); - buffer_read_line(); + buffer_read_line(&input); if (active_ctx == REV_CTX) { read_props(); } else if (active_ctx == NODE_CTX) { @@ -265,7 +268,7 @@ void svndump_read(const char *url) active_ctx = REV_CTX; } else { fprintf(stderr, "Unexpected content length header: %"PRIu32"\n", len); - buffer_skip_bytes(len); + buffer_skip_bytes(&input, len); } } } @@ -277,7 +280,7 @@ void svndump_read(const char *url) void svndump_init(const char *filename) { - buffer_init(filename); + buffer_init(&input, filename); repo_init(); reset_dump_ctx(~0); reset_rev_ctx(0); @@ -292,7 +295,7 @@ void svndump_deinit(void) reset_dump_ctx(~0); reset_rev_ctx(0); reset_node_ctx(NULL); - if (buffer_deinit()) + if (buffer_deinit(&input)) fprintf(stderr, "Input error\n"); if (ferror(stdout)) fprintf(stderr, "Output error\n"); @@ -301,7 +304,7 @@ void svndump_deinit(void) void svndump_reset(void) { log_reset(); - buffer_reset(); + buffer_reset(&input); repo_reset(); reset_dump_ctx(~0); reset_rev_ctx(0); From 850c5ea44ce0b4aac3be7c4d14b38ec901e777d1 Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Sun, 2 Jan 2011 18:50:16 -0600 Subject: [PATCH 23/31] vcs-svn: make test-line-buffer input format more flexible Imitate the input format of test-obj-pool to support arbitrary sequences of commands rather than alternating read/copy. This should make it easier to add tests that exercise other line_buffer functions. Signed-off-by: Jonathan Nieder --- t/t0080-vcs-svn.sh | 18 +++++++-------- test-line-buffer.c | 56 +++++++++++++++++++++++++++++----------------- 2 files changed, 45 insertions(+), 29 deletions(-) diff --git a/t/t0080-vcs-svn.sh b/t/t0080-vcs-svn.sh index d3225ada6..8be97002a 100755 --- a/t/t0080-vcs-svn.sh +++ b/t/t0080-vcs-svn.sh @@ -85,40 +85,40 @@ test_expect_success 'line buffer' ' printf "%s\n" "" foo >expected6 && test-line-buffer <<-\EOF >actual1 && - 5 + read 5 HELLO EOF test-line-buffer <<-\EOF >actual2 && - 0 + read 0 - 5 + copy 5 HELLO EOF q_to_nul <<-\EOF | - 1 + read 1 Q EOF test-line-buffer >actual3 && q_to_nul <<-\EOF | - 0 + read 0 - 1 + copy 1 Q EOF test-line-buffer >actual4 && test-line-buffer <<-\EOF >actual5 && - 5 + read 5 foo EOF test-line-buffer <<-\EOF >actual6 && - 0 + read 0 - 5 + copy 5 foo EOF diff --git a/test-line-buffer.c b/test-line-buffer.c index f9af892d2..383f35bba 100644 --- a/test-line-buffer.c +++ b/test-line-buffer.c @@ -1,11 +1,5 @@ /* * test-line-buffer.c: code to exercise the svn importer's input helper - * - * Input format: - * number NL - * (number bytes) NL - * number NL - * ... */ #include "git-compat-util.h" @@ -20,28 +14,50 @@ static uint32_t strtouint32(const char *s) return (uint32_t) n; } +static void handle_command(const char *command, const char *arg, struct line_buffer *buf) +{ + switch (*command) { + case 'c': + if (!prefixcmp(command, "copy ")) { + buffer_copy_bytes(buf, strtouint32(arg) + 1); + return; + } + case 'r': + if (!prefixcmp(command, "read ")) { + const char *s = buffer_read_string(buf, strtouint32(arg)); + printf("%s\n", s); + buffer_skip_bytes(buf, 1); /* consume newline */ + return; + } + default: + die("unrecognized command: %s", command); + } +} + +static void handle_line(const char *line, struct line_buffer *stdin_buf) +{ + const char *arg = strchr(line, ' '); + if (!arg) + die("no argument in line: %s", line); + handle_command(line, arg + 1, stdin_buf); +} + int main(int argc, char *argv[]) { - struct line_buffer buf = LINE_BUFFER_INIT; + struct line_buffer stdin_buf = LINE_BUFFER_INIT; char *s; if (argc != 1) - usage("test-line-buffer < input.txt"); - if (buffer_init(&buf, NULL)) + usage("test-line-buffer < script"); + + if (buffer_init(&stdin_buf, NULL)) die_errno("open error"); - while ((s = buffer_read_line(&buf))) { - s = buffer_read_string(&buf, strtouint32(s)); - fputs(s, stdout); - fputc('\n', stdout); - buffer_skip_bytes(&buf, 1); - if (!(s = buffer_read_line(&buf))) - break; - buffer_copy_bytes(&buf, strtouint32(s) + 1); - } - if (buffer_deinit(&buf)) + while ((s = buffer_read_line(&stdin_buf))) + handle_line(s, &stdin_buf); + if (buffer_deinit(&stdin_buf)) die("input error"); if (ferror(stdout)) die("output error"); - buffer_reset(&buf); + buffer_reset(&stdin_buf); return 0; } From 232087fd99915abaa7d917fd181ad8477bb689f2 Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Sun, 2 Jan 2011 18:51:07 -0600 Subject: [PATCH 24/31] tests: give vcs-svn/line_buffer its own test script Split the line_buffer test into small pieces and move it to its own file as preparation for adding more tests. Signed-off-by: Jonathan Nieder --- t/t0080-vcs-svn.sh | 54 ---------------------------------- t/t0081-line-buffer.sh | 67 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 67 insertions(+), 54 deletions(-) create mode 100755 t/t0081-line-buffer.sh diff --git a/t/t0080-vcs-svn.sh b/t/t0080-vcs-svn.sh index 8be97002a..99a314b08 100755 --- a/t/t0080-vcs-svn.sh +++ b/t/t0080-vcs-svn.sh @@ -76,60 +76,6 @@ test_expect_success 'obj pool: high-water mark' ' test_cmp expected actual ' -test_expect_success 'line buffer' ' - echo HELLO >expected1 && - printf "%s\n" "" HELLO >expected2 && - echo >expected3 && - printf "%s\n" "" Q | q_to_nul >expected4 && - printf "%s\n" foo "" >expected5 && - printf "%s\n" "" foo >expected6 && - - test-line-buffer <<-\EOF >actual1 && - read 5 - HELLO - EOF - - test-line-buffer <<-\EOF >actual2 && - read 0 - - copy 5 - HELLO - EOF - - q_to_nul <<-\EOF | - read 1 - Q - EOF - test-line-buffer >actual3 && - - q_to_nul <<-\EOF | - read 0 - - copy 1 - Q - EOF - test-line-buffer >actual4 && - - test-line-buffer <<-\EOF >actual5 && - read 5 - foo - EOF - - test-line-buffer <<-\EOF >actual6 && - read 0 - - copy 5 - foo - EOF - - test_cmp expected1 actual1 && - test_cmp expected2 actual2 && - test_cmp expected3 actual3 && - test_cmp expected4 actual4 && - test_cmp expected5 actual5 && - test_cmp expected6 actual6 -' - test_expect_success 'string pool' ' echo a does not equal b >expected.differ && echo a equals a >expected.match && diff --git a/t/t0081-line-buffer.sh b/t/t0081-line-buffer.sh new file mode 100755 index 000000000..13ac735b5 --- /dev/null +++ b/t/t0081-line-buffer.sh @@ -0,0 +1,67 @@ +#!/bin/sh + +test_description="Test the svn importer's input handling routines. +" +. ./test-lib.sh + +test_expect_success 'read greeting' ' + echo HELLO >expect && + test-line-buffer <<-\EOF >actual && + read 5 + HELLO + EOF + test_cmp expect actual +' + +test_expect_success '0-length read, send along greeting' ' + printf "%s\n" "" HELLO >expect && + test-line-buffer <<-\EOF >actual && + read 0 + + copy 5 + HELLO + EOF + test_cmp expect actual +' + +test_expect_success 'buffer_read_string copes with trailing null byte' ' + echo >expect && + q_to_nul <<-\EOF | test-line-buffer >actual && + read 1 + Q + EOF + test_cmp expect actual +' + +test_expect_success '0-length read, copy null byte' ' + printf "%s\n" "" Q | q_to_nul >expect && + q_to_nul <<-\EOF | test-line-buffer >actual && + read 0 + + copy 1 + Q + EOF + test_cmp expect actual +' + +test_expect_success 'long reads are truncated' ' + printf "%s\n" foo "" >expect && + test-line-buffer <<-\EOF >actual && + read 5 + foo + EOF + test_cmp expect actual +' + +test_expect_success 'long copies are truncated' ' + printf "%s\n" "" foo >expect && + test-line-buffer <<-\EOF >actual && + read 0 + + copy 5 + foo + EOF + test_cmp expect actual +' + +test_done From 7b990c90514b24097ee71edbc02cb3a497a9476b Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Sun, 2 Jan 2011 18:52:28 -0600 Subject: [PATCH 25/31] vcs-svn: tweak test-line-buffer to not assume line-oriented input Do not expect an implicit newline after each input record. Use a separate command to exercise buffer_skip_bytes. Signed-off-by: Jonathan Nieder --- t/t0081-line-buffer.sh | 27 +++++++++++++-------------- test-line-buffer.c | 10 +++++++--- 2 files changed, 20 insertions(+), 17 deletions(-) diff --git a/t/t0081-line-buffer.sh b/t/t0081-line-buffer.sh index 13ac735b5..68d616399 100755 --- a/t/t0081-line-buffer.sh +++ b/t/t0081-line-buffer.sh @@ -7,45 +7,44 @@ test_description="Test the svn importer's input handling routines. test_expect_success 'read greeting' ' echo HELLO >expect && test-line-buffer <<-\EOF >actual && - read 5 + read 6 HELLO EOF test_cmp expect actual ' test_expect_success '0-length read, send along greeting' ' - printf "%s\n" "" HELLO >expect && + echo HELLO >expect && test-line-buffer <<-\EOF >actual && read 0 - - copy 5 + copy 6 HELLO EOF test_cmp expect actual ' -test_expect_success 'buffer_read_string copes with trailing null byte' ' - echo >expect && +test_expect_success 'buffer_read_string copes with null byte' ' + >expect && q_to_nul <<-\EOF | test-line-buffer >actual && - read 1 + read 2 Q EOF test_cmp expect actual ' -test_expect_success '0-length read, copy null byte' ' - printf "%s\n" "" Q | q_to_nul >expect && +test_expect_success 'skip, copy null byte' ' + echo Q | q_to_nul >expect && q_to_nul <<-\EOF | test-line-buffer >actual && - read 0 - - copy 1 + skip 2 + Q + copy 2 Q EOF test_cmp expect actual ' test_expect_success 'long reads are truncated' ' - printf "%s\n" foo "" >expect && + echo foo >expect && test-line-buffer <<-\EOF >actual && read 5 foo @@ -56,7 +55,7 @@ test_expect_success 'long reads are truncated' ' test_expect_success 'long copies are truncated' ' printf "%s\n" "" foo >expect && test-line-buffer <<-\EOF >actual && - read 0 + read 1 copy 5 foo diff --git a/test-line-buffer.c b/test-line-buffer.c index 383f35bba..da0bc6502 100644 --- a/test-line-buffer.c +++ b/test-line-buffer.c @@ -19,14 +19,18 @@ static void handle_command(const char *command, const char *arg, struct line_buf switch (*command) { case 'c': if (!prefixcmp(command, "copy ")) { - buffer_copy_bytes(buf, strtouint32(arg) + 1); + buffer_copy_bytes(buf, strtouint32(arg)); return; } case 'r': if (!prefixcmp(command, "read ")) { const char *s = buffer_read_string(buf, strtouint32(arg)); - printf("%s\n", s); - buffer_skip_bytes(buf, 1); /* consume newline */ + fputs(s, stdout); + return; + } + case 's': + if (!prefixcmp(command, "skip ")) { + buffer_skip_bytes(buf, strtouint32(arg)); return; } default: From d280f68313eecb8b3838c70641a246382d5e5343 Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Sun, 2 Jan 2011 19:07:16 -0600 Subject: [PATCH 26/31] t0081 (line-buffer): add buffering tests POSIX makes the behavior of read(2) from a pipe fairly clear: a read from an empty pipe will block until there is data available and any other read will not block, prefering to return a partial result. Likewise, fread(3) and fgets(3) are clearly specified to act as though implemented by calling fgetc(3) in a simple loop. But the buffering behavior of fgetc is less clear. Luckily, no sane platform is going to implement fgetc by calling the equivalent of read(2) more than once. fgetc has to be able to return without filling its buffer to preserve errno when errors are encountered anyway. So let's assume the simpler behavior (trust) but add some tests to catch insane platforms that violate that when they come (verify). First check that fread can handle a 0-length read from an empty fifo. Because open(O_RDONLY) blocks until the writing end is open, open the writing end of the fifo in advance in a subshell. Next try short inputs from a pipe that is not filled all the way. Lastly (two tests) try very large inputs from a pipe that will not fit in the relevant buffers. The first of these tests reads a little more than 8192 bytes, which is BUFSIZ (the size of stdio's buffers) on this Linux machine. The second reads a little over 64 KiB (the pipe capacity on Linux) and is not run unless requested by setting the GIT_REMOTE_SVN_TEST_BIG_FILES environment variable. Signed-off-by: Jonathan Nieder --- t/t0081-line-buffer.sh | 110 ++++++++++++++++++++++++++++++++++++++++- test-line-buffer.c | 22 +++++++-- 2 files changed, 128 insertions(+), 4 deletions(-) diff --git a/t/t0081-line-buffer.sh b/t/t0081-line-buffer.sh index 68d616399..33a728ed9 100755 --- a/t/t0081-line-buffer.sh +++ b/t/t0081-line-buffer.sh @@ -1,10 +1,76 @@ #!/bin/sh test_description="Test the svn importer's input handling routines. + +These tests exercise the line_buffer library, but their real purpose +is to check the assumptions that library makes of the platform's input +routines. Processes engaged in bi-directional communication would +hang if fread or fgets is too greedy. + +While at it, check that input of newlines and null bytes are handled +correctly. " . ./test-lib.sh -test_expect_success 'read greeting' ' +test -n "$GIT_REMOTE_SVN_TEST_BIG_FILES" && test_set_prereq EXPENSIVE + +generate_tens_of_lines () { + tens=$1 && + line=$2 && + + i=0 && + while test $i -lt "$tens" + do + for j in a b c d e f g h i j + do + echo "$line" + done && + : $((i = $i + 1)) || + return + done +} + +long_read_test () { + : each line is 10 bytes, including newline && + line=abcdefghi && + echo "$line" >expect && + + if ! test_declared_prereq PIPE + then + echo >&4 "long_read_test: need to declare PIPE prerequisite" + return 127 + fi && + tens_of_lines=$(($1 / 100 + 1)) && + lines=$(($tens_of_lines * 10)) && + readsize=$((($lines - 1) * 10 + 3)) && + copysize=7 && + rm -f input && + mkfifo input && + { + { + generate_tens_of_lines $tens_of_lines "$line" && + sleep 100 + } >input & + } && + test-line-buffer input <<-EOF >output && + read $readsize + copy $copysize + EOF + kill $! && + test_line_count = $lines output && + tail -n 1 actual && + test_cmp expect actual +} + +test_expect_success 'setup: have pipes?' ' + rm -f frob && + if mkfifo frob + then + test_set_prereq PIPE + fi +' + +test_expect_success 'hello world' ' echo HELLO >expect && test-line-buffer <<-\EOF >actual && read 6 @@ -13,6 +79,21 @@ test_expect_success 'read greeting' ' test_cmp expect actual ' +test_expect_success PIPE '0-length read, no input available' ' + >expect && + rm -f input && + mkfifo input && + { + sleep 100 >input & + } && + test-line-buffer input <<-\EOF >actual && + read 0 + copy 0 + EOF + kill $! && + test_cmp expect actual +' + test_expect_success '0-length read, send along greeting' ' echo HELLO >expect && test-line-buffer <<-\EOF >actual && @@ -23,6 +104,33 @@ test_expect_success '0-length read, send along greeting' ' test_cmp expect actual ' +test_expect_success PIPE '1-byte read, no input available' ' + printf "%s" ab >expect && + rm -f input && + mkfifo input && + { + { + printf "%s" a && + printf "%s" b && + sleep 100 + } >input & + } && + test-line-buffer input <<-\EOF >actual && + read 1 + copy 1 + EOF + kill $! && + test_cmp expect actual +' + +test_expect_success PIPE 'long read (around 8192 bytes)' ' + long_read_test 8192 +' + +test_expect_success PIPE,EXPENSIVE 'longer read (around 65536 bytes)' ' + long_read_test 65536 +' + test_expect_success 'buffer_read_string copes with null byte' ' >expect && q_to_nul <<-\EOF | test-line-buffer >actual && diff --git a/test-line-buffer.c b/test-line-buffer.c index da0bc6502..ec19b13ee 100644 --- a/test-line-buffer.c +++ b/test-line-buffer.c @@ -49,15 +49,31 @@ static void handle_line(const char *line, struct line_buffer *stdin_buf) int main(int argc, char *argv[]) { struct line_buffer stdin_buf = LINE_BUFFER_INIT; + struct line_buffer file_buf = LINE_BUFFER_INIT; + struct line_buffer *input = &stdin_buf; + const char *filename; char *s; - if (argc != 1) - usage("test-line-buffer < script"); + if (argc == 1) + filename = NULL; + else if (argc == 2) + filename = argv[1]; + else + usage("test-line-buffer [file] < script"); if (buffer_init(&stdin_buf, NULL)) die_errno("open error"); + if (filename) { + if (buffer_init(&file_buf, filename)) + die_errno("error opening %s", filename); + input = &file_buf; + } + while ((s = buffer_read_line(&stdin_buf))) - handle_line(s, &stdin_buf); + handle_line(s, input); + + if (filename && buffer_deinit(&file_buf)) + die("error reading from %s", filename); if (buffer_deinit(&stdin_buf)) die("input error"); if (ferror(stdout)) From e832f43c1d26bf70611d98b62d95870a99292add Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Sun, 2 Jan 2011 21:05:46 -0600 Subject: [PATCH 27/31] vcs-svn: add binary-safe read function buffer_read_string works well for non line-oriented input except for one problem: it does not tell the caller how many bytes were actually written. This means that unless one is very careful about checking for errors (and eof) the calling program cannot tell the difference between the string "foo" followed by an early end of file and the string "foo\0bar\0baz". So introduce a variant that reports the length, too, a thinner wrapper around strbuf_fread. Its result is written to a strbuf so the caller does not need to keep track of the number of bytes read. Signed-off-by: Jonathan Nieder --- t/t0081-line-buffer.sh | 18 ++++++++++++++++++ test-line-buffer.c | 10 ++++++++++ vcs-svn/line_buffer.c | 6 ++++++ vcs-svn/line_buffer.h | 1 + 4 files changed, 35 insertions(+) diff --git a/t/t0081-line-buffer.sh b/t/t0081-line-buffer.sh index 33a728ed9..a8eeb2064 100755 --- a/t/t0081-line-buffer.sh +++ b/t/t0081-line-buffer.sh @@ -151,6 +151,15 @@ test_expect_success 'skip, copy null byte' ' test_cmp expect actual ' +test_expect_success 'read null byte' ' + echo ">QhelloQ" | q_to_nul >expect && + q_to_nul <<-\EOF | test-line-buffer >actual && + binary 8 + QhelloQ + EOF + test_cmp expect actual +' + test_expect_success 'long reads are truncated' ' echo foo >expect && test-line-buffer <<-\EOF >actual && @@ -171,4 +180,13 @@ test_expect_success 'long copies are truncated' ' test_cmp expect actual ' +test_expect_success 'long binary reads are truncated' ' + echo ">foo" >expect && + test-line-buffer <<-\EOF >actual && + binary 5 + foo + EOF + test_cmp expect actual +' + test_done diff --git a/test-line-buffer.c b/test-line-buffer.c index ec19b13ee..19bf2d440 100644 --- a/test-line-buffer.c +++ b/test-line-buffer.c @@ -3,6 +3,7 @@ */ #include "git-compat-util.h" +#include "strbuf.h" #include "vcs-svn/line_buffer.h" static uint32_t strtouint32(const char *s) @@ -17,6 +18,15 @@ static uint32_t strtouint32(const char *s) static void handle_command(const char *command, const char *arg, struct line_buffer *buf) { switch (*command) { + case 'b': + if (!prefixcmp(command, "binary ")) { + struct strbuf sb = STRBUF_INIT; + strbuf_addch(&sb, '>'); + buffer_read_binary(buf, &sb, strtouint32(arg)); + fwrite(sb.buf, 1, sb.len, stdout); + strbuf_release(&sb); + return; + } case 'c': if (!prefixcmp(command, "copy ")) { buffer_copy_bytes(buf, strtouint32(arg)); diff --git a/vcs-svn/line_buffer.c b/vcs-svn/line_buffer.c index 806932b32..661b00709 100644 --- a/vcs-svn/line_buffer.c +++ b/vcs-svn/line_buffer.c @@ -56,6 +56,12 @@ char *buffer_read_string(struct line_buffer *buf, uint32_t len) return ferror(buf->infile) ? NULL : buf->blob_buffer.buf; } +void buffer_read_binary(struct line_buffer *buf, + struct strbuf *sb, uint32_t size) +{ + strbuf_fread(sb, size, buf->infile); +} + void buffer_copy_bytes(struct line_buffer *buf, uint32_t len) { char byte_buffer[COPY_BUFFER_LEN]; diff --git a/vcs-svn/line_buffer.h b/vcs-svn/line_buffer.h index fb373903d..0c2d3d955 100644 --- a/vcs-svn/line_buffer.h +++ b/vcs-svn/line_buffer.h @@ -16,6 +16,7 @@ int buffer_init(struct line_buffer *buf, const char *filename); int buffer_deinit(struct line_buffer *buf); char *buffer_read_line(struct line_buffer *buf); char *buffer_read_string(struct line_buffer *buf, uint32_t len); +void buffer_read_binary(struct line_buffer *buf, struct strbuf *sb, uint32_t len); void buffer_copy_bytes(struct line_buffer *buf, uint32_t len); void buffer_skip_bytes(struct line_buffer *buf, uint32_t len); void buffer_reset(struct line_buffer *buf); From cc193f1f0b45e4e65f246f1d5e6e8134844aa35b Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Sun, 2 Jan 2011 21:06:32 -0600 Subject: [PATCH 28/31] vcs-svn: allow character-oriented input buffer_read_char can be used in place of buffer_read_string(1) to avoid consuming valuable static buffer space. The delta applier will use this to read variable-length integers one byte at a time. Underneath, it is fgetc, wrapped so the line_buffer library can maintain its role as gatekeeper of input. Later it might be worth checking if fgetc_unlocked is faster --- most line_buffer functions are not thread-safe anyway. Helpd-by: David Barr Signed-off-by: Jonathan Nieder --- vcs-svn/line_buffer.c | 5 +++++ vcs-svn/line_buffer.h | 1 + 2 files changed, 6 insertions(+) diff --git a/vcs-svn/line_buffer.c b/vcs-svn/line_buffer.c index 661b00709..37ec56e5b 100644 --- a/vcs-svn/line_buffer.c +++ b/vcs-svn/line_buffer.c @@ -27,6 +27,11 @@ int buffer_deinit(struct line_buffer *buf) return err; } +int buffer_read_char(struct line_buffer *buf) +{ + return fgetc(buf->infile); +} + /* Read a line without trailing newline. */ char *buffer_read_line(struct line_buffer *buf) { diff --git a/vcs-svn/line_buffer.h b/vcs-svn/line_buffer.h index 0c2d3d955..0a59c73e8 100644 --- a/vcs-svn/line_buffer.h +++ b/vcs-svn/line_buffer.h @@ -16,6 +16,7 @@ int buffer_init(struct line_buffer *buf, const char *filename); int buffer_deinit(struct line_buffer *buf); char *buffer_read_line(struct line_buffer *buf); char *buffer_read_string(struct line_buffer *buf, uint32_t len); +int buffer_read_char(struct line_buffer *buf); void buffer_read_binary(struct line_buffer *buf, struct strbuf *sb, uint32_t len); void buffer_copy_bytes(struct line_buffer *buf, uint32_t len); void buffer_skip_bytes(struct line_buffer *buf, uint32_t len); From cb3f87cf1ba90373fdc240d65a4d65434099d9a3 Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Sun, 2 Jan 2011 21:09:38 -0600 Subject: [PATCH 29/31] vcs-svn: allow input from file descriptor Based-on-patch-by: David Barr Signed-off-by: Jonathan Nieder --- t/t0081-line-buffer.sh | 9 +++++++++ test-line-buffer.c | 11 ++++++++--- vcs-svn/line_buffer.c | 8 ++++++++ vcs-svn/line_buffer.h | 1 + vcs-svn/line_buffer.txt | 9 +++++---- 5 files changed, 31 insertions(+), 7 deletions(-) diff --git a/t/t0081-line-buffer.sh b/t/t0081-line-buffer.sh index a8eeb2064..550fad082 100755 --- a/t/t0081-line-buffer.sh +++ b/t/t0081-line-buffer.sh @@ -131,6 +131,15 @@ test_expect_success PIPE,EXPENSIVE 'longer read (around 65536 bytes)' ' long_read_test 65536 ' +test_expect_success 'read from file descriptor' ' + rm -f input && + echo hello >expect && + echo hello >input && + echo copy 6 | + test-line-buffer "&4" 4actual && + test_cmp expect actual +' + test_expect_success 'buffer_read_string copes with null byte' ' >expect && q_to_nul <<-\EOF | test-line-buffer >actual && diff --git a/test-line-buffer.c b/test-line-buffer.c index 19bf2d440..25b20b93f 100644 --- a/test-line-buffer.c +++ b/test-line-buffer.c @@ -69,13 +69,18 @@ int main(int argc, char *argv[]) else if (argc == 2) filename = argv[1]; else - usage("test-line-buffer [file] < script"); + usage("test-line-buffer [file | &fd] < script"); if (buffer_init(&stdin_buf, NULL)) die_errno("open error"); if (filename) { - if (buffer_init(&file_buf, filename)) - die_errno("error opening %s", filename); + if (*filename == '&') { + if (buffer_fdinit(&file_buf, strtouint32(filename + 1))) + die_errno("error opening fd %s", filename + 1); + } else { + if (buffer_init(&file_buf, filename)) + die_errno("error opening %s", filename); + } input = &file_buf; } diff --git a/vcs-svn/line_buffer.c b/vcs-svn/line_buffer.c index 37ec56e5b..e29a81a53 100644 --- a/vcs-svn/line_buffer.c +++ b/vcs-svn/line_buffer.c @@ -17,6 +17,14 @@ int buffer_init(struct line_buffer *buf, const char *filename) return 0; } +int buffer_fdinit(struct line_buffer *buf, int fd) +{ + buf->infile = fdopen(fd, "r"); + if (!buf->infile) + return -1; + return 0; +} + int buffer_deinit(struct line_buffer *buf) { int err; diff --git a/vcs-svn/line_buffer.h b/vcs-svn/line_buffer.h index 0a59c73e8..630d83c31 100644 --- a/vcs-svn/line_buffer.h +++ b/vcs-svn/line_buffer.h @@ -13,6 +13,7 @@ struct line_buffer { #define LINE_BUFFER_INIT {"", STRBUF_INIT, NULL} int buffer_init(struct line_buffer *buf, const char *filename); +int buffer_fdinit(struct line_buffer *buf, int fd); int buffer_deinit(struct line_buffer *buf); char *buffer_read_line(struct line_buffer *buf); char *buffer_read_string(struct line_buffer *buf, uint32_t len); diff --git a/vcs-svn/line_buffer.txt b/vcs-svn/line_buffer.txt index f8eaa4dd8..4e8fb719c 100644 --- a/vcs-svn/line_buffer.txt +++ b/vcs-svn/line_buffer.txt @@ -27,10 +27,11 @@ resources. Functions --------- -`buffer_init`:: - Open the named file for input. If filename is NULL, - start reading from stdin. On failure, returns -1 (with - errno indicating the nature of the failure). +`buffer_init`, `buffer_fdinit`:: + Open the named file or file descriptor for input. + buffer_init(buf, NULL) prepares to read from stdin. + On failure, returns -1 (with errno indicating the nature + of the failure). `buffer_deinit`:: Stop reading from the current file (closing it unless From b1c9b798a6dd391aeaea31663a65164815701244 Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Sun, 2 Jan 2011 21:10:59 -0600 Subject: [PATCH 30/31] vcs-svn: teach line_buffer about temporary files It can sometimes be useful to write information temporarily to file, to read back later. These functions allow a program to use the line_buffer facilities when doing so. It works like this: 1. find a unique filename with buffer_tmpfile_init. 2. rewind with buffer_tmpfile_rewind. This returns a stdio handle for writing. 3. when finished writing, declare so with buffer_tmpfile_prepare_to_read. The return value indicates how many bytes were written. 4. read whatever portion of the file is needed. 5. if finished, remove the temporary file with buffer_deinit. otherwise, go back to step 2, The svn support would use this to buffer the postimage from delta application until the length is known and fast-import can receive the resulting blob. Based-on-patch-by: David Barr Signed-off-by: Jonathan Nieder --- vcs-svn/line_buffer.c | 24 ++++++++++++++++++++++++ vcs-svn/line_buffer.h | 7 ++++++- vcs-svn/line_buffer.txt | 22 ++++++++++++++++++++++ 3 files changed, 52 insertions(+), 1 deletion(-) diff --git a/vcs-svn/line_buffer.c b/vcs-svn/line_buffer.c index e29a81a53..aedf105b7 100644 --- a/vcs-svn/line_buffer.c +++ b/vcs-svn/line_buffer.c @@ -25,6 +25,14 @@ int buffer_fdinit(struct line_buffer *buf, int fd) return 0; } +int buffer_tmpfile_init(struct line_buffer *buf) +{ + buf->infile = tmpfile(); + if (!buf->infile) + return -1; + return 0; +} + int buffer_deinit(struct line_buffer *buf) { int err; @@ -35,6 +43,22 @@ int buffer_deinit(struct line_buffer *buf) return err; } +FILE *buffer_tmpfile_rewind(struct line_buffer *buf) +{ + rewind(buf->infile); + return buf->infile; +} + +long buffer_tmpfile_prepare_to_read(struct line_buffer *buf) +{ + long pos = ftell(buf->infile); + if (pos < 0) + return error("ftell error: %s", strerror(errno)); + if (fseek(buf->infile, 0, SEEK_SET)) + return error("seek error: %s", strerror(errno)); + return pos; +} + int buffer_read_char(struct line_buffer *buf) { return fgetc(buf->infile); diff --git a/vcs-svn/line_buffer.h b/vcs-svn/line_buffer.h index 630d83c31..96ce966a2 100644 --- a/vcs-svn/line_buffer.h +++ b/vcs-svn/line_buffer.h @@ -15,12 +15,17 @@ struct line_buffer { int buffer_init(struct line_buffer *buf, const char *filename); int buffer_fdinit(struct line_buffer *buf, int fd); int buffer_deinit(struct line_buffer *buf); +void buffer_reset(struct line_buffer *buf); + +int buffer_tmpfile_init(struct line_buffer *buf); +FILE *buffer_tmpfile_rewind(struct line_buffer *buf); /* prepare to write. */ +long buffer_tmpfile_prepare_to_read(struct line_buffer *buf); + char *buffer_read_line(struct line_buffer *buf); char *buffer_read_string(struct line_buffer *buf, uint32_t len); int buffer_read_char(struct line_buffer *buf); void buffer_read_binary(struct line_buffer *buf, struct strbuf *sb, uint32_t len); void buffer_copy_bytes(struct line_buffer *buf, uint32_t len); void buffer_skip_bytes(struct line_buffer *buf, uint32_t len); -void buffer_reset(struct line_buffer *buf); #endif diff --git a/vcs-svn/line_buffer.txt b/vcs-svn/line_buffer.txt index 4e8fb719c..e89cc41d5 100644 --- a/vcs-svn/line_buffer.txt +++ b/vcs-svn/line_buffer.txt @@ -24,6 +24,28 @@ The calling program: When finished, the caller can use `buffer_reset` to deallocate resources. +Using temporary files +--------------------- + +Temporary files provide a place to store data that should not outlive +the calling program. A program + + - initializes a `struct line_buffer` to LINE_BUFFER_INIT + - requests a temporary file with `buffer_tmpfile_init` + - acquires an output handle by calling `buffer_tmpfile_rewind` + - uses standard I/O functions like `fprintf` and `fwrite` to fill + the temporary file + - declares writing is over with `buffer_tmpfile_prepare_to_read` + - can re-read what was written with `buffer_read_line`, + `buffer_read_string`, and so on + - can reuse the temporary file by calling `buffer_tmpfile_rewind` + again + - removes the temporary file with `buffer_deinit`, perhaps to + reuse the line_buffer for some other file. + +When finished, the calling program can use `buffer_reset` to deallocate +resources. + Functions --------- From 6288e3e180c0b911e6f7062f1e744a25568f7d22 Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Mon, 28 Feb 2011 15:16:59 -0600 Subject: [PATCH 31/31] fast-import: make code "-Wpointer-arith" clean The dereference() function to peel a tree-ish and find the underlying tree expects arithmetic to (void *) to work on byte addresses. We should be reading the text of objects through a char * anyway. Noticed-by: Junio C Hamano Signed-off-by: Jonathan Nieder --- fast-import.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fast-import.c b/fast-import.c index 6c37b8400..e1268b8cb 100644 --- a/fast-import.c +++ b/fast-import.c @@ -2848,7 +2848,7 @@ static struct object_entry *dereference(struct object_entry *oe, unsigned char sha1[20]) { unsigned long size; - void *buf = NULL; + char *buf = NULL; if (!oe) { enum object_type type = sha1_object_info(sha1, NULL); if (type < 0)