From 6a004d3f2e531936d6d91324a0610a874f91a867 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Tue, 21 Oct 2008 14:12:15 -0700 Subject: [PATCH 1/5] git-svn: don't escape tilde ('~') for http(s) URLs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Thanks to Jose Carlos Garcia Sogo and Björn Steinbrink for the bug report. On 2008.10.18 23:39:19 +0200, Björn Steinbrink wrote: > Hi, > > Jose Carlos Garcia Sogo reported on #git that a git-svn clone of this > svn repo fails for him: > https://sucs.org/~welshbyte/svn/backuptool/trunk > > I can reproduce that here with: > git-svn version 1.6.0.2.541.g46dc1.dirty (svn 1.5.1) > > The error message I get is: > Apache got a malformed URI: Unusable URI: it does not refer to this > repository at /usr/local/libexec/git-core/git-svn line 4057 > > strace revealed that git-svn url-encodes ~ while svn does not do that. > > For svn we have: > write(5, " > https://sucs.org/~welshbyte/svn/backuptool/trunk... > > While git-svn shows: > write(7, " > https://sucs.org/%7Ewelshbyte/svn/backuptool/trunk... Signed-off-by: Eric Wong --- git-svn.perl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/git-svn.perl b/git-svn.perl index 5702b100f..70a664ca0 100755 --- a/git-svn.perl +++ b/git-svn.perl @@ -852,7 +852,7 @@ sub escape_uri_only { my ($uri) = @_; my @tmp; foreach (split m{/}, $uri) { - s/([^\w.%+-]|%(?![a-fA-F0-9]{2}))/sprintf("%%%02X",ord($1))/eg; + s/([^~\w.%+-]|%(?![a-fA-F0-9]{2}))/sprintf("%%%02X",ord($1))/eg; push @tmp, $_; } join('/', @tmp); @@ -3537,7 +3537,7 @@ sub repo_path { sub url_path { my ($self, $path) = @_; if ($self->{url} =~ m#^https?://#) { - $path =~ s/([^a-zA-Z0-9_.-])/uc sprintf("%%%02x",ord($1))/eg; + $path =~ s/([^~a-zA-Z0-9_.-])/uc sprintf("%%%02x",ord($1))/eg; } $self->{url} . '/' . $self->repo_path($path); } @@ -3890,7 +3890,7 @@ sub escape_uri_only { my ($uri) = @_; my @tmp; foreach (split m{/}, $uri) { - s/([^\w.%+-]|%(?![a-fA-F0-9]{2}))/sprintf("%%%02X",ord($1))/eg; + s/([^~\w.%+-]|%(?![a-fA-F0-9]{2}))/sprintf("%%%02X",ord($1))/eg; push @tmp, $_; } join('/', @tmp); From 16fc08e2d86dad152194829d21bc55b2ef0c8fb1 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Wed, 29 Oct 2008 23:49:26 -0700 Subject: [PATCH 2/5] git-svn: respect i18n.commitencoding config SVN itself always stores log messages in the repository as UTF-8. git always stores/retrieves everything as raw binary data with no transformations whatsoever. To interact with SVN, we need to encode log messages as UTF-8 before sending them to SVN, as SVN cannot do it for us. When retrieving log messages from SVN, we also need to (attempt to) reencode the UTF-8 log message back to the user-specified commit encoding. Note, handling i18n.logoutputencoding for "git svn log" also needs to be done in a future change. Also, this change only deals with the encoding of commit messages and nothing else (path names, blob content, ...). In-Reply-To: <8b168cfb0810282014r789ac01dnec51824de1078f0@mail.gmail.com> James North wrote: > Hi, > > I'm using git-svn on a system with ISO-8859-1 encoding. The problem is > when I try to use "git svn dcommit" to send changes to a remote svn > (also ISO-8859-1). > > Seems like git-svn is sending commit messages with utf-8 (just a > guessing...) and they look bad on the remote svn log. E.g. "Ca?\241a > de cami?\243n" > > I have tried using i18n.commitencoding=ISO-8859-1 as suggested by the > warning when doing "git svn dcommit" but messages still are sent with > wrong encoding. Signed-off-by: Eric Wong --- git-svn.perl | 24 +++++++- t/t9129-git-svn-i18n-commitencoding.sh | 80 ++++++++++++++++++++++++++ 2 files changed, 101 insertions(+), 3 deletions(-) create mode 100755 t/t9129-git-svn-i18n-commitencoding.sh diff --git a/git-svn.perl b/git-svn.perl index 70a664ca0..2abb7b593 100755 --- a/git-svn.perl +++ b/git-svn.perl @@ -1136,9 +1136,19 @@ sub get_commit_entry { system($editor, $commit_editmsg); } rename $commit_editmsg, $commit_msg or croak $!; - open $log_fh, '<', $commit_msg or croak $!; - { local $/; chomp($log_entry{log} = <$log_fh>); } - close $log_fh or croak $!; + { + # SVN requires messages to be UTF-8 when entering the repo + local $/; + open $log_fh, '<', $commit_msg or croak $!; + binmode $log_fh; + chomp($log_entry{log} = <$log_fh>); + + if (my $enc = Git::config('i18n.commitencoding')) { + require Encode; + Encode::from_to($log_entry{log}, $enc, 'UTF-8'); + } + close $log_fh or croak $!; + } unlink $commit_msg; \%log_entry; } @@ -2273,6 +2283,14 @@ sub do_git_commit { } defined(my $pid = open3(my $msg_fh, my $out_fh, '>&STDERR', @exec)) or croak $!; + binmode $msg_fh; + + # we always get UTF-8 from SVN, but we may want our commits in + # a different encoding. + if (my $enc = Git::config('i18n.commitencoding')) { + require Encode; + Encode::from_to($log_entry->{log}, 'UTF-8', $enc); + } print $msg_fh $log_entry->{log} or croak $!; restore_commit_header_env($old_env); unless ($self->no_metadata) { diff --git a/t/t9129-git-svn-i18n-commitencoding.sh b/t/t9129-git-svn-i18n-commitencoding.sh new file mode 100755 index 000000000..2848e46e3 --- /dev/null +++ b/t/t9129-git-svn-i18n-commitencoding.sh @@ -0,0 +1,80 @@ +#!/bin/sh +# +# Copyright (c) 2008 Eric Wong + +test_description='git svn honors i18n.commitEncoding in config' + +. ./lib-git-svn.sh + +compare_git_head_with () { + nr=`wc -l < "$1"` + a=7 + b=$(($a + $nr - 1)) + git cat-file commit HEAD | sed -ne "$a,${b}p" >current && + test_cmp current "$1" +} + +compare_svn_head_with () { + LC_ALL=en_US.UTF-8 svn log --limit 1 `git svn info --url` | \ + sed -e 1,3d -e "/^-\+\$/d" >current && + test_cmp current "$1" +} + +for H in ISO-8859-1 EUCJP ISO-2022-JP +do + test_expect_success "$H setup" ' + mkdir $H && + svn import -m "$H test" $H "$svnrepo"/$H && + git svn clone "$svnrepo"/$H $H + ' +done + +for H in ISO-8859-1 EUCJP ISO-2022-JP +do + test_expect_success "$H commit on git side" ' + ( + cd $H && + git config i18n.commitencoding $H && + git checkout -b t refs/remotes/git-svn && + echo $H >F && + git add F && + git commit -a -F "$TEST_DIRECTORY"/t3900/$H.txt && + E=$(git cat-file commit HEAD | sed -ne "s/^encoding //p") && + test "z$E" = "z$H" + compare_git_head_with "$TEST_DIRECTORY"/t3900/$H.txt + ) + ' +done + +for H in ISO-8859-1 EUCJP ISO-2022-JP +do + test_expect_success "$H dcommit to svn" ' + ( + cd $H && + git svn dcommit && + git cat-file commit HEAD | grep git-svn-id: && + E=$(git cat-file commit HEAD | sed -ne "s/^encoding //p") && + test "z$E" = "z$H" && + compare_git_head_with "$TEST_DIRECTORY"/t3900/$H.txt + ) + ' +done + +test_expect_success 'ISO-8859-1 should match UTF-8 in svn' ' +( + cd ISO-8859-1 && + compare_svn_head_with "$TEST_DIRECTORY"/t3900/1-UTF-8.txt +) +' + +for H in EUCJP ISO-2022-JP +do + test_expect_success '$H should match UTF-8 in svn' ' + ( + cd $H && + compare_svn_head_with "$TEST_DIRECTORY"/t3900/2-UTF-8.txt + ) + ' +done + +test_done From fe4003f6308b88bcc44a989f12e1baddd97f22d4 Mon Sep 17 00:00:00 2001 From: Deskin Miller Date: Thu, 6 Nov 2008 00:07:39 -0500 Subject: [PATCH 3/5] git-svn: proper detection of bare repositories When in a bare repository (or .git, for that matter), git-svn would fail to initialise properly, since git rev-parse --show-cdup would not output anything. However, git rev-parse --show-cdup actually returns an error code if it's really not in a git directory. Fix the issue by checking for an explicit error from git rev-parse, and setting $git_dir appropriately if instead it just does not output. Signed-off-by: Deskin Miller Acked-by: Eric Wong --- git-svn.perl | 12 +++++++----- t/t9100-git-svn-basic.sh | 9 +++++++++ 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/git-svn.perl b/git-svn.perl index 2abb7b593..86075ec61 100755 --- a/git-svn.perl +++ b/git-svn.perl @@ -223,11 +223,13 @@ BEGIN "but it is not a directory\n"; } my $git_dir = delete $ENV{GIT_DIR}; - chomp(my $cdup = command_oneline(qw/rev-parse --show-cdup/)); - unless (length $cdup) { - die "Already at toplevel, but $git_dir ", - "not found '$cdup'\n"; - } + my $cdup = undef; + git_cmd_try { + $cdup = command_oneline(qw/rev-parse --show-cdup/); + $git_dir = '.' unless ($cdup); + chomp $cdup if ($cdup); + $cdup = "." unless ($cdup && length $cdup); + } "Already at toplevel, but $git_dir not found\n"; chdir $cdup or die "Unable to chdir up to '$cdup'\n"; unless (-d $git_dir) { die "$git_dir still not found after going to ", diff --git a/t/t9100-git-svn-basic.sh b/t/t9100-git-svn-basic.sh index 9b238c329..bb921af56 100755 --- a/t/t9100-git-svn-basic.sh +++ b/t/t9100-git-svn-basic.sh @@ -265,4 +265,13 @@ test_expect_success 'able to set-tree to a subdirectory' " test -z \"\`git diff refs/heads/my-bar refs/remotes/bar\`\" " +test_expect_success 'git-svn works in a bare repository' ' + mkdir bare-repo && + ( cd bare-repo && + git init --bare && + GIT_DIR=. git svn init "$svnrepo" && + git svn fetch ) && + rm -rf bare-repo + ' + test_done From bcdd1b4456998256df19ea0347bf4a7fa5410eef Mon Sep 17 00:00:00 2001 From: "Marten Svanfeldt (dev)" Date: Thu, 13 Nov 2008 20:04:09 +0800 Subject: [PATCH 4/5] Git.pm: Make _temp_cache use the repository directory Update the usage of File::Temp->tempfile to place the temporary files within the repository directory instead of just letting Perl decide what directory to use, given there is a repository specified when requesting the temporary file. This is needed to be able to fix git-svn on msys as msysperl generates paths with UNIX-style paths (/tmp/xxx) while the git tools expect natvie path format (c:/..). The repository dir is stored in native format so by using it as the base directory for temporary files we always get a usable native full path. Signed-off-by: Marten Svanfeldt Acked-by: Eric Wong --- perl/Git.pm | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/perl/Git.pm b/perl/Git.pm index ba9445378..dde9105df 100644 --- a/perl/Git.pm +++ b/perl/Git.pm @@ -961,9 +961,7 @@ issue. =cut sub temp_acquire { - my ($self, $name) = _maybe_self(@_); - - my $temp_fd = _temp_cache($name); + my $temp_fd = _temp_cache(@_); $TEMP_FILES{$temp_fd}{locked} = 1; $temp_fd; @@ -1005,7 +1003,7 @@ sub temp_release { } sub _temp_cache { - my ($name) = @_; + my ($self, $name) = _maybe_self(@_); _verify_require(); @@ -1022,9 +1020,16 @@ sub _temp_cache { "' was closed. Opening replacement."; } my $fname; + + my $tmpdir; + if (defined $self) { + $tmpdir = $self->repo_path(); + } + ($$temp_fd, $fname) = File::Temp->tempfile( - 'Git_XXXXXX', UNLINK => 1 + 'Git_XXXXXX', UNLINK => 1, DIR => $tmpdir, ) or throw Error::Simple("couldn't open new temp file"); + $$temp_fd->autoflush; binmode $$temp_fd; $TEMP_FILES{$$temp_fd}{fname} = $fname; From 1b3069a753e2a2a3794db9235ea79747833cad65 Mon Sep 17 00:00:00 2001 From: "Marten Svanfeldt (dev)" Date: Thu, 13 Nov 2008 00:38:06 +0800 Subject: [PATCH 5/5] git-svn: Update git-svn to use the ability to place temporary files within repository directory This fixes git-svn within msys where Perl will provide temporary files with path such as /tmp while the git suit expects native Windows paths. Signed-off-by: Marten Svanfeldt Acked-by: Eric Wong --- git-svn.perl | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/git-svn.perl b/git-svn.perl index 86075ec61..914c707a9 100755 --- a/git-svn.perl +++ b/git-svn.perl @@ -3332,11 +3332,11 @@ sub change_file_prop { sub apply_textdelta { my ($self, $fb, $exp) = @_; - my $fh = Git::temp_acquire('svn_delta'); + my $fh = $::_repository->temp_acquire('svn_delta'); # $fh gets auto-closed() by SVN::TxDelta::apply(), # (but $base does not,) so dup() it for reading in close_file open my $dup, '<&', $fh or croak $!; - my $base = Git::temp_acquire('git_blob'); + my $base = $::_repository->temp_acquire('git_blob'); if ($fb->{blob}) { print $base 'link ' if ($fb->{mode_a} == 120000); my $size = $::_repository->cat_blob($fb->{blob}, $base); @@ -3377,7 +3377,8 @@ sub close_file { warn "$path has mode 120000", " but is not a link\n"; } else { - my $tmp_fh = Git::temp_acquire('svn_hash'); + my $tmp_fh = $::_repository->temp_acquire( + 'svn_hash'); my $res; while ($res = sysread($fh, my $str, 1024)) { my $out = syswrite($tmp_fh, $str, $res); @@ -3765,7 +3766,7 @@ sub change_file_prop { sub _chg_file_get_blob ($$$$) { my ($self, $fbat, $m, $which) = @_; - my $fh = Git::temp_acquire("git_blob_$which"); + my $fh = $::_repository->temp_acquire("git_blob_$which"); if ($m->{"mode_$which"} =~ /^120/) { print $fh 'link ' or croak $!; $self->change_file_prop($fbat,'svn:special','*');