From 8676eb43133cebe5121b8426f0e67f32c5cdefaa Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 26 Feb 2006 15:51:24 -0800 Subject: [PATCH 1/6] Make git diff-generation use a simpler spawn-like interface Instead of depending of fork() and execve() and doing things in between the two, make the git diff functions do everything up front, and then do a single "spawn_prog()" invocation to run the actual external diff program (if any is even needed). This actually ends up simplifying the code, and should make it much easier to make it efficient under broken operating systems (read: Windows). Signed-off-by: Linus Torvalds Signed-off-by: Junio C Hamano --- diff.c | 138 +++++++++++++++++++++++++++++++++------------------------ 1 file changed, 80 insertions(+), 58 deletions(-) diff --git a/diff.c b/diff.c index 804c08c2c..c0548eed9 100644 --- a/diff.c +++ b/diff.c @@ -178,11 +178,12 @@ static void emit_rewrite_diff(const char *name_a, copy_file('+', temp[1].name); } -static void builtin_diff(const char *name_a, +static const char *builtin_diff(const char *name_a, const char *name_b, struct diff_tempfile *temp, const char *xfrm_msg, - int complete_rewrite) + int complete_rewrite, + const char **args) { int i, next_at, cmd_size; const char *const diff_cmd = "diff -L%s -L%s"; @@ -242,19 +243,24 @@ static void builtin_diff(const char *name_a, } if (xfrm_msg && xfrm_msg[0]) puts(xfrm_msg); + /* + * we do not run diff between different kind + * of objects. + */ if (strncmp(temp[0].mode, temp[1].mode, 3)) - /* we do not run diff between different kind - * of objects. - */ - exit(0); + return NULL; if (complete_rewrite) { - fflush(NULL); emit_rewrite_diff(name_a, name_b, temp); - exit(0); + return NULL; } } - fflush(NULL); - execlp("/bin/sh","sh", "-c", cmd, NULL); + + /* This is disgusting */ + *args++ = "sh"; + *args++ = "-c"; + *args++ = cmd; + *args = NULL; + return "/bin/sh"; } struct diff_filespec *alloc_filespec(const char *path) @@ -559,6 +565,40 @@ static void remove_tempfile_on_signal(int signo) raise(signo); } +static int spawn_prog(const char *pgm, const char **arg) +{ + pid_t pid; + int status; + + fflush(NULL); + pid = fork(); + if (pid < 0) + die("unable to fork"); + if (!pid) { + execvp(pgm, (char *const*) arg); + exit(255); + } + + while (waitpid(pid, &status, 0) < 0) { + if (errno == EINTR) + continue; + return -1; + } + + /* Earlier we did not check the exit status because + * diff exits non-zero if files are different, and + * we are not interested in knowing that. It was a + * mistake which made it harder to quit a diff-* + * session that uses the git-apply-patch-script as + * the GIT_EXTERNAL_DIFF. A custom GIT_EXTERNAL_DIFF + * should also exit non-zero only when it wants to + * abort the entire diff-* session. + */ + if (WIFEXITED(status) && !WEXITSTATUS(status)) + return 0; + return -1; +} + /* An external diff command takes: * * diff-cmd name infile1 infile1-sha1 infile1-mode \ @@ -573,9 +613,9 @@ static void run_external_diff(const char *pgm, const char *xfrm_msg, int complete_rewrite) { + const char *spawn_arg[10]; struct diff_tempfile *temp = diff_temp; - pid_t pid; - int status; + int retval; static int atexit_asked = 0; const char *othername; @@ -592,59 +632,41 @@ static void run_external_diff(const char *pgm, signal(SIGINT, remove_tempfile_on_signal); } - fflush(NULL); - pid = fork(); - if (pid < 0) - die("unable to fork"); - if (!pid) { - if (pgm) { - if (one && two) { - const char *exec_arg[10]; - const char **arg = &exec_arg[0]; - *arg++ = pgm; - *arg++ = name; - *arg++ = temp[0].name; - *arg++ = temp[0].hex; - *arg++ = temp[0].mode; - *arg++ = temp[1].name; - *arg++ = temp[1].hex; - *arg++ = temp[1].mode; - if (other) { - *arg++ = other; - *arg++ = xfrm_msg; - } - *arg = NULL; - execvp(pgm, (char *const*) exec_arg); + if (pgm) { + const char **arg = &spawn_arg[0]; + if (one && two) { + *arg++ = pgm; + *arg++ = name; + *arg++ = temp[0].name; + *arg++ = temp[0].hex; + *arg++ = temp[0].mode; + *arg++ = temp[1].name; + *arg++ = temp[1].hex; + *arg++ = temp[1].mode; + if (other) { + *arg++ = other; + *arg++ = xfrm_msg; } - else - execlp(pgm, pgm, name, NULL); + } else { + *arg++ = pgm; + *arg++ = name; } - /* - * otherwise we use the built-in one. - */ - if (one && two) - builtin_diff(name, othername, temp, xfrm_msg, - complete_rewrite); - else + *arg = NULL; + } else { + if (one && two) { + pgm = builtin_diff(name, othername, temp, xfrm_msg, complete_rewrite, spawn_arg); + } else printf("* Unmerged path %s\n", name); - exit(0); } - if (waitpid(pid, &status, 0) < 0 || - !WIFEXITED(status) || WEXITSTATUS(status)) { - /* Earlier we did not check the exit status because - * diff exits non-zero if files are different, and - * we are not interested in knowing that. It was a - * mistake which made it harder to quit a diff-* - * session that uses the git-apply-patch-script as - * the GIT_EXTERNAL_DIFF. A custom GIT_EXTERNAL_DIFF - * should also exit non-zero only when it wants to - * abort the entire diff-* session. - */ - remove_tempfile(); + + retval = 0; + if (pgm) + retval = spawn_prog(pgm, spawn_arg); + remove_tempfile(); + if (retval) { fprintf(stderr, "external diff died, stopping at %s.\n", name); exit(1); } - remove_tempfile(); } static void diff_fill_sha1_info(struct diff_filespec *one) From 525c0d713c101cf795980aec3baa08f49ec7147f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Karl=20=20Hasselstr=C3=B6m?= Date: Sun, 26 Feb 2006 06:11:24 +0100 Subject: [PATCH 2/6] svnimport: Mention -r in usage summary MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I added the -r option to git-svnimport some time ago, but forgot to update the usage summary in the documentation. Signed-off-by: Karl Hasselström Signed-off-by: Junio C Hamano --- Documentation/git-svnimport.txt | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Documentation/git-svnimport.txt b/Documentation/git-svnimport.txt index 5c543d5d1..b5c772147 100644 --- a/Documentation/git-svnimport.txt +++ b/Documentation/git-svnimport.txt @@ -10,10 +10,10 @@ git-svnimport - Import a SVN repository into git SYNOPSIS -------- 'git-svnimport' [ -o ] [ -h ] [ -v ] [ -d | -D ] - [ -C ] [ -i ] [ -u ] [-l limit_rev] - [ -b branch_subdir ] [ -T trunk_subdir ] [ -t tag_subdir ] - [ -s start_chg ] [ -m ] [ -M regex ] - [ ] + [ -C ] [ -i ] [ -u ] [-l limit_rev] + [ -b branch_subdir ] [ -T trunk_subdir ] [ -t tag_subdir ] + [ -s start_chg ] [ -m ] [ -r ] [ -M regex ] + [ ] DESCRIPTION From 4802426ddbf518f30f53cc572d619495ab52e4f7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Karl=20=20Hasselstr=C3=B6m?= Date: Sun, 26 Feb 2006 06:11:27 +0100 Subject: [PATCH 3/6] svnimport: Convert executable flag MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Convert the svn:executable property to file mode 755 when converting an SVN repository to GIT. Signed-off-by: Karl Hasselström Signed-off-by: Junio C Hamano --- git-svnimport.perl | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/git-svnimport.perl b/git-svnimport.perl index ee2940f48..6603b96d6 100755 --- a/git-svnimport.perl +++ b/git-svnimport.perl @@ -112,16 +112,22 @@ sub file { DIR => File::Spec->tmpdir(), UNLINK => 1); print "... $rev $path ...\n" if $opt_v; - my $pool = SVN::Pool->new(); - eval { $self->{'svn'}->get_file($path,$rev,$fh,$pool); }; - $pool->clear; + my (undef, $properties); + eval { (undef, $properties) + = $self->{'svn'}->get_file($path,$rev,$fh); }; if($@) { return undef if $@ =~ /Attempted to get checksum/; die $@; } + my $mode; + if (exists $properties->{'svn:executable'}) { + $mode = '0755'; + } else { + $mode = '0644'; + } close ($fh); - return $name; + return ($name, $mode); } package main; @@ -296,7 +302,7 @@ ($$$) my $svnpath = revert_split_path($branch,$path); # now get it - my $name; + my ($name,$mode); if($opt_d) { my($req,$res); @@ -316,8 +322,9 @@ ($$$) return undef if $res->code == 301; # directory? die $res->status_line." at $url\n"; } + $mode = '0644'; # can't obtain mode via direct http request? } else { - $name = $svn->file("$svnpath",$rev); + ($name,$mode) = $svn->file("$svnpath",$rev); return undef unless defined $name; } @@ -331,7 +338,6 @@ ($$$) chomp $sha; close $F; unlink $name; - my $mode = "0644"; # SV does not seem to store any file modes return [$mode, $sha, $path]; } From c55f3fff35ac34e6d1f0f686e27029612775e51d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Karl=20=20Hasselstr=C3=B6m?= Date: Sun, 26 Feb 2006 06:11:29 +0100 Subject: [PATCH 4/6] svnimport: Convert the svn:ignore property MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Put the value of the svn:ignore property in a regular file when converting a Subversion repository to GIT. The Subversion and GIT ignore syntaxes are similar enough that it often just works to set the filename to .gitignore and do nothing else. Signed-off-by: Karl Hasselström Signed-off-by: Junio C Hamano --- Documentation/git-svnimport.txt | 8 ++++- git-svnimport.perl | 60 +++++++++++++++++++++++++++++++-- 2 files changed, 64 insertions(+), 4 deletions(-) diff --git a/Documentation/git-svnimport.txt b/Documentation/git-svnimport.txt index b5c772147..c95ff84f6 100644 --- a/Documentation/git-svnimport.txt +++ b/Documentation/git-svnimport.txt @@ -13,7 +13,7 @@ SYNOPSIS [ -C ] [ -i ] [ -u ] [-l limit_rev] [ -b branch_subdir ] [ -T trunk_subdir ] [ -t tag_subdir ] [ -s start_chg ] [ -m ] [ -r ] [ -M regex ] - [ ] + [ -I ] [ ] DESCRIPTION @@ -65,6 +65,12 @@ When importing incrementally, you might need to edit the .git/svn2git file. Prepend 'rX: ' to commit messages, where X is the imported subversion revision. +-I :: + Import the svn:ignore directory property to files with this + name in each directory. (The Subversion and GIT ignore + syntaxes are similar enough that using the Subversion patterns + directly with "-I .gitignore" will almost always just work.) + -m:: Attempt to detect merges based on the commit message. This option will enable default regexes that try to capture the name source diff --git a/git-svnimport.perl b/git-svnimport.perl index 6603b96d6..0dd9fab9f 100755 --- a/git-svnimport.perl +++ b/git-svnimport.perl @@ -29,19 +29,21 @@ $SIG{'PIPE'}="IGNORE"; $ENV{'TZ'}="UTC"; -our($opt_h,$opt_o,$opt_v,$opt_u,$opt_C,$opt_i,$opt_m,$opt_M,$opt_t,$opt_T,$opt_b,$opt_r,$opt_s,$opt_l,$opt_d,$opt_D); +our($opt_h,$opt_o,$opt_v,$opt_u,$opt_C,$opt_i,$opt_m,$opt_M,$opt_t,$opt_T, + $opt_b,$opt_r,$opt_I,$opt_s,$opt_l,$opt_d,$opt_D); sub usage() { print STDERR <{'svn'}->get_dir($path,$rev,undef); + if (exists $properties->{'svn:ignore'}) { + my ($fh, $name) = tempfile('gitsvn.XXXXXX', + DIR => File::Spec->tmpdir(), + UNLINK => 1); + print $fh $properties->{'svn:ignore'}; + close($fh); + return $name; + } else { + return undef; + } +} + package main; use URI; @@ -341,6 +361,34 @@ ($$$) return [$mode, $sha, $path]; } +sub get_ignore($$$$$) { + my($new,$old,$rev,$branch,$path) = @_; + + return unless $opt_I; + my $svnpath = revert_split_path($branch,$path); + my $name = $svn->ignore("$svnpath",$rev); + if ($path eq '/') { + $path = $opt_I; + } else { + $path = File::Spec->catfile($path,$opt_I); + } + if (defined $name) { + my $pid = open(my $F, '-|'); + die $! unless defined $pid; + if (!$pid) { + exec("git-hash-object", "-w", $name) + or die "Cannot create object: $!\n"; + } + my $sha = <$F>; + chomp $sha; + close $F; + unlink $name; + push(@$new,['0644',$sha,$path]); + } else { + push(@$old,$path); + } +} + sub split_path($$) { my($rev,$path) = @_; my $branch; @@ -546,6 +594,9 @@ sub commit { my $opath = $action->[3]; print STDERR "$revision: $branch: could not fetch '$opath'\n"; } + } elsif ($node_kind eq $SVN::Node::dir) { + get_ignore(\@new, \@old, $revision, + $branch,$path); } } elsif ($action->[0] eq "D") { push(@old,$path); @@ -554,6 +605,9 @@ sub commit { if ($node_kind eq $SVN::Node::file) { my $f = get_file($revision,$branch,$path); push(@new,$f) if $f; + } elsif ($node_kind eq $SVN::Node::dir) { + get_ignore(\@new, \@old, $revision, + $branch,$path); } } else { die "$revision: unknown action '".$action->[0]."' for $path\n"; From 19bfcd5a14e647763e5ecc247ba24790f56721f3 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 26 Feb 2006 09:29:00 -0800 Subject: [PATCH 5/6] The war on trailing whitespace On Sat, 25 Feb 2006, Andrew Morton wrote: > > I'd suggest a) git will simply refuse to apply such a patch unless given a > special `forcing' flag, b) even when thus forced, it will still warn and c) > with a different flag, it will strip-then-apply, without generating a > warning. This doesn't do the "strip-then-apply" thing, but it allows you to make git-apply generate a warning or error on extraneous whitespace. Use --whitespace=warn to warn, and (surprise, surprise) --whitespace=error to make it a fatal error to have whitespace at the end. Totally untested, of course. But it compiles, so it must be fine. HOWEVER! Note that this literally will check every single patch-line with "+" at the beginning. Which means that if you fix a simple typo, and the line had a space at the end before, and you didn't remove it, that's still considered a "new line with whitespace at the end", even though obviously the line wasn't really new. I assume this is what you wanted, and there isn't really any sane alternatives (you could make the warning activate only for _pure_ additions with no deletions at all in that hunk, but that sounds a bit insane). Linus --- apply.c | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/apply.c b/apply.c index 244718ca1..e7b3dcad4 100644 --- a/apply.c +++ b/apply.c @@ -34,6 +34,12 @@ static int line_termination = '\n'; static const char apply_usage[] = "git-apply [--stat] [--numstat] [--summary] [--check] [--index] [--apply] [--no-add] [--index-info] [--allow-binary-replacement] [-z] [-pNUM] ..."; +static enum whitespace_eol { + nowarn, + warn_on_whitespace, + error_on_whitespace +} new_whitespace = nowarn; + /* * For "diff-stat" like behaviour, we keep track of the biggest change * we've seen, and the longest filename. That allows us to do simple @@ -815,6 +821,22 @@ static int parse_fragment(char *line, unsigned long size, struct patch *patch, s oldlines--; break; case '+': + /* + * We know len is at least two, since we have a '+' and + * we checked that the last character was a '\n' above + */ + if (isspace(line[len-2])) { + switch (new_whitespace) { + case nowarn: + break; + case warn_on_whitespace: + new_whitespace = nowarn; /* Just once */ + error("Added whitespace at end of line at line %d", linenr); + break; + case error_on_whitespace: + die("Added whitespace at end of line at line %d", linenr); + } + } added++; newlines--; break; @@ -1839,6 +1861,17 @@ int main(int argc, char **argv) line_termination = 0; continue; } + if (!strncmp(arg, "--whitespace=", 13)) { + if (strcmp(arg+13, "warn")) { + new_whitespace = warn_on_whitespace; + continue; + } + if (strcmp(arg+13, "error")) { + new_whitespace = error_on_whitespace; + continue; + } + die("unrecognixed whitespace option '%s'", arg+13); + } if (check_index && prefix_length < 0) { prefix = setup_git_directory(); From b5767dd6605364002a96a9d039cd1f010707b4f6 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Sun, 26 Feb 2006 18:13:25 -0800 Subject: [PATCH 6/6] apply --whitespace fixes and enhancements. In addition to fixing obvious command line parsing bugs in the previous round, this changes the following: * Adds "--whitespace=strip". This applies after stripping the new trailing whitespaces introduced to the patch. * The output error message format is changed to say "patch-filename:linenumber:contents of the line". This makes it similar to typical compiler error message format, and helps C-x ` (next-error) in Emacs compilation buffer. * --whitespace=error and --whitespace=warn do not stop at the first error. We might want to limit the output to say first 20 such lines to prevent cluttering, but on the other hand if you are willing to hand-fix after inspecting them, getting everything with a single run might be easier to work with. After all, somebody has to do the clean-up work somewhere. Signed-off-by: Junio C Hamano --- apply.c | 77 ++++++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 54 insertions(+), 23 deletions(-) diff --git a/apply.c b/apply.c index e7b3dcad4..7dbbeb47a 100644 --- a/apply.c +++ b/apply.c @@ -37,8 +37,11 @@ static const char apply_usage[] = static enum whitespace_eol { nowarn, warn_on_whitespace, - error_on_whitespace + error_on_whitespace, + strip_and_apply, } new_whitespace = nowarn; +static int whitespace_error = 0; +static const char *patch_input_file = NULL; /* * For "diff-stat" like behaviour, we keep track of the biggest change @@ -823,19 +826,17 @@ static int parse_fragment(char *line, unsigned long size, struct patch *patch, s case '+': /* * We know len is at least two, since we have a '+' and - * we checked that the last character was a '\n' above + * we checked that the last character was a '\n' above. + * That is, an addition of an empty line would check + * the '+' here. Sneaky... */ - if (isspace(line[len-2])) { - switch (new_whitespace) { - case nowarn: - break; - case warn_on_whitespace: - new_whitespace = nowarn; /* Just once */ - error("Added whitespace at end of line at line %d", linenr); - break; - case error_on_whitespace: - die("Added whitespace at end of line at line %d", linenr); - } + if ((new_whitespace != nowarn) && + isspace(line[len-2])) { + fprintf(stderr, "Added whitespace\n"); + fprintf(stderr, "%s:%d:%.*s\n", + patch_input_file, + linenr, len-2, line+1); + whitespace_error = 1; } added++; newlines--; @@ -1114,6 +1115,27 @@ struct buffer_desc { unsigned long alloc; }; +static int apply_line(char *output, const char *patch, int plen) +{ + /* plen is number of bytes to be copied from patch, + * starting at patch+1 (patch[0] is '+'). Typically + * patch[plen] is '\n'. + */ + int add_nl_to_tail = 0; + if ((new_whitespace == strip_and_apply) && + 1 < plen && isspace(patch[plen-1])) { + if (patch[plen] == '\n') + add_nl_to_tail = 1; + plen--; + while (0 < plen && isspace(patch[plen])) + plen--; + } + memcpy(output, patch + 1, plen); + if (add_nl_to_tail) + output[plen++] = '\n'; + return plen; +} + static int apply_one_fragment(struct buffer_desc *desc, struct fragment *frag) { char *buf = desc->buffer; @@ -1149,10 +1171,9 @@ static int apply_one_fragment(struct buffer_desc *desc, struct fragment *frag) break; /* Fall-through for ' ' */ case '+': - if (*patch != '+' || !no_add) { - memcpy(new + newsize, patch + 1, plen); - newsize += plen; - } + if (*patch != '+' || !no_add) + newsize += apply_line(new + newsize, patch, + plen); break; case '@': case '\\': /* Ignore it, we already handled it */ @@ -1721,7 +1742,7 @@ static int use_patch(struct patch *p) return 1; } -static int apply_patch(int fd) +static int apply_patch(int fd, const char *filename) { int newfd; unsigned long offset, size; @@ -1729,6 +1750,7 @@ static int apply_patch(int fd) struct patch *list = NULL, **listp = &list; int skipped_patch = 0; + patch_input_file = filename; if (!buffer) return -1; offset = 0; @@ -1755,6 +1777,9 @@ static int apply_patch(int fd) } newfd = -1; + if (whitespace_error && (new_whitespace == error_on_whitespace)) + apply = 0; + write_index = check_index && apply; if (write_index) newfd = hold_index_file_for_update(&cache_file, get_index_file()); @@ -1801,7 +1826,7 @@ int main(int argc, char **argv) int fd; if (!strcmp(arg, "-")) { - apply_patch(0); + apply_patch(0, ""); read_stdin = 0; continue; } @@ -1862,14 +1887,18 @@ int main(int argc, char **argv) continue; } if (!strncmp(arg, "--whitespace=", 13)) { - if (strcmp(arg+13, "warn")) { + if (!strcmp(arg+13, "warn")) { new_whitespace = warn_on_whitespace; continue; } - if (strcmp(arg+13, "error")) { + if (!strcmp(arg+13, "error")) { new_whitespace = error_on_whitespace; continue; } + if (!strcmp(arg+13, "strip")) { + new_whitespace = strip_and_apply; + continue; + } die("unrecognixed whitespace option '%s'", arg+13); } @@ -1885,10 +1914,12 @@ int main(int argc, char **argv) if (fd < 0) usage(apply_usage); read_stdin = 0; - apply_patch(fd); + apply_patch(fd, arg); close(fd); } if (read_stdin) - apply_patch(0); + apply_patch(0, ""); + if (whitespace_error && new_whitespace == error_on_whitespace) + return 1; return 0; }