From 4b340cfab9c7a18e39bc531d6a6ffaffdf95f62d Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Sun, 11 Mar 2012 01:25:43 -0800 Subject: [PATCH 01/32] ident.c: add split_ident_line() to parse formatted ident line The commit formatting logic format_person_part() in pretty.c implements the logic to split an author/committer ident line into its parts, intermixed with logic to compute its output using these piece it computes. Separate the former out to a helper function split_ident_line() so that other codepath can use the same logic, and rewrite the function using the helper function. Signed-off-by: Junio C Hamano --- cache.h | 16 +++++++++++++ ident.c | 68 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ pretty.c | 64 +++++++++++++++++----------------------------------- 3 files changed, 104 insertions(+), 44 deletions(-) diff --git a/cache.h b/cache.h index 3a8e1258e..7c42ecaff 100644 --- a/cache.h +++ b/cache.h @@ -928,6 +928,22 @@ extern const char *fmt_name(const char *name, const char *email); extern const char *git_editor(void); extern const char *git_pager(int stdout_is_tty); +struct ident_split { + const char *name_begin; + const char *name_end; + const char *mail_begin; + const char *mail_end; + const char *date_begin; + const char *date_end; + const char *tz_begin; + const char *tz_end; +}; +/* + * Signals an success with 0, but time part of the result may be NULL + * if the input lacks timestamp and zone + */ +extern int split_ident_line(struct ident_split *, const char *, int); + struct checkout { const char *base_dir; int base_dir_len; diff --git a/ident.c b/ident.c index f619619b8..87c697c2b 100644 --- a/ident.c +++ b/ident.c @@ -220,6 +220,74 @@ static int copy(char *buf, size_t size, int offset, const char *src) return offset; } +/* + * Reverse of fmt_ident(); given an ident line, split the fields + * to allow the caller to parse it. + * Signal a success by returning 0, but date/tz fields of the result + * can still be NULL if the input line only has the name/email part + * (e.g. reading from a reflog entry). + */ +int split_ident_line(struct ident_split *split, const char *line, int len) +{ + const char *cp; + size_t span; + int status = -1; + + memset(split, 0, sizeof(*split)); + + split->name_begin = line; + for (cp = line; *cp && cp < line + len; cp++) + if (*cp == '<') { + split->mail_begin = cp + 1; + break; + } + if (!split->mail_begin) + return status; + + for (cp = split->mail_begin - 2; line < cp; cp--) + if (!isspace(*cp)) { + split->name_end = cp + 1; + break; + } + if (!split->name_end) + return status; + + for (cp = split->mail_begin; cp < line + len; cp++) + if (*cp == '>') { + split->mail_end = cp; + break; + } + if (!split->mail_end) + return status; + + for (cp = split->mail_end + 1; cp < line + len && isspace(*cp); cp++) + ; + if (line + len <= cp) + goto person_only; + split->date_begin = cp; + span = strspn(cp, "0123456789"); + if (!span) + goto person_only; + split->date_end = split->date_begin + span; + for (cp = split->date_end; cp < line + len && isspace(*cp); cp++) + ; + if (line + len <= cp || (*cp != '+' && *cp != '-')) + goto person_only; + split->tz_begin = cp; + span = strspn(cp + 1, "0123456789"); + if (!span) + goto person_only; + split->tz_end = split->tz_begin + 1 + span; + return 0; + +person_only: + split->date_begin = NULL; + split->date_end = NULL; + split->tz_begin = NULL; + split->tz_end = NULL; + return 0; +} + static const char *env_hint = "\n" "*** Please tell me who you are.\n" diff --git a/pretty.c b/pretty.c index 8688b8f2d..f2dee308b 100644 --- a/pretty.c +++ b/pretty.c @@ -531,41 +531,24 @@ static size_t format_person_part(struct strbuf *sb, char part, { /* currently all placeholders have same length */ const int placeholder_len = 2; - int start, end, tz = 0; + int tz; unsigned long date = 0; - char *ep; - const char *name_start, *name_end, *mail_start, *mail_end, *msg_end = msg+len; char person_name[1024]; char person_mail[1024]; + struct ident_split s; + const char *name_start, *name_end, *mail_start, *mail_end; - /* advance 'end' to point to email start delimiter */ - for (end = 0; end < len && msg[end] != '<'; end++) - ; /* do nothing */ - - /* - * When end points at the '<' that we found, it should have - * matching '>' later, which means 'end' must be strictly - * below len - 1. - */ - if (end >= len - 2) + if (split_ident_line(&s, msg, len) < 0) goto skip; - /* Seek for both name and email part */ - name_start = msg; - name_end = msg+end; - while (name_end > name_start && isspace(*(name_end-1))) - name_end--; - mail_start = msg+end+1; - mail_end = mail_start; - while (mail_end < msg_end && *mail_end != '>') - mail_end++; - if (mail_end == msg_end) - goto skip; - end = mail_end-msg; + name_start = s.name_begin; + name_end = s.name_end; + mail_start = s.mail_begin; + mail_end = s.mail_end; if (part == 'N' || part == 'E') { /* mailmap lookup */ - strlcpy(person_name, name_start, name_end-name_start+1); - strlcpy(person_mail, mail_start, mail_end-mail_start+1); + strlcpy(person_name, name_start, name_end - name_start + 1); + strlcpy(person_mail, mail_start, mail_end - mail_start + 1); mailmap_name(person_mail, sizeof(person_mail), person_name, sizeof(person_name)); name_start = person_name; name_end = name_start + strlen(person_name); @@ -581,28 +564,20 @@ static size_t format_person_part(struct strbuf *sb, char part, return placeholder_len; } - /* advance 'start' to point to date start delimiter */ - for (start = end + 1; start < len && isspace(msg[start]); start++) - ; /* do nothing */ - if (start >= len) - goto skip; - date = strtoul(msg + start, &ep, 10); - if (msg + start == ep) + if (!s.date_begin) goto skip; + date = strtoul(s.date_begin, NULL, 10); + if (part == 't') { /* date, UNIX timestamp */ - strbuf_add(sb, msg + start, ep - (msg + start)); + strbuf_add(sb, s.date_begin, s.date_end - s.date_begin); return placeholder_len; } /* parse tz */ - for (start = ep - msg + 1; start < len && isspace(msg[start]); start++) - ; /* do nothing */ - if (start + 1 < len) { - tz = strtoul(msg + start + 1, NULL, 10); - if (msg[start] == '-') - tz = -tz; - } + tz = strtoul(s.tz_begin + 1, NULL, 10); + if (*s.tz_begin == '-') + tz = -tz; switch (part) { case 'd': /* date */ @@ -621,8 +596,9 @@ static size_t format_person_part(struct strbuf *sb, char part, skip: /* - * bogus commit, 'sb' cannot be updated, but we still need to - * compute a valid return value. + * reading from either a bogus commit, or a reflog entry with + * %gn, %ge, etc.; 'sb' cannot be updated, but we still need + * to compute a valid return value. */ if (part == 'n' || part == 'e' || part == 't' || part == 'd' || part == 'D' || part == 'r' || part == 'i') From 04861982e553289c923b2f9ef829ef33206c9bc4 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Sun, 11 Mar 2012 03:51:32 -0700 Subject: [PATCH 02/32] t7503: does pre-commit-hook learn authorship? When "--author" option is used to lie the authorship to "git commit" command, hooks should learn the author name and email just like when GIT_AUTHOR_NAME and GIT_AUTHOR_EMAIL environment variables are used to lie the authorship. Test this. Signed-off-by: Junio C Hamano --- t/t7503-pre-commit-hook.sh | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/t/t7503-pre-commit-hook.sh b/t/t7503-pre-commit-hook.sh index ee7f0cd45..aa294ea42 100755 --- a/t/t7503-pre-commit-hook.sh +++ b/t/t7503-pre-commit-hook.sh @@ -118,4 +118,22 @@ test_expect_success 'with failing hook requiring GIT_PREFIX' ' git checkout -- file ' +test_expect_failure 'check the author in hook' ' + write_script "$HOOK" <<-\EOF && + test "$GIT_AUTHOR_NAME" = "New Author" && + test "$GIT_AUTHOR_EMAIL" = "newauthor@example.com" + EOF + test_must_fail git commit --allow-empty -m "by a.u.thor" && + ( + GIT_AUTHOR_NAME="New Author" && + GIT_AUTHOR_EMAIL="newauthor@example.com" && + export GIT_AUTHOR_NAME GIT_AUTHOR_EMAIL && + git commit --allow-empty -m "by new.author via env" && + git show -s + ) && + git commit --author="New Author " \ + --allow-empty -m "by new.author via command line" && + git show -s +' + test_done From 7dfe8ad6006c105989e4e8cfb196aa4ecda3c21d Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Sun, 11 Mar 2012 03:12:10 -0700 Subject: [PATCH 03/32] commit: pass author/committer info to hooks When lying the author name via GIT_AUTHOR_NAME environment variable to "git commit", the hooks run by the command saw it and could act on the name that will be recorded in the final commit. When the user uses the "--author" option from the command line, the command should give the same information to the hook, and back when "git command" was a scripted Porcelain, it did set the environment variable and hooks can learn the author name from it. However, when the command was reimplemented in C, the rewritten code was not very faithful to the original, and hooks stopped getting the authorship information given with "--author". Fix this by exporting the necessary environment variables. Signed-off-by: Junio C Hamano --- builtin/commit.c | 22 +++++++++++++++++++--- t/t7503-pre-commit-hook.sh | 2 +- 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/builtin/commit.c b/builtin/commit.c index eae5a29ae..57a60f9f4 100644 --- a/builtin/commit.c +++ b/builtin/commit.c @@ -533,9 +533,20 @@ static int is_a_merge(const struct commit *current_head) static const char sign_off_header[] = "Signed-off-by: "; +static void export_one(const char *var, const char *s, const char *e, int hack) +{ + struct strbuf buf = STRBUF_INIT; + if (hack) + strbuf_addch(&buf, hack); + strbuf_addf(&buf, "%.*s", (int)(e - s), s); + setenv(var, buf.buf, 1); + strbuf_release(&buf); +} + static void determine_author_info(struct strbuf *author_ident) { char *name, *email, *date; + struct ident_split author; name = getenv("GIT_AUTHOR_NAME"); email = getenv("GIT_AUTHOR_EMAIL"); @@ -585,6 +596,11 @@ static void determine_author_info(struct strbuf *author_ident) date = force_date; strbuf_addstr(author_ident, fmt_ident(name, email, date, IDENT_ERROR_ON_NO_NAME)); + if (!split_ident_line(&author, author_ident->buf, author_ident->len)) { + export_one("GIT_AUTHOR_NAME", author.name_begin, author.name_end, 0); + export_one("GIT_AUTHOR_EMAIL", author.mail_begin, author.mail_end, 0); + export_one("GIT_AUTHOR_DATE", author.date_begin, author.tz_end, '@'); + } } static int ends_rfc2822_footer(struct strbuf *sb) @@ -652,6 +668,9 @@ static int prepare_to_commit(const char *index_file, const char *prefix, int ident_shown = 0; int clean_message_contents = (cleanup_mode != CLEANUP_NONE); + /* This checks and barfs if author is badly specified */ + determine_author_info(author_ident); + if (!no_verify && run_hook(index_file, "pre-commit", NULL)) return 0; @@ -771,9 +790,6 @@ static int prepare_to_commit(const char *index_file, const char *prefix, strbuf_release(&sb); - /* This checks and barfs if author is badly specified */ - determine_author_info(author_ident); - /* This checks if committer ident is explicitly given */ strbuf_addstr(&committer_ident, git_committer_info(0)); if (use_editor && include_status) { diff --git a/t/t7503-pre-commit-hook.sh b/t/t7503-pre-commit-hook.sh index aa294ea42..984889b39 100755 --- a/t/t7503-pre-commit-hook.sh +++ b/t/t7503-pre-commit-hook.sh @@ -118,7 +118,7 @@ test_expect_success 'with failing hook requiring GIT_PREFIX' ' git checkout -- file ' -test_expect_failure 'check the author in hook' ' +test_expect_success 'check the author in hook' ' write_script "$HOOK" <<-\EOF && test "$GIT_AUTHOR_NAME" = "New Author" && test "$GIT_AUTHOR_EMAIL" = "newauthor@example.com" From 62d39359af1a46de68b600c0dc73a98b78aa5523 Mon Sep 17 00:00:00 2001 From: Johannes Sixt Date: Wed, 14 Mar 2012 20:50:21 +0100 Subject: [PATCH 04/32] t4034: diff.*.wordregex should not be "sticky" in --word-diff The test case applies a custom wordRegex to one file in a diff, and expects that the default word splitting applies to the second file in the diff. But the custom wordRegex is also incorrectly used for the second file. Helped-by: Thomas Rast Signed-off-by: Johannes Sixt Signed-off-by: Junio C Hamano --- t/t4034-diff-words.sh | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/t/t4034-diff-words.sh b/t/t4034-diff-words.sh index 5c2012111..310ace1b5 100755 --- a/t/t4034-diff-words.sh +++ b/t/t4034-diff-words.sh @@ -3,6 +3,7 @@ test_description='word diff colors' . ./test-lib.sh +. "$TEST_DIRECTORY"/diff-lib.sh cat >pre.simple <<-\EOF h(4) @@ -293,6 +294,10 @@ test_expect_success '--word-diff=none' ' word_diff --word-diff=plain --word-diff=none ' +test_expect_success 'unset default driver' ' + test_unconfig diff.wordregex +' + test_language_driver bibtex test_language_driver cpp test_language_driver csharp @@ -348,4 +353,35 @@ test_expect_success 'word-diff with no newline at EOF' ' word_diff --word-diff=plain ' +test_expect_success 'setup history with two files' ' + echo "a b; c" >a.tex && + echo "a b; c" >z.txt && + git add a.tex z.txt && + git commit -minitial && + + # modify both + echo "a bx; c" >a.tex && + echo "a bx; c" >z.txt && + git commit -mmodified -a +' + +test_expect_failure 'wordRegex for the first file does not apply to the second' ' + echo "*.tex diff=tex" >.gitattributes && + git config diff.tex.wordRegex "[a-z]+|." && + cat >expect <<-\EOF && + diff --git a/a.tex b/a.tex + --- a/a.tex + +++ b/a.tex + @@ -1 +1 @@ + a [-b-]{+bx+}; c + diff --git a/z.txt b/z.txt + --- a/z.txt + +++ b/z.txt + @@ -1 +1 @@ + a [-b;-]{+bx;+} c + EOF + git diff --word-diff HEAD~ >actual && + compare_diff_patch expect actual +' + test_done From 77d1a520fb5b8ad8cc86228023f16a44b75c05d1 Mon Sep 17 00:00:00 2001 From: Thomas Rast Date: Wed, 14 Mar 2012 19:24:08 +0100 Subject: [PATCH 05/32] diff: refactor the word-diff setup from builtin_diff_cmd Quite a chunk of builtin_diff_cmd deals with word-diff setup, defaults and such. This makes the function a bit hard to read, but is also asymmetric because the corresponding teardown lives in free_diff_words_data already. Refactor into a new function init_diff_words_data. For simplicity, also shuffle around some functions it depends on. Signed-off-by: Thomas Rast Signed-off-by: Junio C Hamano --- diff.c | 119 ++++++++++++++++++++++++++++++--------------------------- 1 file changed, 63 insertions(+), 56 deletions(-) diff --git a/diff.c b/diff.c index 5388ded21..526f19805 100644 --- a/diff.c +++ b/diff.c @@ -987,6 +987,67 @@ static void diff_words_flush(struct emit_callback *ecbdata) diff_words_show(ecbdata->diff_words); } +static void diff_filespec_load_driver(struct diff_filespec *one) +{ + /* Use already-loaded driver */ + if (one->driver) + return; + + if (S_ISREG(one->mode)) + one->driver = userdiff_find_by_path(one->path); + + /* Fallback to default settings */ + if (!one->driver) + one->driver = userdiff_find_by_name("default"); +} + +static const char *userdiff_word_regex(struct diff_filespec *one) +{ + diff_filespec_load_driver(one); + return one->driver->word_regex; +} + +static void init_diff_words_data(struct emit_callback *ecbdata, + struct diff_options *o, + struct diff_filespec *one, + struct diff_filespec *two) +{ + int i; + + ecbdata->diff_words = + xcalloc(1, sizeof(struct diff_words_data)); + ecbdata->diff_words->type = o->word_diff; + ecbdata->diff_words->opt = o; + if (!o->word_regex) + o->word_regex = userdiff_word_regex(one); + if (!o->word_regex) + o->word_regex = userdiff_word_regex(two); + if (!o->word_regex) + o->word_regex = diff_word_regex_cfg; + if (o->word_regex) { + ecbdata->diff_words->word_regex = (regex_t *) + xmalloc(sizeof(regex_t)); + if (regcomp(ecbdata->diff_words->word_regex, + o->word_regex, + REG_EXTENDED | REG_NEWLINE)) + die ("Invalid regular expression: %s", + o->word_regex); + } + for (i = 0; i < ARRAY_SIZE(diff_words_styles); i++) { + if (o->word_diff == diff_words_styles[i].type) { + ecbdata->diff_words->style = + &diff_words_styles[i]; + break; + } + } + if (want_color(o->use_color)) { + struct diff_words_style *st = ecbdata->diff_words->style; + st->old.color = diff_get_color_opt(o, DIFF_FILE_OLD); + st->new.color = diff_get_color_opt(o, DIFF_FILE_NEW); + st->ctx.color = diff_get_color_opt(o, DIFF_PLAIN); + } +} + static void free_diff_words_data(struct emit_callback *ecbdata) { if (ecbdata->diff_words) { @@ -2016,20 +2077,6 @@ static void emit_binary_diff(FILE *file, mmfile_t *one, mmfile_t *two, char *pre emit_binary_diff_body(file, two, one, prefix); } -static void diff_filespec_load_driver(struct diff_filespec *one) -{ - /* Use already-loaded driver */ - if (one->driver) - return; - - if (S_ISREG(one->mode)) - one->driver = userdiff_find_by_path(one->path); - - /* Fallback to default settings */ - if (!one->driver) - one->driver = userdiff_find_by_name("default"); -} - int diff_filespec_is_binary(struct diff_filespec *one) { if (one->is_binary == -1) { @@ -2055,12 +2102,6 @@ static const struct userdiff_funcname *diff_funcname_pattern(struct diff_filespe return one->driver->funcname.pattern ? &one->driver->funcname : NULL; } -static const char *userdiff_word_regex(struct diff_filespec *one) -{ - diff_filespec_load_driver(one); - return one->driver->word_regex; -} - void diff_set_mnemonic_prefix(struct diff_options *options, const char *a, const char *b) { if (!options->a_prefix) @@ -2247,42 +2288,8 @@ static void builtin_diff(const char *name_a, xecfg.ctxlen = strtoul(diffopts + 10, NULL, 10); else if (!prefixcmp(diffopts, "-u")) xecfg.ctxlen = strtoul(diffopts + 2, NULL, 10); - if (o->word_diff) { - int i; - - ecbdata.diff_words = - xcalloc(1, sizeof(struct diff_words_data)); - ecbdata.diff_words->type = o->word_diff; - ecbdata.diff_words->opt = o; - if (!o->word_regex) - o->word_regex = userdiff_word_regex(one); - if (!o->word_regex) - o->word_regex = userdiff_word_regex(two); - if (!o->word_regex) - o->word_regex = diff_word_regex_cfg; - if (o->word_regex) { - ecbdata.diff_words->word_regex = (regex_t *) - xmalloc(sizeof(regex_t)); - if (regcomp(ecbdata.diff_words->word_regex, - o->word_regex, - REG_EXTENDED | REG_NEWLINE)) - die ("Invalid regular expression: %s", - o->word_regex); - } - for (i = 0; i < ARRAY_SIZE(diff_words_styles); i++) { - if (o->word_diff == diff_words_styles[i].type) { - ecbdata.diff_words->style = - &diff_words_styles[i]; - break; - } - } - if (want_color(o->use_color)) { - struct diff_words_style *st = ecbdata.diff_words->style; - st->old.color = diff_get_color_opt(o, DIFF_FILE_OLD); - st->new.color = diff_get_color_opt(o, DIFF_FILE_NEW); - st->ctx.color = diff_get_color_opt(o, DIFF_PLAIN); - } - } + if (o->word_diff) + init_diff_words_data(&ecbdata, o, one, two); xdi_diff_outf(&mf1, &mf2, fn_out_consume, &ecbdata, &xpp, &xecfg); if (o->word_diff) From 6440d3417c1d51a20014d4b6fc6c59bacfa87dab Mon Sep 17 00:00:00 2001 From: Thomas Rast Date: Wed, 14 Mar 2012 19:24:09 +0100 Subject: [PATCH 06/32] diff: tweak a _copy_ of diff_options with word-diff When using word diff, the code sets the word_regex from various defaults if it was not set already. The problem is that it does this on the original diff_options, which will also be used in subsequent diffs. This means that when the word_regex is not given on the command line, only the first diff for which a setting for word_regex (either from attributes or diff.wordRegex) ever takes effect. This value then propagates to the rest of the diff runs and in particular prevents further attribute lookups. Fix the problem of changing diff state once and for all, by working with a _copy_ of the diff_options. Noticed-by: Johannes Sixt Signed-off-by: Thomas Rast Signed-off-by: Junio C Hamano --- diff.c | 5 ++++- t/t4034-diff-words.sh | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/diff.c b/diff.c index 526f19805..349a61d58 100644 --- a/diff.c +++ b/diff.c @@ -1008,11 +1008,13 @@ static const char *userdiff_word_regex(struct diff_filespec *one) } static void init_diff_words_data(struct emit_callback *ecbdata, - struct diff_options *o, + struct diff_options *orig_opts, struct diff_filespec *one, struct diff_filespec *two) { int i; + struct diff_options *o = xmalloc(sizeof(struct diff_options)); + memcpy(o, orig_opts, sizeof(struct diff_options)); ecbdata->diff_words = xcalloc(1, sizeof(struct diff_words_data)); @@ -1052,6 +1054,7 @@ static void free_diff_words_data(struct emit_callback *ecbdata) { if (ecbdata->diff_words) { diff_words_flush(ecbdata); + free (ecbdata->diff_words->opt); free (ecbdata->diff_words->minus.text.ptr); free (ecbdata->diff_words->minus.orig); free (ecbdata->diff_words->plus.text.ptr); diff --git a/t/t4034-diff-words.sh b/t/t4034-diff-words.sh index 310ace1b5..30d42cb3b 100755 --- a/t/t4034-diff-words.sh +++ b/t/t4034-diff-words.sh @@ -365,7 +365,7 @@ test_expect_success 'setup history with two files' ' git commit -mmodified -a ' -test_expect_failure 'wordRegex for the first file does not apply to the second' ' +test_expect_success 'wordRegex for the first file does not apply to the second' ' echo "*.tex diff=tex" >.gitattributes && git config diff.tex.wordRegex "[a-z]+|." && cat >expect <<-\EOF && From c844a8035617a602015d74ce329ee88f9b003bf9 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Thu, 15 Mar 2012 15:58:54 +0100 Subject: [PATCH 07/32] remove_dir_recursively(): Add flag for skipping removal of toplevel dir Add the REMOVE_DIR_KEEP_TOPLEVEL flag to remove_dir_recursively() for deleting everything inside the given directory, but _not_ the given directory itself. Note that this does not pass the REMOVE_DIR_KEEP_NESTED_GIT flag, if set, to the recursive invocations of remove_dir_recursively(). It is likely to be a a bug that has been present since REMOVE_DIR_KEEP_NESTED_GIT was introduced (a0f4afb), but this commit keeps the same behaviour for now. Signed-off-by: Johan Herland Signed-off-by: Junio C Hamano --- dir.c | 14 ++++++++++---- dir.h | 1 + 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/dir.c b/dir.c index 0a78d00b5..2087d23b6 100644 --- a/dir.c +++ b/dir.c @@ -1178,6 +1178,7 @@ int remove_dir_recursively(struct strbuf *path, int flag) struct dirent *e; int ret = 0, original_len = path->len, len; int only_empty = (flag & REMOVE_DIR_EMPTY_ONLY); + int keep_toplevel = (flag & REMOVE_DIR_KEEP_TOPLEVEL); unsigned char submodule_head[20]; if ((flag & REMOVE_DIR_KEEP_NESTED_GIT) && @@ -1185,9 +1186,14 @@ int remove_dir_recursively(struct strbuf *path, int flag) /* Do not descend and nuke a nested git work tree. */ return 0; + flag &= ~(REMOVE_DIR_KEEP_TOPLEVEL|REMOVE_DIR_KEEP_NESTED_GIT); dir = opendir(path->buf); - if (!dir) - return rmdir(path->buf); + if (!dir) { + if (!keep_toplevel) + return rmdir(path->buf); + else + return -1; + } if (path->buf[original_len - 1] != '/') strbuf_addch(path, '/'); @@ -1202,7 +1208,7 @@ int remove_dir_recursively(struct strbuf *path, int flag) if (lstat(path->buf, &st)) ; /* fall thru */ else if (S_ISDIR(st.st_mode)) { - if (!remove_dir_recursively(path, only_empty)) + if (!remove_dir_recursively(path, flag)) continue; /* happy */ } else if (!only_empty && !unlink(path->buf)) continue; /* happy, too */ @@ -1214,7 +1220,7 @@ int remove_dir_recursively(struct strbuf *path, int flag) closedir(dir); strbuf_setlen(path, original_len); - if (!ret) + if (!ret && !keep_toplevel) ret = rmdir(path->buf); return ret; } diff --git a/dir.h b/dir.h index dd6947e1d..58b6fc7c8 100644 --- a/dir.h +++ b/dir.h @@ -102,6 +102,7 @@ extern void setup_standard_excludes(struct dir_struct *dir); #define REMOVE_DIR_EMPTY_ONLY 01 #define REMOVE_DIR_KEEP_NESTED_GIT 02 +#define REMOVE_DIR_KEEP_TOPLEVEL 04 extern int remove_dir_recursively(struct strbuf *path, int flag); /* tries to remove the path with empty directories along it, ignores ENOENT */ From 01bfec8e52dcfa2da47b54b3c89c3181ae09b9a9 Mon Sep 17 00:00:00 2001 From: Johan Herland Date: Mon, 12 Mar 2012 15:57:12 +0100 Subject: [PATCH 08/32] t3310: illustrate failure to "notes merge --commit" inside $GIT_DIR/ The 'git notes merge' command expected to be run from the working tree of the project being annotated, and did not anticipate getting run inside $GIT_DIR/. However, because we use $GIT_DIR/NOTES_MERGE_WORKTREE as a temporary working space for the user to work on resolving conflicts, it is not unreasonable for a user to run "git notes merge --commit" there. But the command fails to do so. Found-by: David Bremner Signed-off-by: Johan Herland Signed-off-by: Junio C Hamano --- t/t3310-notes-merge-manual-resolve.sh | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/t/t3310-notes-merge-manual-resolve.sh b/t/t3310-notes-merge-manual-resolve.sh index 436719795..0c531c379 100755 --- a/t/t3310-notes-merge-manual-resolve.sh +++ b/t/t3310-notes-merge-manual-resolve.sh @@ -553,4 +553,23 @@ test_expect_success 'resolve situation by aborting the notes merge' ' verify_notes z ' +cat >expect_notes < $(git rev-parse HEAD) && + echo "bar" >> $(git rev-parse HEAD) && + git notes merge --commit + ) && + git notes show HEAD > actual_notes && + test_cmp expect_notes actual_notes +' + test_done From a0be62c100897573ef1575ec0d5e8b215e9dcafe Mon Sep 17 00:00:00 2001 From: Johan Herland Date: Mon, 12 Mar 2012 15:57:13 +0100 Subject: [PATCH 09/32] notes-merge: use opendir/readdir instead of using read_directory() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit notes_merge_commit() only needs to list all entries (non-recursively) under a directory, which can be easily accomplished with opendir/readdir and would be more lightweight than read_directory(). read_directory() is designed to list paths inside a working directory. Using it outside of its scope may lead to undesired effects. Apparently, one of the undesired effects of read_directory() is that it doesn't deal with being given absolute paths. This creates problems for notes_merge_commit() when git_path() returns an absolute path, which happens when the current working directory is in a subdirectory of the .git directory. Originally-by: Nguyễn Thái Ngọc Duy Updated-by: Johan Herland Signed-off-by: Johan Herland Signed-off-by: Junio C Hamano --- notes-merge.c | 50 ++++++++++++++++----------- t/t3310-notes-merge-manual-resolve.sh | 2 +- 2 files changed, 31 insertions(+), 21 deletions(-) diff --git a/notes-merge.c b/notes-merge.c index fb0832f97..3a16af281 100644 --- a/notes-merge.c +++ b/notes-merge.c @@ -687,51 +687,60 @@ int notes_merge_commit(struct notes_merge_options *o, { /* * Iterate through files in .git/NOTES_MERGE_WORKTREE and add all - * found notes to 'partial_tree'. Write the updates notes tree to + * found notes to 'partial_tree'. Write the updated notes tree to * the DB, and commit the resulting tree object while reusing the * commit message and parents from 'partial_commit'. * Finally store the new commit object SHA1 into 'result_sha1'. */ - struct dir_struct dir; - char *path = xstrdup(git_path(NOTES_MERGE_WORKTREE "/")); - int path_len = strlen(path), i; + DIR *dir; + struct dirent *e; + struct strbuf path = STRBUF_INIT; char *msg = strstr(partial_commit->buffer, "\n\n"); struct strbuf sb_msg = STRBUF_INIT; + int baselen; + strbuf_addstr(&path, git_path(NOTES_MERGE_WORKTREE)); if (o->verbosity >= 3) - printf("Committing notes in notes merge worktree at %.*s\n", - path_len - 1, path); + printf("Committing notes in notes merge worktree at %s\n", + path.buf); if (!msg || msg[2] == '\0') die("partial notes commit has empty message"); msg += 2; - memset(&dir, 0, sizeof(dir)); - read_directory(&dir, path, path_len, NULL); - for (i = 0; i < dir.nr; i++) { - struct dir_entry *ent = dir.entries[i]; + dir = opendir(path.buf); + if (!dir) + die_errno("could not open %s", path.buf); + + strbuf_addch(&path, '/'); + baselen = path.len; + while ((e = readdir(dir)) != NULL) { struct stat st; - const char *relpath = ent->name + path_len; unsigned char obj_sha1[20], blob_sha1[20]; - if (ent->len - path_len != 40 || get_sha1_hex(relpath, obj_sha1)) { + if (is_dot_or_dotdot(e->d_name)) + continue; + + if (strlen(e->d_name) != 40 || get_sha1_hex(e->d_name, obj_sha1)) { if (o->verbosity >= 3) - printf("Skipping non-SHA1 entry '%s'\n", - ent->name); + printf("Skipping non-SHA1 entry '%s%s'\n", + path.buf, e->d_name); continue; } + strbuf_addstr(&path, e->d_name); /* write file as blob, and add to partial_tree */ - if (stat(ent->name, &st)) - die_errno("Failed to stat '%s'", ent->name); - if (index_path(blob_sha1, ent->name, &st, HASH_WRITE_OBJECT)) - die("Failed to write blob object from '%s'", ent->name); + if (stat(path.buf, &st)) + die_errno("Failed to stat '%s'", path.buf); + if (index_path(blob_sha1, path.buf, &st, HASH_WRITE_OBJECT)) + die("Failed to write blob object from '%s'", path.buf); if (add_note(partial_tree, obj_sha1, blob_sha1, NULL)) die("Failed to add resolved note '%s' to notes tree", - ent->name); + path.buf); if (o->verbosity >= 4) printf("Added resolved note for object %s: %s\n", sha1_to_hex(obj_sha1), sha1_to_hex(blob_sha1)); + strbuf_setlen(&path, baselen); } strbuf_attach(&sb_msg, msg, strlen(msg), strlen(msg) + 1); @@ -740,7 +749,8 @@ int notes_merge_commit(struct notes_merge_options *o, if (o->verbosity >= 4) printf("Finalized notes merge commit: %s\n", sha1_to_hex(result_sha1)); - free(path); + strbuf_release(&path); + closedir(dir); return 0; } diff --git a/t/t3310-notes-merge-manual-resolve.sh b/t/t3310-notes-merge-manual-resolve.sh index 0c531c379..d6d6ac608 100755 --- a/t/t3310-notes-merge-manual-resolve.sh +++ b/t/t3310-notes-merge-manual-resolve.sh @@ -558,7 +558,7 @@ foo bar EOF -test_expect_failure 'switch cwd before committing notes merge' ' +test_expect_success 'switch cwd before committing notes merge' ' git notes add -m foo HEAD && git notes --ref=other add -m bar HEAD && test_must_fail git notes merge refs/notes/other && From dabba590aa0e94a8deb6f0e827144697895bcfe8 Mon Sep 17 00:00:00 2001 From: Johan Herland Date: Thu, 15 Mar 2012 15:58:56 +0100 Subject: [PATCH 10/32] notes-merge: Don't remove .git/NOTES_MERGE_WORKTREE; it may be the user's cwd When a manual notes merge is committed or aborted, we need to remove the temporary worktree at .git/NOTES_MERGE_WORKTREE. However, removing the entire directory is not good if the user ran the 'git notes merge --commit/--abort' from within that directory. On Windows, the directory removal would simply fail, while on POSIX systems, users would suddenly find themselves in an invalid current directory. Therefore, instead of deleting the entire directory, we delete everything _within_ the directory, and leave the (empty) directory in place. This would cause a subsequent notes merge to abort, complaining about a previous - unfinished - notes merge (due to the presence of .git/NOTES_MERGE_WORKTREE), so we also need to adjust this check to only trigger when .git/NOTES_MERGE_WORKTREE is non-empty. Finally, adjust the t3310 manual notes merge testcases to correctly handle the existence of an empty .git/NOTES_MERGE_WORKTREE directory. Inspired-by: Junio C Hamano Signed-off-by: Johan Herland Signed-off-by: Junio C Hamano --- notes-merge.c | 13 +++++++++---- t/t3310-notes-merge-manual-resolve.sh | 8 ++++---- 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/notes-merge.c b/notes-merge.c index 3a16af281..74aa77ce4 100644 --- a/notes-merge.c +++ b/notes-merge.c @@ -267,7 +267,8 @@ static void check_notes_merge_worktree(struct notes_merge_options *o) * Must establish NOTES_MERGE_WORKTREE. * Abort if NOTES_MERGE_WORKTREE already exists */ - if (file_exists(git_path(NOTES_MERGE_WORKTREE))) { + if (file_exists(git_path(NOTES_MERGE_WORKTREE)) && + !is_empty_dir(git_path(NOTES_MERGE_WORKTREE))) { if (advice_resolve_conflict) die("You have not concluded your previous " "notes merge (%s exists).\nPlease, use " @@ -756,14 +757,18 @@ int notes_merge_commit(struct notes_merge_options *o, int notes_merge_abort(struct notes_merge_options *o) { - /* Remove .git/NOTES_MERGE_WORKTREE directory and all files within */ + /* + * Remove all files within .git/NOTES_MERGE_WORKTREE. We do not remove + * the .git/NOTES_MERGE_WORKTREE directory itself, since it might be + * the current working directory of the user. + */ struct strbuf buf = STRBUF_INIT; int ret; strbuf_addstr(&buf, git_path(NOTES_MERGE_WORKTREE)); if (o->verbosity >= 3) - printf("Removing notes merge worktree at %s\n", buf.buf); - ret = remove_dir_recursively(&buf, 0); + printf("Removing notes merge worktree at %s/*\n", buf.buf); + ret = remove_dir_recursively(&buf, REMOVE_DIR_KEEP_TOPLEVEL); strbuf_release(&buf); return ret; } diff --git a/t/t3310-notes-merge-manual-resolve.sh b/t/t3310-notes-merge-manual-resolve.sh index d6d6ac608..195bb97f8 100755 --- a/t/t3310-notes-merge-manual-resolve.sh +++ b/t/t3310-notes-merge-manual-resolve.sh @@ -324,7 +324,7 @@ y and z notes on 4th commit EOF git notes merge --commit && # No .git/NOTES_MERGE_* files left - test_must_fail ls .git/NOTES_MERGE_* >output 2>/dev/null && + test_might_fail ls .git/NOTES_MERGE_* >output 2>/dev/null && test_cmp /dev/null output && # Merge commit has pre-merge y and pre-merge z as parents test "$(git rev-parse refs/notes/m^1)" = "$(cat pre_merge_y)" && @@ -386,7 +386,7 @@ test_expect_success 'redo merge of z into m (== y) with default ("manual") resol test_expect_success 'abort notes merge' ' git notes merge --abort && # No .git/NOTES_MERGE_* files left - test_must_fail ls .git/NOTES_MERGE_* >output 2>/dev/null && + test_might_fail ls .git/NOTES_MERGE_* >output 2>/dev/null && test_cmp /dev/null output && # m has not moved (still == y) test "$(git rev-parse refs/notes/m)" = "$(cat pre_merge_y)" && @@ -453,7 +453,7 @@ EOF # Finalize merge git notes merge --commit && # No .git/NOTES_MERGE_* files left - test_must_fail ls .git/NOTES_MERGE_* >output 2>/dev/null && + test_might_fail ls .git/NOTES_MERGE_* >output 2>/dev/null && test_cmp /dev/null output && # Merge commit has pre-merge y and pre-merge z as parents test "$(git rev-parse refs/notes/m^1)" = "$(cat pre_merge_y)" && @@ -542,7 +542,7 @@ EOF test_expect_success 'resolve situation by aborting the notes merge' ' git notes merge --abort && # No .git/NOTES_MERGE_* files left - test_must_fail ls .git/NOTES_MERGE_* >output 2>/dev/null && + test_might_fail ls .git/NOTES_MERGE_* >output 2>/dev/null && test_cmp /dev/null output && # m has not moved (still == w) test "$(git rev-parse refs/notes/m)" = "$(git rev-parse refs/notes/w)" && From ae2f203ef7be7b33ff5ec61646c8662363eccd62 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Thu, 15 Mar 2012 01:04:12 -0700 Subject: [PATCH 11/32] clean: preserve nested git worktree in subdirectories remove_dir_recursively() has a check to avoid removing the directory it was asked to remove without recursing into it and report success when the directory is the top level of a working tree of a nested git repository, to protect such a repository from "clean -f" (without double -f). If a working tree of a nested git repository is in a subdirectory of a toplevel project, however, this protection did not apply by mistake; we forgot to pass the REMOVE_DIR_KEEP_NESTED_GIT down to the recursive removal codepath. This requires us to also teach the higher level not to remove the directory it is asked to remove, when the recursed invocation did not remove the directory it was asked to remove due to a nested git repository, as it is not an error to leave the parent directories of such a nested repository. Signed-off-by: Junio C Hamano --- dir.c | 27 +++++++++++++++++++++------ t/t7300-clean.sh | 27 ++++++++++++++++++++++----- 2 files changed, 43 insertions(+), 11 deletions(-) diff --git a/dir.c b/dir.c index 2087d23b6..e98760c72 100644 --- a/dir.c +++ b/dir.c @@ -1172,23 +1172,27 @@ int is_empty_dir(const char *path) return ret; } -int remove_dir_recursively(struct strbuf *path, int flag) +static int remove_dir_recurse(struct strbuf *path, int flag, int *kept_up) { DIR *dir; struct dirent *e; - int ret = 0, original_len = path->len, len; + int ret = 0, original_len = path->len, len, kept_down = 0; int only_empty = (flag & REMOVE_DIR_EMPTY_ONLY); int keep_toplevel = (flag & REMOVE_DIR_KEEP_TOPLEVEL); unsigned char submodule_head[20]; if ((flag & REMOVE_DIR_KEEP_NESTED_GIT) && - !resolve_gitlink_ref(path->buf, "HEAD", submodule_head)) + !resolve_gitlink_ref(path->buf, "HEAD", submodule_head)) { /* Do not descend and nuke a nested git work tree. */ + if (kept_up) + *kept_up = 1; return 0; + } - flag &= ~(REMOVE_DIR_KEEP_TOPLEVEL|REMOVE_DIR_KEEP_NESTED_GIT); + flag &= ~REMOVE_DIR_KEEP_TOPLEVEL; dir = opendir(path->buf); if (!dir) { + /* an empty dir could be removed even if it is unreadble */ if (!keep_toplevel) return rmdir(path->buf); else @@ -1208,7 +1212,7 @@ int remove_dir_recursively(struct strbuf *path, int flag) if (lstat(path->buf, &st)) ; /* fall thru */ else if (S_ISDIR(st.st_mode)) { - if (!remove_dir_recursively(path, flag)) + if (!remove_dir_recurse(path, flag, &kept_down)) continue; /* happy */ } else if (!only_empty && !unlink(path->buf)) continue; /* happy, too */ @@ -1220,11 +1224,22 @@ int remove_dir_recursively(struct strbuf *path, int flag) closedir(dir); strbuf_setlen(path, original_len); - if (!ret && !keep_toplevel) + if (!ret && !keep_toplevel && !kept_down) ret = rmdir(path->buf); + else if (kept_up) + /* + * report the uplevel that it is not an error that we + * did not rmdir() our directory. + */ + *kept_up = !ret; return ret; } +int remove_dir_recursively(struct strbuf *path, int flag) +{ + return remove_dir_recurse(path, flag, NULL); +} + void setup_standard_excludes(struct dir_struct *dir) { const char *path; diff --git a/t/t7300-clean.sh b/t/t7300-clean.sh index 800b5368a..ccfb54de7 100755 --- a/t/t7300-clean.sh +++ b/t/t7300-clean.sh @@ -399,8 +399,8 @@ test_expect_success SANITY 'removal failure' ' ' test_expect_success 'nested git work tree' ' - rm -fr foo bar && - mkdir foo bar && + rm -fr foo bar baz && + mkdir -p foo bar baz/boo && ( cd foo && git init && @@ -412,15 +412,24 @@ test_expect_success 'nested git work tree' ' cd bar && >goodbye.people ) && + ( + cd baz/boo && + git init && + >deeper.world + git add . && + git commit -a -m deeply.nested + ) && git clean -f -d && test -f foo/.git/index && test -f foo/hello.world && + test -f baz/boo/.git/index && + test -f baz/boo/deeper.world && ! test -d bar ' test_expect_success 'force removal of nested git work tree' ' - rm -fr foo bar && - mkdir foo bar && + rm -fr foo bar baz && + mkdir -p foo bar baz/boo && ( cd foo && git init && @@ -432,9 +441,17 @@ test_expect_success 'force removal of nested git work tree' ' cd bar && >goodbye.people ) && + ( + cd baz/boo && + git init && + >deeper.world + git add . && + git commit -a -m deeply.nested + ) && git clean -f -f -d && ! test -d foo && - ! test -d bar + ! test -d bar && + ! test -d baz ' test_expect_success 'git clean -e' ' From cba595bd21fd0731e37898dbd6c5b55f65fd8aea Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 22 Mar 2012 14:53:24 -0400 Subject: [PATCH 12/32] drop casts from users EMPTY_TREE_SHA1_BIN This macro already evaluates to the correct type, as it casts the string literal to "unsigned char *" itself (and callers who want the literal can use the _LITERAL form). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/diff.c | 2 +- merge-recursive.c | 2 +- sequencer.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/builtin/diff.c b/builtin/diff.c index 424c815f9..9069dc41b 100644 --- a/builtin/diff.c +++ b/builtin/diff.c @@ -327,7 +327,7 @@ int cmd_diff(int argc, const char **argv, const char *prefix) add_head_to_pending(&rev); if (!rev.pending.nr) { struct tree *tree; - tree = lookup_tree((const unsigned char*)EMPTY_TREE_SHA1_BIN); + tree = lookup_tree(EMPTY_TREE_SHA1_BIN); add_pending_object(&rev, &tree->object, "HEAD"); } break; diff --git a/merge-recursive.c b/merge-recursive.c index 6479a60cd..318d32e22 100644 --- a/merge-recursive.c +++ b/merge-recursive.c @@ -1914,7 +1914,7 @@ int merge_recursive(struct merge_options *o, /* if there is no common ancestor, use an empty tree */ struct tree *tree; - tree = lookup_tree((const unsigned char *)EMPTY_TREE_SHA1_BIN); + tree = lookup_tree(EMPTY_TREE_SHA1_BIN); merged_common_ancestors = make_virtual_commit(tree, "ancestor"); } diff --git a/sequencer.c b/sequencer.c index a37846a59..4307364b2 100644 --- a/sequencer.c +++ b/sequencer.c @@ -164,7 +164,7 @@ static void write_message(struct strbuf *msgbuf, const char *filename) static struct tree *empty_tree(void) { - return lookup_tree((const unsigned char *)EMPTY_TREE_SHA1_BIN); + return lookup_tree(EMPTY_TREE_SHA1_BIN); } static int error_dirty_index(struct replay_opts *opts) From f8582cad8d11cae15e73fe5213b6180ee4b9fcce Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 22 Mar 2012 14:53:39 -0400 Subject: [PATCH 13/32] make is_empty_blob_sha1 available everywhere The read-cache implementation defines this static function, but it is a generally useful concept in git. Let's give the empty blob the same treatment as the empty tree, providing both hex and binary forms of the sha1. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- cache.h | 13 +++++++++++++ read-cache.c | 10 ---------- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/cache.h b/cache.h index e5e1aa4e1..528057480 100644 --- a/cache.h +++ b/cache.h @@ -708,6 +708,19 @@ static inline void hashclr(unsigned char *hash) #define EMPTY_TREE_SHA1_BIN \ ((const unsigned char *) EMPTY_TREE_SHA1_BIN_LITERAL) +#define EMPTY_BLOB_SHA1_HEX \ + "e69de29bb2d1d6434b8b29ae775ad8c2e48c5391" +#define EMPTY_BLOB_SHA1_BIN_LITERAL \ + "\xe6\x9d\xe2\x9b\xb2\xd1\xd6\x43\x4b\x8b" \ + "\x29\xae\x77\x5a\xd8\xc2\xe4\x8c\x53\x91" +#define EMPTY_BLOB_SHA1_BIN \ + ((const unsigned char *) EMPTY_BLOB_SHA1_BIN_LITERAL) + +static inline int is_empty_blob_sha1(const unsigned char *sha1) +{ + return !hashcmp(sha1, EMPTY_BLOB_SHA1_BIN); +} + int git_mkstemp(char *path, size_t n, const char *template); int git_mkstemps(char *path, size_t n, const char *template, int suffix_len); diff --git a/read-cache.c b/read-cache.c index 274e54b4f..6c8f39583 100644 --- a/read-cache.c +++ b/read-cache.c @@ -157,16 +157,6 @@ static int ce_modified_check_fs(struct cache_entry *ce, struct stat *st) return 0; } -static int is_empty_blob_sha1(const unsigned char *sha1) -{ - static const unsigned char empty_blob_sha1[20] = { - 0xe6,0x9d,0xe2,0x9b,0xb2,0xd1,0xd6,0x43,0x4b,0x8b, - 0x29,0xae,0x77,0x5a,0xd8,0xc2,0xe4,0x8c,0x53,0x91 - }; - - return !hashcmp(sha1, empty_blob_sha1); -} - static int ce_match_stat_basic(struct cache_entry *ce, struct stat *st) { unsigned int changed = 0; From 90d43b07687fdc51d1f2fc14948df538dc45584b Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 22 Mar 2012 18:52:13 -0400 Subject: [PATCH 14/32] teach diffcore-rename to optionally ignore empty content Our rename detection is a heuristic, matching pairs of removed and added files with similar or identical content. It's unlikely to be wrong when there is actual content to compare, and we already take care not to do inexact rename detection when there is not enough content to produce good results. However, we always do exact rename detection, even when the blob is tiny or empty. It's easy to get false positives with an empty blob, simply because it is an obvious content to use as a boilerplate (e.g., when telling git that an empty directory is worth tracking via an empty .gitignore). This patch lets callers specify whether or not they are interested in using empty files as rename sources and destinations. The default is "yes", keeping the original behavior. It works by detecting the empty-blob sha1 for rename sources and destinations. One more flexible alternative would be to allow the caller to specify a minimum size for a blob to be "interesting" for rename detection. But that would catch small boilerplate files, not large ones (e.g., if you had the GPL COPYING file in many directories). A better alternative would be to allow a "-rename" gitattribute to allow boilerplate files to be marked as such. I'll leave the complexity of that solution until such time as somebody actually wants it. The complaints we've seen so far revolve around empty files, so let's start with the simple thing. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- diff.c | 5 +++++ diff.h | 2 +- diffcore-rename.c | 6 ++++++ 3 files changed, 12 insertions(+), 1 deletion(-) diff --git a/diff.c b/diff.c index 377ec1ea4..0b70aadc4 100644 --- a/diff.c +++ b/diff.c @@ -3136,6 +3136,7 @@ void diff_setup(struct diff_options *options) options->rename_limit = -1; options->dirstat_permille = diff_dirstat_permille_default; options->context = 3; + DIFF_OPT_SET(options, RENAME_EMPTY); options->change = diff_change; options->add_remove = diff_addremove; @@ -3506,6 +3507,10 @@ int diff_opt_parse(struct diff_options *options, const char **av, int ac) } else if (!strcmp(arg, "--no-renames")) options->detect_rename = 0; + else if (!strcmp(arg, "--rename-empty")) + DIFF_OPT_SET(options, RENAME_EMPTY); + else if (!strcmp(arg, "--no-rename-empty")) + DIFF_OPT_CLR(options, RENAME_EMPTY); else if (!strcmp(arg, "--relative")) DIFF_OPT_SET(options, RELATIVE_NAME); else if (!prefixcmp(arg, "--relative=")) { diff --git a/diff.h b/diff.h index cb687436a..dd48eca71 100644 --- a/diff.h +++ b/diff.h @@ -60,7 +60,7 @@ typedef struct strbuf *(*diff_prefix_fn_t)(struct diff_options *opt, void *data) #define DIFF_OPT_SILENT_ON_REMOVE (1 << 5) #define DIFF_OPT_FIND_COPIES_HARDER (1 << 6) #define DIFF_OPT_FOLLOW_RENAMES (1 << 7) -/* (1 << 8) unused */ +#define DIFF_OPT_RENAME_EMPTY (1 << 8) /* (1 << 9) unused */ #define DIFF_OPT_HAS_CHANGES (1 << 10) #define DIFF_OPT_QUICK (1 << 11) diff --git a/diffcore-rename.c b/diffcore-rename.c index f639601c7..216a7a4bb 100644 --- a/diffcore-rename.c +++ b/diffcore-rename.c @@ -512,9 +512,15 @@ void diffcore_rename(struct diff_options *options) else if (options->single_follow && strcmp(options->single_follow, p->two->path)) continue; /* not interested */ + else if (!DIFF_OPT_TST(options, RENAME_EMPTY) && + is_empty_blob_sha1(p->two->sha1)) + continue; else locate_rename_dst(p->two, 1); } + else if (!DIFF_OPT_TST(options, RENAME_EMPTY) && + is_empty_blob_sha1(p->one->sha1)) + continue; else if (!DIFF_PAIR_UNMERGED(p) && !DIFF_FILE_VALID(p->two)) { /* * If the source is a broken "delete", and From 4f7cb99ada26be5d86402a6e060f3ee16a672f16 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 22 Mar 2012 18:52:24 -0400 Subject: [PATCH 15/32] merge-recursive: don't detect renames of empty files Merge-recursive detects renames so that if one side modifies "foo" and the other side moves it to "bar", the modification is applied to "bar". However, our rename detection is based on content analysis, it can be wrong (i.e., two files were not intended as a rename, but just happen to have the same or similar content). This is quite rare if the files actually contain content, since two unrelated files are unlikely to have exactly the same content. However, empty files present a problem, in that there is nothing to analyze. An uninteresting placeholder file with zero bytes may or may not be related to a placeholder file with another name. The result is that adding content to an empty file may cause confusion if the other side of a merge removed it; your content may end up in another random placeholder file that was added. Let's err on the side of caution and not consider empty files as renames. This will cause a modify/delete conflict on the merge, which will let the user sort it out themselves. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- merge-recursive.c | 1 + t/t6022-merge-rename.sh | 16 ++++++++++++++++ 2 files changed, 17 insertions(+) diff --git a/merge-recursive.c b/merge-recursive.c index 318d32e22..0fb174359 100644 --- a/merge-recursive.c +++ b/merge-recursive.c @@ -485,6 +485,7 @@ static struct string_list *get_renames(struct merge_options *o, renames = xcalloc(1, sizeof(struct string_list)); diff_setup(&opts); DIFF_OPT_SET(&opts, RECURSIVE); + DIFF_OPT_CLR(&opts, RENAME_EMPTY); opts.detect_rename = DIFF_DETECT_RENAME; opts.rename_limit = o->merge_rename_limit >= 0 ? o->merge_rename_limit : o->diff_rename_limit >= 0 ? o->diff_rename_limit : diff --git a/t/t6022-merge-rename.sh b/t/t6022-merge-rename.sh index 9d8584e95..110424918 100755 --- a/t/t6022-merge-rename.sh +++ b/t/t6022-merge-rename.sh @@ -884,4 +884,20 @@ test_expect_success 'no spurious "refusing to lose untracked" message' ' ! grep "refusing to lose untracked file" errors.txt ' +test_expect_success 'do not follow renames for empty files' ' + git checkout -f -b empty-base && + >empty1 && + git add empty1 && + git commit -m base && + echo content >empty1 && + git add empty1 && + git commit -m fill && + git checkout -b empty-topic HEAD^ && + git mv empty1 empty2 && + git commit -m rename && + test_must_fail git merge empty-base && + >expect && + test_cmp expect empty2 +' + test_done From e5e9b56528294e9455d22d1abb21ad32f098a1be Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Sat, 24 Mar 2012 16:18:46 +0100 Subject: [PATCH 16/32] combine-diff: fix loop index underflow If both la and context are zero at the start of the loop, la wraps around and we end up reading from memory far away. Skip the loop in that case instead. Reported-by: Julia Lawall Signed-off-by: Rene Scharfe Signed-off-by: Junio C Hamano --- combine-diff.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/combine-diff.c b/combine-diff.c index 9445e86c2..45221f84a 100644 --- a/combine-diff.c +++ b/combine-diff.c @@ -414,7 +414,7 @@ static int make_hunks(struct sline *sline, unsigned long cnt, hunk_begin, j); la = (la + context < cnt + 1) ? (la + context) : cnt + 1; - while (j <= --la) { + while (la && j <= --la) { if (sline[la].flag & mark) { contin = 1; break; From c65dc351f0ae184f69cbc0b6555ff19aacb70e01 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Fri, 30 Mar 2012 11:04:08 -0700 Subject: [PATCH 17/32] t7501: test the right kind of breakage These tests try to run "git commit" with various "forbidden" combinations of options and expect the command to fail, but they do so without having any change added to the index. We wouldn't be able to catch breakages that would allow these combinations by mistake with them because the command will fail with "nothing to commit" anyway. Make sure we have something added to the index before running the command. Signed-off-by: Junio C Hamano --- t/t7501-commit.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/t/t7501-commit.sh b/t/t7501-commit.sh index 8bb38337a..45446b186 100755 --- a/t/t7501-commit.sh +++ b/t/t7501-commit.sh @@ -30,10 +30,12 @@ test_expect_success 'setup: initial commit' ' ' test_expect_success '-m and -F do not mix' ' + git checkout HEAD file && echo >>file && git add file && test_must_fail git commit -m foo -m bar -F file ' test_expect_success '-m and -C do not mix' ' + git checkout HEAD file && echo >>file && git add file && test_must_fail git commit -C HEAD -m illegal ' From 010c7dbcbe2e55c1dcc85f23fbe55be6d7e931f0 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Fri, 30 Mar 2012 11:30:59 -0700 Subject: [PATCH 18/32] commit: do not trigger bogus "has templated message edited" check When "-t template" and "-F msg" options are both given (or worse yet, there is "commit.template" configuration but a message is given in some other way), the documentation says that template is ignored. However, the "has the user edited the message?" check still used the contents of the template file as the basis of the emptyness check. Signed-off-by: Junio C Hamano --- builtin/commit.c | 2 ++ t/t7501-commit.sh | 6 ++++++ 2 files changed, 8 insertions(+) diff --git a/builtin/commit.c b/builtin/commit.c index eba1377eb..7141766ba 100644 --- a/builtin/commit.c +++ b/builtin/commit.c @@ -1049,6 +1049,8 @@ static int parse_and_validate_options(int argc, const char *argv[], die(_("Only one of -c/-C/-F/--fixup can be used.")); if (message.len && f > 0) die((_("Option -m cannot be combined with -c/-C/-F/--fixup."))); + if (f || message.len) + template_file = NULL; if (edit_message) use_message = edit_message; if (amend && !use_message && !fixup_message) diff --git a/t/t7501-commit.sh b/t/t7501-commit.sh index 45446b186..e59cc4e86 100755 --- a/t/t7501-commit.sh +++ b/t/t7501-commit.sh @@ -81,7 +81,13 @@ test_expect_success 'empty commit message' ' test_must_fail git commit -F msg -a ' +test_expect_success 'template "emptyness" check does not kick in with -F' ' + git checkout HEAD file && echo >>file && git add file && + git commit -t file -F file +' + test_expect_success 'setup: commit message from file' ' + git checkout HEAD file && echo >>file && git add file && echo this is the commit message, coming from a file >msg && git commit -F msg -a ' From b2eda9bdfbacdd3bb52da2ebcb468415cf153940 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Fri, 30 Mar 2012 12:14:33 -0700 Subject: [PATCH 19/32] commit: rephrase the error when user did not touch templated log message When the user exited editor without editing the commit log template given by "git commit -t