From a58f3c01f7e6aecb486aadece040a6293eb43059 Mon Sep 17 00:00:00 2001
From: Jakub Narebski <jnareb@gmail.com>
Date: Sat, 26 May 2007 00:37:38 +0200
Subject: [PATCH 1/9] Documentation: Clean up links in GIT Glossary

Ensure that the same link is not repeated in single glossary entry,
and that there is no self-link i.e. link to current entry.

Add links to other definitions in git glossary.

Remove inappropriate (nonsense) links, or change link to link to
correct definition (to correct term).

Signed-off-by: Jakub Narebski <jnareb@gmail.com>
Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 Documentation/glossary.txt | 58 +++++++++++++++++++-------------------
 1 file changed, 29 insertions(+), 29 deletions(-)

diff --git a/Documentation/glossary.txt b/Documentation/glossary.txt
index 489c3e9d5..ba5261883 100644
--- a/Documentation/glossary.txt
+++ b/Documentation/glossary.txt
@@ -10,7 +10,7 @@ GIT Glossary
 	A bare repository is normally an appropriately
 	named <<def_directory,directory>> with a `.git` suffix that does not
 	have a locally checked-out copy of any of the files under
-	<<def_revision,revision>> control. That is, all of the `git`
+	revision control. That is, all of the `git`
 	administrative and control files that would normally be present in the
 	hidden `.git` sub-directory are directly present in the
 	`repository.git` directory instead,
@@ -37,7 +37,7 @@ GIT Glossary
 [[def_chain]]chain::
 	A list of objects, where each <<def_object,object>> in the list contains
 	a reference to its successor (for example, the successor of a
-	<<def_commit,commit>> could be one of its parents).
+	<<def_commit,commit>> could be one of its <<def_parent,parents>>).
 
 [[def_changeset]]changeset::
 	BitKeeper/cvsps speak for "<<def_commit,commit>>". Since git does not
@@ -77,10 +77,10 @@ to point at the new commit.
 
 [[def_commit_object]]commit object::
 	An <<def_object,object>> which contains the information about a
-	particular <<def_revision,revision>>, such as parents, committer,
+	particular <<def_revision,revision>>, such as <<def_parent,parents>>, committer,
 	author, date and the <<def_tree_object,tree object>> which corresponds
 	to the top <<def_directory,directory>> of the stored
-	<<def_revision,revision>>.
+	revision.
 
 [[def_core_git]]core git::
 	Fundamental data structures and utilities of git. Exposes only limited
@@ -101,19 +101,19 @@ to point at the new commit.
 
 [[def_detached_HEAD]]detached HEAD::
 	Normally the <<def_HEAD,HEAD>> stores the name of a
-	<<def_branch,branch>>.  However, git also allows you to check
-	out an arbitrary commit that isn't necessarily the tip of any
+	<<def_branch,branch>>.  However, git also allows you to <<def_checkout,check out>>
+	an arbitrary <<def_commit,commit>> that isn't necessarily the tip of any
 	particular branch.  In this case HEAD is said to be "detached".
 
 [[def_dircache]]dircache::
-	You are *waaaaay* behind.
+	You are *waaaaay* behind. See <<def_index,index>>.
 
 [[def_directory]]directory::
 	The list you get with "ls" :-)
 
 [[def_dirty]]dirty::
 	A <<def_working_tree,working tree>> is said to be "dirty" if
-	it contains modifications which have not been committed to the current
+	it contains modifications which have not been <<def_commit,committed>> to the current
 	<<def_branch,branch>>.
 
 [[def_ent]]ent::
@@ -149,7 +149,7 @@ to point at the new commit.
 [[def_grafts]]grafts::
 	Grafts enables two otherwise different lines of development to be joined
 	together by recording fake ancestry information for commits. This way
-	you can make git pretend the set of parents a <<def_commit,commit>> has
+	you can make git pretend the set of <<def_parent,parents>> a <<def_commit,commit>> has
 	is different from what was recorded when the commit was
 	created. Configured via the `.git/info/grafts` file.
 
@@ -157,13 +157,13 @@ to point at the new commit.
 	In git's context, synonym to <<def_object_name,object name>>.
 
 [[def_head]]head::
-	A named reference to the <<def_commit,commit>> at the tip of a
+	A <<def_ref,named reference>> to the <<def_commit,commit>> at the tip of a
 	<<def_branch,branch>>.  Heads are stored in
 	`$GIT_DIR/refs/heads/`, except when using packed refs. (See
 	gitlink:git-pack-refs[1].)
 
 [[def_HEAD]]HEAD::
-	The current branch.  In more detail: Your <<def_working_tree,
+	The current <<def_branch,branch>>.  In more detail: Your <<def_working_tree,
 	working tree>> is normally derived from the state of the tree
 	referred to by HEAD.  HEAD is a reference to one of the
 	<<def_head,heads>> in your repository, except when using a
@@ -179,15 +179,15 @@ to point at the new commit.
 	checking. Typically, the hooks allow for a command to be pre-verified
 	and potentially aborted, and allow for a post-notification after the
 	operation is done. The hook scripts are found in the
-	`$GIT_DIR/hooks/` <<def_directory,directory>>, and are enabled by simply
+	`$GIT_DIR/hooks/` directory, and are enabled by simply
 	making them executable.
 
 [[def_index]]index::
 	A collection of files with stat information, whose contents are stored
-	as objects. The index is a stored version of your working
-	<<def_tree,tree>>. Truth be told, it can also contain a second, and even
-	a third version of a <<def_working_tree,working tree>>, which are used
-	when merging.
+	as objects. The index is a stored version of your
+	<<def_working_tree,working tree>>. Truth be told, it can also contain a second, and even
+	a third version of a working tree, which are used
+	when <<def_merge,merging>>.
 
 [[def_index_entry]]index entry::
 	The information regarding a particular file, stored in the
@@ -249,16 +249,16 @@ This commit is referred to as a "merge commit", or sometimes just a
 	describing the type of an <<def_object,object>>.
 
 [[def_octopus]]octopus::
-	To <<def_merge,merge>> more than two branches. Also denotes an
+	To <<def_merge,merge>> more than two <<def_branch,branches>>. Also denotes an
 	intelligent predator.
 
 [[def_origin]]origin::
 	The default upstream <<def_repository,repository>>. Most projects have
 	at least one upstream project which they track. By default
 	'origin' is used for that purpose. New upstream updates
-	will be fetched into remote tracking branches named
+	will be fetched into remote <<def_tracking_branch,tracking branches>> named
 	origin/name-of-upstream-branch, which you can see using
-	"git <<def_branch,branch>> -r".
+	"`git branch -r`".
 
 [[def_pack]]pack::
 	A set of objects which have been compressed into one file (to save space
@@ -327,7 +327,7 @@ This commit is referred to as a "merge commit", or sometimes just a
 	`$GIT_DIR/refs/`.
 
 [[def_refspec]]refspec::
-	A <<def_refspec,refspec>> is used by <<def_fetch,fetch>> and
+	A "refspec" is used by <<def_fetch,fetch>> and
 	<<def_push,push>> to describe the mapping between remote
 	<<def_ref,ref>> and local ref. They are combined with a colon in
 	the format <src>:<dst>, preceded by an optional plus sign, +.
@@ -340,11 +340,12 @@ This commit is referred to as a "merge commit", or sometimes just a
 	gitlink:git-push[1]
 
 [[def_repository]]repository::
-	A collection of refs together with an
+	A collection of <<def_ref,refs>> together with an
 	<<def_object_database,object database>> containing all objects
 	which are <<def_reachable,reachable>> from the refs, possibly
-	accompanied by meta data from one or more porcelains. A
-	repository can share an object database with other repositories.
+	accompanied by meta data from one or more <<def_porcelain,porcelains>>. A
+	repository can share an object database with other repositories
+	via <<def_alternate_object_database,alternates mechanism>>.
 
 [[def_resolve]]resolve::
 	The action of fixing up manually what a failed automatic
@@ -366,8 +367,8 @@ This commit is referred to as a "merge commit", or sometimes just a
 	Synonym for <<def_object_name,object name>>.
 
 [[def_shallow_repository]]shallow repository::
-	A shallow repository has an incomplete
-	history some of whose commits have parents cauterized away (in other
+	A shallow <<def_repository,repository>> has an incomplete
+	history some of whose <<def_commit,commits>> have <<def_parent,parents>> cauterized away (in other
 	words, git is told to pretend that these commits do not have the
 	parents, even though they are recorded in the <<def_commit_object,commit
 	object>>). This is sometimes useful when you are interested only in the
@@ -385,7 +386,7 @@ This commit is referred to as a "merge commit", or sometimes just a
 	command.
 
 [[def_tag]]tag::
-	A <<def_ref,ref>> pointing to a tag or
+	A <<def_ref,ref>> pointing to a <<def_tag_object,tag>> or
 	<<def_commit_object,commit object>>. In contrast to a <<def_head,head>>,
 	a tag is not changed by a <<def_commit,commit>>. Tags (not
 	<<def_tag_object,tag objects>>) are stored in `$GIT_DIR/refs/tags/`. A
@@ -398,8 +399,7 @@ This commit is referred to as a "merge commit", or sometimes just a
 	An <<def_object,object>> containing a <<def_ref,ref>> pointing to
 	another object, which can contain a message just like a
 	<<def_commit_object,commit object>>. It can also contain a (PGP)
-	signature, in which case it is called a "signed <<def_tag_object,tag
-	object>>".
+	signature, in which case it is called a "signed tag object".
 
 [[def_topic_branch]]topic branch::
 	A regular git <<def_branch,branch>> that is used by a developer to
@@ -418,7 +418,7 @@ This commit is referred to as a "merge commit", or sometimes just a
 
 [[def_tree]]tree::
 	Either a <<def_working_tree,working tree>>, or a <<def_tree_object,tree
-	object>> together with the dependent blob and tree objects
+	object>> together with the dependent <<def_blob_object,blob>> and tree objects
 	(i.e. a stored representation of a working tree).
 
 [[def_tree_object]]tree object::

From 5adf317b31729707fad4967c1aef6cdba43d0dd3 Mon Sep 17 00:00:00 2001
From: Jakub Narebski <jnareb@gmail.com>
Date: Sat, 26 May 2007 00:37:40 +0200
Subject: [PATCH 2/9] Replace the last 'dircache's by 'index'

Signed-off-by: Jakub Narebski <jnareb@gmail.com>
Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 Documentation/git-ls-files.txt | 2 +-
 diff.c                         | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/Documentation/git-ls-files.txt b/Documentation/git-ls-files.txt
index 79e0b7b71..b7c8ab5f7 100644
--- a/Documentation/git-ls-files.txt
+++ b/Documentation/git-ls-files.txt
@@ -124,7 +124,7 @@ which case it outputs:
 detailed information on unmerged paths.
 
 For an unmerged path, instead of recording a single mode/SHA1 pair,
-the dircache records up to three such pairs; one from tree O in stage
+the index records up to three such pairs; one from tree O in stage
 1, A in stage 2, and B in stage 3.  This information can be used by
 the user (or the porcelain) to see what should eventually be recorded at the
 path. (see git-read-tree for more information on state)
diff --git a/diff.c b/diff.c
index 3c1555371..487168be4 100644
--- a/diff.c
+++ b/diff.c
@@ -1258,7 +1258,7 @@ void fill_filespec(struct diff_filespec *spec, const unsigned char *sha1,
 }
 
 /*
- * Given a name and sha1 pair, if the dircache tells us the file in
+ * Given a name and sha1 pair, if the index tells us the file in
  * the work tree has that object contents, return true, so that
  * prepare_temp_file() does not have to inflate and extract.
  */

From c1bab2889eb71bf537497fc77a2fdb6a74bc92e6 Mon Sep 17 00:00:00 2001
From: Jakub Narebski <jnareb@gmail.com>
Date: Sat, 26 May 2007 00:37:39 +0200
Subject: [PATCH 3/9] Documentation: Add definition of "evil merge" to GIT
 Glossary

Signed-off-by: Jakub Narebski <jnareb@gmail.com>
Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 Documentation/glossary.txt | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/Documentation/glossary.txt b/Documentation/glossary.txt
index ba5261883..e903abfeb 100644
--- a/Documentation/glossary.txt
+++ b/Documentation/glossary.txt
@@ -121,6 +121,10 @@ to point at the new commit.
 	`http://en.wikipedia.org/wiki/Ent_(Middle-earth)` for an in-depth
 	explanation. Avoid this term, not to confuse people.
 
+[[def_evil_merge]]evil merge::
+	An evil merge is a <<def_merge,merge>> that introduces changes that
+	do not appear in any <<def_parent,parent>>.
+
 [[def_fast_forward]]fast forward::
 	A fast-forward is a special type of <<def_merge,merge>> where you have a
 	<<def_revision,revision>> and you are "merging" another

From 4b7cc26a74b01ceab14a32ef66704557b26d5622 Mon Sep 17 00:00:00 2001
From: Jeff King <peff@peff.net>
Date: Fri, 25 May 2007 23:42:36 -0400
Subject: [PATCH 4/9] git-am: use printf instead of echo on user-supplied
 strings

Under some implementations of echo (such as that provided by
dash), backslash escapes are recognized without any other
options. This means that echo-ing user-supplied strings may
cause any backslash sequences in them to be converted. Using
printf resolves the ambiguity.

This bug can be seen when using git-am to apply a patch
whose subject contains the character sequence "\n"; the
characters are converted to a literal newline. Noticed by
Szekeres Istvan.

Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 git-am.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/git-am.sh b/git-am.sh
index c9f66e278..543efd0ad 100755
--- a/git-am.sh
+++ b/git-am.sh
@@ -331,7 +331,7 @@ do
 		ADD_SIGNOFF=
 	    fi
 	    {
-		echo "$SUBJECT"
+		printf '%s\n' "$SUBJECT"
 		if test -s "$dotest/msg-clean"
 		then
 			echo
@@ -394,7 +394,7 @@ do
 	fi
 
 	echo
-	echo "Applying '$SUBJECT'"
+	printf 'Applying %s\n' "$SUBJECT"
 	echo
 
 	case "$resolved" in

From 293623edbc9488adbc71c2e0de1100b52cb193ac Mon Sep 17 00:00:00 2001
From: Junio C Hamano <junkio@cox.net>
Date: Fri, 25 May 2007 22:00:54 -0700
Subject: [PATCH 5/9] git-commit: use printf '%s\n' instead of echo on
 user-supplied strings

This fixes the same issue git-am had, which was fixed by Jeff
King in the previous commit.  Cleverly enough, this commit's log
message is a good test case at the same time.

Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 git-commit.sh | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/git-commit.sh b/git-commit.sh
index 292cf967e..a1884fed2 100755
--- a/git-commit.sh
+++ b/git-commit.sh
@@ -376,12 +376,12 @@ t,)
 			rm -f "$TMP_INDEX"
 		fi || exit
 
-		echo "$commit_only" |
+		printf '%s\n' "$commit_only" |
 		GIT_INDEX_FILE="$TMP_INDEX" \
 		git-update-index --add --remove --stdin &&
 
 		save_index &&
-		echo "$commit_only" |
+		printf '%s\n' "$commit_only" |
 		(
 			GIT_INDEX_FILE="$NEXT_INDEX"
 			export GIT_INDEX_FILE
@@ -432,7 +432,7 @@ fi
 
 if test "$log_message" != ''
 then
-	echo "$log_message"
+	printf '%s\n' "$log_message"
 elif test "$logfile" != ""
 then
 	if test "$logfile" = -
@@ -475,7 +475,7 @@ if test -f "$GIT_DIR/MERGE_HEAD" && test -z "$no_edit"; then
 	echo "#"
 	echo "# It looks like you may be committing a MERGE."
 	echo "# If this is not correct, please remove the file"
-	echo "#	$GIT_DIR/MERGE_HEAD"
+	printf '%s\n' "#	$GIT_DIR/MERGE_HEAD"
 	echo "# and try again"
 	echo "#"
 fi >>"$GIT_DIR"/COMMIT_EDITMSG

From 816366e23dfe366b938b427eac8ea1c8345ea339 Mon Sep 17 00:00:00 2001
From: Junio C Hamano <junkio@cox.net>
Date: Sat, 26 May 2007 00:26:20 -0700
Subject: [PATCH 6/9] Add tests for the last two fixes.

This updates t4014 to check the two fixes for git-am and git-commit
we observed with "echo" that does backslash interpolation by default
without being asked with -e option.

Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 t/t4014-format-patch.sh | 25 ++++++++++++++++++++++---
 1 file changed, 22 insertions(+), 3 deletions(-)

diff --git a/t/t4014-format-patch.sh b/t/t4014-format-patch.sh
index 4795872a7..df969bb69 100755
--- a/t/t4014-format-patch.sh
+++ b/t/t4014-format-patch.sh
@@ -16,16 +16,16 @@ test_expect_success setup '
 
 	for i in 1 2 5 6 A B C 7 8 9 10; do echo "$i"; done >file &&
 	git update-index file &&
-	git commit -m "Side change #1" &&
+	git commit -m "Side changes #1" &&
 
 	for i in D E F; do echo "$i"; done >>file &&
 	git update-index file &&
-	git commit -m "Side change #2" &&
+	git commit -m "Side changes #2" &&
 	git tag C2 &&
 
 	for i in 5 6 1 2 3 A 4 B C 7 8 9 10 D E F; do echo "$i"; done >file &&
 	git update-index file &&
-	git commit -m "Side change #3" &&
+	git commit -m "Side changes #3 with \\n backslash-n in it." &&
 
 	git checkout master &&
 	git diff-tree -p C2 | git apply --index &&
@@ -66,4 +66,23 @@ test_expect_success "format-patch --ignore-if-in-upstream result applies" '
 	test $cnt = 2
 '
 
+test_expect_success 'commit did not screw up the log message' '
+
+	git cat-file commit side | grep "^Side .* with .* backslash-n"
+
+'
+
+test_expect_success 'format-patch did not screw up the log message' '
+
+	grep "^Subject: .*Side changes #3 with .* backslash-n" patch0 &&
+	grep "^Subject: .*Side changes #3 with .* backslash-n" patch1
+
+'
+
+test_expect_success 'replay did not screw up the log message' '
+
+	git cat-file commit rebuild-1 | grep "^Side .* with .* backslash-n"
+
+'
+
 test_done

From a23bfaed7da90ccdc75cbfd0099741080381f60a Mon Sep 17 00:00:00 2001
From: Jeff King <peff@peff.net>
Date: Sat, 26 May 2007 00:33:03 -0700
Subject: [PATCH 7/9] More echo "$user_message" fixes.

Here are fixes to more uses of 'echo "$msg"' where $msg could contain
backslashed sequence.

Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 git-am.sh    | 4 ++--
 git-merge.sh | 6 +++---
 git-tag.sh   | 2 +-
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/git-am.sh b/git-am.sh
index 543efd0ad..8b5712968 100755
--- a/git-am.sh
+++ b/git-am.sh
@@ -18,7 +18,7 @@ stop_here () {
 
 stop_here_user_resolve () {
     if [ -n "$resolvemsg" ]; then
-	    echo "$resolvemsg"
+	    printf '%s\n' "$resolvemsg"
 	    stop_here $1
     fi
     cmdline=$(basename $0)
@@ -146,7 +146,7 @@ do
 	git_apply_opt="$git_apply_opt $1"; shift ;;
 
 	--resolvemsg=*)
-	resolvemsg=$(echo "$1" | sed -e "s/^--resolvemsg=//"); shift ;;
+	resolvemsg=${1#--resolvemsg=}; shift ;;
 
 	--)
 	shift; break ;;
diff --git a/git-merge.sh b/git-merge.sh
index fa4589173..bf1fd4f06 100755
--- a/git-merge.sh
+++ b/git-merge.sh
@@ -328,7 +328,7 @@ f,*)
 		then
 			echo "Wonderful."
 			result_commit=$(
-				echo "$merge_msg" |
+				printf '%s\n' "$merge_msg" |
 				git-commit-tree $result_tree -p HEAD -p "$1"
 			) || exit
 			finish "$result_commit" "In-index merge"
@@ -433,7 +433,7 @@ done
 if test '' != "$result_tree"
 then
     parents=$(git-show-branch --independent "$head" "$@" | sed -e 's/^/-p /')
-    result_commit=$(echo "$merge_msg" | git-commit-tree $result_tree $parents) || exit
+    result_commit=$(printf '%s\n' "$merge_msg" | git-commit-tree $result_tree $parents) || exit
     finish "$result_commit" "Merge made by $wt_strategy."
     dropsave
     exit 0
@@ -472,7 +472,7 @@ else
 	do
 		echo $remote
 	done >"$GIT_DIR/MERGE_HEAD"
-	echo "$merge_msg" >"$GIT_DIR/MERGE_MSG"
+	printf '%s\n' "$merge_msg" >"$GIT_DIR/MERGE_MSG"
 fi
 
 if test "$merge_was_ok" = t
diff --git a/git-tag.sh b/git-tag.sh
index 4a0a7b660..6f0b7a721 100755
--- a/git-tag.sh
+++ b/git-tag.sh
@@ -126,7 +126,7 @@ if [ "$annotate" ]; then
           echo "#" ) > "$GIT_DIR"/TAG_EDITMSG
         ${VISUAL:-${EDITOR:-vi}} "$GIT_DIR"/TAG_EDITMSG || exit
     else
-        echo "$message" >"$GIT_DIR"/TAG_EDITMSG
+        printf '%s\n' "$message" >"$GIT_DIR"/TAG_EDITMSG
     fi
 
     grep -v '^#' <"$GIT_DIR"/TAG_EDITMSG |

From c23290d528c208a25641f0fc278bac9bb9838265 Mon Sep 17 00:00:00 2001
From: Andy Parkins <andyparkins@gmail.com>
Date: Fri, 25 May 2007 11:50:08 +0100
Subject: [PATCH 8/9] Fix mishandling of $Id$ expanded in the repository copy
 in convert.c

If the repository contained an expanded ident keyword (i.e. $Id:XXXX$),
then the wrong bytes were discarded, and the Id keyword was not
expanded.  The fault was in convert.c:ident_to_worktree().

Previously, when a "$Id:" was found in the repository version,
ident_to_worktree() would search for the next "$" after this, and
discarded everything it found until then.  That was done with the loop:

    do {
        ch = *cp++;
        if (ch == '$')
            break;
        rem--;
    } while (rem);

The above loop left cp pointing one character _after_ the final "$"
(because of ch = *cp++).  This was different from the non-expanded case,
were cp is left pointing at the "$", and was different from the comment
which stated "discard up to but not including the closing $".  This
patch fixes that by making the loop:

    do {
        ch = *cp;
        if (ch == '$')
            break;
        cp++;
        rem--;
    } while (rem);

That is, cp is tested _then_ incremented.

This loop exits if it finds a "$" or if it runs out of bytes in the
source.  After this loop, if there was no closing "$" the expansion is
skipped, and the outer loop is allowed to continue leaving this
non-keyword as it was.  However, when the "$" is found, size is
corrected, before running the expansion:

    size -= (cp - src);

This is wrong; size is going to be corrected anyway after the expansion,
so there is no need to do it here.  This patch removes that redundant
correction.

To help find this bug, I heavily commented the routine; those comments
are included here as a bonus.

Signed-off-by: Andy Parkins <andyparkins@gmail.com>
Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 convert.c | 39 +++++++++++++++++++++++++++++++++++++--
 1 file changed, 37 insertions(+), 2 deletions(-)

diff --git a/convert.c b/convert.c
index 4b26b1a9b..21908b103 100644
--- a/convert.c
+++ b/convert.c
@@ -509,36 +509,71 @@ static char *ident_to_worktree(const char *path, const char *src, unsigned long
 
 	for (dst = buf; size; size--) {
 		const char *cp;
+		/* Fetch next source character, move the pointer on */
 		char ch = *src++;
+		/* Copy the current character to the destination */
 		*dst++ = ch;
+		/* If the current character is "$" or there are less than three
+		 * remaining bytes or the two bytes following this one are not
+		 * "Id", then simply read the next character */
 		if ((ch != '$') || (size < 3) || memcmp("Id", src, 2))
 			continue;
+		/*
+		 * Here when
+		 *  - There are more than 2 bytes remaining
+		 *  - The current three bytes are "$Id"
+		 * with
+		 *  - ch == "$"
+		 *  - src[0] == "I"
+		 */
 
+		/*
+		 * It's possible that an expanded Id has crept its way into the
+		 * repository, we cope with that by stripping the expansion out
+		 */
 		if (src[2] == ':') {
+			/* Expanded keywords have "$Id:" at the front */
+
 			/* discard up to but not including the closing $ */
 			unsigned long rem = size - 3;
+			/* Point at first byte after the ":" */
 			cp = src + 3;
+			/*
+			 * Throw away characters until either
+			 *  - we reach a "$"
+			 *  - we run out of bytes (rem == 0)
+			 */
 			do {
-				ch = *cp++;
+				ch = *cp;
 				if (ch == '$')
 					break;
+				cp++;
 				rem--;
 			} while (rem);
+			/* If the above finished because it ran out of characters, then
+			 * this is an incomplete keyword, so don't run the expansion */
 			if (!rem)
 				continue;
-			size -= (cp - src);
 		} else if (src[2] == '$')
 			cp = src + 2;
 		else
+			/* Anything other than "$Id:XXX$" or $Id$ and we skip the
+			 * expansion */
 			continue;
 
+		/* cp is now pointing at the last $ of the keyword */
+
 		memcpy(dst, "Id: ", 4);
 		dst += 4;
 		memcpy(dst, sha1_to_hex(sha1), 40);
 		dst += 40;
 		*dst++ = ' ';
+
+		/* Adjust for the characters we've discarded */
 		size -= (cp - src);
 		src = cp;
+
+		/* Copy the final "$" */
 		*dst++ = *src++;
 		size--;
 	}

From 20b3d206acbbb042c7ad5f42d36ff8d036a538c5 Mon Sep 17 00:00:00 2001
From: James Y Knight <foom@fuhm.net>
Date: Thu, 24 May 2007 00:37:06 -0400
Subject: [PATCH 9/9] Fix git-svn to handle svn not reporting the md5sum of a
 file, and test.

Acked-by: Eric Wong <normalperson@yhbt.net>
Signed-off-by: Junio C Hamano <junkio@cox.net>
---
 git-svn.perl                    |  2 +-
 t/t9112-git-svn-md5less-file.sh | 45 +++++++++++++++++++++++++++++++++
 2 files changed, 46 insertions(+), 1 deletion(-)
 create mode 100755 t/t9112-git-svn-md5less-file.sh

diff --git a/git-svn.perl b/git-svn.perl
index eda9969f5..fa46236ae 100755
--- a/git-svn.perl
+++ b/git-svn.perl
@@ -2477,7 +2477,7 @@ sub close_file {
 		$md5->addfile($fh);
 		my $got = $md5->hexdigest;
 		die "Checksum mismatch: $path\n",
-		    "expected: $exp\n    got: $got\n" if ($got ne $exp);
+		    "expected: $exp\n    got: $got\n" if (defined $exp && $got ne $exp);
 		sysseek($fh, 0, 0) or croak $!;
 		if ($fb->{mode_b} == 120000) {
 			sysread($fh, my $buf, 5) == 5 or croak $!;
diff --git a/t/t9112-git-svn-md5less-file.sh b/t/t9112-git-svn-md5less-file.sh
new file mode 100755
index 000000000..08313bb54
--- /dev/null
+++ b/t/t9112-git-svn-md5less-file.sh
@@ -0,0 +1,45 @@
+test_description='test that git handles an svn repository with missing md5sums'
+
+. ./lib-git-svn.sh
+
+# Loading a node from a svn dumpfile without a Text-Content-Length
+# field causes svn to neglect to store or report an md5sum.  (it will
+# calculate one if you had put Text-Content-Length: 0).  This showed
+# up in a repository creted with cvs2svn.
+
+cat > dumpfile.svn <<EOF
+SVN-fs-dump-format-version: 1
+
+Revision-number: 1
+Prop-content-length: 98
+Content-length: 98
+
+K 7
+svn:log
+V 0
+
+K 10
+svn:author
+V 4
+test
+K 8
+svn:date
+V 27
+2007-05-06T12:37:01.153339Z
+PROPS-END
+
+Node-path: md5less-file
+Node-kind: file
+Node-action: add
+Prop-content-length: 10
+Content-length: 10
+
+PROPS-END
+
+EOF
+
+test_expect_success 'load svn dumpfile' "svnadmin load $rawsvnrepo < dumpfile.svn"
+
+test_expect_success 'initialize git-svn' "git-svn init $svnrepo"
+test_expect_success 'fetch revisions from svn' 'git-svn fetch'
+test_done