From ce80ca566af713c85ce3a1c20b466486058a971a Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 18 Oct 2012 03:25:22 -0400 Subject: [PATCH 1/2] git-sh-setup: refactor ident-parsing functions The only ident-parsing function we currently provide is get_author_ident_from_commit. This is not very flexible for two reasons: 1. It takes a commit as an argument, and can't read from commit headers saved on disk. 2. It will only parse authors, not committers. This patch provides a more flexible interface which will parse multiple idents from a commit provide on stdin. We can easily use it as a building block for the current function to retain compatibility. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- git-sh-setup.sh | 62 ++++++++++++++++++++++++++++++++++--------------- 1 file changed, 43 insertions(+), 19 deletions(-) diff --git a/git-sh-setup.sh b/git-sh-setup.sh index ee0e0bc04..22f0aed6d 100644 --- a/git-sh-setup.sh +++ b/git-sh-setup.sh @@ -191,28 +191,52 @@ require_clean_work_tree () { fi } +# Generate a sed script to parse identities from a commit. +# +# Reads the commit from stdin, which should be in raw format (e.g., from +# cat-file or "--pretty=raw"). +# +# The first argument specifies the ident line to parse (e.g., "author"), and +# the second specifies the environment variable to put it in (e.g., "AUTHOR" +# for "GIT_AUTHOR_*"). Multiple pairs can be given to parse author and +# committer. +pick_ident_script () { + while test $# -gt 0 + do + lid=$1; shift + uid=$1; shift + printf '%s' " + /^$lid /{ + s/'/'\\\\''/g + h + s/^$lid "'\([^<]*\) <[^>]*> .*$/\1/'" + s/.*/GIT_${uid}_NAME='&'/p + + g + s/^$lid "'[^<]* <\([^>]*\)> .*$/\1/'" + s/.*/GIT_${uid}_EMAIL='&'/p + + g + s/^$lid "'[^<]* <[^>]*> \(.*\)$/@\1/'" + s/.*/GIT_${uid}_DATE='&'/p + } + " + done + echo '/^$/q' +} + +# Create a pick-script as above and feed it to sed. Stdout is suitable for +# feeding to eval. +parse_ident_from_commit () { + LANG=C LC_ALL=C sed -ne "$(pick_ident_script "$@")" +} + +# Parse the author from a commit given as an argument. Stdout is suitable for +# feeding to eval to set the usual GIT_* ident variables. get_author_ident_from_commit () { - pick_author_script=' - /^author /{ - s/'\''/'\''\\'\'\''/g - h - s/^author \([^<]*\) <[^>]*> .*$/\1/ - s/.*/GIT_AUTHOR_NAME='\''&'\''/p - - g - s/^author [^<]* <\([^>]*\)> .*$/\1/ - s/.*/GIT_AUTHOR_EMAIL='\''&'\''/p - - g - s/^author [^<]* <[^>]*> \(.*\)$/@\1/ - s/.*/GIT_AUTHOR_DATE='\''&'\''/p - - q - } - ' encoding=$(git config i18n.commitencoding || echo UTF-8) git show -s --pretty=raw --encoding="$encoding" "$1" -- | - LANG=C LC_ALL=C sed -ne "$pick_author_script" + parse_ident_from_commit author AUTHOR } # Clear repo-local GIT_* environment variables. Useful when switching to From 3c730fab2cae1bb52d34620af170a628b3b8c537 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 18 Oct 2012 06:33:02 -0400 Subject: [PATCH 2/2] filter-branch: use git-sh-setup's ident parsing functions This saves us some code, but it also reduces the number of processes we start for each filtered commit. Since we can parse both author and committer in the same sed invocation, we save one process. And since the new interface avoids tr, we save 4 processes. It also avoids using "tr", which has had some odd portability problems reported with from Solaris's xpg6 version. We also tweak one of the tests in t7003 to double-check that we are properly exporting the variables (because test-lib.sh exports GIT_AUTHOR_NAME, it will be automatically exported in subprograms. We override this to make sure that filter-branch handles it properly itself). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- git-filter-branch.sh | 46 ++++++++++++---------------------------- t/t7003-filter-branch.sh | 5 +++-- 2 files changed, 16 insertions(+), 35 deletions(-) diff --git a/git-filter-branch.sh b/git-filter-branch.sh index 178e45305..53142492a 100755 --- a/git-filter-branch.sh +++ b/git-filter-branch.sh @@ -64,37 +64,19 @@ EOF eval "$functions" -# When piped a commit, output a script to set the ident of either -# "author" or "committer +finish_ident() { + # Ensure non-empty id name. + echo "case \"\$GIT_$1_NAME\" in \"\") GIT_$1_NAME=\"\${GIT_$1_EMAIL%%@*}\" && export GIT_$1_NAME;; esac" + # And make sure everything is exported. + echo "export GIT_$1_NAME" + echo "export GIT_$1_EMAIL" + echo "export GIT_$1_DATE" +} set_ident () { - lid="$(echo "$1" | tr "[A-Z]" "[a-z]")" - uid="$(echo "$1" | tr "[a-z]" "[A-Z]")" - pick_id_script=' - /^'$lid' /{ - s/'\''/'\''\\'\'\''/g - h - s/^'$lid' \([^<]*\) <[^>]*> .*$/\1/ - s/'\''/'\''\'\'\''/g - s/.*/GIT_'$uid'_NAME='\''&'\''; export GIT_'$uid'_NAME/p - - g - s/^'$lid' [^<]* <\([^>]*\)> .*$/\1/ - s/'\''/'\''\'\'\''/g - s/.*/GIT_'$uid'_EMAIL='\''&'\''; export GIT_'$uid'_EMAIL/p - - g - s/^'$lid' [^<]* <[^>]*> \(.*\)$/@\1/ - s/'\''/'\''\'\'\''/g - s/.*/GIT_'$uid'_DATE='\''&'\''; export GIT_'$uid'_DATE/p - - q - } - ' - - LANG=C LC_ALL=C sed -ne "$pick_id_script" - # Ensure non-empty id name. - echo "case \"\$GIT_${uid}_NAME\" in \"\") GIT_${uid}_NAME=\"\${GIT_${uid}_EMAIL%%@*}\" && export GIT_${uid}_NAME;; esac" + parse_ident_from_commit author AUTHOR committer COMMITTER + finish_ident AUTHOR + finish_ident COMMITTER } USAGE="[--env-filter ] [--tree-filter ] @@ -320,10 +302,8 @@ while read commit parents; do git cat-file commit "$commit" >../commit || die "Cannot read commit $commit" - eval "$(set_ident AUTHOR <../commit)" || - die "setting author failed for commit $commit" - eval "$(set_ident COMMITTER <../commit)" || - die "setting committer failed for commit $commit" + eval "$(set_ident <../commit)" || + die "setting author/committer failed for commit $commit" eval "$filter_env" < /dev/null || die "env filter failed: $filter_env" diff --git a/t/t7003-filter-branch.sh b/t/t7003-filter-branch.sh index 4d13e10de..1e7a209ef 100755 --- a/t/t7003-filter-branch.sh +++ b/t/t7003-filter-branch.sh @@ -167,10 +167,11 @@ test_expect_success 'author information is preserved' ' test_tick && GIT_AUTHOR_NAME="B V Uips" git commit -m bvuips && git branch preserved-author && - git filter-branch -f --msg-filter "cat; \ + (sane_unset GIT_AUTHOR_NAME && + git filter-branch -f --msg-filter "cat; \ test \$GIT_COMMIT != $(git rev-parse master) || \ echo Hallo" \ - preserved-author && + preserved-author) && test 1 = $(git rev-list --author="B V Uips" preserved-author | wc -l) '