Skip to content

Commit

Permalink
UTF-8: introduce i18n.logoutputencoding.
Browse files Browse the repository at this point in the history
It is plausible for somebody to want to view the commit log in a
different encoding from i18n.commitencoding -- the project's
policy may be UTF-8 and the user may be using a commit message
hook to run iconv to conform to that policy (and either not have
i18n.commitencoding to default to UTF-8 or have it explicitly
set to UTF-8).  Even then, Latin-1 may be more convenient for
the usual pager and the terminal the user uses.

The new variable i18n.logoutputencoding is used in preference to
i18n.commitencoding to decide what encoding to recode the log
output in when git-log and friends formats the commit log message.

Signed-off-by: Junio C Hamano <junkio@cox.net>
  • Loading branch information
Junio C Hamano committed Dec 28, 2006
1 parent 52883fb commit d2c11a3
Show file tree
Hide file tree
Showing 15 changed files with 160 additions and 23 deletions.
4 changes: 4 additions & 0 deletions Documentation/config.txt
Original file line number Diff line number Diff line change
Expand Up @@ -248,6 +248,10 @@ i18n.commitEncoding::
browser (and possibly at other places in the future or in other
porcelains). See e.g. gitlink:git-mailinfo[1]. Defaults to 'utf-8'.

i18n.logOutputEncoding::
Character encoding the commit messages are converted to when
running `git-log` and friends.

log.showroot::
If true, the initial commit will be shown as a big creation event.
This is equivalent to a diff against an empty tree.
Expand Down
4 changes: 3 additions & 1 deletion builtin-commit-tree.c
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,9 @@ int cmd_commit_tree(int argc, const char **argv, const char *prefix)
parents++;
}

encoding_is_utf8 = !strcmp(git_commit_encoding, "utf-8");
/* Not having i18n.commitencoding is the same as having utf-8 */
encoding_is_utf8 = (!git_commit_encoding ||
!strcmp(git_commit_encoding, "utf-8"));

init_buffer(&buffer, &size);
add_buffer(&buffer, &size, "tree %s\n", sha1_to_hex(tree_sha1));
Expand Down
7 changes: 2 additions & 5 deletions builtin-log.c
Original file line number Diff line number Diff line change
Expand Up @@ -33,13 +33,10 @@ static void cmd_log_init(int argc, const char **argv, const char *prefix,
const char *arg = argv[i];
if (!strncmp(arg, "--encoding=", 11)) {
arg += 11;
if (MAX_ENCODING_LENGTH <= strlen(arg))
die(" Value of output encoding '%s' too long",
arg);
if (strcmp(arg, "none"))
strcpy(git_commit_encoding, arg);
git_log_output_encoding = strdup(arg);
else
git_commit_encoding[0] = 0;
git_log_output_encoding = "";
}
else
die("unrecognized argument: %s", arg);
Expand Down
3 changes: 2 additions & 1 deletion builtin-mailinfo.c
Original file line number Diff line number Diff line change
Expand Up @@ -806,7 +806,8 @@ int cmd_mailinfo(int argc, const char **argv, const char *prefix)
if (!strcmp(argv[1], "-k"))
keep_subject = 1;
else if (!strcmp(argv[1], "-u"))
metainfo_charset = git_commit_encoding;
metainfo_charset = (git_commit_encoding
? git_commit_encoding : "utf-8");
else if (!strncmp(argv[1], "--encoding=", 11))
metainfo_charset = argv[1] + 11;
else
Expand Down
4 changes: 2 additions & 2 deletions cache.h
Original file line number Diff line number Diff line change
Expand Up @@ -416,8 +416,8 @@ extern int check_repository_format_version(const char *var, const char *value);
extern char git_default_email[MAX_GITNAME];
extern char git_default_name[MAX_GITNAME];

#define MAX_ENCODING_LENGTH 64
extern char git_commit_encoding[MAX_ENCODING_LENGTH];
extern char *git_commit_encoding;
extern char *git_log_output_encoding;

extern int copy_fd(int ifd, int ofd);
extern void write_or_die(int fd, const void *buf, size_t count);
Expand Down
27 changes: 15 additions & 12 deletions commit.c
Original file line number Diff line number Diff line change
Expand Up @@ -592,12 +592,20 @@ static char *get_header(const struct commit *commit, const char *key)

static char *logmsg_reencode(const struct commit *commit)
{
char *encoding = get_header(commit, "encoding");
char *encoding;
char *out;
char *output_encoding = (git_log_output_encoding
? git_log_output_encoding
: git_commit_encoding);

if (!encoding || !strcmp(encoding, git_commit_encoding))
if (!output_encoding)
return NULL;
out = reencode_string(commit->buffer, git_commit_encoding, encoding);
encoding = get_header(commit, "encoding");
if (!encoding || !strcmp(encoding, output_encoding)) {
free(encoding);
return NULL;
}
out = reencode_string(commit->buffer, output_encoding, encoding);
free(encoding);
if (!out)
return NULL;
Expand All @@ -618,15 +626,10 @@ unsigned long pretty_print_commit(enum cmit_fmt fmt,
int parents_shown = 0;
const char *msg = commit->buffer;
int plain_non_ascii = 0;
char *reencoded = NULL;
char *reencoded = logmsg_reencode(commit);

if (*git_commit_encoding) {
reencoded = logmsg_reencode(commit);
if (reencoded) {
msg = reencoded;
len = strlen(msg);
}
}
if (reencoded)
msg = reencoded;

if (fmt == CMIT_FMT_ONELINE || fmt == CMIT_FMT_EMAIL)
indent = 0;
Expand All @@ -643,7 +646,7 @@ unsigned long pretty_print_commit(enum cmit_fmt fmt,
for (in_body = i = 0; (ch = msg[i]) && i < len; i++) {
if (!in_body) {
/* author could be non 7-bit ASCII but
* the log may so; skip over the
* the log may be so; skip over the
* header part first.
*/
if (ch == '\n' &&
Expand Down
8 changes: 7 additions & 1 deletion config.c
Original file line number Diff line number Diff line change
Expand Up @@ -309,10 +309,16 @@ int git_default_config(const char *var, const char *value)
}

if (!strcmp(var, "i18n.commitencoding")) {
strlcpy(git_commit_encoding, value, sizeof(git_commit_encoding));
git_commit_encoding = strdup(value);
return 0;
}

if (!strcmp(var, "i18n.logoutputencoding")) {
git_log_output_encoding = strdup(value);
return 0;
}


if (!strcmp(var, "pager.color") || !strcmp(var, "color.pager")) {
pager_use_color = git_config_bool(var,value);
return 0;
Expand Down
1 change: 1 addition & 0 deletions contrib/completion/git-completion.bash
Original file line number Diff line number Diff line change
Expand Up @@ -711,6 +711,7 @@ _git_repo_config ()
core.compression
core.legacyHeaders
i18n.commitEncoding
i18n.logOutputEncoding
diff.color
color.diff
diff.renameLimit
Expand Down
3 changes: 2 additions & 1 deletion environment.c
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@ int prefer_symlink_refs;
int log_all_ref_updates;
int warn_ambiguous_refs = 1;
int repository_format_version;
char git_commit_encoding[MAX_ENCODING_LENGTH] = "utf-8";
char *git_commit_encoding;
char *git_log_output_encoding;
int shared_repository = PERM_UMASK;
const char *apply_default_whitespace;
int zlib_compression_level = Z_DEFAULT_COMPRESSION;
Expand Down
104 changes: 104 additions & 0 deletions t/t3900-i18n-commit.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
#!/bin/sh
#
# Copyright (c) 2006 Junio C Hamano
#

test_description='commit and log output encodings'

. ./test-lib.sh

compare_with () {
git-show -s "$1" | sed -e '1,/^$/d' -e 's/^ //' -e '$d' >current &&
diff -u current "$2"
}

test_expect_success setup '
: >F &&
git-add F &&
T=$(git-write-tree) &&
C=$(git-commit-tree $T <../t3900/1-UTF-8.txt) &&
git-update-ref HEAD $C &&
git-tag C0
'

test_expect_success 'no encoding header for base case' '
E=$(git-cat-file commit C0 | sed -ne "s/^encoding //p") &&
test z = "z$E"
'

for H in ISO-8859-1 EUCJP ISO2022JP
do
test_expect_success "$H setup" '
git-repo-config i18n.commitencoding $H &&
git-checkout -b $H C0 &&
echo $H >F &&
git-commit -a -F ../t3900/$H.txt
'
done

for H in ISO-8859-1 EUCJP ISO2022JP
do
test_expect_success "check encoding header for $H" '
E=$(git-cat-file commit '$H' | sed -ne "s/^encoding //p") &&
test "z$E" = "z'$H'"
'
done

test_expect_success 'repo-config to remove customization' '
git-repo-config --unset-all i18n.commitencoding &&
if Z=$(git-repo-config --get-all i18n.commitencoding)
then
echo Oops, should have failed.
false
else
test z = "z$Z"
fi &&
git-repo-config i18n.commitencoding utf-8
'

test_expect_success 'ISO-8859-1 should be shown in UTF-8 now' '
compare_with ISO-8859-1 ../t3900/1-UTF-8.txt
'

for H in EUCJP ISO2022JP
do
test_expect_success "$H should be shown in UTF-8 now" '
compare_with '$H' ../t3900/2-UTF-8.txt
'
done

test_expect_success 'repo-config to add customization' '
git-repo-config --unset-all i18n.commitencoding &&
if Z=$(git-repo-config --get-all i18n.commitencoding)
then
echo Oops, should have failed.
false
else
test z = "z$Z"
fi
'

for H in ISO-8859-1 EUCJP ISO2022JP
do
test_expect_success "$H should be shown in itself now" '
git-repo-config i18n.commitencoding '$H' &&
compare_with '$H' ../t3900/'$H'.txt
'
done

test_expect_success 'repo-config to tweak customization' '
git-repo-config i18n.logoutputencoding utf-8
'

test_expect_success 'ISO-8859-1 should be shown in UTF-8 now' '
compare_with ISO-8859-1 ../t3900/1-UTF-8.txt
'

for H in EUCJP ISO2022JP
do
test_expect_success "$H should be shown in UTF-8 now" '
compare_with '$H' ../t3900/2-UTF-8.txt
'
done

test_done
3 changes: 3 additions & 0 deletions t/t3900/1-UTF-8.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
ÄËÑÏÖ

Ábçdèfg
4 changes: 4 additions & 0 deletions t/t3900/2-UTF-8.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
はれひほふ

しているのが、いるので。
濱浜ほれぷりぽれまびぐりろへ。
4 changes: 4 additions & 0 deletions t/t3900/EUCJP.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
�Ϥ�Ҥۤ�

���Ƥ���Τ�������Τǡ�
���ͤۤ�פ�ݤ�ޤӤ�����ء�
3 changes: 3 additions & 0 deletions t/t3900/ISO-8859-1.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
�����

�b�d�fg
4 changes: 4 additions & 0 deletions t/t3900/ISO2022JP.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
$B$O$l$R$[$U(B

$B$7$F$$$k$N$,!"$$$k$N$G!#(B
$B_@IM$[$l$W$j$]$l$^$S$0$j$m$X!#(B

0 comments on commit d2c11a3

Please sign in to comment.