Skip to content

Commit

Permalink
Merge branch 'jc/utf8'
Browse files Browse the repository at this point in the history
* jc/utf8:
  t3900: test conversion to non UTF-8 as well
  Rename t3900 test vector file
  UTF-8: introduce i18n.logoutputencoding.
  Teach log family --encoding
  i18n.logToUTF8: convert commit log message to UTF-8
  Move encoding conversion routine out of mailinfo to utf8.c

Conflicts:

	commit.c
  • Loading branch information
Junio C Hamano committed Dec 29, 2006
2 parents 013672b + 7255ff0 commit eff7375
Show file tree
Hide file tree
Showing 18 changed files with 308 additions and 43 deletions.
4 changes: 4 additions & 0 deletions Documentation/config.txt
Original file line number Diff line number Diff line change
Expand Up @@ -267,6 +267,10 @@ i18n.commitEncoding::
browser (and possibly at other places in the future or in other
porcelains). See e.g. gitlink:git-mailinfo[1]. Defaults to 'utf-8'.

i18n.logOutputEncoding::
Character encoding the commit messages are converted to when
running `git-log` and friends.

log.showroot::
If true, the initial commit will be shown as a big creation event.
This is equivalent to a diff against an empty tree.
Expand Down
13 changes: 11 additions & 2 deletions builtin-commit-tree.c
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ int cmd_commit_tree(int argc, const char **argv, const char *prefix)
char comment[1000];
char *buffer;
unsigned int size;
int encoding_is_utf8;

setup_ident();
git_config(git_default_config);
Expand All @@ -117,6 +118,10 @@ int cmd_commit_tree(int argc, const char **argv, const char *prefix)
parents++;
}

/* Not having i18n.commitencoding is the same as having utf-8 */
encoding_is_utf8 = (!git_commit_encoding ||
!strcmp(git_commit_encoding, "utf-8"));

init_buffer(&buffer, &size);
add_buffer(&buffer, &size, "tree %s\n", sha1_to_hex(tree_sha1));

Expand All @@ -130,15 +135,19 @@ int cmd_commit_tree(int argc, const char **argv, const char *prefix)

/* Person/date information */
add_buffer(&buffer, &size, "author %s\n", git_author_info(1));
add_buffer(&buffer, &size, "committer %s\n\n", git_committer_info(1));
add_buffer(&buffer, &size, "committer %s\n", git_committer_info(1));
if (!encoding_is_utf8)
add_buffer(&buffer, &size,
"encoding %s\n", git_commit_encoding);
add_buffer(&buffer, &size, "\n");

/* And add the comment */
while (fgets(comment, sizeof(comment), stdin) != NULL)
add_buffer(&buffer, &size, "%s", comment);

/* And check the encoding */
buffer[size] = '\0';
if (!strcmp(git_commit_encoding, "utf-8") && !is_utf8(buffer))
if (encoding_is_utf8 && !is_utf8(buffer))
fprintf(stderr, commit_utf8_warn);

if (!write_sha1_file(buffer, size, commit_type, commit_sha1)) {
Expand Down
16 changes: 14 additions & 2 deletions builtin-log.c
Original file line number Diff line number Diff line change
Expand Up @@ -20,15 +20,27 @@ void add_head(struct rev_info *revs);
static void cmd_log_init(int argc, const char **argv, const char *prefix,
struct rev_info *rev)
{
int i;

rev->abbrev = DEFAULT_ABBREV;
rev->commit_format = CMIT_FMT_DEFAULT;
rev->verbose_header = 1;
rev->show_root_diff = default_show_root;
argc = setup_revisions(argc, argv, rev, "HEAD");
if (rev->diffopt.pickaxe || rev->diffopt.filter)
rev->always_show_header = 0;
if (argc > 1)
die("unrecognized argument: %s", argv[1]);
for (i = 1; i < argc; i++) {
const char *arg = argv[i];
if (!strncmp(arg, "--encoding=", 11)) {
arg += 11;
if (strcmp(arg, "none"))
git_log_output_encoding = strdup(arg);
else
git_log_output_encoding = "";
}
else
die("unrecognized argument: %s", arg);
}
}

static int cmd_log_walk(struct rev_info *rev)
Expand Down
42 changes: 11 additions & 31 deletions builtin-mailinfo.c
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
*/
#include "cache.h"
#include "builtin.h"
#include "utf8.h"

static FILE *cmitmsg, *patchfile, *fin, *fout;

Expand Down Expand Up @@ -510,40 +511,18 @@ static int decode_b_segment(char *in, char *ot, char *ep)

static void convert_to_utf8(char *line, char *charset)
{
#ifndef NO_ICONV
char *in, *out;
size_t insize, outsize, nrc;
char outbuf[4096]; /* cheat */
static char latin_one[] = "latin1";
char *input_charset = *charset ? charset : latin_one;
iconv_t conv = iconv_open(metainfo_charset, input_charset);

if (conv == (iconv_t) -1) {
static int warned_latin1_once = 0;
if (input_charset != latin_one) {
fprintf(stderr, "cannot convert from %s to %s\n",
input_charset, metainfo_charset);
*charset = 0;
}
else if (!warned_latin1_once) {
warned_latin1_once = 1;
fprintf(stderr, "tried to convert from %s to %s, "
"but your iconv does not work with it.\n",
input_charset, metainfo_charset);
}
char *out = reencode_string(line, metainfo_charset, input_charset);

if (!out) {
fprintf(stderr, "cannot convert from %s to %s\n",
input_charset, metainfo_charset);
*charset = 0;
return;
}
in = line;
insize = strlen(in);
out = outbuf;
outsize = sizeof(outbuf);
nrc = iconv(conv, &in, &insize, &out, &outsize);
iconv_close(conv);
if (nrc == (size_t) -1)
return;
*out = 0;
strcpy(line, outbuf);
#endif
strcpy(line, out);
free(out);
}

static int decode_header_bq(char *it)
Expand Down Expand Up @@ -827,7 +806,8 @@ int cmd_mailinfo(int argc, const char **argv, const char *prefix)
if (!strcmp(argv[1], "-k"))
keep_subject = 1;
else if (!strcmp(argv[1], "-u"))
metainfo_charset = git_commit_encoding;
metainfo_charset = (git_commit_encoding
? git_commit_encoding : "utf-8");
else if (!strncmp(argv[1], "--encoding=", 11))
metainfo_charset = argv[1] + 11;
else
Expand Down
4 changes: 2 additions & 2 deletions cache.h
Original file line number Diff line number Diff line change
Expand Up @@ -416,8 +416,8 @@ extern int check_repository_format_version(const char *var, const char *value);
extern char git_default_email[MAX_GITNAME];
extern char git_default_name[MAX_GITNAME];

#define MAX_ENCODING_LENGTH 64
extern char git_commit_encoding[MAX_ENCODING_LENGTH];
extern char *git_commit_encoding;
extern char *git_log_output_encoding;

extern int copy_fd(int ifd, int ofd);
extern void write_or_die(int fd, const void *buf, size_t count);
Expand Down
66 changes: 62 additions & 4 deletions commit.c
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#include "tag.h"
#include "commit.h"
#include "pkt-line.h"
#include "utf8.h"

int save_commit_buffer = 1;

Expand Down Expand Up @@ -597,17 +598,72 @@ static int add_merge_info(enum cmit_fmt fmt, char *buf, const struct commit *com
return offset;
}

unsigned long pretty_print_commit(enum cmit_fmt fmt, const struct commit *commit,
unsigned long len, char *buf, unsigned long space,
static char *get_header(const struct commit *commit, const char *key)
{
int key_len = strlen(key);
const char *line = commit->buffer;

for (;;) {
const char *eol = strchr(line, '\n'), *next;

if (line == eol)
return NULL;
if (!eol) {
eol = line + strlen(line);
next = NULL;
} else
next = eol + 1;
if (!strncmp(line, key, key_len) && line[key_len] == ' ') {
int len = eol - line - key_len;
char *ret = xmalloc(len);
memcpy(ret, line + key_len + 1, len - 1);
ret[len - 1] = '\0';
return ret;
}
line = next;
}
}

static char *logmsg_reencode(const struct commit *commit)
{
char *encoding;
char *out;
char *output_encoding = (git_log_output_encoding
? git_log_output_encoding
: git_commit_encoding);

if (!output_encoding)
return NULL;
encoding = get_header(commit, "encoding");
if (!encoding || !strcmp(encoding, output_encoding)) {
free(encoding);
return NULL;
}
out = reencode_string(commit->buffer, output_encoding, encoding);
free(encoding);
if (!out)
return NULL;
return out;
}

unsigned long pretty_print_commit(enum cmit_fmt fmt,
const struct commit *commit,
unsigned long len,
char *buf, unsigned long space,
int abbrev, const char *subject,
const char *after_subject, int relative_date)
const char *after_subject,
int relative_date)
{
int hdr = 1, body = 0;
unsigned long offset = 0;
int indent = 4;
int parents_shown = 0;
const char *msg = commit->buffer;
int plain_non_ascii = 0;
char *reencoded = logmsg_reencode(commit);

if (reencoded)
msg = reencoded;

if (fmt == CMIT_FMT_ONELINE || fmt == CMIT_FMT_EMAIL)
indent = 0;
Expand All @@ -624,7 +680,7 @@ unsigned long pretty_print_commit(enum cmit_fmt fmt, const struct commit *commit
for (in_body = i = 0; (ch = msg[i]) && i < len; i++) {
if (!in_body) {
/* author could be non 7-bit ASCII but
* the log may so; skip over the
* the log may be so; skip over the
* header part first.
*/
if (ch == '\n' &&
Expand Down Expand Up @@ -755,6 +811,8 @@ unsigned long pretty_print_commit(enum cmit_fmt fmt, const struct commit *commit
if (fmt == CMIT_FMT_EMAIL && !body)
buf[offset++] = '\n';
buf[offset] = '\0';

free(reencoded);
return offset;
}

Expand Down
8 changes: 7 additions & 1 deletion config.c
Original file line number Diff line number Diff line change
Expand Up @@ -309,10 +309,16 @@ int git_default_config(const char *var, const char *value)
}

if (!strcmp(var, "i18n.commitencoding")) {
strlcpy(git_commit_encoding, value, sizeof(git_commit_encoding));
git_commit_encoding = strdup(value);
return 0;
}

if (!strcmp(var, "i18n.logoutputencoding")) {
git_log_output_encoding = strdup(value);
return 0;
}


if (!strcmp(var, "pager.color") || !strcmp(var, "color.pager")) {
pager_use_color = git_config_bool(var,value);
return 0;
Expand Down
1 change: 1 addition & 0 deletions contrib/completion/git-completion.bash
Original file line number Diff line number Diff line change
Expand Up @@ -711,6 +711,7 @@ _git_repo_config ()
core.compression
core.legacyHeaders
i18n.commitEncoding
i18n.logOutputEncoding
diff.color
color.diff
diff.renameLimit
Expand Down
3 changes: 2 additions & 1 deletion environment.c
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@ int prefer_symlink_refs;
int log_all_ref_updates;
int warn_ambiguous_refs = 1;
int repository_format_version;
char git_commit_encoding[MAX_ENCODING_LENGTH] = "utf-8";
char *git_commit_encoding;
char *git_log_output_encoding;
int shared_repository = PERM_UMASK;
const char *apply_default_whitespace;
int zlib_compression_level = Z_DEFAULT_COMPRESSION;
Expand Down
1 change: 1 addition & 0 deletions revision.h
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ struct rev_info {
const char *ref_message_id;
const char *add_signoff;
const char *extra_headers;
const char *log_reencode;

/* Filter by commit log message */
struct grep_opt *grep_filter;
Expand Down
Loading

0 comments on commit eff7375

Please sign in to comment.