Skip to content
Navigation Menu
Toggle navigation
Sign in
In this repository
All GitHub Enterprise
↵
Jump to
↵
No suggested jump to results
In this repository
All GitHub Enterprise
↵
Jump to
↵
In this organization
All GitHub Enterprise
↵
Jump to
↵
In this repository
All GitHub Enterprise
↵
Jump to
↵
Sign in
Reseting focus
You signed in with another tab or window.
Reload
to refresh your session.
You signed out in another tab or window.
Reload
to refresh your session.
You switched accounts on another tab or window.
Reload
to refresh your session.
Dismiss alert
{{ message }}
git-mirror
/
git
Public
Notifications
You must be signed in to change notification settings
Fork
0
Star
0
Code
Issues
0
Pull requests
0
Actions
Projects
0
Security
Insights
Additional navigation options
Code
Issues
Pull requests
Actions
Projects
Security
Insights
Files
f9acaea
Documentation
block-sha1
compat
contrib
git-gui
gitk-git
gitweb
perl
ppc
t
templates
xdiff
.gitattributes
.gitignore
.mailmap
COPYING
GIT-VERSION-GEN
INSTALL
Makefile
README
RelNotes
abspath.c
advice.c
advice.h
alias.c
alloc.c
archive-tar.c
archive-zip.c
archive.c
archive.h
attr.c
attr.h
base85.c
bisect.c
bisect.h
blob.c
blob.h
branch.c
branch.h
builtin-add.c
builtin-annotate.c
builtin-apply.c
builtin-archive.c
builtin-bisect--helper.c
builtin-blame.c
builtin-branch.c
builtin-bundle.c
builtin-cat-file.c
builtin-check-attr.c
builtin-check-ref-format.c
builtin-checkout-index.c
builtin-checkout.c
builtin-clean.c
builtin-clone.c
builtin-commit-tree.c
builtin-commit.c
builtin-config.c
builtin-count-objects.c
builtin-describe.c
builtin-diff-files.c
builtin-diff-index.c
builtin-diff-tree.c
builtin-diff.c
builtin-fast-export.c
builtin-fetch--tool.c
builtin-fetch-pack.c
builtin-fetch.c
builtin-fmt-merge-msg.c
builtin-for-each-ref.c
builtin-fsck.c
builtin-gc.c
builtin-grep.c
builtin-help.c
builtin-init-db.c
builtin-log.c
builtin-ls-files.c
builtin-ls-remote.c
builtin-ls-tree.c
builtin-mailinfo.c
builtin-mailsplit.c
builtin-merge-base.c
builtin-merge-file.c
builtin-merge-ours.c
builtin-merge-recursive.c
builtin-merge.c
builtin-mktree.c
builtin-mv.c
builtin-name-rev.c
builtin-pack-objects.c
builtin-pack-refs.c
builtin-prune-packed.c
builtin-prune.c
builtin-push.c
builtin-read-tree.c
builtin-receive-pack.c
builtin-reflog.c
builtin-remote.c
builtin-replace.c
builtin-rerere.c
builtin-reset.c
builtin-rev-list.c
builtin-rev-parse.c
builtin-revert.c
builtin-rm.c
builtin-send-pack.c
builtin-shortlog.c
builtin-show-branch.c
builtin-show-ref.c
builtin-stripspace.c
builtin-symbolic-ref.c
builtin-tag.c
builtin-tar-tree.c
builtin-unpack-objects.c
builtin-update-index.c
builtin-update-ref.c
builtin-update-server-info.c
builtin-upload-archive.c
builtin-verify-pack.c
builtin-verify-tag.c
builtin-write-tree.c
builtin.h
bundle.c
bundle.h
cache-tree.c
cache-tree.h
cache.h
check-builtins.sh
check-racy.c
check_bindir
color.c
color.h
combine-diff.c
command-list.txt
commit.c
commit.h
config.c
config.mak.in
configure.ac
connect.c
convert.c
copy.c
csum-file.c
csum-file.h
ctype.c
daemon.c
date.c
decorate.c
decorate.h
delta.h
diff-delta.c
diff-lib.c
diff-no-index.c
diff.c
diff.h
diffcore-break.c
diffcore-delta.c
diffcore-order.c
diffcore-pickaxe.c
diffcore-rename.c
diffcore.h
dir.c
dir.h
editor.c
entry.c
environment.c
exec_cmd.c
exec_cmd.h
fast-import.c
fetch-pack.h
fixup-builtins
fsck.c
fsck.h
generate-cmdlist.sh
git-add--interactive.perl
git-am.sh
git-archimport.perl
git-bisect.sh
git-compat-util.h
git-cvsexportcommit.perl
git-cvsimport.perl
git-cvsserver.perl
git-difftool--helper.sh
git-difftool.perl
git-filter-branch.sh
git-instaweb.sh
git-lost-found.sh
git-merge-octopus.sh
git-merge-one-file.sh
git-merge-resolve.sh
git-mergetool--lib.sh
git-mergetool.sh
git-parse-remote.sh
git-pull.sh
git-quiltimport.sh
git-rebase--interactive.sh
git-rebase.sh
git-relink.perl
git-repack.sh
git-request-pull.sh
git-send-email.perl
git-sh-setup.sh
git-stash.sh
git-submodule.sh
git-svn.perl
git-web--browse.sh
git.c
git.spec.in
graph.c
graph.h
grep.c
grep.h
hash-object.c
hash.c
hash.h
help.c
help.h
http-fetch.c
http-push.c
http-walker.c
http.c
http.h
ident.c
imap-send.c
index-pack.c
levenshtein.c
levenshtein.h
list-objects.c
list-objects.h
ll-merge.c
ll-merge.h
lockfile.c
log-tree.c
log-tree.h
mailmap.c
mailmap.h
match-trees.c
merge-file.c
merge-index.c
merge-recursive.c
merge-recursive.h
merge-tree.c
mktag.c
name-hash.c
object.c
object.h
pack-check.c
pack-redundant.c
pack-refs.c
pack-refs.h
pack-revindex.c
pack-revindex.h
pack-write.c
pack.h
pager.c
parse-options.c
parse-options.h
patch-delta.c
patch-id.c
patch-ids.c
patch-ids.h
path.c
pkt-line.c
pkt-line.h
preload-index.c
pretty.c
progress.c
progress.h
quote.c
quote.h
reachable.c
reachable.h
read-cache.c
reflog-walk.c
reflog-walk.h
refs.c
refs.h
remote-curl.c
remote.c
remote.h
replace_object.c
rerere.c
rerere.h
revision.c
revision.h
run-command.c
run-command.h
send-pack.h
server-info.c
setup.c
sha1-lookup.c
sha1-lookup.h
sha1_file.c
sha1_name.c
shallow.c
shell.c
shortlog.h
show-index.c
sideband.c
sideband.h
sigchain.c
sigchain.h
strbuf.c
strbuf.h
string-list.c
string-list.h
symlinks.c
tag.c
tag.h
tar.h
test-chmtime.c
test-ctype.c
test-date.c
test-delta.c
test-dump-cache-tree.c
test-genrandom.c
test-match-trees.c
test-parse-options.c
test-path-utils.c
test-sha1.c
test-sha1.sh
test-sigchain.c
thread-utils.c
thread-utils.h
trace.c
transport-helper.c
transport.c
transport.h
tree-diff.c
tree-walk.c
tree-walk.h
tree.c
tree.h
unimplemented.sh
unpack-file.c
unpack-trees.c
unpack-trees.h
upload-pack.c
usage.c
userdiff.c
userdiff.h
utf8.c
utf8.h
var.c
walker.c
walker.h
wrapper.c
write_or_die.c
ws.c
wt-status.c
wt-status.h
xdiff-interface.c
xdiff-interface.h
Breadcrumbs
git
/
utf8.c
Blame
Blame
Latest commit
History
History
410 lines (382 loc) · 11.6 KB
Breadcrumbs
git
/
utf8.c
Top
File metadata and controls
Code
Blame
410 lines (382 loc) · 11.6 KB
Raw
#include "git-compat-util.h" #include "utf8.h" /* This code is originally from http://www.cl.cam.ac.uk/~mgk25/ucs/ */ struct interval { int first; int last; }; /* auxiliary function for binary search in interval table */ static int bisearch(ucs_char_t ucs, const struct interval *table, int max) { int min = 0; int mid; if (ucs < table[0].first || ucs > table[max].last) return 0; while (max >= min) { mid = (min + max) / 2; if (ucs > table[mid].last) min = mid + 1; else if (ucs < table[mid].first) max = mid - 1; else return 1; } return 0; } /* The following two functions define the column width of an ISO 10646 * character as follows: * * - The null character (U+0000) has a column width of 0. * * - Other C0/C1 control characters and DEL will lead to a return * value of -1. * * - Non-spacing and enclosing combining characters (general * category code Mn or Me in the Unicode database) have a * column width of 0. * * - SOFT HYPHEN (U+00AD) has a column width of 1. * * - Other format characters (general category code Cf in the Unicode * database) and ZERO WIDTH SPACE (U+200B) have a column width of 0. * * - Hangul Jamo medial vowels and final consonants (U+1160-U+11FF) * have a column width of 0. * * - Spacing characters in the East Asian Wide (W) or East Asian * Full-width (F) category as defined in Unicode Technical * Report #11 have a column width of 2. * * - All remaining characters (including all printable * ISO 8859-1 and WGL4 characters, Unicode control characters, * etc.) have a column width of 1. * * This implementation assumes that ucs_char_t characters are encoded * in ISO 10646. */ static int git_wcwidth(ucs_char_t ch) { /* * Sorted list of non-overlapping intervals of non-spacing characters, * generated by * "uniset +cat=Me +cat=Mn +cat=Cf -00AD +1160-11FF +200B c". */ static const struct interval combining[] = { { 0x0300, 0x0357 }, { 0x035D, 0x036F }, { 0x0483, 0x0486 }, { 0x0488, 0x0489 }, { 0x0591, 0x05A1 }, { 0x05A3, 0x05B9 }, { 0x05BB, 0x05BD }, { 0x05BF, 0x05BF }, { 0x05C1, 0x05C2 }, { 0x05C4, 0x05C4 }, { 0x0600, 0x0603 }, { 0x0610, 0x0615 }, { 0x064B, 0x0658 }, { 0x0670, 0x0670 }, { 0x06D6, 0x06E4 }, { 0x06E7, 0x06E8 }, { 0x06EA, 0x06ED }, { 0x070F, 0x070F }, { 0x0711, 0x0711 }, { 0x0730, 0x074A }, { 0x07A6, 0x07B0 }, { 0x0901, 0x0902 }, { 0x093C, 0x093C }, { 0x0941, 0x0948 }, { 0x094D, 0x094D }, { 0x0951, 0x0954 }, { 0x0962, 0x0963 }, { 0x0981, 0x0981 }, { 0x09BC, 0x09BC }, { 0x09C1, 0x09C4 }, { 0x09CD, 0x09CD }, { 0x09E2, 0x09E3 }, { 0x0A01, 0x0A02 }, { 0x0A3C, 0x0A3C }, { 0x0A41, 0x0A42 }, { 0x0A47, 0x0A48 }, { 0x0A4B, 0x0A4D }, { 0x0A70, 0x0A71 }, { 0x0A81, 0x0A82 }, { 0x0ABC, 0x0ABC }, { 0x0AC1, 0x0AC5 }, { 0x0AC7, 0x0AC8 }, { 0x0ACD, 0x0ACD }, { 0x0AE2, 0x0AE3 }, { 0x0B01, 0x0B01 }, { 0x0B3C, 0x0B3C }, { 0x0B3F, 0x0B3F }, { 0x0B41, 0x0B43 }, { 0x0B4D, 0x0B4D }, { 0x0B56, 0x0B56 }, { 0x0B82, 0x0B82 }, { 0x0BC0, 0x0BC0 }, { 0x0BCD, 0x0BCD }, { 0x0C3E, 0x0C40 }, { 0x0C46, 0x0C48 }, { 0x0C4A, 0x0C4D }, { 0x0C55, 0x0C56 }, { 0x0CBC, 0x0CBC }, { 0x0CBF, 0x0CBF }, { 0x0CC6, 0x0CC6 }, { 0x0CCC, 0x0CCD }, { 0x0D41, 0x0D43 }, { 0x0D4D, 0x0D4D }, { 0x0DCA, 0x0DCA }, { 0x0DD2, 0x0DD4 }, { 0x0DD6, 0x0DD6 }, { 0x0E31, 0x0E31 }, { 0x0E34, 0x0E3A }, { 0x0E47, 0x0E4E }, { 0x0EB1, 0x0EB1 }, { 0x0EB4, 0x0EB9 }, { 0x0EBB, 0x0EBC }, { 0x0EC8, 0x0ECD }, { 0x0F18, 0x0F19 }, { 0x0F35, 0x0F35 }, { 0x0F37, 0x0F37 }, { 0x0F39, 0x0F39 }, { 0x0F71, 0x0F7E }, { 0x0F80, 0x0F84 }, { 0x0F86, 0x0F87 }, { 0x0F90, 0x0F97 }, { 0x0F99, 0x0FBC }, { 0x0FC6, 0x0FC6 }, { 0x102D, 0x1030 }, { 0x1032, 0x1032 }, { 0x1036, 0x1037 }, { 0x1039, 0x1039 }, { 0x1058, 0x1059 }, { 0x1160, 0x11FF }, { 0x1712, 0x1714 }, { 0x1732, 0x1734 }, { 0x1752, 0x1753 }, { 0x1772, 0x1773 }, { 0x17B4, 0x17B5 }, { 0x17B7, 0x17BD }, { 0x17C6, 0x17C6 }, { 0x17C9, 0x17D3 }, { 0x17DD, 0x17DD }, { 0x180B, 0x180D }, { 0x18A9, 0x18A9 }, { 0x1920, 0x1922 }, { 0x1927, 0x1928 }, { 0x1932, 0x1932 }, { 0x1939, 0x193B }, { 0x200B, 0x200F }, { 0x202A, 0x202E }, { 0x2060, 0x2063 }, { 0x206A, 0x206F }, { 0x20D0, 0x20EA }, { 0x302A, 0x302F }, { 0x3099, 0x309A }, { 0xFB1E, 0xFB1E }, { 0xFE00, 0xFE0F }, { 0xFE20, 0xFE23 }, { 0xFEFF, 0xFEFF }, { 0xFFF9, 0xFFFB }, { 0x1D167, 0x1D169 }, { 0x1D173, 0x1D182 }, { 0x1D185, 0x1D18B }, { 0x1D1AA, 0x1D1AD }, { 0xE0001, 0xE0001 }, { 0xE0020, 0xE007F }, { 0xE0100, 0xE01EF } }; /* test for 8-bit control characters */ if (ch == 0) return 0; if (ch < 32 || (ch >= 0x7f && ch < 0xa0)) return -1; /* binary search in table of non-spacing characters */ if (bisearch(ch, combining, sizeof(combining) / sizeof(struct interval) - 1)) return 0; /* * If we arrive here, ch is neither a combining nor a C0/C1 * control character. */ return 1 + (ch >= 0x1100 && /* Hangul Jamo init. consonants */ (ch <= 0x115f || ch == 0x2329 || ch == 0x232a || /* CJK ... Yi */ (ch >= 0x2e80 && ch <= 0xa4cf && ch != 0x303f) || /* Hangul Syllables */ (ch >= 0xac00 && ch <= 0xd7a3) || /* CJK Compatibility Ideographs */ (ch >= 0xf900 && ch <= 0xfaff) || /* CJK Compatibility Forms */ (ch >= 0xfe30 && ch <= 0xfe6f) || /* Fullwidth Forms */ (ch >= 0xff00 && ch <= 0xff60) || (ch >= 0xffe0 && ch <= 0xffe6) || (ch >= 0x20000 && ch <= 0x2fffd) || (ch >= 0x30000 && ch <= 0x3fffd))); } /* * Pick one ucs character starting from the location *start points at, * and return it, while updating the *start pointer to point at the * end of that character. When remainder_p is not NULL, the location * holds the number of bytes remaining in the string that we are allowed * to pick from. Otherwise we are allowed to pick up to the NUL that * would eventually appear in the string. *remainder_p is also reduced * by the number of bytes we have consumed. * * If the string was not a valid UTF-8, *start pointer is set to NULL * and the return value is undefined. */ ucs_char_t pick_one_utf8_char(const char **start, size_t *remainder_p) { unsigned char *s = (unsigned char *)*start; ucs_char_t ch; size_t remainder, incr; /* * A caller that assumes NUL terminated text can choose * not to bother with the remainder length. We will * stop at the first NUL. */ remainder = (remainder_p ? *remainder_p : 999); if (remainder < 1) { goto invalid; } else if (*s < 0x80) { /* 0xxxxxxx */ ch = *s; incr = 1; } else if ((s[0] & 0xe0) == 0xc0) { /* 110XXXXx 10xxxxxx */ if (remainder < 2 || (s[1] & 0xc0) != 0x80 || (s[0] & 0xfe) == 0xc0) goto invalid; ch = ((s[0] & 0x1f) << 6) | (s[1] & 0x3f); incr = 2; } else if ((s[0] & 0xf0) == 0xe0) { /* 1110XXXX 10Xxxxxx 10xxxxxx */ if (remainder < 3 || (s[1] & 0xc0) != 0x80 || (s[2] & 0xc0) != 0x80 || /* overlong? */ (s[0] == 0xe0 && (s[1] & 0xe0) == 0x80) || /* surrogate? */ (s[0] == 0xed && (s[1] & 0xe0) == 0xa0) || /* U+FFFE or U+FFFF? */ (s[0] == 0xef && s[1] == 0xbf && (s[2] & 0xfe) == 0xbe)) goto invalid; ch = ((s[0] & 0x0f) << 12) | ((s[1] & 0x3f) << 6) | (s[2] & 0x3f); incr = 3; } else if ((s[0] & 0xf8) == 0xf0) { /* 11110XXX 10XXxxxx 10xxxxxx 10xxxxxx */ if (remainder < 4 || (s[1] & 0xc0) != 0x80 || (s[2] & 0xc0) != 0x80 || (s[3] & 0xc0) != 0x80 || /* overlong? */ (s[0] == 0xf0 && (s[1] & 0xf0) == 0x80) || /* > U+10FFFF? */ (s[0] == 0xf4 && s[1] > 0x8f) || s[0] > 0xf4) goto invalid; ch = ((s[0] & 0x07) << 18) | ((s[1] & 0x3f) << 12) | ((s[2] & 0x3f) << 6) | (s[3] & 0x3f); incr = 4; } else { invalid: *start = NULL; return 0; } *start += incr; if (remainder_p) *remainder_p = remainder - incr; return ch; } /* * This function returns the number of columns occupied by the character * pointed to by the variable start. The pointer is updated to point at * the next character. When remainder_p is not NULL, it points at the * location that stores the number of remaining bytes we can use to pick * a character (see pick_one_utf8_char() above). */ int utf8_width(const char **start, size_t *remainder_p) { ucs_char_t ch = pick_one_utf8_char(start, remainder_p); if (!*start) return 0; return git_wcwidth(ch); } /* * Returns the total number of columns required by a null-terminated * string, assuming that the string is utf8. Returns strlen() instead * if the string does not look like a valid utf8 string. */ int utf8_strwidth(const char *string) { int width = 0; const char *orig = string; while (1) { if (!string) return strlen(orig); if (!*string) return width; width += utf8_width(&string, NULL); } } int is_utf8(const char *text) { while (*text) { if (*text == '\n' || *text == '\t' || *text == '\r') { text++; continue; } utf8_width(&text, NULL); if (!text) return 0; } return 1; } static void print_spaces(int count) { static const char s[] = " "; while (count >= sizeof(s)) { fwrite(s, sizeof(s) - 1, 1, stdout); count -= sizeof(s) - 1; } fwrite(s, count, 1, stdout); } /* * Wrap the text, if necessary. The variable indent is the indent for the * first line, indent2 is the indent for all other lines. * If indent is negative, assume that already -indent columns have been * consumed (and no extra indent is necessary for the first line). */ int print_wrapped_text(const char *text, int indent, int indent2, int width) { int w = indent, assume_utf8 = is_utf8(text); const char *bol = text, *space = NULL; if (indent < 0) { w = -indent; space = text; } for (;;) { char c = *text; if (!c || isspace(c)) { if (w < width || !space) { const char *start = bol; if (space) start = space; else print_spaces(indent); fwrite(start, text - start, 1, stdout); if (!c) return w; else if (c == '\t') w |= 0x07; space = text; w++; text++; } else { putchar('\n'); text = bol = space + isspace(*space); space = NULL; w = indent = indent2; } continue; } if (assume_utf8) w += utf8_width(&text, NULL); else { w++; text++; } } } int is_encoding_utf8(const char *name) { if (!name) return 1; if (!strcasecmp(name, "utf-8") || !strcasecmp(name, "utf8")) return 1; return 0; } /* * Given a buffer and its encoding, return it re-encoded * with iconv. If the conversion fails, returns NULL. */ #ifndef NO_ICONV #if defined(OLD_ICONV) || (defined(__sun__) && !defined(_XPG6)) typedef const char * iconv_ibp; #else typedef char * iconv_ibp; #endif char *reencode_string(const char *in, const char *out_encoding, const char *in_encoding) { iconv_t conv; size_t insz, outsz, outalloc; char *out, *outpos; iconv_ibp cp; if (!in_encoding) return NULL; conv = iconv_open(out_encoding, in_encoding); if (conv == (iconv_t) -1) return NULL; insz = strlen(in); outsz = insz; outalloc = outsz + 1; /* for terminating NUL */ out = xmalloc(outalloc); outpos = out; cp = (iconv_ibp)in; while (1) { size_t cnt = iconv(conv, &cp, &insz, &outpos, &outsz); if (cnt == -1) { size_t sofar; if (errno != E2BIG) { free(out); iconv_close(conv); return NULL; } /* insz has remaining number of bytes. * since we started outsz the same as insz, * it is likely that insz is not enough for * converting the rest. */ sofar = outpos - out; outalloc = sofar + insz * 2 + 32; out = xrealloc(out, outalloc); outpos = out + sofar; outsz = outalloc - sofar - 1; } else { *outpos = '\0'; break; } } iconv_close(conv); return out; } #endif
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
You can’t perform that action at this time.