-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
utf8: make it easier to auto-update git_wcwidth()
The function git_wcwidth() returns for a given unicode code point the width on the display: -1 for control characters, 0 for combining or other non-visible code points 1 for e.g. ASCII 2 for double-width code points. This table had been originally been extracted for one Unicode version, probably 3.2. We now use two tables these days, one for zero-width and another for double-width. Make it easier to update these tables to a later version of Unicode by factoring out the table from utf8.c into unicode_width.h and add the script update_unicode.sh to update the table based on the latest Unicode specification files. Thanks to Peter Krefting <peter@softwolves.pp.se> and Kevin Bracey <kevin@bracey.fi> for helping with their Unicode knowledge. Signed-off-by: Torsten Bögershausen <tboegi@web.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
- Loading branch information
Torsten Bögershausen
authored and
Junio C Hamano
committed
May 12, 2014
1 parent
0846034
commit 9c94389
Showing
5 changed files
with
329 additions
and
59 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -225,6 +225,7 @@ | |
/config.mak.autogen | ||
/config.mak.append | ||
/configure | ||
/unicode | ||
/tags | ||
/TAGS | ||
/cscope* | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,288 @@ | ||
static const struct interval zero_width[] = { | ||
{ 0x0300, 0x036F }, | ||
{ 0x0483, 0x0489 }, | ||
{ 0x0591, 0x05BD }, | ||
{ 0x05BF, 0x05BF }, | ||
{ 0x05C1, 0x05C2 }, | ||
{ 0x05C4, 0x05C5 }, | ||
{ 0x05C7, 0x05C7 }, | ||
{ 0x0600, 0x0604 }, | ||
{ 0x0610, 0x061A }, | ||
{ 0x061C, 0x061C }, | ||
{ 0x064B, 0x065F }, | ||
{ 0x0670, 0x0670 }, | ||
{ 0x06D6, 0x06DD }, | ||
{ 0x06DF, 0x06E4 }, | ||
{ 0x06E7, 0x06E8 }, | ||
{ 0x06EA, 0x06ED }, | ||
{ 0x070F, 0x070F }, | ||
{ 0x0711, 0x0711 }, | ||
{ 0x0730, 0x074A }, | ||
{ 0x07A6, 0x07B0 }, | ||
{ 0x07EB, 0x07F3 }, | ||
{ 0x0816, 0x0819 }, | ||
{ 0x081B, 0x0823 }, | ||
{ 0x0825, 0x0827 }, | ||
{ 0x0829, 0x082D }, | ||
{ 0x0859, 0x085B }, | ||
{ 0x08E4, 0x08FE }, | ||
{ 0x0900, 0x0902 }, | ||
{ 0x093A, 0x093A }, | ||
{ 0x093C, 0x093C }, | ||
{ 0x0941, 0x0948 }, | ||
{ 0x094D, 0x094D }, | ||
{ 0x0951, 0x0957 }, | ||
{ 0x0962, 0x0963 }, | ||
{ 0x0981, 0x0981 }, | ||
{ 0x09BC, 0x09BC }, | ||
{ 0x09C1, 0x09C4 }, | ||
{ 0x09CD, 0x09CD }, | ||
{ 0x09E2, 0x09E3 }, | ||
{ 0x0A01, 0x0A02 }, | ||
{ 0x0A3C, 0x0A3C }, | ||
{ 0x0A41, 0x0A42 }, | ||
{ 0x0A47, 0x0A48 }, | ||
{ 0x0A4B, 0x0A4D }, | ||
{ 0x0A51, 0x0A51 }, | ||
{ 0x0A70, 0x0A71 }, | ||
{ 0x0A75, 0x0A75 }, | ||
{ 0x0A81, 0x0A82 }, | ||
{ 0x0ABC, 0x0ABC }, | ||
{ 0x0AC1, 0x0AC5 }, | ||
{ 0x0AC7, 0x0AC8 }, | ||
{ 0x0ACD, 0x0ACD }, | ||
{ 0x0AE2, 0x0AE3 }, | ||
{ 0x0B01, 0x0B01 }, | ||
{ 0x0B3C, 0x0B3C }, | ||
{ 0x0B3F, 0x0B3F }, | ||
{ 0x0B41, 0x0B44 }, | ||
{ 0x0B4D, 0x0B4D }, | ||
{ 0x0B56, 0x0B56 }, | ||
{ 0x0B62, 0x0B63 }, | ||
{ 0x0B82, 0x0B82 }, | ||
{ 0x0BC0, 0x0BC0 }, | ||
{ 0x0BCD, 0x0BCD }, | ||
{ 0x0C3E, 0x0C40 }, | ||
{ 0x0C46, 0x0C48 }, | ||
{ 0x0C4A, 0x0C4D }, | ||
{ 0x0C55, 0x0C56 }, | ||
{ 0x0C62, 0x0C63 }, | ||
{ 0x0CBC, 0x0CBC }, | ||
{ 0x0CBF, 0x0CBF }, | ||
{ 0x0CC6, 0x0CC6 }, | ||
{ 0x0CCC, 0x0CCD }, | ||
{ 0x0CE2, 0x0CE3 }, | ||
{ 0x0D41, 0x0D44 }, | ||
{ 0x0D4D, 0x0D4D }, | ||
{ 0x0D62, 0x0D63 }, | ||
{ 0x0DCA, 0x0DCA }, | ||
{ 0x0DD2, 0x0DD4 }, | ||
{ 0x0DD6, 0x0DD6 }, | ||
{ 0x0E31, 0x0E31 }, | ||
{ 0x0E34, 0x0E3A }, | ||
{ 0x0E47, 0x0E4E }, | ||
{ 0x0EB1, 0x0EB1 }, | ||
{ 0x0EB4, 0x0EB9 }, | ||
{ 0x0EBB, 0x0EBC }, | ||
{ 0x0EC8, 0x0ECD }, | ||
{ 0x0F18, 0x0F19 }, | ||
{ 0x0F35, 0x0F35 }, | ||
{ 0x0F37, 0x0F37 }, | ||
{ 0x0F39, 0x0F39 }, | ||
{ 0x0F71, 0x0F7E }, | ||
{ 0x0F80, 0x0F84 }, | ||
{ 0x0F86, 0x0F87 }, | ||
{ 0x0F8D, 0x0F97 }, | ||
{ 0x0F99, 0x0FBC }, | ||
{ 0x0FC6, 0x0FC6 }, | ||
{ 0x102D, 0x1030 }, | ||
{ 0x1032, 0x1037 }, | ||
{ 0x1039, 0x103A }, | ||
{ 0x103D, 0x103E }, | ||
{ 0x1058, 0x1059 }, | ||
{ 0x105E, 0x1060 }, | ||
{ 0x1071, 0x1074 }, | ||
{ 0x1082, 0x1082 }, | ||
{ 0x1085, 0x1086 }, | ||
{ 0x108D, 0x108D }, | ||
{ 0x109D, 0x109D }, | ||
{ 0x1160, 0x11FF }, | ||
{ 0x135D, 0x135F }, | ||
{ 0x1712, 0x1714 }, | ||
{ 0x1732, 0x1734 }, | ||
{ 0x1752, 0x1753 }, | ||
{ 0x1772, 0x1773 }, | ||
{ 0x17B4, 0x17B5 }, | ||
{ 0x17B7, 0x17BD }, | ||
{ 0x17C6, 0x17C6 }, | ||
{ 0x17C9, 0x17D3 }, | ||
{ 0x17DD, 0x17DD }, | ||
{ 0x180B, 0x180E }, | ||
{ 0x18A9, 0x18A9 }, | ||
{ 0x1920, 0x1922 }, | ||
{ 0x1927, 0x1928 }, | ||
{ 0x1932, 0x1932 }, | ||
{ 0x1939, 0x193B }, | ||
{ 0x1A17, 0x1A18 }, | ||
{ 0x1A1B, 0x1A1B }, | ||
{ 0x1A56, 0x1A56 }, | ||
{ 0x1A58, 0x1A5E }, | ||
{ 0x1A60, 0x1A60 }, | ||
{ 0x1A62, 0x1A62 }, | ||
{ 0x1A65, 0x1A6C }, | ||
{ 0x1A73, 0x1A7C }, | ||
{ 0x1A7F, 0x1A7F }, | ||
{ 0x1B00, 0x1B03 }, | ||
{ 0x1B34, 0x1B34 }, | ||
{ 0x1B36, 0x1B3A }, | ||
{ 0x1B3C, 0x1B3C }, | ||
{ 0x1B42, 0x1B42 }, | ||
{ 0x1B6B, 0x1B73 }, | ||
{ 0x1B80, 0x1B81 }, | ||
{ 0x1BA2, 0x1BA5 }, | ||
{ 0x1BA8, 0x1BA9 }, | ||
{ 0x1BAB, 0x1BAB }, | ||
{ 0x1BE6, 0x1BE6 }, | ||
{ 0x1BE8, 0x1BE9 }, | ||
{ 0x1BED, 0x1BED }, | ||
{ 0x1BEF, 0x1BF1 }, | ||
{ 0x1C2C, 0x1C33 }, | ||
{ 0x1C36, 0x1C37 }, | ||
{ 0x1CD0, 0x1CD2 }, | ||
{ 0x1CD4, 0x1CE0 }, | ||
{ 0x1CE2, 0x1CE8 }, | ||
{ 0x1CED, 0x1CED }, | ||
{ 0x1CF4, 0x1CF4 }, | ||
{ 0x1DC0, 0x1DE6 }, | ||
{ 0x1DFC, 0x1DFF }, | ||
{ 0x200B, 0x200F }, | ||
{ 0x202A, 0x202E }, | ||
{ 0x2060, 0x2064 }, | ||
{ 0x2066, 0x206F }, | ||
{ 0x20D0, 0x20F0 }, | ||
{ 0x2CEF, 0x2CF1 }, | ||
{ 0x2D7F, 0x2D7F }, | ||
{ 0x2DE0, 0x2DFF }, | ||
{ 0x302A, 0x302D }, | ||
{ 0x3099, 0x309A }, | ||
{ 0xA66F, 0xA672 }, | ||
{ 0xA674, 0xA67D }, | ||
{ 0xA69F, 0xA69F }, | ||
{ 0xA6F0, 0xA6F1 }, | ||
{ 0xA802, 0xA802 }, | ||
{ 0xA806, 0xA806 }, | ||
{ 0xA80B, 0xA80B }, | ||
{ 0xA825, 0xA826 }, | ||
{ 0xA8C4, 0xA8C4 }, | ||
{ 0xA8E0, 0xA8F1 }, | ||
{ 0xA926, 0xA92D }, | ||
{ 0xA947, 0xA951 }, | ||
{ 0xA980, 0xA982 }, | ||
{ 0xA9B3, 0xA9B3 }, | ||
{ 0xA9B6, 0xA9B9 }, | ||
{ 0xA9BC, 0xA9BC }, | ||
{ 0xAA29, 0xAA2E }, | ||
{ 0xAA31, 0xAA32 }, | ||
{ 0xAA35, 0xAA36 }, | ||
{ 0xAA43, 0xAA43 }, | ||
{ 0xAA4C, 0xAA4C }, | ||
{ 0xAAB0, 0xAAB0 }, | ||
{ 0xAAB2, 0xAAB4 }, | ||
{ 0xAAB7, 0xAAB8 }, | ||
{ 0xAABE, 0xAABF }, | ||
{ 0xAAC1, 0xAAC1 }, | ||
{ 0xAAEC, 0xAAED }, | ||
{ 0xAAF6, 0xAAF6 }, | ||
{ 0xABE5, 0xABE5 }, | ||
{ 0xABE8, 0xABE8 }, | ||
{ 0xABED, 0xABED }, | ||
{ 0xFB1E, 0xFB1E }, | ||
{ 0xFE00, 0xFE0F }, | ||
{ 0xFE20, 0xFE26 }, | ||
{ 0xFEFF, 0xFEFF }, | ||
{ 0xFFF9, 0xFFFB }, | ||
{ 0x101FD, 0x101FD }, | ||
{ 0x10A01, 0x10A03 }, | ||
{ 0x10A05, 0x10A06 }, | ||
{ 0x10A0C, 0x10A0F }, | ||
{ 0x10A38, 0x10A3A }, | ||
{ 0x10A3F, 0x10A3F }, | ||
{ 0x11001, 0x11001 }, | ||
{ 0x11038, 0x11046 }, | ||
{ 0x11080, 0x11081 }, | ||
{ 0x110B3, 0x110B6 }, | ||
{ 0x110B9, 0x110BA }, | ||
{ 0x110BD, 0x110BD }, | ||
{ 0x11100, 0x11102 }, | ||
{ 0x11127, 0x1112B }, | ||
{ 0x1112D, 0x11134 }, | ||
{ 0x11180, 0x11181 }, | ||
{ 0x111B6, 0x111BE }, | ||
{ 0x116AB, 0x116AB }, | ||
{ 0x116AD, 0x116AD }, | ||
{ 0x116B0, 0x116B5 }, | ||
{ 0x116B7, 0x116B7 }, | ||
{ 0x16F8F, 0x16F92 }, | ||
{ 0x1D167, 0x1D169 }, | ||
{ 0x1D173, 0x1D182 }, | ||
{ 0x1D185, 0x1D18B }, | ||
{ 0x1D1AA, 0x1D1AD }, | ||
{ 0x1D242, 0x1D244 }, | ||
{ 0xE0001, 0xE0001 }, | ||
{ 0xE0020, 0xE007F }, | ||
{ 0xE0100, 0xE01EF } | ||
}; | ||
static const struct interval double_width[] = { | ||
{ /* plane */ 0x0, 0x1C }, | ||
{ /* plane */ 0x1C, 0x21 }, | ||
{ /* plane */ 0x21, 0x22 }, | ||
{ /* plane */ 0x22, 0x23 }, | ||
{ /* plane */ 0x0, 0x0 }, | ||
{ /* plane */ 0x0, 0x0 }, | ||
{ /* plane */ 0x0, 0x0 }, | ||
{ /* plane */ 0x0, 0x0 }, | ||
{ /* plane */ 0x0, 0x0 }, | ||
{ /* plane */ 0x0, 0x0 }, | ||
{ /* plane */ 0x0, 0x0 }, | ||
{ /* plane */ 0x0, 0x0 }, | ||
{ /* plane */ 0x0, 0x0 }, | ||
{ /* plane */ 0x0, 0x0 }, | ||
{ /* plane */ 0x0, 0x0 }, | ||
{ /* plane */ 0x0, 0x0 }, | ||
{ /* plane */ 0x0, 0x0 }, | ||
{ 0x1100, 0x115F }, | ||
{ 0x2329, 0x232A }, | ||
{ 0x2E80, 0x2E99 }, | ||
{ 0x2E9B, 0x2EF3 }, | ||
{ 0x2F00, 0x2FD5 }, | ||
{ 0x2FF0, 0x2FFB }, | ||
{ 0x3000, 0x303E }, | ||
{ 0x3041, 0x3096 }, | ||
{ 0x3099, 0x30FF }, | ||
{ 0x3105, 0x312D }, | ||
{ 0x3131, 0x318E }, | ||
{ 0x3190, 0x31BA }, | ||
{ 0x31C0, 0x31E3 }, | ||
{ 0x31F0, 0x321E }, | ||
{ 0x3220, 0x3247 }, | ||
{ 0x3250, 0x32FE }, | ||
{ 0x3300, 0x4DBF }, | ||
{ 0x4E00, 0xA48C }, | ||
{ 0xA490, 0xA4C6 }, | ||
{ 0xA960, 0xA97C }, | ||
{ 0xAC00, 0xD7A3 }, | ||
{ 0xF900, 0xFAFF }, | ||
{ 0xFE10, 0xFE19 }, | ||
{ 0xFE30, 0xFE52 }, | ||
{ 0xFE54, 0xFE66 }, | ||
{ 0xFE68, 0xFE6B }, | ||
{ 0xFF01, 0xFF60 }, | ||
{ 0xFFE0, 0xFFE6 }, | ||
{ 0x1B000, 0x1B001 }, | ||
{ 0x1F200, 0x1F202 }, | ||
{ 0x1F210, 0x1F23A }, | ||
{ 0x1F240, 0x1F248 }, | ||
{ 0x1F250, 0x1F251 }, | ||
{ 0x20000, 0x2FFFD }, | ||
{ 0x30000, 0x3FFFD } | ||
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
#!/bin/sh | ||
#See http://www.unicode.org/reports/tr44/ | ||
# | ||
#Me Enclosing_Mark an enclosing combining mark | ||
#Mn Nonspacing_Mark a nonspacing combining mark (zero advance width) | ||
#Cf Format a format control character | ||
# | ||
UNICODEWIDTH_H=../unicode_width.h | ||
if ! test -d unicode; then | ||
mkdir unicode | ||
fi && | ||
( cd unicode && | ||
if ! test -f UnicodeData.txt; then | ||
wget http://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt | ||
fi && | ||
if ! test -f EastAsianWidth.txt; then | ||
wget http://www.unicode.org/Public/UCD/latest/ucd/EastAsianWidth.txt | ||
fi && | ||
if ! test -d uniset; then | ||
git clone https://github.com/depp/uniset.git | ||
fi && | ||
( | ||
cd uniset && | ||
if ! test -x uniset; then | ||
autoreconf -i && | ||
./configure --enable-warnings=-Werror CFLAGS='-O0 -ggdb' | ||
fi && | ||
make | ||
) && | ||
echo "static const struct interval zero_width[] = {" >$UNICODEWIDTH_H && | ||
UNICODE_DIR=. ./uniset/uniset --32 cat:Me,Mn,Cf + U+1160..U+11FF - U+00AD | | ||
grep -v plane >>$UNICODEWIDTH_H && | ||
echo "};" >>$UNICODEWIDTH_H && | ||
echo "static const struct interval double_width[] = {" >>$UNICODEWIDTH_H && | ||
UNICODE_DIR=. ./uniset/uniset --32 eaw:F,W >>$UNICODEWIDTH_H && | ||
echo "};" >>$UNICODEWIDTH_H | ||
) |
Oops, something went wrong.