Skip to content
Navigation Menu
Toggle navigation
Sign in
In this repository
All GitHub Enterprise
↵
Jump to
↵
No suggested jump to results
In this repository
All GitHub Enterprise
↵
Jump to
↵
In this organization
All GitHub Enterprise
↵
Jump to
↵
In this repository
All GitHub Enterprise
↵
Jump to
↵
Sign in
Reseting focus
You signed in with another tab or window.
Reload
to refresh your session.
You signed out in another tab or window.
Reload
to refresh your session.
You switched accounts on another tab or window.
Reload
to refresh your session.
Dismiss alert
{{ message }}
git-mirror
/
glibc
Public
Notifications
You must be signed in to change notification settings
Fork
0
Star
0
Code
Pull requests
0
Actions
Projects
0
Security
Insights
Additional navigation options
Code
Pull requests
Actions
Projects
Security
Insights
Files
dc4bb1c
abilist
aout
argp
assert
bare
bits
catgets
conf
conform
crypt
csu
ctype
debug
dirent
dlfcn
elf
gmon
gnulib
grp
hesiod
hurd
iconv
iconvdata
testdata
8bit-gap.c
8bit-generic.c
ARMSCII-8.irreversible
BIG5.irreversible
BIG5HKSCS.irreversible
CP1255.irreversible
CP1258.irreversible
CP932.irreversible
Depend
EUC-JISX0213.precomposed
EUC-JP-MS.irreversible
EUC-JP.irreversible
EUC-KR.irreversible
EUC-TW.irreversible
IBM1132.irreversible
IBM1133.irreversible
IBM1160.irreversible
IBM1161.irreversible
IBM1163.irreversible
IBM1164.irreversible
IBM856.irreversible
IBM922.irreversible
ISIRI-3342.irreversible
JISX0213.TXT
MISSING
Makefile
SHIFT_JISX0213.irreversible
SHIFT_JISX0213.precomposed
SJIS.irreversible
TCVN5712-1.precomposed
TESTS
TESTS2
TSCII.irreversible
TSCII.precomposed
ansi_x3.110.c
armscii-8.c
asmo_449.c
big5.c
big5hkscs.c
bug-iconv1.c
bug-iconv2.c
bug-iconv3.c
bug-iconv4.c
cns11643.c
cns11643.h
cns11643l1.c
cns11643l1.h
cns11643l2.h
cp10007.c
cp1125.c
cp1250.c
cp1251.c
cp1252.c
cp1253.c
cp1254.c
cp1255.c
cp1256.c
cp1257.c
cp1258.c
cp737.c
cp737.h
cp775.c
cp775.h
cp932.c
csn_369103.c
cwi.c
dec-mcs.c
ebcdic-at-de-a.c
ebcdic-at-de.c
ebcdic-ca-fr.c
ebcdic-dk-no-a.c
ebcdic-dk-no.c
ebcdic-es-a.c
ebcdic-es-s.c
ebcdic-es.c
ebcdic-fi-se-a.c
ebcdic-fi-se.c
ebcdic-fr.c
ebcdic-is-friss.c
ebcdic-it.c
ebcdic-pt.c
ebcdic-uk.c
ebcdic-us.c
ecma-cyrillic.c
euc-cn.c
euc-jisx0213.c
euc-jp-ms.c
euc-jp.c
euc-kr.c
euc-tw.c
extra-module.mk
gap.awk
gaptab.awk
gb18030.c
gb2312.c
gb2312.h
gbbig5.c
gbgbk.c
gbk.c
gconv-modules
gconv.map
gen-8bit-gap-1.sh
gen-8bit-gap.sh
gen-8bit.sh
georgian-academy.c
georgian-ps.c
gost_19768-74.c
greek-ccitt.c
greek7-old.c
greek7.c
hp-roman8.c
ibm037.c
ibm038.c
ibm1004.c
ibm1025.c
ibm1025.h
ibm1026.c
ibm1046.c
ibm1046.h
ibm1047.c
ibm1122.c
ibm1122.h
ibm1124.c
ibm1124.h
ibm1129.c
ibm1129.h
ibm1132.c
ibm1132.h
ibm1133.c
ibm1133.h
ibm1137.c
ibm1137.h
ibm1153.c
ibm1153.h
ibm1154.c
ibm1154.h
ibm1155.c
ibm1155.h
ibm1156.c
ibm1156.h
ibm1157.c
ibm1157.h
ibm1158.c
ibm1158.h
ibm1160.c
ibm1160.h
ibm1161.c
ibm1161.h
ibm1162.c
ibm1162.h
ibm1163.c
ibm1163.h
ibm1164.c
ibm1164.h
ibm256.c
ibm273.c
ibm274.c
ibm275.c
ibm277.c
ibm278.c
ibm280.c
ibm281.c
ibm284.c
ibm285.c
ibm290.c
ibm297.c
ibm420.c
ibm423.c
ibm424.c
ibm437.c
ibm500.c
ibm850.c
ibm851.c
ibm852.c
ibm855.c
ibm856.c
ibm856.h
ibm857.c
ibm860.c
ibm861.c
ibm862.c
ibm863.c
ibm864.c
ibm865.c
ibm866.c
ibm866nav.c
ibm868.c
ibm869.c
ibm870.c
ibm871.c
ibm874.c
ibm875.c
ibm880.c
ibm891.c
ibm903.c
ibm904.c
ibm905.c
ibm918.c
ibm922.c
ibm922.h
ibm930.c
ibm930.h
ibm932.c
ibm932.h
ibm933.c
ibm933.h
ibm935.c
ibm935.h
ibm937.c
ibm937.h
ibm939.c
ibm939.h
ibm943.c
ibm943.h
iec_p27-1.c
inis-8.c
inis-cyrillic.c
inis.c
isiri-3342.c
isiri-3342.h
iso-2022-cn-ext.c
iso-2022-cn.c
iso-2022-jp-3.c
iso-2022-jp.c
iso-2022-kr.c
iso-ir-165.c
iso-ir-165.h
iso-ir-197.c
iso-ir-209.c
iso646.c
iso8859-1.c
iso8859-10.c
iso8859-11.c
iso8859-13.c
iso8859-14.c
iso8859-15.c
iso8859-16.c
iso8859-2.c
iso8859-3.c
iso8859-4.c
iso8859-5.c
iso8859-6.c
iso8859-7.c
iso8859-8.c
iso8859-9.c
iso_10367-box.c
iso_2033.c
iso_5427-ext.c
iso_5427.c
iso_5428.c
iso_6937-2.c
iso_6937.c
jis0201.c
jis0201.h
jis0208.c
jis0208.h
jis0212.c
jis0212.h
jisx0213.c
jisx0213.h
johab.c
koi-8.c
koi8-r.c
koi8-t.c
koi8-u.c
ksc5601.c
ksc5601.h
latin-greek-1.c
latin-greek.c
mac-is.c
mac-sami.c
mac-uk.c
macintosh.c
nats-dano.c
nats-sefi.c
pt154.c
rk1048.c
run-iconv-test.sh
sami-ws2.c
shift_jisx0213.c
sjis.c
t.61.c
tcvn5712-1.c
tis-620.c
tscii.c
tst-e2big.c
tst-iconv4.c
tst-loading.c
tst-table-charmap.sh
tst-table-from.c
tst-table-to.c
tst-table.sh
tst-tables.sh
uhc.c
unicode.c
utf-16.c
utf-32.c
utf-7.c
viscii.c
include
inet
intl
io
libidn
libio
locale
localedata
login
mach
malloc
manual
math
misc
nis
nptl
nptl_db
nscd
nss
po
posix
pwd
resolv
resource
rt
scripts
setjmp
shadow
signal
socket
soft-fp
stdio-common
stdlib
streams
string
sunrpc
sysdeps
sysvipc
termios
time
timezone
wcsmbs
wctype
.cvsignore
BUGS
CANCEL-FCT-WAIVE
CANCEL-FILE-WAIVE
CONFORMANCE
COPYING
COPYING.LIB
ChangeLog
ChangeLog.1
ChangeLog.10
ChangeLog.11
ChangeLog.12
ChangeLog.13
ChangeLog.14
ChangeLog.15
ChangeLog.2
ChangeLog.3
ChangeLog.4
ChangeLog.5
ChangeLog.6
ChangeLog.7
ChangeLog.8
ChangeLog.9
FAQ
FAQ.in
INSTALL
INTERFACE
LICENSES
MakeTAGS
Makeconfig
Makefile
Makefile.in
Makerules
NAMESPACE
NEWS
NOTES
PROJECTS
README
README.libm
README.template
Rules
Versions.def
WUR-REPORT
abi-tags
aclocal.m4
config-name.in
config.h.in
config.make.in
configure
configure.in
cppflags-iterator.mk
extra-lib.mk
extra-modules.mk
o-iterator.mk
shlib-versions
test-skeleton.c
tls.make.c
version.h
Breadcrumbs
glibc
/
iconvdata
/
utf-16.c
Blame
Blame
Latest commit
History
History
362 lines (334 loc) · 10.7 KB
Breadcrumbs
glibc
/
iconvdata
/
utf-16.c
Top
File metadata and controls
Code
Blame
362 lines (334 loc) · 10.7 KB
Raw
/* Conversion module for UTF-16. Copyright (C) 1999, 2000-2002, 2003, 2005 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper <drepper@cygnus.com>, 1999. The GNU C Library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. The GNU C Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with the GNU C Library; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. */ #include <byteswap.h> #include <dlfcn.h> #include <gconv.h> #include <stddef.h> #include <stdint.h> #include <stdlib.h> #include <string.h> /* This is the Byte Order Mark character (BOM). */ #define BOM 0xfeff /* And in the other byte order. */ #define BOM_OE 0xfffe /* Definitions used in the body of the `gconv' function. */ #define FROM_LOOP from_utf16_loop #define TO_LOOP to_utf16_loop #define DEFINE_INIT 0 #define DEFINE_FINI 0 #define MIN_NEEDED_FROM 2 #define MAX_NEEDED_FROM 4 #define MIN_NEEDED_TO 4 #define FROM_DIRECTION (dir == from_utf16) #define PREPARE_LOOP \ enum direction dir = ((struct utf16_data *) step->__data)->dir; \ enum variant var = ((struct utf16_data *) step->__data)->var; \ if (__builtin_expect (data->__invocation_counter == 0, 0) && var == UTF_16) \ { \ if (FROM_DIRECTION) \ { \ /* We have to find out which byte order the file is encoded in. */ \ if (inptr + 2 > inend) \ return (inptr == inend \ ? __GCONV_EMPTY_INPUT : __GCONV_INCOMPLETE_INPUT); \ \ if (get16u (inptr) == BOM) \ /* Simply ignore the BOM character. */ \ *inptrp = inptr += 2; \ else if (get16u (inptr) == BOM_OE) \ { \ ((struct utf16_data *) step->__data)->swap = 1; \ *inptrp = inptr += 2; \ } \ } \ else if (!FROM_DIRECTION && !data->__internal_use) \ { \ /* Emit the Byte Order Mark. */ \ if (__builtin_expect (outbuf + 2 > outend, 0)) \ return __GCONV_FULL_OUTPUT; \ \ put16u (outbuf, BOM); \ outbuf += 2; \ } \ } \ int swap = ((struct utf16_data *) step->__data)->swap; #define EXTRA_LOOP_ARGS , swap /* Direction of the transformation. */ enum direction { illegal_dir, to_utf16, from_utf16 }; enum variant { illegal_var, UTF_16, UTF_16LE, UTF_16BE }; struct utf16_data { enum direction dir; enum variant var; int swap; }; extern int gconv_init (struct __gconv_step *step); int gconv_init (struct __gconv_step *step) { /* Determine which direction. */ struct utf16_data *new_data; enum direction dir = illegal_dir; enum variant var = illegal_var; int result; if (__strcasecmp (step->__from_name, "UTF-16//") == 0) { dir = from_utf16; var = UTF_16; } else if (__strcasecmp (step->__to_name, "UTF-16//") == 0) { dir = to_utf16; var = UTF_16; } else if (__strcasecmp (step->__from_name, "UTF-16BE//") == 0) { dir = from_utf16; var = UTF_16BE; } else if (__strcasecmp (step->__to_name, "UTF-16BE//") == 0) { dir = to_utf16; var = UTF_16BE; } else if (__strcasecmp (step->__from_name, "UTF-16LE//") == 0) { dir = from_utf16; var = UTF_16LE; } else if (__strcasecmp (step->__to_name, "UTF-16LE//") == 0) { dir = to_utf16; var = UTF_16LE; } result = __GCONV_NOCONV; if (__builtin_expect (dir, to_utf16) != illegal_dir) { new_data = (struct utf16_data *) malloc (sizeof (struct utf16_data)); result = __GCONV_NOMEM; if (new_data != NULL) { new_data->dir = dir; new_data->var = var; new_data->swap = ((var == UTF_16LE && BYTE_ORDER == BIG_ENDIAN) || (var == UTF_16BE && BYTE_ORDER == LITTLE_ENDIAN)); step->__data = new_data; if (dir == from_utf16) { step->__min_needed_from = MIN_NEEDED_FROM; step->__max_needed_from = MAX_NEEDED_FROM; step->__min_needed_to = MIN_NEEDED_TO; step->__max_needed_to = MIN_NEEDED_TO; } else { step->__min_needed_from = MIN_NEEDED_TO; step->__max_needed_from = MIN_NEEDED_TO; step->__min_needed_to = MIN_NEEDED_FROM; step->__max_needed_to = MAX_NEEDED_FROM; } step->__stateful = 0; result = __GCONV_OK; } } return result; } extern void gconv_end (struct __gconv_step *data); void gconv_end (struct __gconv_step *data) { free (data->__data); } /* Convert from the internal (UCS4-like) format to UTF-16. */ #define MIN_NEEDED_INPUT MIN_NEEDED_TO #define MIN_NEEDED_OUTPUT MIN_NEEDED_FROM #define MAX_NEEDED_OUTPUT MAX_NEEDED_FROM #define LOOPFCT TO_LOOP #define BODY \ { \ uint32_t c = get32 (inptr); \ \ if (__builtin_expect (c >= 0xd800 && c < 0xe000, 0)) \ { \ /* Surrogate characters in UCS-4 input are not valid. \ We must catch this. If we let surrogates pass through, \ attackers could make a security hole exploit by \ synthesizing any desired plane 1-16 character. */ \ result = __GCONV_ILLEGAL_INPUT; \ if (! ignore_errors_p ()) \ break; \ inptr += 4; \ ++*irreversible; \ continue; \ } \ \ if (swap) \ { \ if (__builtin_expect (c >= 0x10000, 0)) \ { \ if (__builtin_expect (c >= 0x110000, 0)) \ { \ STANDARD_TO_LOOP_ERR_HANDLER (4); \ } \ \ /* Generate a surrogate character. */ \ if (__builtin_expect (outptr + 4 > outend, 0)) \ { \ /* Overflow in the output buffer. */ \ result = __GCONV_FULL_OUTPUT; \ break; \ } \ \ put16 (outptr, bswap_16 (0xd7c0 + (c >> 10))); \ outptr += 2; \ put16 (outptr, bswap_16 (0xdc00 + (c & 0x3ff))); \ } \ else \ put16 (outptr, bswap_16 (c)); \ } \ else \ { \ if (__builtin_expect (c >= 0x10000, 0)) \ { \ if (__builtin_expect (c >= 0x110000, 0)) \ { \ STANDARD_TO_LOOP_ERR_HANDLER (4); \ } \ \ /* Generate a surrogate character. */ \ if (__builtin_expect (outptr + 4 > outend, 0)) \ { \ /* Overflow in the output buffer. */ \ result = __GCONV_FULL_OUTPUT; \ break; \ } \ \ put16 (outptr, 0xd7c0 + (c >> 10)); \ outptr += 2; \ put16 (outptr, 0xdc00 + (c & 0x3ff)); \ } \ else \ put16 (outptr, c); \ } \ outptr += 2; \ inptr += 4; \ } #define LOOP_NEED_FLAGS #define EXTRA_LOOP_DECLS \ , int swap #include <iconv/loop.c> /* Convert from UTF-16 to the internal (UCS4-like) format. */ #define MIN_NEEDED_INPUT MIN_NEEDED_FROM #define MAX_NEEDED_INPUT MAX_NEEDED_FROM #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO #define LOOPFCT FROM_LOOP #define BODY \ { \ uint16_t u1 = get16 (inptr); \ \ if (swap) \ { \ u1 = bswap_16 (u1); \ \ if (__builtin_expect (u1 < 0xd800, 1) || u1 > 0xdfff) \ { \ /* No surrogate. */ \ put32 (outptr, u1); \ inptr += 2; \ } \ else \ { \ uint16_t u2; \ \ /* It's a surrogate character. At least the first word says \ it is. */ \ if (__builtin_expect (inptr + 4 > inend, 0)) \ { \ /* We don't have enough input for another complete input \ character. */ \ result = __GCONV_INCOMPLETE_INPUT; \ break; \ } \ \ inptr += 2; \ u2 = bswap_16 (get16 (inptr)); \ if (__builtin_expect (u2 < 0xdc00, 0) \ || __builtin_expect (u2 > 0xdfff, 0)) \ { \ /* This is no valid second word for a surrogate. */ \ inptr -= 2; \ STANDARD_FROM_LOOP_ERR_HANDLER (2); \ } \ \ put32 (outptr, ((u1 - 0xd7c0) << 10) + (u2 - 0xdc00)); \ inptr += 2; \ } \ } \ else \ { \ if (__builtin_expect (u1 < 0xd800, 1) || u1 > 0xdfff) \ { \ /* No surrogate. */ \ put32 (outptr, u1); \ inptr += 2; \ } \ else \ { \ /* It's a surrogate character. At least the first word says \ it is. */ \ if (__builtin_expect (inptr + 4 > inend, 0)) \ { \ /* We don't have enough input for another complete input \ character. */ \ result = __GCONV_INCOMPLETE_INPUT; \ break; \ } \ \ inptr += 2; \ uint16_t u2 = get16 (inptr); \ if (__builtin_expect (u2 < 0xdc00, 0) \ || __builtin_expect (u2 > 0xdfff, 0)) \ { \ /* This is no valid second word for a surrogate. */ \ inptr -= 2; \ STANDARD_FROM_LOOP_ERR_HANDLER (2); \ } \ \ put32 (outptr, ((u1 - 0xd7c0) << 10) + (u2 - 0xdc00)); \ inptr += 2; \ } \ } \ outptr += 4; \ } #define LOOP_NEED_FLAGS #define EXTRA_LOOP_DECLS \ , int swap #include <iconv/loop.c> /* Now define the toplevel functions. */ #include <iconv/skeleton.c>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
You can’t perform that action at this time.