diff --git a/ChangeLog b/ChangeLog index a9d575f2cb..7f2a19d50a 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,14 @@ +2016-01-15 Martin Sebor + + [BZ #19432] + * iconvdata/Makefile: Add bug-iconv11. + * iconvdata/bug-iconv11.c: New test. + * iconvdata/ibm930.c: Do not reject redundant shift sequences. + * iconvdata/ibm933.c: Same. + * iconvdata/ibm935.c: Same. + * iconvdata/ibm937.c: Same. + * iconvdata/ibm939.c: Same. + 2016-01-15 Martin Sebor [BZ #19443] diff --git a/iconvdata/Makefile b/iconvdata/Makefile index ae5dde60b4..357530b558 100644 --- a/iconvdata/Makefile +++ b/iconvdata/Makefile @@ -68,7 +68,7 @@ modules.so := $(addsuffix .so, $(modules)) ifeq (yes,$(build-shared)) tests = bug-iconv1 bug-iconv2 tst-loading tst-e2big tst-iconv4 bug-iconv4 \ tst-iconv6 bug-iconv5 bug-iconv6 tst-iconv7 bug-iconv8 bug-iconv9 \ - bug-iconv10 + bug-iconv10 bug-iconv11 ifeq ($(have-thread-library),yes) tests += bug-iconv3 endif diff --git a/iconvdata/bug-iconv11.c b/iconvdata/bug-iconv11.c new file mode 100644 index 0000000000..6cdc07d798 --- /dev/null +++ b/iconvdata/bug-iconv11.c @@ -0,0 +1,114 @@ +/* bug 19432: iconv rejects redundant escape sequences in IBM903, + IBM905, IBM907, and IBM909 + + Copyright (C) 2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include +#include +#include +#include + +// The longest test input sequence. +#define MAXINBYTES 8 +#define MAXOUTBYTES (MAXINBYTES * MB_LEN_MAX) + +/* Verify that a conversion of the INPUT sequence consisting of + INBYTESLEFT bytes in the encoding specified by the codeset + named by FROM_SET is successful. + Return 0 on success, non-zero on iconv() failure. */ + +static int +test_ibm93x (const char *from_set, const char *input, size_t inbytesleft) +{ + const char to_set[] = "UTF-8"; + iconv_t cd = iconv_open (to_set, from_set); + if (cd == (iconv_t) -1) + { + printf ("iconv_open(\"%s\", \"%s\"): %s\n", + from_set, to_set, strerror (errno)); + return 1; + } + + char output [MAXOUTBYTES]; + size_t outbytesleft = sizeof output; + + char *inbuf = (char*)input; + char *outbuf = output; + + printf ("iconv(cd, %p, %zu, %p, %zu)\n", + inbuf, inbytesleft, outbuf, outbytesleft); + + errno = 0; + size_t ret = iconv (cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft); + printf (" ==> %td: %s\n" + " inbuf%+td, inbytesleft=%zu, outbuf%+td, outbytesleft=%zu\n", + ret, strerror (errno), + inbuf - input, inbytesleft, outbuf - output, outbytesleft); + + // Return 0 on success, non-zero on iconv() failure. + return ret == (size_t)-1 || errno; +} + +static int +do_test (void) +{ + // State-dependent encodings to exercise. + static const char* const to_code[] = { + "IBM930", "IBM933", "IBM935", "IBM937", "IBM939" + }; + + static const size_t ncodesets = sizeof to_code / sizeof *to_code; + + static const struct { + char txt[MAXINBYTES]; + size_t len; + } input[] = { +#define DATA(s) { s, sizeof s - 1 } + /* : denotes the shift-in 1-byte escape sequence, changing + the encoder from a sigle-byte encoding to multibyte + : denotes the shift-out 1-byte escape sequence, switching + the encoder from a multibyte to a single-byte state */ + + DATA ("\x0e"), // (not redundant) + DATA ("\x0f"), // (redundant with initial state) + DATA ("\x0e\x0e"), // + DATA ("\x0e\x0f\x0f"), // + DATA ("\x0f\x0f"), // + DATA ("\x0f\x0e\x0e"), // + DATA ("\x0e\x0f\xc7\x0f"), // + DATA ("\xc7\x0f") // (redundant with initial state) + }; + + static const size_t ninputs = sizeof input / sizeof *input; + + int ret = 0; + + size_t i, j; + + /* Iterate over the IBM93x codesets above and exercise each with + the input sequences above. */ + for (i = 0; i != ncodesets; ++i) + for (j = 0; j != ninputs; ++j) + ret += test_ibm93x (to_code [i], input [i].txt, input [i].len); + + return ret; +} + +#define TEST_FUNCTION do_test () +#include "../test-skeleton.c" diff --git a/iconvdata/ibm930.c b/iconvdata/ibm930.c index 1f60962d86..9d35734b3a 100644 --- a/iconvdata/ibm930.c +++ b/iconvdata/ibm930.c @@ -110,24 +110,14 @@ enum \ if (__builtin_expect (ch, 0) == SO) \ { \ - /* Shift OUT, change to DBCS converter. */ \ - if (curcs == db) \ - { \ - result = __GCONV_ILLEGAL_INPUT; \ - break; \ - } \ + /* Shift OUT, change to DBCS converter (redundant escape okay). */ \ curcs = db; \ ++inptr; \ continue; \ } \ else if (__builtin_expect (ch, 0) == SI) \ { \ - /* Shift IN, change to SBCS converter */ \ - if (curcs == sb) \ - { \ - result = __GCONV_ILLEGAL_INPUT; \ - break; \ - } \ + /* Shift IN, change to SBCS converter (redundant escape okay). */ \ curcs = sb; \ ++inptr; \ continue; \ diff --git a/iconvdata/ibm933.c b/iconvdata/ibm933.c index c984cd1733..669e357066 100644 --- a/iconvdata/ibm933.c +++ b/iconvdata/ibm933.c @@ -108,24 +108,14 @@ enum \ if (__builtin_expect (ch, 0) == SO) \ { \ - /* Shift OUT, change to DBCS converter. */ \ - if (curcs == db) \ - { \ - result = __GCONV_ILLEGAL_INPUT; \ - break; \ - } \ + /* Shift OUT, change to DBCS converter (redundant escape okay). */ \ curcs = db; \ ++inptr; \ continue; \ } \ else if (__builtin_expect (ch, 0) == SI) \ { \ - /* Shift IN, change to SBCS converter. */ \ - if (curcs == sb) \ - { \ - result = __GCONV_ILLEGAL_INPUT; \ - break; \ - } \ + /* Shift IN, change to SBCS converter (redundant escape okay). */ \ curcs = sb; \ ++inptr; \ continue; \ diff --git a/iconvdata/ibm935.c b/iconvdata/ibm935.c index 07f4bf2558..2d3065e54c 100644 --- a/iconvdata/ibm935.c +++ b/iconvdata/ibm935.c @@ -109,24 +109,14 @@ enum \ if (__builtin_expect(ch, 0) == SO) \ { \ - /* Shift OUT, change to DBCS converter. */ \ - if (curcs == db) \ - { \ - result = __GCONV_ILLEGAL_INPUT; \ - break; \ - } \ + /* Shift OUT, change to DBCS converter (redundant escape okay). */ \ curcs = db; \ ++inptr; \ continue; \ } \ else if (__builtin_expect (ch, 0) == SI) \ { \ - /* Shift IN, change to SBCS converter. */ \ - if (curcs == sb) \ - { \ - result = __GCONV_ILLEGAL_INPUT; \ - break; \ - } \ + /* Shift IN, change to SBCS converter (redundant escape okay). */ \ curcs = sb; \ ++inptr; \ continue; \ diff --git a/iconvdata/ibm937.c b/iconvdata/ibm937.c index 88344a445b..3ed6479366 100644 --- a/iconvdata/ibm937.c +++ b/iconvdata/ibm937.c @@ -109,24 +109,14 @@ enum \ if (__builtin_expect (ch, 0) == SO) \ { \ - /* Shift OUT, change to DBCS converter. */ \ - if (curcs == db) \ - { \ - result = __GCONV_ILLEGAL_INPUT; \ - break; \ - } \ + /* Shift OUT, change to DBCS converter (redundant escape okay). */ \ curcs = db; \ ++inptr; \ continue; \ } \ else if (__builtin_expect (ch, 0) == SI) \ { \ - /* Shift IN, change to SBCS converter. */ \ - if (curcs == sb) \ - { \ - result = __GCONV_ILLEGAL_INPUT; \ - break; \ - } \ + /* Shift IN, change to SBCS converter (redundant escape okay). */ \ curcs = sb; \ ++inptr; \ continue; \ diff --git a/iconvdata/ibm939.c b/iconvdata/ibm939.c index dbdda628a3..c0a75f7797 100644 --- a/iconvdata/ibm939.c +++ b/iconvdata/ibm939.c @@ -109,24 +109,14 @@ enum \ if (__builtin_expect (ch, 0) == SO) \ { \ - /* Shift OUT, change to DBCS converter. */ \ - if (curcs == db) \ - { \ - result = __GCONV_ILLEGAL_INPUT; \ - break; \ - } \ + /* Shift OUT, change to DBCS converter (redundant escape okay). */ \ curcs = db; \ ++inptr; \ continue; \ } \ else if (__builtin_expect (ch, 0) == SI) \ { \ - /* Shift IN, change to SBCS converter. */ \ - if (curcs == sb) \ - { \ - result = __GCONV_ILLEGAL_INPUT; \ - break; \ - } \ + /* Shift IN, change to SBCS converter (redundant escape okay). */ \ curcs = sb; \ ++inptr; \ continue; \