diff --git a/ChangeLog b/ChangeLog index 165f1bca35..b9633625be 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,15 @@ +2000-06-17 Ulrich Drepper + + * iconv/gconv_trans.c: Implement handling if translit_ignore. + * locale/langinfo.h: Add entries for translit_ignore information. + * locale/categories.def: Add entries for new LC_CTYPE elements. + * locale/C-ctype.c: Add initializers for new fields. Use NULL + pointer instead of "" where possible. + * locale/programs/ld-ctype.c: Write out translit_ignore information. + * intl/Depend: Add localedata. + * intl/tst-gettext.c: Call setlocale for LC_CTYPE. + * intl/tst-gettext.sh: Set LOCPATH to localedata build dir. + 2000-06-16 Ulrich Drepper * locale/langinfo.h: Add entries for default_missing information. diff --git a/iconv/gconv_trans.c b/iconv/gconv_trans.c index 0bb5f00c07..269917b531 100644 --- a/iconv/gconv_trans.c +++ b/iconv/gconv_trans.c @@ -123,6 +123,7 @@ __gconv_transliterate (struct __gconv_step *step, { *inbufp += cnt * sizeof (uint32_t); ++*irreversible; + res = __GCONV_OK; } return res; @@ -135,6 +136,11 @@ __gconv_transliterate (struct __gconv_step *step, /* Nothing found, continue searching. */ } + else if (cnt > 0) + /* This means that the input buffer contents matches a prefix of + an entry. Since we cannot match it unless we get more input, + we will tell the caller about it. */ + return __GCONV_INCOMPLETE_INPUT; if (winbuf + cnt >= winbufend || from_tbl[idx + cnt] < winbuf[cnt]) low = idx; @@ -142,8 +148,37 @@ __gconv_transliterate (struct __gconv_step *step, high = idx; } - /* One last chance: use the default replacement. */ no_rules: + /* Maybe the character is supposed to be ignored. */ + if (_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_TRANSLIT_IGNORE_LEN) != 0) + { + int n = _NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_TRANSLIT_IGNORE_LEN); + uint32_t *ranges = (uint32_t *) _NL_CURRENT (LC_CTYPE, + _NL_CTYPE_TRANSLIT_IGNORE); + uint32_t wc = *(uint32_t *) (*inbufp); + int i; + + /* Test whether there is enough input. */ + if (winbuf + 1 > winbufend) + return (winbuf == winbufend + ? __GCONV_EMPTY_INPUT : __GCONV_INCOMPLETE_INPUT); + + for (i = 0; i < n; ranges += 3, ++i) + if (ranges[0] <= wc && wc <= ranges[1] + && (wc - ranges[0]) % ranges[2] == 0) + { + /* Matches the range. Ignore it. */ + *inbufp += 4; + ++*irreversible; + return __GCONV_OK; + } + else if (wc < ranges[0]) + /* There cannot be any other matching range since they are + sorted. */ + break; + } + + /* One last chance: use the default replacement. */ default_missing = (uint32_t *) _NL_CURRENT (LC_CTYPE, _NL_CTYPE_TRANSLIT_DEFAULT_MISSING); if (default_missing[0] != L'\0') @@ -153,6 +188,11 @@ __gconv_transliterate (struct __gconv_step *step, _NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN); int res; + /* Test whether there is enough input. */ + if (winbuf + 1 > winbufend) + return (winbuf == winbufend + ? __GCONV_EMPTY_INPUT : __GCONV_INCOMPLETE_INPUT); + res = DL_CALL_FCT (step->__fct, (step, step_data, &toinptr, (const unsigned char *) (default_missing + len), @@ -165,9 +205,10 @@ __gconv_transliterate (struct __gconv_step *step, input buffer. */ if (res == __GCONV_EMPTY_INPUT) { - /* We consuming one character. */ - ++*inbufp; + /* This worked but is not reversible. */ ++*irreversible; + *inbufp += 4; + res = __GCONV_OK; } return res; diff --git a/intl/Depend b/intl/Depend index e35d3258a3..981111e299 100644 --- a/intl/Depend +++ b/intl/Depend @@ -1 +1,2 @@ iconvdata +localedata diff --git a/intl/tst-gettext.c b/intl/tst-gettext.c index b897d71582..d9f97187f7 100644 --- a/intl/tst-gettext.c +++ b/intl/tst-gettext.c @@ -18,6 +18,7 @@ Boston, MA 02111-1307, USA. */ #include +#include #include #include #include @@ -70,6 +71,7 @@ main (int argc, char *argv[]) setenv ("LC_MESSAGES", "non-existing-locale", 1); setenv ("LC_CTYPE", "non-existing-locale", 1); setenv ("LANG", "non-existing-locale", 1); + setlocale (LC_CTYPE, "de_DE"); unsetenv ("OUTPUT_CHARSET"); /* This is the name of the existing domain with a catalog for the LC_MESSAGES category. */ diff --git a/intl/tst-gettext.sh b/intl/tst-gettext.sh index 9d2151f6b6..9fb8b432c7 100755 --- a/intl/tst-gettext.sh +++ b/intl/tst-gettext.sh @@ -23,6 +23,8 @@ objpfx=$2 GCONV_PATH=${common_objpfx}iconvdata export GCONV_PATH +LOCPATH=${common_objpfx}localedata +export LOCPATH # Generate the test data. test -d ${objpfx}domaindir || mkdir ${objpfx}domaindir diff --git a/locale/C-ctype.c b/locale/C-ctype.c index 04eb091e62..344596d972 100644 --- a/locale/C-ctype.c +++ b/locale/C-ctype.c @@ -345,7 +345,7 @@ const struct locale_data _nl_C_LC_CTYPE = UNDELETABLE, 0, NULL, - 64, + 66, { { string: _nl_C_LC_CTYPE_class }, { string: (const char *) _nl_C_LC_CTYPE_toupper }, @@ -419,11 +419,13 @@ const struct locale_data _nl_C_LC_CTYPE = { word: L'9' }, { word: 0 }, { word: 0 }, - { string: "" }, - { string: "" }, - { string: "" }, - { string: "" }, + { string: NULL }, + { string: NULL }, + { string: NULL }, + { string: NULL }, + { word: 1 }, { wstr: (uint32_t *) L"?" }, - { word: 1 } + { word: 0 }, + { wstr: NULL } } }; diff --git a/locale/categories.def b/locale/categories.def index e1f3f054b9..b71d44469b 100644 --- a/locale/categories.def +++ b/locale/categories.def @@ -132,8 +132,10 @@ DEFINE_CATEGORY DEFINE_ELEMENT (_NL_CTYPE_TRANSLIT_FROM_TBL, "ctype-translit-from-tbl", std, string) DEFINE_ELEMENT (_NL_CTYPE_TRANSLIT_TO_IDX, "ctype-translit-to-idx", std, string) DEFINE_ELEMENT (_NL_CTYPE_TRANSLIT_TO_TBL, "ctype-translit-to-tbl", std, string) - DEFINE_ELEMENT (_NL_CTYPE_TRANSLIT_DEFAULT_MISSING, "ctype-translit-default-missing", std, string) DEFINE_ELEMENT (_NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN, "ctype-translit-default-missing-len", std, word) + DEFINE_ELEMENT (_NL_CTYPE_TRANSLIT_DEFAULT_MISSING, "ctype-translit-default-missing", std, string) + DEFINE_ELEMENT (_NL_CTYPE_TRANSLIT_IGNORE_LEN, "ctype-translit-ignore-len", std, word) + DEFINE_ELEMENT (_NL_CTYPE_TRANSLIT_IGNORE, "ctype-translit-ignore", std, string) ), _nl_postload_ctype) diff --git a/locale/langinfo.h b/locale/langinfo.h index 11ab68ef63..582253c9cc 100644 --- a/locale/langinfo.h +++ b/locale/langinfo.h @@ -319,8 +319,10 @@ enum _NL_CTYPE_TRANSLIT_FROM_TBL, _NL_CTYPE_TRANSLIT_TO_IDX, _NL_CTYPE_TRANSLIT_TO_TBL, - _NL_CTYPE_TRANSLIT_DEFAULT_MISSING, _NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN, + _NL_CTYPE_TRANSLIT_DEFAULT_MISSING, + _NL_CTYPE_TRANSLIT_IGNORE_LEN, + _NL_CTYPE_TRANSLIT_IGNORE, _NL_NUM_LC_CTYPE, /* LC_MONETARY category: formatting of monetary quantities. diff --git a/locale/programs/ld-ctype.c b/locale/programs/ld-ctype.c index e899649d65..bda89ec9bc 100644 --- a/locale/programs/ld-ctype.c +++ b/locale/programs/ld-ctype.c @@ -154,6 +154,7 @@ struct locale_ctype_t const char *translit_copy_repertoire; struct translit_t *translit; struct translit_ignore_t *translit_ignore; + uint32_t ntranslit_ignore; uint32_t *default_missing; const char *default_missing_file; @@ -774,6 +775,33 @@ not all characters used in `outdigit' are available in the repertoire")); ctype->wcoutdigits[cnt] = L'?'; } + + /* Sort the entries in the translit_ignore list. */ + if (ctype->translit_ignore != NULL) + { + struct translit_ignore_t *firstp = ctype->translit_ignore; + struct translit_ignore_t *runp; + + ctype->ntranslit_ignore = 1; + + for (runp = firstp->next; runp != NULL; runp = runp->next) + { + struct translit_ignore_t *lastp = NULL; + struct translit_ignore_t *cmpp; + + ++ctype->ntranslit_ignore; + + for (cmpp = firstp; cmpp != NULL; lastp = cmpp, cmpp = cmpp->next) + if (runp->from < cmpp->from) + break; + + runp->next = lastp; + if (lastp == NULL) + firstp = runp; + } + + ctype->translit_ignore = firstp; + } } @@ -1007,6 +1035,15 @@ ctype_output (struct localedef_t *locale, struct charmap_t *charmap, idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len; break; + case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN): + default_missing_len = (ctype->default_missing + ? wcslen ((wchar_t *)ctype->default_missing) + : 1); + iov[2 + elem + offset].iov_base = &default_missing_len; + iov[2 + elem + offset].iov_len = sizeof (uint32_t); + idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len; + break; + case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_DEFAULT_MISSING): iov[2 + elem + offset].iov_base = ctype->default_missing ?: (uint32_t *) L""; @@ -1015,12 +1052,30 @@ ctype_output (struct localedef_t *locale, struct charmap_t *charmap, idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len; break; - case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN): - default_missing_len = (ctype->default_missing - ? wcslen ((wchar_t *)ctype->default_missing) - : 1); - iov[2 + elem + offset].iov_base = &default_missing_len; + case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_IGNORE_LEN): + iov[2 + elem + offset].iov_base = &ctype->ntranslit_ignore; iov[2 + elem + offset].iov_len = sizeof (uint32_t); + idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len; + break; + + case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_IGNORE): + { + uint32_t *ranges = (uint32_t *) alloca (ctype->ntranslit_ignore + * 3 * sizeof (uint32_t)); + struct translit_ignore_t *runp; + + iov[2 + elem + offset].iov_base = ranges; + iov[2 + elem + offset].iov_len = (ctype->ntranslit_ignore + * 3 * sizeof (uint32_t)); + + for (runp = ctype->translit_ignore; runp != NULL; + runp = runp->next) + { + *ranges++ = runp->from; + *ranges++ = runp->to; + *ranges++ = runp->step; + } + } /* Remove the following line in case a new entry is added after _NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN. */ if (elem < nelems)