From 6afb9c0175006c8060928537842364f83df6fc15 Mon Sep 17 00:00:00 2001 From: Andriy Rysin Date: Tue, 26 May 2015 23:51:18 +0530 Subject: [PATCH] Fix sorting order for Ukrainian locale (BZ 17293) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the introduction for the official orthography rules for Ukrainian language (http://spelling.ulif.org.ua/peredmova.htm) there's a note that only apostrophe does not affect order of the words when sorting. As could be seen from the official alphabet the soft sign (U+044C/U+042C) has its hard position and thus affects the order and also letters "е" and "є" (CYR-IE: U+0435/U+0415 and UKR-IE: U+0454/U+0404) have their own positions and should have separate place when sorting. This also corresponds to official Unicode collation chart for these letters: http://unicode.org/charts/collation/chart_Cyrillic.html --- ChangeLog | 5 +++ NEWS | 18 +++++------ localedata/Makefile | 4 +-- localedata/locales/uk_UA | 66 +++------------------------------------- localedata/uk_UA.in | 56 ++++++++++++++++++++++++++++++++++ 5 files changed, 76 insertions(+), 73 deletions(-) create mode 100644 localedata/uk_UA.in diff --git a/ChangeLog b/ChangeLog index eb029730d6..81ce22e61b 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,8 @@ +2015-05-26 Andriy Rysin + + [BZ #17293] + * uk_UA: Fix sorting order for Ukrainian locale + 2015-05-26 Marko Myllynen * stdlib/monetary.h: Fix comment. diff --git a/NEWS b/NEWS index d50560a37d..4aff69d3e5 100644 --- a/NEWS +++ b/NEWS @@ -11,15 +11,15 @@ Version 2.22 438, 4719, 6792, 13028, 13064, 14094, 14841, 14906, 15319, 15467, 15790, 15969, 16159, 16339, 16351, 16352, 16512, 16560, 16704, 16783, 16850, - 17053, 17090, 17195, 17269, 17523, 17542, 17569, 17581, 17588, 17596, - 17620, 17621, 17628, 17631, 17692, 17711, 17715, 17776, 17779, 17792, - 17836, 17912, 17916, 17930, 17932, 17944, 17949, 17964, 17965, 17967, - 17969, 17978, 17987, 17991, 17996, 17998, 17999, 18007, 18019, 18020, - 18029, 18030, 18032, 18036, 18038, 18039, 18042, 18043, 18046, 18047, - 18049, 18068, 18080, 18093, 18100, 18104, 18110, 18111, 18125, 18128, - 18138, 18185, 18196, 18197, 18206, 18210, 18211, 18217, 18220, 18221, - 18234, 18244, 18247, 18287, 18319, 18333, 18346, 18397, 18409, 18410, - 18412, 18418, 18434, 18444. + 17053, 17090, 17195, 17269, 17293, 17523, 17542, 17569, 17581, 17588, + 17596, 17620, 17621, 17628, 17631, 17692, 17711, 17715, 17776, 17779, + 17792, 17836, 17912, 17916, 17930, 17932, 17944, 17949, 17964, 17965, + 17967, 17969, 17978, 17987, 17991, 17996, 17998, 17999, 18007, 18019, + 18020, 18029, 18030, 18032, 18036, 18038, 18039, 18042, 18043, 18046, + 18047, 18049, 18068, 18080, 18093, 18100, 18104, 18110, 18111, 18125, + 18128, 18138, 18185, 18196, 18197, 18206, 18210, 18211, 18217, 18220, + 18221, 18234, 18244, 18247, 18287, 18319, 18333, 18346, 18397, 18409, + 18410, 18412, 18418, 18434, 18444. * Cache information can be queried via sysconf() function on s390 e.g. with _SC_LEVEL1_ICACHE_SIZE as argument. diff --git a/localedata/Makefile b/localedata/Makefile index 305c87f9d3..ebf6ac99d9 100644 --- a/localedata/Makefile +++ b/localedata/Makefile @@ -37,7 +37,7 @@ test-srcs := collate-test xfrm-test tst-fmon tst-rpmatch tst-trans \ tst-ctype tst-langinfo tst-langinfo-static tst-numeric test-input := de_DE.ISO-8859-1 en_US.ISO-8859-1 da_DK.ISO-8859-1 \ hr_HR.ISO-8859-2 sv_SE.ISO-8859-1 tr_TR.UTF-8 fr_FR.UTF-8 \ - si_LK.UTF-8 + si_LK.UTF-8 uk_UA.UTF-8 test-input-data = $(addsuffix .in, $(basename $(test-input))) test-output := $(foreach s, .out .xout, \ $(addsuffix $s, $(basename $(test-input)))) @@ -106,7 +106,7 @@ LOCALES := de_DE.ISO-8859-1 de_DE.UTF-8 en_US.ANSI_X3.4-1968 \ hr_HR.ISO-8859-2 sv_SE.ISO-8859-1 ja_JP.SJIS fr_FR.ISO-8859-1 \ nb_NO.ISO-8859-1 nn_NO.ISO-8859-1 tr_TR.UTF-8 cs_CZ.UTF-8 \ zh_TW.EUC-TW fa_IR.UTF-8 fr_FR.UTF-8 ja_JP.UTF-8 si_LK.UTF-8 \ - tr_TR.ISO-8859-9 en_GB.UTF-8 + tr_TR.ISO-8859-9 en_GB.UTF-8 uk_UA.UTF-8 include ../gen-locales.mk endif diff --git a/localedata/locales/uk_UA b/localedata/locales/uk_UA index d9194b82c2..511f004883 100644 --- a/localedata/locales/uk_UA +++ b/localedata/locales/uk_UA @@ -340,70 +340,14 @@ copy " -% Soft sign and apostrophe must be ignored during sorting because they are -% just signs, not real letters. +% Apostrophe must be ignored during sorting because it's just a sign, not a +% real letter. % ( ""=="", ""=="", etc. ) % % Apostrophe already ignored by iso14651_t1. % -% Soft sign '' may follow only this set of nine characters []. -% It only softens pronunciation of these characters so it's should not impact -% sorting. - - -collating-symbol -collating-element from "" -collating-element from "" -collating-element from "" -collating-element from "" - -collating-symbol -collating-element from "" -collating-element from "" -collating-element from "" -collating-element from "" - -collating-symbol -collating-element from "" -collating-element from "" -collating-element from "" -collating-element from "" - -collating-symbol -collating-element from "" -collating-element from "" -collating-element from "" -collating-element from "" - -collating-symbol -collating-element from "" -collating-element from "" -collating-element from "" -collating-element from "" - -collating-symbol -collating-element from "" -collating-element from "" -collating-element from "" -collating-element from "" - -collating-symbol -collating-element from "" -collating-element from "" -collating-element from "" -collating-element from "" - -collating-symbol -collating-element from "" -collating-element from "" -collating-element from "" -collating-element from "" - -collating-symbol -collating-element from "" -collating-element from "" -collating-element from "" -collating-element from "" +% In the official alphabet the soft sign is a letter and has a hard position in +% the order. collating-symbol @@ -489,11 +433,9 @@ reorder-after "";"";"";IGNORE % CYR-DZE reorder-after - ;;;IGNORE ;;;IGNORE ;;;IGNORE reorder-after - ;;;IGNORE ;;;IGNORE ;;;IGNORE diff --git a/localedata/uk_UA.in b/localedata/uk_UA.in new file mode 100644 index 0000000000..ff4d284d61 --- /dev/null +++ b/localedata/uk_UA.in @@ -0,0 +1,56 @@ +01010 +Абажур +абажур +абажур-10 +брама +вермішель +грати +Граття +граття +ґрати +ебонітовий +експорт +експосол +екс-посол +експоцентр +експрацівник +екс-працівник +еластичність +електрика +ельбор +елюент +епатаж +євгеніка +Європа +єдність +Жмих +жмих +зоря +и +і +ї +й +Карпати +криниця +лебідь +місяцевий +місяць +наразі +обапіл +об'їзд +об’їзд +обʼїзд +образ +опір +право +сонце +тарган +упродовж +фантастика +центр +чухатися +ш +щ +ь +ю +я