Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
Fix sorting order for Ukrainian locale (BZ 17293)
In the introduction for the official orthography rules for Ukrainian
language (http://spelling.ulif.org.ua/peredmova.htm) there's a note
that only apostrophe does not affect order of the words when sorting.
As could be seen from the official alphabet the soft sign
(U+044C/U+042C) has its hard position and thus affects the order and
also letters "е" and "є" (CYR-IE: U+0435/U+0415 and UKR-IE:
U+0454/U+0404) have their own positions and should have separate place
when sorting.
This also corresponds to official Unicode collation chart for these
letters: http://unicode.org/charts/collation/chart_Cyrillic.html
  • Loading branch information
Andriy Rysin authored and Siddhesh Poyarekar committed May 26, 2015
1 parent f09b861 commit 6afb9c0
Show file tree
Hide file tree
Showing 5 changed files with 76 additions and 73 deletions.
5 changes: 5 additions & 0 deletions ChangeLog
@@ -1,3 +1,8 @@
2015-05-26 Andriy Rysin <arysin@gmail.com>

[BZ #17293]
* uk_UA: Fix sorting order for Ukrainian locale

2015-05-26 Marko Myllynen <myllynen@redhat.com>

* stdlib/monetary.h: Fix comment.
Expand Down
18 changes: 9 additions & 9 deletions NEWS
Expand Up @@ -11,15 +11,15 @@ Version 2.22

438, 4719, 6792, 13028, 13064, 14094, 14841, 14906, 15319, 15467, 15790,
15969, 16159, 16339, 16351, 16352, 16512, 16560, 16704, 16783, 16850,
17053, 17090, 17195, 17269, 17523, 17542, 17569, 17581, 17588, 17596,
17620, 17621, 17628, 17631, 17692, 17711, 17715, 17776, 17779, 17792,
17836, 17912, 17916, 17930, 17932, 17944, 17949, 17964, 17965, 17967,
17969, 17978, 17987, 17991, 17996, 17998, 17999, 18007, 18019, 18020,
18029, 18030, 18032, 18036, 18038, 18039, 18042, 18043, 18046, 18047,
18049, 18068, 18080, 18093, 18100, 18104, 18110, 18111, 18125, 18128,
18138, 18185, 18196, 18197, 18206, 18210, 18211, 18217, 18220, 18221,
18234, 18244, 18247, 18287, 18319, 18333, 18346, 18397, 18409, 18410,
18412, 18418, 18434, 18444.
17053, 17090, 17195, 17269, 17293, 17523, 17542, 17569, 17581, 17588,
17596, 17620, 17621, 17628, 17631, 17692, 17711, 17715, 17776, 17779,
17792, 17836, 17912, 17916, 17930, 17932, 17944, 17949, 17964, 17965,
17967, 17969, 17978, 17987, 17991, 17996, 17998, 17999, 18007, 18019,
18020, 18029, 18030, 18032, 18036, 18038, 18039, 18042, 18043, 18046,
18047, 18049, 18068, 18080, 18093, 18100, 18104, 18110, 18111, 18125,
18128, 18138, 18185, 18196, 18197, 18206, 18210, 18211, 18217, 18220,
18221, 18234, 18244, 18247, 18287, 18319, 18333, 18346, 18397, 18409,
18410, 18412, 18418, 18434, 18444.

* Cache information can be queried via sysconf() function on s390 e.g. with
_SC_LEVEL1_ICACHE_SIZE as argument.
Expand Down
4 changes: 2 additions & 2 deletions localedata/Makefile
Expand Up @@ -37,7 +37,7 @@ test-srcs := collate-test xfrm-test tst-fmon tst-rpmatch tst-trans \
tst-ctype tst-langinfo tst-langinfo-static tst-numeric
test-input := de_DE.ISO-8859-1 en_US.ISO-8859-1 da_DK.ISO-8859-1 \
hr_HR.ISO-8859-2 sv_SE.ISO-8859-1 tr_TR.UTF-8 fr_FR.UTF-8 \
si_LK.UTF-8
si_LK.UTF-8 uk_UA.UTF-8
test-input-data = $(addsuffix .in, $(basename $(test-input)))
test-output := $(foreach s, .out .xout, \
$(addsuffix $s, $(basename $(test-input))))
Expand Down Expand Up @@ -106,7 +106,7 @@ LOCALES := de_DE.ISO-8859-1 de_DE.UTF-8 en_US.ANSI_X3.4-1968 \
hr_HR.ISO-8859-2 sv_SE.ISO-8859-1 ja_JP.SJIS fr_FR.ISO-8859-1 \
nb_NO.ISO-8859-1 nn_NO.ISO-8859-1 tr_TR.UTF-8 cs_CZ.UTF-8 \
zh_TW.EUC-TW fa_IR.UTF-8 fr_FR.UTF-8 ja_JP.UTF-8 si_LK.UTF-8 \
tr_TR.ISO-8859-9 en_GB.UTF-8
tr_TR.ISO-8859-9 en_GB.UTF-8 uk_UA.UTF-8
include ../gen-locales.mk
endif

Expand Down
66 changes: 4 additions & 62 deletions localedata/locales/uk_UA
Expand Up @@ -340,70 +340,14 @@ copy "<U0069><U0073><U006F><U0031><U0034><U0036><U0035><U0031><U005F><U0074><U00
% Ukrainian ghe is missing in iso14651_t1
collating-symbol <UKR-GHE>

% Soft sign and apostrophe must be ignored during sorting because they are
% just signs, not real letters.
% Apostrophe must be ignored during sorting because it's just a sign, not a
% real letter.
% ( "<U006E><U0060>"=="<U006E>", "<U0027><U0079><U0061>"=="<U0079><U0061>", etc. )
%
% Apostrophe already ignored by iso14651_t1.
%
% Soft sign '<U044C>' may follow only this set of nine characters [<U0432><U0434><U0437><U043B><U043D><U0440><U0441><U0442><U0446>].
% It only softens pronunciation of these characters so it's should not impact
% sorting.


collating-symbol <V+SS>
collating-element <V-SS> from "<U0412><U042C>"
collating-element <V-ss> from "<U0412><U044C>"
collating-element <v-SS> from "<U0432><U042C>"
collating-element <v-ss> from "<U0432><U044C>"

collating-symbol <D+SS>
collating-element <D-SS> from "<U0414><U042C>"
collating-element <D-ss> from "<U0414><U044C>"
collating-element <d-SS> from "<U0434><U042C>"
collating-element <d-ss> from "<U0434><U044C>"

collating-symbol <Z+SS>
collating-element <Z-SS> from "<U0417><U042C>"
collating-element <Z-ss> from "<U0417><U044C>"
collating-element <z-SS> from "<U0437><U042C>"
collating-element <z-ss> from "<U0437><U044C>"

collating-symbol <L+SS>
collating-element <L-SS> from "<U041B><U042C>"
collating-element <L-ss> from "<U041B><U044C>"
collating-element <l-SS> from "<U043B><U042C>"
collating-element <l-ss> from "<U043B><U044C>"

collating-symbol <N+SS>
collating-element <N-SS> from "<U041D><U042C>"
collating-element <N-ss> from "<U041D><U044C>"
collating-element <n-SS> from "<U043D><U042C>"
collating-element <n-ss> from "<U043D><U044C>"

collating-symbol <R+SS>
collating-element <R-SS> from "<U0420><U042C>"
collating-element <R-ss> from "<U0420><U044C>"
collating-element <r-SS> from "<U0440><U042C>"
collating-element <r-ss> from "<U0440><U044C>"

collating-symbol <S+SS>
collating-element <S-SS> from "<U0421><U042C>"
collating-element <S-ss> from "<U0421><U044C>"
collating-element <s-SS> from "<U0441><U042C>"
collating-element <s-ss> from "<U0441><U044C>"

collating-symbol <T+SS>
collating-element <T-SS> from "<U0422><U042C>"
collating-element <T-ss> from "<U0422><U044C>"
collating-element <t-SS> from "<U0442><U042C>"
collating-element <t-ss> from "<U0442><U044C>"

collating-symbol <TSE+SS>
collating-element <TS-SS> from "<U0426><U042C>"
collating-element <TS-ss> from "<U0426><U044C>"
collating-element <ts-SS> from "<U0446><U042C>"
collating-element <ts-ss> from "<U0446><U044C>"
% In the official alphabet the soft sign is a letter and has a hard position in
% the order.


collating-symbol <CAP-MIN>
Expand Down Expand Up @@ -489,11 +433,9 @@ reorder-after <U0434>
<U0455> "<U003C><U0043><U0059><U0052><U002D><U0044><U0045><U003E><U003C><U0043><U0059><U0052><U002D><U005A><U0045><U003E>";"<U003C><U004C><U0049><U0047><U003E><U003C><U004C><U0049><U0047><U003E>";"<U003C><U004D><U0049><U004E><U003E><U003C><U004D><U0049><U004E><U003E>";IGNORE % CYR-DZE

reorder-after <U0435>
<U0454> <CYR-IE>;<UKR-IE>;<MIN>;IGNORE
<U0451> <CYR-IE>;<CYR-IO>;<MIN>;IGNORE
<U044D> <CYR-IE>;<CYR-E>;<MIN>;IGNORE
reorder-after <U0415>
<U0404> <CYR-IE>;<UKR-IE>;<CAP>;IGNORE
<U0401> <CYR-IE>;<CYR-IO>;<CAP>;IGNORE
<U042D> <CYR-IE>;<CYR-E>;<CAP>;IGNORE

Expand Down
56 changes: 56 additions & 0 deletions localedata/uk_UA.in
@@ -0,0 +1,56 @@
01010
Абажур
абажур
абажур-10
брама
вермішель
грати
Граття
граття
ґрати
ебонітовий
експорт
експосол
екс-посол
експоцентр
експрацівник
екс-працівник
еластичність
електрика
ельбор
елюент
епатаж
євгеніка
Європа
єдність
Жмих
жмих
зоря
и
і
ї
й
Карпати
криниця
лебідь
місяцевий
місяць
наразі
обапіл
об'їзд
об’їзд
обʼїзд
образ
опір
право
сонце
тарган
упродовж
фантастика
центр
чухатися
ш
щ
ь
ю
я

0 comments on commit 6afb9c0

Please sign in to comment.