add nfc, nfd normalization tsv files

This commit is contained in:
Taku Kudo 2024-02-25 15:47:08 +00:00
parent a216bd01d1
commit 0ba506938c
14 changed files with 176702 additions and 97651 deletions

13655
data/nfc.tsv Normal file

File diff suppressed because it is too large Load Diff

18013
data/nfc_cf.tsv Normal file

File diff suppressed because it is too large Load Diff

13233
data/nfd.tsv Normal file

File diff suppressed because it is too large Load Diff

14306
data/nfd_cf.tsv Normal file

File diff suppressed because it is too large Load Diff

View File

@ -155529,6 +155529,18 @@ FB9 F90 FB5 # ྐྵ => ྐྵ
A69C 44A # ꚜ => ъ
A69D 44C # ꚝ => ь
A770 A76F # ꝰ => ꝯ
A7F2 43 # ꟲ => C
A7F2 301 106 # ꟲ́ => Ć
A7F2 302 108 # ꟲ̂ => Ĉ
A7F2 307 10A # ꟲ̇ => Ċ
A7F2 30C 10C # ꟲ̌ => Č
A7F2 327 C7 # ꟲ̧ => Ç
A7F2 327 301 1E08 # ꟲ̧́ => Ḉ
A7F2 327 341 1E08 # ꟲ̧́ => Ḉ
A7F2 341 106 # ꟲ́ => Ć
A7F3 46 # ꟳ => F
A7F3 307 1E1E # ꟳ̇ => Ḟ
A7F4 51 # ꟴ => Q
A7F8 126 # ꟸ => Ħ
A7F9 153 # ꟹ => œ
AB5C A727 # ꭜ => ꜧ
@ -212805,6 +212817,67 @@ FFEB 338 219B # ↛ => ↛
FFEC 2193 # ↓ => ↓
FFED 25A0 # ■ => ■
FFEE 25CB # ○ => ○
10781 2D0 # 𐞁 => ː
10782 2D1 # 𐞂 => ˑ
10783 E6 # 𐞃 => æ
10783 301 1FD # 𐞃́ => ǽ
10783 304 1E3 # 𐞃̄ => ǣ
10783 341 1FD # 𐞃́ => ǽ
10784 299 # 𐞄 => ʙ
10785 253 # 𐞅 => ɓ
10787 2A3 # 𐞇 => ʣ
10788 AB66 # 𐞈 => ꭦ
10789 2A5 # 𐞉 => ʥ
1078A 2A4 # 𐞊 => ʤ
1078B 256 # 𐞋 => ɖ
1078C 257 # 𐞌 => ɗ
1078D 1D91 # 𐞍 => ᶑ
1078E 258 # 𐞎 => ɘ
1078F 25E # 𐞏 => ɞ
10790 2A9 # 𐞐 => ʩ
10791 264 # 𐞑 => ɤ
10792 262 # 𐞒 => ɢ
10793 260 # 𐞓 => ɠ
10794 29B # 𐞔 => ʛ
10795 127 # 𐞕 => ħ
10796 29C # 𐞖 => ʜ
10797 267 # 𐞗 => ɧ
10798 284 # 𐞘 => ʄ
10799 2AA # 𐞙 => ʪ
1079A 2AB # 𐞚 => ʫ
1079B 26C # 𐞛 => ɬ
1079C 1DF04 # 𐞜 => 𝼄
1079D A78E # 𐞝 => ꞎ
1079E 26E # 𐞞 => ɮ
1079F 1DF05 # 𐞟 => 𝼅
107A0 28E # 𐞠 => ʎ
107A1 1DF06 # 𐞡 => 𝼆
107A2 F8 # 𐞢 => ø
107A2 301 1FF # 𐞢́ => ǿ
107A2 341 1FF # 𐞢́ => ǿ
107A3 276 # 𐞣 => ɶ
107A4 277 # 𐞤 => ɷ
107A5 71 # 𐞥 => q
107A6 27A # 𐞦 => ɺ
107A7 1DF08 # 𐞧 => 𝼈
107A8 27D # 𐞨 => ɽ
107A9 27E # 𐞩 => ɾ
107AA 280 # 𐞪 => ʀ
107AB 2A8 # 𐞫 => ʨ
107AC 2A6 # 𐞬 => ʦ
107AD AB67 # 𐞭 => ꭧ
107AE 2A7 # 𐞮 => ʧ
107AF 288 # 𐞯 => ʈ
107B0 2C71 # 𐞰 => ⱱ
107B2 28F # 𐞲 => ʏ
107B3 2A1 # 𐞳 => ʡ
107B4 2A2 # 𐞴 => ʢ
107B5 298 # 𐞵 => ʘ
107B6 1C0 # 𐞶 => ǀ
107B7 1C1 # 𐞷 => ǁ
107B8 1C2 # 𐞸 => ǂ
107B9 1DF0A # 𐞹 => 𝼊
107BA 1DF1E # 𐞺 => 𝼞
11099 110BA 1109A # 𑂚 => 𑂚
1109B 110BA 1109C # 𑂜 => 𑂜
110A5 110BA 110AB # 𑂫 => 𑂫
@ -223525,6 +223598,123 @@ FFEE 25CB # ○ => ○
1D7FD 37 # 𝟽 => 7
1D7FE 38 # 𝟾 => 8
1D7FF 39 # 𝟿 => 9
1E030 430 # 𞀰 => а
1E030 306 4D1 # 𞀰̆ => ӑ
1E030 308 4D3 # 𞀰̈ => ӓ
1E031 431 # 𞀱 => б
1E032 432 # 𞀲 => в
1E033 433 # 𞀳 => г
1E033 301 453 # 𞀳́ => ѓ
1E033 341 453 # 𞀳́ => ѓ
1E034 434 # 𞀴 => д
1E035 435 # 𞀵 => е
1E035 300 450 # 𞀵̀ => ѐ
1E035 306 4D7 # 𞀵̆ => ӗ
1E035 308 451 # 𞀵̈ => ё
1E035 340 450 # 𞀵̀ => ѐ
1E036 436 # 𞀶 => ж
1E036 306 4C2 # 𞀶̆ => ӂ
1E036 308 4DD # 𞀶̈ => ӝ
1E037 437 # 𞀷 => з
1E037 308 4DF # 𞀷̈ => ӟ
1E038 438 # 𞀸 => и
1E038 300 45D # 𞀸̀ => ѝ
1E038 304 4E3 # 𞀸̄ => ӣ
1E038 306 439 # 𞀸̆ => й
1E038 308 4E5 # 𞀸̈ => ӥ
1E038 340 45D # 𞀸̀ => ѝ
1E039 43A # 𞀹 => к
1E039 301 45C # 𞀹́ => ќ
1E039 341 45C # 𞀹́ => ќ
1E03A 43B # 𞀺 => л
1E03B 43C # 𞀻 => м
1E03C 43E # 𞀼 => о
1E03C 308 4E7 # 𞀼̈ => ӧ
1E03D 43F # 𞀽 => п
1E03E 440 # 𞀾 => р
1E03F 441 # 𞀿 => с
1E040 442 # 𞁀 => т
1E041 443 # 𞁁 => у
1E041 304 4EF # 𞁁̄ => ӯ
1E041 306 45E # 𞁁̆ => ў
1E041 308 4F1 # 𞁁̈ => ӱ
1E041 30B 4F3 # 𞁁̋ => ӳ
1E042 444 # 𞁂 => ф
1E043 445 # 𞁃 => х
1E044 446 # 𞁄 => ц
1E045 447 # 𞁅 => ч
1E045 308 4F5 # 𞁅̈ => ӵ
1E046 448 # 𞁆 => ш
1E047 44B # 𞁇 => ы
1E047 308 4F9 # 𞁇̈ => ӹ
1E048 44D # 𞁈 => э
1E048 308 4ED # 𞁈̈ => ӭ
1E049 44E # 𞁉 => ю
1E04A A689 # 𞁊 => ꚉ
1E04B 4D9 # 𞁋 => ә
1E04B 308 4DB # 𞁋̈ => ӛ
1E04C 456 # 𞁌 => і
1E04C 308 457 # 𞁌̈ => ї
1E04D 458 # 𞁍 => ј
1E04E 4E9 # 𞁎 => ө
1E04E 308 4EB # 𞁎̈ => ӫ
1E04F 4AF # 𞁏 => ү
1E050 4CF # 𞁐 => ӏ
1E051 430 # 𞁑 => а
1E051 306 4D1 # 𞁑̆ => ӑ
1E051 308 4D3 # 𞁑̈ => ӓ
1E052 431 # 𞁒 => б
1E053 432 # 𞁓 => в
1E054 433 # 𞁔 => г
1E054 301 453 # 𞁔́ => ѓ
1E054 341 453 # 𞁔́ => ѓ
1E055 434 # 𞁕 => д
1E056 435 # 𞁖 => е
1E056 300 450 # 𞁖̀ => ѐ
1E056 306 4D7 # 𞁖̆ => ӗ
1E056 308 451 # 𞁖̈ => ё
1E056 340 450 # 𞁖̀ => ѐ
1E057 436 # 𞁗 => ж
1E057 306 4C2 # 𞁗̆ => ӂ
1E057 308 4DD # 𞁗̈ => ӝ
1E058 437 # 𞁘 => з
1E058 308 4DF # 𞁘̈ => ӟ
1E059 438 # 𞁙 => и
1E059 300 45D # 𞁙̀ => ѝ
1E059 304 4E3 # 𞁙̄ => ӣ
1E059 306 439 # 𞁙̆ => й
1E059 308 4E5 # 𞁙̈ => ӥ
1E059 340 45D # 𞁙̀ => ѝ
1E05A 43A # 𞁚 => к
1E05A 301 45C # 𞁚́ => ќ
1E05A 341 45C # 𞁚́ => ќ
1E05B 43B # 𞁛 => л
1E05C 43E # 𞁜 => о
1E05C 308 4E7 # 𞁜̈ => ӧ
1E05D 43F # 𞁝 => п
1E05E 441 # 𞁞 => с
1E05F 443 # 𞁟 => у
1E05F 304 4EF # 𞁟̄ => ӯ
1E05F 306 45E # 𞁟̆ => ў
1E05F 308 4F1 # 𞁟̈ => ӱ
1E05F 30B 4F3 # 𞁟̋ => ӳ
1E060 444 # 𞁠 => ф
1E061 445 # 𞁡 => х
1E062 446 # 𞁢 => ц
1E063 447 # 𞁣 => ч
1E063 308 4F5 # 𞁣̈ => ӵ
1E064 448 # 𞁤 => ш
1E065 44A # 𞁥 => ъ
1E066 44B # 𞁦 => ы
1E066 308 4F9 # 𞁦̈ => ӹ
1E067 491 # 𞁧 => ґ
1E068 456 # 𞁨 => і
1E068 308 457 # 𞁨̈ => ї
1E069 455 # 𞁩 => ѕ
1E06A 45F # 𞁪 => џ
1E06B 4AB # 𞁫 => ҫ
1E06C A651 # 𞁬 => ꙑ
1E06D 4B1 # 𞁭 => ұ
1EE00 627 # 𞸀 => ا
1EE00 653 622 # 𞸀ٓ => آ
1EE00 654 623 # 𞸀ٔ => أ

Can't render this file because it is too large.

View File

@ -59695,6 +59695,7 @@ FB9 F90 FB5 # ྐྵ => ྐྵ
2C2C 2C5C # Ⱜ => ⱜ
2C2D 2C5D # Ⱝ => ⱝ
2C2E 2C5E # Ⱞ => ⱞ
2C2F 2C5F # Ⱟ => ⱟ
2C60 2C61 # Ⱡ => ⱡ
2C62 26B # Ɫ => ɫ
2C63 1D7D # Ᵽ => ᵽ
@ -156530,12 +156531,28 @@ A7B8 A7B9 # Ꞹ => ꞹ
A7BA A7BB # Ꞻ => ꞻ
A7BC A7BD # Ꞽ => ꞽ
A7BE A7BF # Ꞿ => ꞿ
A7C0 A7C1 # Ꟁ => ꟁ
A7C2 A7C3 # Ꟃ => ꟃ
A7C4 A794 # Ꞔ => ꞔ
A7C5 282 # Ʂ => ʂ
A7C6 1D8E # Ᶎ => ᶎ
A7C7 A7C8 # Ꟈ => ꟈ
A7C9 A7CA # Ꟊ => ꟊ
A7D0 A7D1 # Ꟑ => ꟑ
A7D6 A7D7 # Ꟗ => ꟗ
A7D8 A7D9 # Ꟙ => ꟙ
A7F2 63 # ꟲ => c
A7F2 301 107 # ꟲ́ => ć
A7F2 302 109 # ꟲ̂ => ĉ
A7F2 307 10B # ꟲ̇ => ċ
A7F2 30C 10D # ꟲ̌ => č
A7F2 327 E7 # ꟲ̧ => ç
A7F2 327 301 1E09 # ꟲ̧́ => ḉ
A7F2 327 341 1E09 # ꟲ̧́ => ḉ
A7F2 341 107 # ꟲ́ => ć
A7F3 66 # ꟳ => f
A7F3 307 1E1F # ꟳ̇ => ḟ
A7F4 71 # ꟴ => q
A7F5 A7F6 # Ꟶ => ꟶ
A7F8 127 # ꟸ => ħ
A7F9 153 # ꟹ => œ
@ -213969,6 +213986,102 @@ FFEE 25CB # ○ => ○
104D1 104F9 # 𐓑 => 𐓹
104D2 104FA # 𐓒 => 𐓺
104D3 104FB # 𐓓 => 𐓻
10570 10597 # 𐕰 => 𐖗
10571 10598 # 𐕱 => 𐖘
10572 10599 # 𐕲 => 𐖙
10573 1059A # 𐕳 => 𐖚
10574 1059B # 𐕴 => 𐖛
10575 1059C # 𐕵 => 𐖜
10576 1059D # 𐕶 => 𐖝
10577 1059E # 𐕷 => 𐖞
10578 1059F # 𐕸 => 𐖟
10579 105A0 # 𐕹 => 𐖠
1057A 105A1 # 𐕺 => 𐖡
1057C 105A3 # 𐕼 => 𐖣
1057D 105A4 # 𐕽 => 𐖤
1057E 105A5 # 𐕾 => 𐖥
1057F 105A6 # 𐕿 => 𐖦
10580 105A7 # 𐖀 => 𐖧
10581 105A8 # 𐖁 => 𐖨
10582 105A9 # 𐖂 => 𐖩
10583 105AA # 𐖃 => 𐖪
10584 105AB # 𐖄 => 𐖫
10585 105AC # 𐖅 => 𐖬
10586 105AD # 𐖆 => 𐖭
10587 105AE # 𐖇 => 𐖮
10588 105AF # 𐖈 => 𐖯
10589 105B0 # 𐖉 => 𐖰
1058A 105B1 # 𐖊 => 𐖱
1058C 105B3 # 𐖌 => 𐖳
1058D 105B4 # 𐖍 => 𐖴
1058E 105B5 # 𐖎 => 𐖵
1058F 105B6 # 𐖏 => 𐖶
10590 105B7 # 𐖐 => 𐖷
10591 105B8 # 𐖑 => 𐖸
10592 105B9 # 𐖒 => 𐖹
10594 105BB # 𐖔 => 𐖻
10595 105BC # 𐖕 => 𐖼
10781 2D0 # 𐞁 => ː
10782 2D1 # 𐞂 => ˑ
10783 E6 # 𐞃 => æ
10783 301 1FD # 𐞃́ => ǽ
10783 304 1E3 # 𐞃̄ => ǣ
10783 341 1FD # 𐞃́ => ǽ
10784 299 # 𐞄 => ʙ
10785 253 # 𐞅 => ɓ
10787 2A3 # 𐞇 => ʣ
10788 AB66 # 𐞈 => ꭦ
10789 2A5 # 𐞉 => ʥ
1078A 2A4 # 𐞊 => ʤ
1078B 256 # 𐞋 => ɖ
1078C 257 # 𐞌 => ɗ
1078D 1D91 # 𐞍 => ᶑ
1078E 258 # 𐞎 => ɘ
1078F 25E # 𐞏 => ɞ
10790 2A9 # 𐞐 => ʩ
10791 264 # 𐞑 => ɤ
10792 262 # 𐞒 => ɢ
10793 260 # 𐞓 => ɠ
10794 29B # 𐞔 => ʛ
10795 127 # 𐞕 => ħ
10796 29C # 𐞖 => ʜ
10797 267 # 𐞗 => ɧ
10798 284 # 𐞘 => ʄ
10799 2AA # 𐞙 => ʪ
1079A 2AB # 𐞚 => ʫ
1079B 26C # 𐞛 => ɬ
1079C 1DF04 # 𐞜 => 𝼄
1079D A78E # 𐞝 => ꞎ
1079E 26E # 𐞞 => ɮ
1079F 1DF05 # 𐞟 => 𝼅
107A0 28E # 𐞠 => ʎ
107A1 1DF06 # 𐞡 => 𝼆
107A2 F8 # 𐞢 => ø
107A2 301 1FF # 𐞢́ => ǿ
107A2 341 1FF # 𐞢́ => ǿ
107A3 276 # 𐞣 => ɶ
107A4 277 # 𐞤 => ɷ
107A5 71 # 𐞥 => q
107A6 27A # 𐞦 => ɺ
107A7 1DF08 # 𐞧 => 𝼈
107A8 27D # 𐞨 => ɽ
107A9 27E # 𐞩 => ɾ
107AA 280 # 𐞪 => ʀ
107AB 2A8 # 𐞫 => ʨ
107AC 2A6 # 𐞬 => ʦ
107AD AB67 # 𐞭 => ꭧ
107AE 2A7 # 𐞮 => ʧ
107AF 288 # 𐞯 => ʈ
107B0 2C71 # 𐞰 => ⱱ
107B2 28F # 𐞲 => ʏ
107B3 2A1 # 𐞳 => ʡ
107B4 2A2 # 𐞴 => ʢ
107B5 298 # 𐞵 => ʘ
107B6 1C0 # 𐞶 => ǀ
107B7 1C1 # 𐞷 => ǁ
107B8 1C2 # 𐞸 => ǂ
107B9 1DF0A # 𐞹 => 𝼊
107BA 1DF1E # 𐞺 => 𝼞
10C80 10CC0 # 𐲀 => 𐳀
10C81 10CC1 # 𐲁 => 𐳁
10C82 10CC2 # 𐲂 => 𐳂
@ -224804,6 +224917,123 @@ FFEE 25CB # ○ => ○
1D7FD 37 # 𝟽 => 7
1D7FE 38 # 𝟾 => 8
1D7FF 39 # 𝟿 => 9
1E030 430 # 𞀰 => а
1E030 306 4D1 # 𞀰̆ => ӑ
1E030 308 4D3 # 𞀰̈ => ӓ
1E031 431 # 𞀱 => б
1E032 432 # 𞀲 => в
1E033 433 # 𞀳 => г
1E033 301 453 # 𞀳́ => ѓ
1E033 341 453 # 𞀳́ => ѓ
1E034 434 # 𞀴 => д
1E035 435 # 𞀵 => е
1E035 300 450 # 𞀵̀ => ѐ
1E035 306 4D7 # 𞀵̆ => ӗ
1E035 308 451 # 𞀵̈ => ё
1E035 340 450 # 𞀵̀ => ѐ
1E036 436 # 𞀶 => ж
1E036 306 4C2 # 𞀶̆ => ӂ
1E036 308 4DD # 𞀶̈ => ӝ
1E037 437 # 𞀷 => з
1E037 308 4DF # 𞀷̈ => ӟ
1E038 438 # 𞀸 => и
1E038 300 45D # 𞀸̀ => ѝ
1E038 304 4E3 # 𞀸̄ => ӣ
1E038 306 439 # 𞀸̆ => й
1E038 308 4E5 # 𞀸̈ => ӥ
1E038 340 45D # 𞀸̀ => ѝ
1E039 43A # 𞀹 => к
1E039 301 45C # 𞀹́ => ќ
1E039 341 45C # 𞀹́ => ќ
1E03A 43B # 𞀺 => л
1E03B 43C # 𞀻 => м
1E03C 43E # 𞀼 => о
1E03C 308 4E7 # 𞀼̈ => ӧ
1E03D 43F # 𞀽 => п
1E03E 440 # 𞀾 => р
1E03F 441 # 𞀿 => с
1E040 442 # 𞁀 => т
1E041 443 # 𞁁 => у
1E041 304 4EF # 𞁁̄ => ӯ
1E041 306 45E # 𞁁̆ => ў
1E041 308 4F1 # 𞁁̈ => ӱ
1E041 30B 4F3 # 𞁁̋ => ӳ
1E042 444 # 𞁂 => ф
1E043 445 # 𞁃 => х
1E044 446 # 𞁄 => ц
1E045 447 # 𞁅 => ч
1E045 308 4F5 # 𞁅̈ => ӵ
1E046 448 # 𞁆 => ш
1E047 44B # 𞁇 => ы
1E047 308 4F9 # 𞁇̈ => ӹ
1E048 44D # 𞁈 => э
1E048 308 4ED # 𞁈̈ => ӭ
1E049 44E # 𞁉 => ю
1E04A A689 # 𞁊 => ꚉ
1E04B 4D9 # 𞁋 => ә
1E04B 308 4DB # 𞁋̈ => ӛ
1E04C 456 # 𞁌 => і
1E04C 308 457 # 𞁌̈ => ї
1E04D 458 # 𞁍 => ј
1E04E 4E9 # 𞁎 => ө
1E04E 308 4EB # 𞁎̈ => ӫ
1E04F 4AF # 𞁏 => ү
1E050 4CF # 𞁐 => ӏ
1E051 430 # 𞁑 => а
1E051 306 4D1 # 𞁑̆ => ӑ
1E051 308 4D3 # 𞁑̈ => ӓ
1E052 431 # 𞁒 => б
1E053 432 # 𞁓 => в
1E054 433 # 𞁔 => г
1E054 301 453 # 𞁔́ => ѓ
1E054 341 453 # 𞁔́ => ѓ
1E055 434 # 𞁕 => д
1E056 435 # 𞁖 => е
1E056 300 450 # 𞁖̀ => ѐ
1E056 306 4D7 # 𞁖̆ => ӗ
1E056 308 451 # 𞁖̈ => ё
1E056 340 450 # 𞁖̀ => ѐ
1E057 436 # 𞁗 => ж
1E057 306 4C2 # 𞁗̆ => ӂ
1E057 308 4DD # 𞁗̈ => ӝ
1E058 437 # 𞁘 => з
1E058 308 4DF # 𞁘̈ => ӟ
1E059 438 # 𞁙 => и
1E059 300 45D # 𞁙̀ => ѝ
1E059 304 4E3 # 𞁙̄ => ӣ
1E059 306 439 # 𞁙̆ => й
1E059 308 4E5 # 𞁙̈ => ӥ
1E059 340 45D # 𞁙̀ => ѝ
1E05A 43A # 𞁚 => к
1E05A 301 45C # 𞁚́ => ќ
1E05A 341 45C # 𞁚́ => ќ
1E05B 43B # 𞁛 => л
1E05C 43E # 𞁜 => о
1E05C 308 4E7 # 𞁜̈ => ӧ
1E05D 43F # 𞁝 => п
1E05E 441 # 𞁞 => с
1E05F 443 # 𞁟 => у
1E05F 304 4EF # 𞁟̄ => ӯ
1E05F 306 45E # 𞁟̆ => ў
1E05F 308 4F1 # 𞁟̈ => ӱ
1E05F 30B 4F3 # 𞁟̋ => ӳ
1E060 444 # 𞁠 => ф
1E061 445 # 𞁡 => х
1E062 446 # 𞁢 => ц
1E063 447 # 𞁣 => ч
1E063 308 4F5 # 𞁣̈ => ӵ
1E064 448 # 𞁤 => ш
1E065 44A # 𞁥 => ъ
1E066 44B # 𞁦 => ы
1E066 308 4F9 # 𞁦̈ => ӹ
1E067 491 # 𞁧 => ґ
1E068 456 # 𞁨 => і
1E068 308 457 # 𞁨̈ => ї
1E069 455 # 𞁩 => ѕ
1E06A 45F # 𞁪 => џ
1E06B 4AB # 𞁫 => ҫ
1E06C A651 # 𞁬 => ꙑ
1E06D 4B1 # 𞁭 => ұ
1E900 1E922 # 𞤀 => 𞤢
1E901 1E923 # 𞤁 => 𞤣
1E902 1E924 # 𞤂 => 𞤤

Can't render this file because it is too large.

View File

@ -2337,6 +2337,9 @@ FB9 F90 FB5 # ྐྵ => ྐྵ
A69C 44A # ꚜ => ъ
A69D 44C # ꚝ => ь
A770 A76F # ꝰ => ꝯ
A7F2 43 # ꟲ => C
A7F3 46 # ꟳ => F
A7F4 51 # ꟴ => Q
A7F8 126 # ꟸ => Ħ
A7F9 153 # ꟹ => œ
AB5C A727 # ꭜ => ꜧ
@ -15055,6 +15058,62 @@ FFEB 2192 # → => →
FFEC 2193 # ↓ => ↓
FFED 25A0 # ■ => ■
FFEE 25CB # ○ => ○
10781 2D0 # 𐞁 => ː
10782 2D1 # 𐞂 => ˑ
10783 E6 # 𐞃 => æ
10784 299 # 𐞄 => ʙ
10785 253 # 𐞅 => ɓ
10787 2A3 # 𐞇 => ʣ
10788 AB66 # 𐞈 => ꭦ
10789 2A5 # 𐞉 => ʥ
1078A 2A4 # 𐞊 => ʤ
1078B 256 # 𐞋 => ɖ
1078C 257 # 𐞌 => ɗ
1078D 1D91 # 𐞍 => ᶑ
1078E 258 # 𐞎 => ɘ
1078F 25E # 𐞏 => ɞ
10790 2A9 # 𐞐 => ʩ
10791 264 # 𐞑 => ɤ
10792 262 # 𐞒 => ɢ
10793 260 # 𐞓 => ɠ
10794 29B # 𐞔 => ʛ
10795 127 # 𐞕 => ħ
10796 29C # 𐞖 => ʜ
10797 267 # 𐞗 => ɧ
10798 284 # 𐞘 => ʄ
10799 2AA # 𐞙 => ʪ
1079A 2AB # 𐞚 => ʫ
1079B 26C # 𐞛 => ɬ
1079C 1DF04 # 𐞜 => 𝼄
1079D A78E # 𐞝 => ꞎ
1079E 26E # 𐞞 => ɮ
1079F 1DF05 # 𐞟 => 𝼅
107A0 28E # 𐞠 => ʎ
107A1 1DF06 # 𐞡 => 𝼆
107A2 F8 # 𐞢 => ø
107A3 276 # 𐞣 => ɶ
107A4 277 # 𐞤 => ɷ
107A5 71 # 𐞥 => q
107A6 27A # 𐞦 => ɺ
107A7 1DF08 # 𐞧 => 𝼈
107A8 27D # 𐞨 => ɽ
107A9 27E # 𐞩 => ɾ
107AA 280 # 𐞪 => ʀ
107AB 2A8 # 𐞫 => ʨ
107AC 2A6 # 𐞬 => ʦ
107AD AB67 # 𐞭 => ꭧ
107AE 2A7 # 𐞮 => ʧ
107AF 288 # 𐞯 => ʈ
107B0 2C71 # 𐞰 => ⱱ
107B2 28F # 𐞲 => ʏ
107B3 2A1 # 𐞳 => ʡ
107B4 2A2 # 𐞴 => ʢ
107B5 298 # 𐞵 => ʘ
107B6 1C0 # 𐞶 => ǀ
107B7 1C1 # 𐞷 => ǁ
107B8 1C2 # 𐞸 => ǂ
107B9 1DF0A # 𐞹 => 𝼊
107BA 1DF1E # 𐞺 => 𝼞
1109A 11099 110BA # 𑂚 => 𑂚
1109C 1109B 110BA # 𑂜 => 𑂜
110AB 110A5 110BA # 𑂫 => 𑂫
@ -16077,6 +16136,68 @@ FFEE 25CB # ○ => ○
1D7FD 37 # 𝟽 => 7
1D7FE 38 # 𝟾 => 8
1D7FF 39 # 𝟿 => 9
1E030 430 # 𞀰 => а
1E031 431 # 𞀱 => б
1E032 432 # 𞀲 => в
1E033 433 # 𞀳 => г
1E034 434 # 𞀴 => д
1E035 435 # 𞀵 => е
1E036 436 # 𞀶 => ж
1E037 437 # 𞀷 => з
1E038 438 # 𞀸 => и
1E039 43A # 𞀹 => к
1E03A 43B # 𞀺 => л
1E03B 43C # 𞀻 => м
1E03C 43E # 𞀼 => о
1E03D 43F # 𞀽 => п
1E03E 440 # 𞀾 => р
1E03F 441 # 𞀿 => с
1E040 442 # 𞁀 => т
1E041 443 # 𞁁 => у
1E042 444 # 𞁂 => ф
1E043 445 # 𞁃 => х
1E044 446 # 𞁄 => ц
1E045 447 # 𞁅 => ч
1E046 448 # 𞁆 => ш
1E047 44B # 𞁇 => ы
1E048 44D # 𞁈 => э
1E049 44E # 𞁉 => ю
1E04A A689 # 𞁊 => ꚉ
1E04B 4D9 # 𞁋 => ә
1E04C 456 # 𞁌 => і
1E04D 458 # 𞁍 => ј
1E04E 4E9 # 𞁎 => ө
1E04F 4AF # 𞁏 => ү
1E050 4CF # 𞁐 => ӏ
1E051 430 # 𞁑 => а
1E052 431 # 𞁒 => б
1E053 432 # 𞁓 => в
1E054 433 # 𞁔 => г
1E055 434 # 𞁕 => д
1E056 435 # 𞁖 => е
1E057 436 # 𞁗 => ж
1E058 437 # 𞁘 => з
1E059 438 # 𞁙 => и
1E05A 43A # 𞁚 => к
1E05B 43B # 𞁛 => л
1E05C 43E # 𞁜 => о
1E05D 43F # 𞁝 => п
1E05E 441 # 𞁞 => с
1E05F 443 # 𞁟 => у
1E060 444 # 𞁠 => ф
1E061 445 # 𞁡 => х
1E062 446 # 𞁢 => ц
1E063 447 # 𞁣 => ч
1E064 448 # 𞁤 => ш
1E065 44A # 𞁥 => ъ
1E066 44B # 𞁦 => ы
1E067 491 # 𞁧 => ґ
1E068 456 # 𞁨 => і
1E069 455 # 𞁩 => ѕ
1E06A 45F # 𞁪 => џ
1E06B 4AB # 𞁫 => ҫ
1E06C A651 # 𞁬 => ꙑ
1E06D 4B1 # 𞁭 => ұ
1EE00 627 # 𞸀 => ا
1EE01 628 # 𞸁 => ب
1EE02 62C # 𞸂 => ج

Can't render this file because it is too large.

18013
data/nfkd_cf.tsv Normal file

File diff suppressed because it is too large Load Diff

View File

@ -155571,6 +155571,18 @@ FB9 F90 FB5 # ྐྵ => ྐྵ
A69C 44A # ꚜ => ъ
A69D 44C # ꚝ => ь
A770 A76F # ꝰ => ꝯ
A7F2 43 # ꟲ => C
A7F2 301 106 # ꟲ́ => Ć
A7F2 302 108 # ꟲ̂ => Ĉ
A7F2 307 10A # ꟲ̇ => Ċ
A7F2 30C 10C # ꟲ̌ => Č
A7F2 327 C7 # ꟲ̧ => Ç
A7F2 327 301 1E08 # ꟲ̧́ => Ḉ
A7F2 327 341 1E08 # ꟲ̧́ => Ḉ
A7F2 341 106 # ꟲ́ => Ć
A7F3 46 # ꟳ => F
A7F3 307 1E1E # ꟳ̇ => Ḟ
A7F4 51 # ꟴ => Q
A7F8 126 # ꟸ => Ħ
A7F9 153 # ꟹ => œ
AB5C A727 # ꭜ => ꜧ
@ -212848,6 +212860,67 @@ FFEC 2193 # ↓ => ↓
FFED 25A0 # ■ => ■
FFEE 25CB # ○ => ○
FFFD 20 # <20> =>
10781 2D0 # 𐞁 => ː
10782 2D1 # 𐞂 => ˑ
10783 E6 # 𐞃 => æ
10783 301 1FD # 𐞃́ => ǽ
10783 304 1E3 # 𐞃̄ => ǣ
10783 341 1FD # 𐞃́ => ǽ
10784 299 # 𐞄 => ʙ
10785 253 # 𐞅 => ɓ
10787 2A3 # 𐞇 => ʣ
10788 AB66 # 𐞈 => ꭦ
10789 2A5 # 𐞉 => ʥ
1078A 2A4 # 𐞊 => ʤ
1078B 256 # 𐞋 => ɖ
1078C 257 # 𐞌 => ɗ
1078D 1D91 # 𐞍 => ᶑ
1078E 258 # 𐞎 => ɘ
1078F 25E # 𐞏 => ɞ
10790 2A9 # 𐞐 => ʩ
10791 264 # 𐞑 => ɤ
10792 262 # 𐞒 => ɢ
10793 260 # 𐞓 => ɠ
10794 29B # 𐞔 => ʛ
10795 127 # 𐞕 => ħ
10796 29C # 𐞖 => ʜ
10797 267 # 𐞗 => ɧ
10798 284 # 𐞘 => ʄ
10799 2AA # 𐞙 => ʪ
1079A 2AB # 𐞚 => ʫ
1079B 26C # 𐞛 => ɬ
1079C 1DF04 # 𐞜 => 𝼄
1079D A78E # 𐞝 => ꞎ
1079E 26E # 𐞞 => ɮ
1079F 1DF05 # 𐞟 => 𝼅
107A0 28E # 𐞠 => ʎ
107A1 1DF06 # 𐞡 => 𝼆
107A2 F8 # 𐞢 => ø
107A2 301 1FF # 𐞢́ => ǿ
107A2 341 1FF # 𐞢́ => ǿ
107A3 276 # 𐞣 => ɶ
107A4 277 # 𐞤 => ɷ
107A5 71 # 𐞥 => q
107A6 27A # 𐞦 => ɺ
107A7 1DF08 # 𐞧 => 𝼈
107A8 27D # 𐞨 => ɽ
107A9 27E # 𐞩 => ɾ
107AA 280 # 𐞪 => ʀ
107AB 2A8 # 𐞫 => ʨ
107AC 2A6 # 𐞬 => ʦ
107AD AB67 # 𐞭 => ꭧ
107AE 2A7 # 𐞮 => ʧ
107AF 288 # 𐞯 => ʈ
107B0 2C71 # 𐞰 => ⱱ
107B2 28F # 𐞲 => ʏ
107B3 2A1 # 𐞳 => ʡ
107B4 2A2 # 𐞴 => ʢ
107B5 298 # 𐞵 => ʘ
107B6 1C0 # 𐞶 => ǀ
107B7 1C1 # 𐞷 => ǁ
107B8 1C2 # 𐞸 => ǂ
107B9 1DF0A # 𐞹 => 𝼊
107BA 1DF1E # 𐞺 => 𝼞
11099 110BA 1109A # 𑂚 => 𑂚
1109B 110BA 1109C # 𑂜 => 𑂜
110A5 110BA 110AB # 𑂫 => 𑂫
@ -223568,6 +223641,123 @@ FFFD 20 # <20> =>
1D7FD 37 # 𝟽 => 7
1D7FE 38 # 𝟾 => 8
1D7FF 39 # 𝟿 => 9
1E030 430 # 𞀰 => а
1E030 306 4D1 # 𞀰̆ => ӑ
1E030 308 4D3 # 𞀰̈ => ӓ
1E031 431 # 𞀱 => б
1E032 432 # 𞀲 => в
1E033 433 # 𞀳 => г
1E033 301 453 # 𞀳́ => ѓ
1E033 341 453 # 𞀳́ => ѓ
1E034 434 # 𞀴 => д
1E035 435 # 𞀵 => е
1E035 300 450 # 𞀵̀ => ѐ
1E035 306 4D7 # 𞀵̆ => ӗ
1E035 308 451 # 𞀵̈ => ё
1E035 340 450 # 𞀵̀ => ѐ
1E036 436 # 𞀶 => ж
1E036 306 4C2 # 𞀶̆ => ӂ
1E036 308 4DD # 𞀶̈ => ӝ
1E037 437 # 𞀷 => з
1E037 308 4DF # 𞀷̈ => ӟ
1E038 438 # 𞀸 => и
1E038 300 45D # 𞀸̀ => ѝ
1E038 304 4E3 # 𞀸̄ => ӣ
1E038 306 439 # 𞀸̆ => й
1E038 308 4E5 # 𞀸̈ => ӥ
1E038 340 45D # 𞀸̀ => ѝ
1E039 43A # 𞀹 => к
1E039 301 45C # 𞀹́ => ќ
1E039 341 45C # 𞀹́ => ќ
1E03A 43B # 𞀺 => л
1E03B 43C # 𞀻 => м
1E03C 43E # 𞀼 => о
1E03C 308 4E7 # 𞀼̈ => ӧ
1E03D 43F # 𞀽 => п
1E03E 440 # 𞀾 => р
1E03F 441 # 𞀿 => с
1E040 442 # 𞁀 => т
1E041 443 # 𞁁 => у
1E041 304 4EF # 𞁁̄ => ӯ
1E041 306 45E # 𞁁̆ => ў
1E041 308 4F1 # 𞁁̈ => ӱ
1E041 30B 4F3 # 𞁁̋ => ӳ
1E042 444 # 𞁂 => ф
1E043 445 # 𞁃 => х
1E044 446 # 𞁄 => ц
1E045 447 # 𞁅 => ч
1E045 308 4F5 # 𞁅̈ => ӵ
1E046 448 # 𞁆 => ш
1E047 44B # 𞁇 => ы
1E047 308 4F9 # 𞁇̈ => ӹ
1E048 44D # 𞁈 => э
1E048 308 4ED # 𞁈̈ => ӭ
1E049 44E # 𞁉 => ю
1E04A A689 # 𞁊 => ꚉ
1E04B 4D9 # 𞁋 => ә
1E04B 308 4DB # 𞁋̈ => ӛ
1E04C 456 # 𞁌 => і
1E04C 308 457 # 𞁌̈ => ї
1E04D 458 # 𞁍 => ј
1E04E 4E9 # 𞁎 => ө
1E04E 308 4EB # 𞁎̈ => ӫ
1E04F 4AF # 𞁏 => ү
1E050 4CF # 𞁐 => ӏ
1E051 430 # 𞁑 => а
1E051 306 4D1 # 𞁑̆ => ӑ
1E051 308 4D3 # 𞁑̈ => ӓ
1E052 431 # 𞁒 => б
1E053 432 # 𞁓 => в
1E054 433 # 𞁔 => г
1E054 301 453 # 𞁔́ => ѓ
1E054 341 453 # 𞁔́ => ѓ
1E055 434 # 𞁕 => д
1E056 435 # 𞁖 => е
1E056 300 450 # 𞁖̀ => ѐ
1E056 306 4D7 # 𞁖̆ => ӗ
1E056 308 451 # 𞁖̈ => ё
1E056 340 450 # 𞁖̀ => ѐ
1E057 436 # 𞁗 => ж
1E057 306 4C2 # 𞁗̆ => ӂ
1E057 308 4DD # 𞁗̈ => ӝ
1E058 437 # 𞁘 => з
1E058 308 4DF # 𞁘̈ => ӟ
1E059 438 # 𞁙 => и
1E059 300 45D # 𞁙̀ => ѝ
1E059 304 4E3 # 𞁙̄ => ӣ
1E059 306 439 # 𞁙̆ => й
1E059 308 4E5 # 𞁙̈ => ӥ
1E059 340 45D # 𞁙̀ => ѝ
1E05A 43A # 𞁚 => к
1E05A 301 45C # 𞁚́ => ќ
1E05A 341 45C # 𞁚́ => ќ
1E05B 43B # 𞁛 => л
1E05C 43E # 𞁜 => о
1E05C 308 4E7 # 𞁜̈ => ӧ
1E05D 43F # 𞁝 => п
1E05E 441 # 𞁞 => с
1E05F 443 # 𞁟 => у
1E05F 304 4EF # 𞁟̄ => ӯ
1E05F 306 45E # 𞁟̆ => ў
1E05F 308 4F1 # 𞁟̈ => ӱ
1E05F 30B 4F3 # 𞁟̋ => ӳ
1E060 444 # 𞁠 => ф
1E061 445 # 𞁡 => х
1E062 446 # 𞁢 => ц
1E063 447 # 𞁣 => ч
1E063 308 4F5 # 𞁣̈ => ӵ
1E064 448 # 𞁤 => ш
1E065 44A # 𞁥 => ъ
1E066 44B # 𞁦 => ы
1E066 308 4F9 # 𞁦̈ => ӹ
1E067 491 # 𞁧 => ґ
1E068 456 # 𞁨 => і
1E068 308 457 # 𞁨̈ => ї
1E069 455 # 𞁩 => ѕ
1E06A 45F # 𞁪 => џ
1E06B 4AB # 𞁫 => ҫ
1E06C A651 # 𞁬 => ꙑ
1E06D 4B1 # 𞁭 => ұ
1EE00 627 # 𞸀 => ا
1EE00 653 622 # 𞸀ٓ => آ
1EE00 654 623 # 𞸀ٔ => أ

Can't render this file because it is too large.

View File

@ -59737,6 +59737,7 @@ FB9 F90 FB5 # ྐྵ => ྐྵ
2C2C 2C5C # Ⱜ => ⱜ
2C2D 2C5D # Ⱝ => ⱝ
2C2E 2C5E # Ⱞ => ⱞ
2C2F 2C5F # Ⱟ => ⱟ
2C60 2C61 # Ⱡ => ⱡ
2C62 26B # Ɫ => ɫ
2C63 1D7D # Ᵽ => ᵽ
@ -156572,12 +156573,28 @@ A7B8 A7B9 # Ꞹ => ꞹ
A7BA A7BB # Ꞻ => ꞻ
A7BC A7BD # Ꞽ => ꞽ
A7BE A7BF # Ꞿ => ꞿ
A7C0 A7C1 # Ꟁ => ꟁ
A7C2 A7C3 # Ꟃ => ꟃ
A7C4 A794 # Ꞔ => ꞔ
A7C5 282 # Ʂ => ʂ
A7C6 1D8E # Ᶎ => ᶎ
A7C7 A7C8 # Ꟈ => ꟈ
A7C9 A7CA # Ꟊ => ꟊ
A7D0 A7D1 # Ꟑ => ꟑ
A7D6 A7D7 # Ꟗ => ꟗ
A7D8 A7D9 # Ꟙ => ꟙ
A7F2 63 # ꟲ => c
A7F2 301 107 # ꟲ́ => ć
A7F2 302 109 # ꟲ̂ => ĉ
A7F2 307 10B # ꟲ̇ => ċ
A7F2 30C 10D # ꟲ̌ => č
A7F2 327 E7 # ꟲ̧ => ç
A7F2 327 301 1E09 # ꟲ̧́ => ḉ
A7F2 327 341 1E09 # ꟲ̧́ => ḉ
A7F2 341 107 # ꟲ́ => ć
A7F3 66 # ꟳ => f
A7F3 307 1E1F # ꟳ̇ => ḟ
A7F4 71 # ꟴ => q
A7F5 A7F6 # Ꟶ => ꟶ
A7F8 127 # ꟸ => ħ
A7F9 153 # ꟹ => œ
@ -214012,6 +214029,102 @@ FFFD 20 # <20> =>
104D1 104F9 # 𐓑 => 𐓹
104D2 104FA # 𐓒 => 𐓺
104D3 104FB # 𐓓 => 𐓻
10570 10597 # 𐕰 => 𐖗
10571 10598 # 𐕱 => 𐖘
10572 10599 # 𐕲 => 𐖙
10573 1059A # 𐕳 => 𐖚
10574 1059B # 𐕴 => 𐖛
10575 1059C # 𐕵 => 𐖜
10576 1059D # 𐕶 => 𐖝
10577 1059E # 𐕷 => 𐖞
10578 1059F # 𐕸 => 𐖟
10579 105A0 # 𐕹 => 𐖠
1057A 105A1 # 𐕺 => 𐖡
1057C 105A3 # 𐕼 => 𐖣
1057D 105A4 # 𐕽 => 𐖤
1057E 105A5 # 𐕾 => 𐖥
1057F 105A6 # 𐕿 => 𐖦
10580 105A7 # 𐖀 => 𐖧
10581 105A8 # 𐖁 => 𐖨
10582 105A9 # 𐖂 => 𐖩
10583 105AA # 𐖃 => 𐖪
10584 105AB # 𐖄 => 𐖫
10585 105AC # 𐖅 => 𐖬
10586 105AD # 𐖆 => 𐖭
10587 105AE # 𐖇 => 𐖮
10588 105AF # 𐖈 => 𐖯
10589 105B0 # 𐖉 => 𐖰
1058A 105B1 # 𐖊 => 𐖱
1058C 105B3 # 𐖌 => 𐖳
1058D 105B4 # 𐖍 => 𐖴
1058E 105B5 # 𐖎 => 𐖵
1058F 105B6 # 𐖏 => 𐖶
10590 105B7 # 𐖐 => 𐖷
10591 105B8 # 𐖑 => 𐖸
10592 105B9 # 𐖒 => 𐖹
10594 105BB # 𐖔 => 𐖻
10595 105BC # 𐖕 => 𐖼
10781 2D0 # 𐞁 => ː
10782 2D1 # 𐞂 => ˑ
10783 E6 # 𐞃 => æ
10783 301 1FD # 𐞃́ => ǽ
10783 304 1E3 # 𐞃̄ => ǣ
10783 341 1FD # 𐞃́ => ǽ
10784 299 # 𐞄 => ʙ
10785 253 # 𐞅 => ɓ
10787 2A3 # 𐞇 => ʣ
10788 AB66 # 𐞈 => ꭦ
10789 2A5 # 𐞉 => ʥ
1078A 2A4 # 𐞊 => ʤ
1078B 256 # 𐞋 => ɖ
1078C 257 # 𐞌 => ɗ
1078D 1D91 # 𐞍 => ᶑ
1078E 258 # 𐞎 => ɘ
1078F 25E # 𐞏 => ɞ
10790 2A9 # 𐞐 => ʩ
10791 264 # 𐞑 => ɤ
10792 262 # 𐞒 => ɢ
10793 260 # 𐞓 => ɠ
10794 29B # 𐞔 => ʛ
10795 127 # 𐞕 => ħ
10796 29C # 𐞖 => ʜ
10797 267 # 𐞗 => ɧ
10798 284 # 𐞘 => ʄ
10799 2AA # 𐞙 => ʪ
1079A 2AB # 𐞚 => ʫ
1079B 26C # 𐞛 => ɬ
1079C 1DF04 # 𐞜 => 𝼄
1079D A78E # 𐞝 => ꞎ
1079E 26E # 𐞞 => ɮ
1079F 1DF05 # 𐞟 => 𝼅
107A0 28E # 𐞠 => ʎ
107A1 1DF06 # 𐞡 => 𝼆
107A2 F8 # 𐞢 => ø
107A2 301 1FF # 𐞢́ => ǿ
107A2 341 1FF # 𐞢́ => ǿ
107A3 276 # 𐞣 => ɶ
107A4 277 # 𐞤 => ɷ
107A5 71 # 𐞥 => q
107A6 27A # 𐞦 => ɺ
107A7 1DF08 # 𐞧 => 𝼈
107A8 27D # 𐞨 => ɽ
107A9 27E # 𐞩 => ɾ
107AA 280 # 𐞪 => ʀ
107AB 2A8 # 𐞫 => ʨ
107AC 2A6 # 𐞬 => ʦ
107AD AB67 # 𐞭 => ꭧ
107AE 2A7 # 𐞮 => ʧ
107AF 288 # 𐞯 => ʈ
107B0 2C71 # 𐞰 => ⱱ
107B2 28F # 𐞲 => ʏ
107B3 2A1 # 𐞳 => ʡ
107B4 2A2 # 𐞴 => ʢ
107B5 298 # 𐞵 => ʘ
107B6 1C0 # 𐞶 => ǀ
107B7 1C1 # 𐞷 => ǁ
107B8 1C2 # 𐞸 => ǂ
107B9 1DF0A # 𐞹 => 𝼊
107BA 1DF1E # 𐞺 => 𝼞
10C80 10CC0 # 𐲀 => 𐳀
10C81 10CC1 # 𐲁 => 𐳁
10C82 10CC2 # 𐲂 => 𐳂
@ -224847,6 +224960,123 @@ FFFD 20 # <20> =>
1D7FD 37 # 𝟽 => 7
1D7FE 38 # 𝟾 => 8
1D7FF 39 # 𝟿 => 9
1E030 430 # 𞀰 => а
1E030 306 4D1 # 𞀰̆ => ӑ
1E030 308 4D3 # 𞀰̈ => ӓ
1E031 431 # 𞀱 => б
1E032 432 # 𞀲 => в
1E033 433 # 𞀳 => г
1E033 301 453 # 𞀳́ => ѓ
1E033 341 453 # 𞀳́ => ѓ
1E034 434 # 𞀴 => д
1E035 435 # 𞀵 => е
1E035 300 450 # 𞀵̀ => ѐ
1E035 306 4D7 # 𞀵̆ => ӗ
1E035 308 451 # 𞀵̈ => ё
1E035 340 450 # 𞀵̀ => ѐ
1E036 436 # 𞀶 => ж
1E036 306 4C2 # 𞀶̆ => ӂ
1E036 308 4DD # 𞀶̈ => ӝ
1E037 437 # 𞀷 => з
1E037 308 4DF # 𞀷̈ => ӟ
1E038 438 # 𞀸 => и
1E038 300 45D # 𞀸̀ => ѝ
1E038 304 4E3 # 𞀸̄ => ӣ
1E038 306 439 # 𞀸̆ => й
1E038 308 4E5 # 𞀸̈ => ӥ
1E038 340 45D # 𞀸̀ => ѝ
1E039 43A # 𞀹 => к
1E039 301 45C # 𞀹́ => ќ
1E039 341 45C # 𞀹́ => ќ
1E03A 43B # 𞀺 => л
1E03B 43C # 𞀻 => м
1E03C 43E # 𞀼 => о
1E03C 308 4E7 # 𞀼̈ => ӧ
1E03D 43F # 𞀽 => п
1E03E 440 # 𞀾 => р
1E03F 441 # 𞀿 => с
1E040 442 # 𞁀 => т
1E041 443 # 𞁁 => у
1E041 304 4EF # 𞁁̄ => ӯ
1E041 306 45E # 𞁁̆ => ў
1E041 308 4F1 # 𞁁̈ => ӱ
1E041 30B 4F3 # 𞁁̋ => ӳ
1E042 444 # 𞁂 => ф
1E043 445 # 𞁃 => х
1E044 446 # 𞁄 => ц
1E045 447 # 𞁅 => ч
1E045 308 4F5 # 𞁅̈ => ӵ
1E046 448 # 𞁆 => ш
1E047 44B # 𞁇 => ы
1E047 308 4F9 # 𞁇̈ => ӹ
1E048 44D # 𞁈 => э
1E048 308 4ED # 𞁈̈ => ӭ
1E049 44E # 𞁉 => ю
1E04A A689 # 𞁊 => ꚉ
1E04B 4D9 # 𞁋 => ә
1E04B 308 4DB # 𞁋̈ => ӛ
1E04C 456 # 𞁌 => і
1E04C 308 457 # 𞁌̈ => ї
1E04D 458 # 𞁍 => ј
1E04E 4E9 # 𞁎 => ө
1E04E 308 4EB # 𞁎̈ => ӫ
1E04F 4AF # 𞁏 => ү
1E050 4CF # 𞁐 => ӏ
1E051 430 # 𞁑 => а
1E051 306 4D1 # 𞁑̆ => ӑ
1E051 308 4D3 # 𞁑̈ => ӓ
1E052 431 # 𞁒 => б
1E053 432 # 𞁓 => в
1E054 433 # 𞁔 => г
1E054 301 453 # 𞁔́ => ѓ
1E054 341 453 # 𞁔́ => ѓ
1E055 434 # 𞁕 => д
1E056 435 # 𞁖 => е
1E056 300 450 # 𞁖̀ => ѐ
1E056 306 4D7 # 𞁖̆ => ӗ
1E056 308 451 # 𞁖̈ => ё
1E056 340 450 # 𞁖̀ => ѐ
1E057 436 # 𞁗 => ж
1E057 306 4C2 # 𞁗̆ => ӂ
1E057 308 4DD # 𞁗̈ => ӝ
1E058 437 # 𞁘 => з
1E058 308 4DF # 𞁘̈ => ӟ
1E059 438 # 𞁙 => и
1E059 300 45D # 𞁙̀ => ѝ
1E059 304 4E3 # 𞁙̄ => ӣ
1E059 306 439 # 𞁙̆ => й
1E059 308 4E5 # 𞁙̈ => ӥ
1E059 340 45D # 𞁙̀ => ѝ
1E05A 43A # 𞁚 => к
1E05A 301 45C # 𞁚́ => ќ
1E05A 341 45C # 𞁚́ => ќ
1E05B 43B # 𞁛 => л
1E05C 43E # 𞁜 => о
1E05C 308 4E7 # 𞁜̈ => ӧ
1E05D 43F # 𞁝 => п
1E05E 441 # 𞁞 => с
1E05F 443 # 𞁟 => у
1E05F 304 4EF # 𞁟̄ => ӯ
1E05F 306 45E # 𞁟̆ => ў
1E05F 308 4F1 # 𞁟̈ => ӱ
1E05F 30B 4F3 # 𞁟̋ => ӳ
1E060 444 # 𞁠 => ф
1E061 445 # 𞁡 => х
1E062 446 # 𞁢 => ц
1E063 447 # 𞁣 => ч
1E063 308 4F5 # 𞁣̈ => ӵ
1E064 448 # 𞁤 => ш
1E065 44A # 𞁥 => ъ
1E066 44B # 𞁦 => ы
1E066 308 4F9 # 𞁦̈ => ӹ
1E067 491 # 𞁧 => ґ
1E068 456 # 𞁨 => і
1E068 308 457 # 𞁨̈ => ї
1E069 455 # 𞁩 => ѕ
1E06A 45F # 𞁪 => џ
1E06B 4AB # 𞁫 => ҫ
1E06C A651 # 𞁬 => ꙑ
1E06D 4B1 # 𞁭 => ұ
1E900 1E922 # 𞤀 => 𞤢
1E901 1E923 # 𞤁 => 𞤣
1E902 1E924 # 𞤂 => 𞤤

Can't render this file because it is too large.

View File

@ -50,14 +50,13 @@ static constexpr char kDefaultNormalizerName[] = "nfkc";
#ifndef ENABLE_NFKC_COMPILE
static constexpr char kCompileError[] =
"NFK compile is not enabled. rebuild with ./configure "
"--enable-nfkc-compile";
"NFK compile is not enabled. rebuild with -DSPM_ENABLE_NFKC_COMPILE=ON"
#endif
#ifdef ENABLE_NFKC_COMPILE
// Normalize `input` with ICU's normalizer with `mode`.
Builder::Chars UnicodeNormalize(UNormalizationMode mode,
const Builder::Chars &input) {
// Normalize `input` with ICU's normalizer with `mode`.
Builder::Chars
UnicodeNormalize(UNormalizationMode mode, const Builder::Chars &input) {
const std::string utf8 = string_util::UnicodeTextToUTF8(input);
CHECK(!utf8.empty());
@ -293,11 +292,12 @@ util::Status Builder::GetPrecompiledCharsMap(absl::string_view name,
<< "No precompiled charsmap is found: " << name;
}
// static
util::Status Builder::BuildNFKCMap(CharsMap *chars_map) {
namespace {
util::Status BuildMapInternal(
Builder::CharsMap *chars_map,
std::function<Builder::Chars(const Builder::Chars &)> composer,
std::function<Builder::Chars(const Builder::Chars &)> decomposer) {
#ifdef ENABLE_NFKC_COMPILE
LOG(INFO) << "Running BuildNFKCMap";
// Set of fully NFKD decomposed characters.
std::set<Builder::Chars> nfkd_decomposed;
@ -312,11 +312,11 @@ util::Status Builder::BuildNFKCMap(CharsMap *chars_map) {
continue;
}
// Aggregates single character to fully NFKC normalized characters.
const auto nfkc = ToNFKC({cp});
const auto nfkc = composer({cp});
if (nfkc.size() >= 2 || (nfkc.size() == 1 && nfkc[0] != cp)) {
nfkc_map[{cp}] = nfkc;
}
const auto nfkd = ToNFKD({cp});
const auto nfkd = decomposer({cp});
if (nfkd.size() == 1) {
// Aggregates reverse mapping from normalized to unnormalized character.
norm2orig[nfkd[0]].insert(cp);
@ -327,7 +327,7 @@ util::Status Builder::BuildNFKCMap(CharsMap *chars_map) {
}
for (const auto &nfkd : nfkd_decomposed) {
const auto nfkc = ToNFC(nfkd);
const auto nfkc = composer(nfkd);
// This case is already covered by single-character to NFKC mapping.
if (nfkc == nfkd) {
continue;
@ -341,9 +341,18 @@ util::Status Builder::BuildNFKCMap(CharsMap *chars_map) {
}
}
RETURN_IF_ERROR(RemoveRedundantMap(&nfkc_map));
RETURN_IF_ERROR(Builder::RemoveRedundantMap(&nfkc_map));
*chars_map = std::move(nfkc_map);
#endif // ENABLE_NFKC_COMPILE
return util::OkStatus();
}
} // namespace
// static
util::Status Builder::BuildNFKCMap(CharsMap *chars_map) {
#ifdef ENABLE_NFKC_COMPILE
LOG(INFO) << "Running BuildNFKCMap";
BuildMapInternal(chars_map, ToNFKC, ToNFKD);
#else
LOG(ERROR) << kCompileError;
#endif
@ -351,72 +360,27 @@ util::Status Builder::BuildNFKCMap(CharsMap *chars_map) {
return util::OkStatus();
}
// static
util::Status Builder::BuildNFCMap(CharsMap *chars_map) {
#ifdef ENABLE_NFKC_COMPILE
LOG(INFO) << "Running BuildNFCMap";
BuildMapInternal(chars_map, ToNFC, ToNFD);
#else
LOG(ERROR) << kCompileError;
#endif
return util::OkStatus();
}
util::Status Builder::BuildNmtNFKCMap(CharsMap *chars_map) {
#ifdef ENABLE_NFKC_COMPILE
LOG(INFO) << "Running BuildNmtNFKCMap";
CharsMap nfkc_map;
RETURN_IF_ERROR(Builder::BuildNFKCMap(&nfkc_map));
// Other code points considered as whitespace.
nfkc_map[{0x0009}] = {0x20}; // TAB
nfkc_map[{0x000A}] = {0x20}; // LINE FEED
nfkc_map[{0x000C}] = {0x20}; // FORM FEED
nfkc_map[{0x000D}] = {0x20}; // CARRIAGE RETURN
nfkc_map[{0x1680}] = {0x20}; // OGHAM SPACE MARK
nfkc_map[{0x200B}] = {0x20}; // ZERO WIDTH SPACE
nfkc_map[{0x200E}] = {0x20}; // LEFT-TO-RIGHT MARK
nfkc_map[{0x200F}] = {0x20}; // RIGHT-TO-LEFT MARK
nfkc_map[{0x2028}] = {0x20}; // LINE SEPARATOR
nfkc_map[{0x2029}] = {0x20}; // PARAGRAPH SEPARATOR
nfkc_map[{0x2581}] = {0x20}; // LOWER ONE EIGHT BLOCK
nfkc_map[{0xFEFF}] = {0x20}; // ZERO WIDTH NO-BREAK
nfkc_map[{0xFFFD}] = {0x20}; // REPLACEMENT CHARACTER
nfkc_map[{0x200C}] = {0x20}; // ZERO WIDTH NON-JOINER
// nfkc_map[{0x200D}] = {0x20}; // ZERO WIDTH JOINER
// Ascii Control characters
nfkc_map[{0x0001}] = {};
nfkc_map[{0x0002}] = {};
nfkc_map[{0x0003}] = {};
nfkc_map[{0x0004}] = {};
nfkc_map[{0x0005}] = {};
nfkc_map[{0x0006}] = {};
nfkc_map[{0x0007}] = {};
nfkc_map[{0x0008}] = {};
nfkc_map[{0x000B}] = {};
nfkc_map[{0x000E}] = {};
nfkc_map[{0x000F}] = {};
nfkc_map[{0x0010}] = {};
nfkc_map[{0x0011}] = {};
nfkc_map[{0x0012}] = {};
nfkc_map[{0x0013}] = {};
nfkc_map[{0x0014}] = {};
nfkc_map[{0x0015}] = {};
nfkc_map[{0x0016}] = {};
nfkc_map[{0x0017}] = {};
nfkc_map[{0x0018}] = {};
nfkc_map[{0x0019}] = {};
nfkc_map[{0x001A}] = {};
nfkc_map[{0x001B}] = {};
nfkc_map[{0x001C}] = {};
nfkc_map[{0x001D}] = {};
nfkc_map[{0x001E}] = {};
nfkc_map[{0x001F}] = {};
// <control-007F>..<control-009F>
nfkc_map[{0x007F}] = {};
nfkc_map[{0x008F}] = {};
nfkc_map[{0x009F}] = {};
// Do not normalize FULL_WIDTH TILDE, since FULL_WIDTH TILDE
// and HALF_WIDTH TILDE are used differently in Japanese.
nfkc_map.erase({0xFF5E});
RETURN_IF_ERROR(BuildNFKCMap(&nfkc_map));
RETURN_IF_ERROR(MergeNmtMap(&nfkc_map));
RETURN_IF_ERROR(RemoveRedundantMap(&nfkc_map));
*chars_map = std::move(nfkc_map);
#else
LOG(ERROR) << kCompileError;
#endif
@ -449,6 +413,66 @@ util::Status Builder::MergeUnicodeCaseFoldMap(Builder::CharsMap *chars_map) {
return util::OkStatus();
}
// static
util::Status Builder::MergeNmtMap(Builder::CharsMap *chars_map) {
// Other code points considered as whitespace.
(*chars_map)[{0x0009}] = {0x20}; // TAB
(*chars_map)[{0x000A}] = {0x20}; // LINE FEED
(*chars_map)[{0x000C}] = {0x20}; // FORM FEED
(*chars_map)[{0x000D}] = {0x20}; // CARRIAGE RETURN
(*chars_map)[{0x1680}] = {0x20}; // OGHAM SPACE MARK
(*chars_map)[{0x200B}] = {0x20}; // ZERO WIDTH SPACE
(*chars_map)[{0x200E}] = {0x20}; // LEFT-TO-RIGHT MARK
(*chars_map)[{0x200F}] = {0x20}; // RIGHT-TO-LEFT MARK
(*chars_map)[{0x2028}] = {0x20}; // LINE SEPARATOR
(*chars_map)[{0x2029}] = {0x20}; // PARAGRAPH SEPARATOR
(*chars_map)[{0x2581}] = {0x20}; // LOWER ONE EIGHT BLOCK
(*chars_map)[{0xFEFF}] = {0x20}; // ZERO WIDTH NO-BREAK
(*chars_map)[{0xFFFD}] = {0x20}; // REPLACEMENT CHARACTER
(*chars_map)[{0x200C}] = {0x20}; // ZERO WIDTH NON-JOINER
// (*chars_map)[{0x200D}] = {0x20}; // ZERO WIDTH JOINER
// Ascii Control characters
(*chars_map)[{0x0001}] = {};
(*chars_map)[{0x0002}] = {};
(*chars_map)[{0x0003}] = {};
(*chars_map)[{0x0004}] = {};
(*chars_map)[{0x0005}] = {};
(*chars_map)[{0x0006}] = {};
(*chars_map)[{0x0007}] = {};
(*chars_map)[{0x0008}] = {};
(*chars_map)[{0x000B}] = {};
(*chars_map)[{0x000E}] = {};
(*chars_map)[{0x000F}] = {};
(*chars_map)[{0x0010}] = {};
(*chars_map)[{0x0011}] = {};
(*chars_map)[{0x0012}] = {};
(*chars_map)[{0x0013}] = {};
(*chars_map)[{0x0014}] = {};
(*chars_map)[{0x0015}] = {};
(*chars_map)[{0x0016}] = {};
(*chars_map)[{0x0017}] = {};
(*chars_map)[{0x0018}] = {};
(*chars_map)[{0x0019}] = {};
(*chars_map)[{0x001A}] = {};
(*chars_map)[{0x001B}] = {};
(*chars_map)[{0x001C}] = {};
(*chars_map)[{0x001D}] = {};
(*chars_map)[{0x001E}] = {};
(*chars_map)[{0x001F}] = {};
// <control-007F>..<control-009F>
(*chars_map)[{0x007F}] = {};
(*chars_map)[{0x008F}] = {};
(*chars_map)[{0x009F}] = {};
// Do not normalize FULL_WIDTH TILDE, since FULL_WIDTH TILDE
// and HALF_WIDTH TILDE are used differently in Japanese.
(*chars_map).erase({0xFF5E});
return util::OkStatus();
}
// static
util::Status Builder::BuildNFKC_CFMap(CharsMap *chars_map) {
#ifdef ENABLE_NFKC_COMPILE
@ -496,6 +520,65 @@ util::Status Builder::BuildNFKDMap(CharsMap *chars_map) {
return util::OkStatus();
}
// static
util::Status Builder::BuildNFDMap(CharsMap *chars_map) {
#ifdef ENABLE_NFKC_COMPILE
constexpr int kMaxUnicode = 0x10FFFF;
for (char32 cp = 1; cp <= kMaxUnicode; ++cp) {
if (!U_IS_UNICODE_CHAR(cp)) {
continue;
}
const auto nfd = ToNFD({cp});
if (nfd.size() >= 2 || (nfd.size() == 1 && nfd[0] != cp)) {
(*chars_map)[{cp}] = nfd;
}
}
#else
LOG(ERROR) << kCompileError;
#endif
return util::OkStatus();
}
// static
util::Status Builder::BuildNFKD_CFMap(CharsMap *chars_map) {
#ifdef ENABLE_NFKC_COMPILE
CharsMap nfkd_map;
RETURN_IF_ERROR(Builder::BuildNFKDMap(&nfkd_map));
RETURN_IF_ERROR(Builder::MergeUnicodeCaseFoldMap(&nfkd_map));
*chars_map = std::move(nfkd_map);
#else
LOG(ERROR) << kCompileError;
#endif
return util::OkStatus();
}
// static
util::Status Builder::BuildNFC_CFMap(CharsMap *chars_map) {
#ifdef ENABLE_NFKC_COMPILE
CharsMap nfc_map;
RETURN_IF_ERROR(Builder::BuildNFKDMap(&nfc_map));
RETURN_IF_ERROR(Builder::MergeUnicodeCaseFoldMap(&nfc_map));
*chars_map = std::move(nfc_map);
#else
LOG(ERROR) << kCompileError;
#endif
return util::OkStatus();
}
// static
util::Status Builder::BuildNFD_CFMap(CharsMap *chars_map) {
#ifdef ENABLE_NFKC_COMPILE
CharsMap nfd_map;
RETURN_IF_ERROR(Builder::BuildNFDMap(&nfd_map));
RETURN_IF_ERROR(Builder::MergeUnicodeCaseFoldMap(&nfd_map));
*chars_map = std::move(nfd_map);
#else
LOG(ERROR) << kCompileError;
#endif
return util::OkStatus();
}
// static
util::Status Builder::LoadCharsMap(absl::string_view filename,
CharsMap *chars_map) {

View File

@ -98,6 +98,8 @@ class Builder {
// Merge Unicode case folding mapping into `chars_map`.
static util::Status MergeUnicodeCaseFoldMap(CharsMap *chars_map);
static util::Status MergeNmtMap(Builder::CharsMap *chars_map);
// Makes NFKC with Unicode case folding.
static util::Status BuildNFKC_CFMap(CharsMap *chars_map);
@ -107,6 +109,21 @@ class Builder {
// Given NFKC maps, convert them to NFKD.
static util::Status BuildNFKDMap(CharsMap *chars_map);
// Builds NFC map.
static util::Status BuildNFCMap(CharsMap *chars_map);
// Builds NFD map.
static util::Status BuildNFDMap(CharsMap *chars_map);
// Makes NFKD with Unicode case folding.
static util::Status BuildNFKD_CFMap(CharsMap *chars_map);
// Makes NFKC with Unicode case folding.
static util::Status BuildNFC_CFMap(CharsMap *chars_map);
// Makes NFD with Unicode case folding.
static util::Status BuildNFD_CFMap(CharsMap *chars_map);
// Builds Chars map save in `filename`.
// Format:
// src_uchar1 src_uchar2 ... <tab> trg_uchar1 trg_uchar2...
@ -118,13 +135,13 @@ class Builder {
static util::Status SaveCharsMap(absl::string_view filename,
const CharsMap &chars_map);
private:
FRIEND_TEST(BuilderTest, RemoveRedundantMapTest);
// Removes redundant rules from `chars_map`.
// When char_maps have "aa" => "bb" and "a" => "b", the first
// rule is not necessary since the second rule can cover the first rule.
static util::Status RemoveRedundantMap(CharsMap *chars_map);
private:
FRIEND_TEST(BuilderTest, RemoveRedundantMapTest);
};
} // namespace normalizer
} // namespace sentencepiece

View File

@ -166,7 +166,12 @@ int main(int argc, char **argv) {
{"nmt_nfkc", Builder::BuildNmtNFKCMap},
{"nfkc_cf", Builder::BuildNFKC_CFMap},
{"nmt_nfkc_cf", Builder::BuildNmtNFKC_CFMap},
{"nfkd", Builder::BuildNFKDMap}};
{"nfkd", Builder::BuildNFKDMap},
{"nfc", Builder::BuildNFCMap},
{"nfd", Builder::BuildNFDMap},
{"nfkd_cf", Builder::BuildNFKD_CFMap},
{"nfc_cf", Builder::BuildNFC_CFMap},
{"nfd_cf", Builder::BuildNFD_CFMap}};
std::vector<std::pair<std::string, std::string>> data;
for (const auto &p : kRuleList) {
@ -181,7 +186,10 @@ int main(int argc, char **argv) {
CHECK_OK(Builder::SaveCharsMap(p.first + ".tsv", normalized_map));
// Do not make NFKD map as it is optionally created.
if (p.first.find("nfkd") != std::string::npos) continue;
if (p.first == "nfkd" || p.first == "nfd" || p.first == "nfc" ||
p.first == "nfkd_cf" || p.first == "nfd_cf" || p.first == "nfc_cf") {
continue;
}
data.emplace_back(p.first, index);
}

File diff suppressed because it is too large Load Diff