Merge "Update SORTED_CHAR_MAP"
commit
e01e22e1ea
|
@ -45,18 +45,16 @@ struct LatinCapitalSmallPair {
|
||||||
|
|
||||||
extern "C" int main() {
|
extern "C" int main() {
|
||||||
for (unsigned short c = 0; c < 0xFFFF; c++) {
|
for (unsigned short c = 0; c < 0xFFFF; c++) {
|
||||||
const unsigned short baseC = c < NELEMS(BASE_CHARS) ? BASE_CHARS[c] : c;
|
if (c <= 0x7F) continue;
|
||||||
if (baseC <= 0x7F) continue;
|
const unsigned short icu4cLowerC = u_tolower(c);
|
||||||
const unsigned short icu4cLowerBaseC = u_tolower(baseC);
|
const unsigned short myLowerC = latin_tolower(c);
|
||||||
const unsigned short myLowerBaseC = latin_tolower(baseC);
|
if (c != icu4cLowerC) {
|
||||||
if (baseC != icu4cLowerBaseC) {
|
|
||||||
#ifdef CONFIRMING_CHAR_UTILS
|
#ifdef CONFIRMING_CHAR_UTILS
|
||||||
if (icu4cLowerBaseC != myLowerBaseC) {
|
if (icu4cLowerC != myLowerC) {
|
||||||
fprintf(stderr, "icu4cLowerBaseC != myLowerBaseC, 0x%04X, 0x%04X\n",
|
fprintf(stderr, "icu4cLowerC != myLowerC, 0x%04X, 0x%04X\n", icu4cLowerC, myLowerC);
|
||||||
icu4cLowerBaseC, myLowerBaseC);
|
|
||||||
}
|
}
|
||||||
#else // CONFIRMING_CHAR_UTILS
|
#else // CONFIRMING_CHAR_UTILS
|
||||||
printf("0x%04X, 0x%04X\n", baseC, icu4cLowerBaseC);
|
printf("0x%04X, 0x%04X\n", c, icu4cLowerC);
|
||||||
#endif // CONFIRMING_CHAR_UTILS
|
#endif // CONFIRMING_CHAR_UTILS
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -77,14 +75,99 @@ extern "C" int main() {
|
||||||
* $
|
* $
|
||||||
*/
|
*/
|
||||||
static const struct LatinCapitalSmallPair SORTED_CHAR_MAP[] = {
|
static const struct LatinCapitalSmallPair SORTED_CHAR_MAP[] = {
|
||||||
|
{ 0x00C0, 0x00E0 }, // LATIN CAPITAL LETTER A WITH GRAVE
|
||||||
|
{ 0x00C1, 0x00E1 }, // LATIN CAPITAL LETTER A WITH ACUTE
|
||||||
|
{ 0x00C2, 0x00E2 }, // LATIN CAPITAL LETTER A WITH CIRCUMFLEX
|
||||||
|
{ 0x00C3, 0x00E3 }, // LATIN CAPITAL LETTER A WITH TILDE
|
||||||
|
{ 0x00C4, 0x00E4 }, // LATIN CAPITAL LETTER A WITH DIAERESIS
|
||||||
|
{ 0x00C5, 0x00E5 }, // LATIN CAPITAL LETTER A WITH RING ABOVE
|
||||||
{ 0x00C6, 0x00E6 }, // LATIN CAPITAL LETTER AE
|
{ 0x00C6, 0x00E6 }, // LATIN CAPITAL LETTER AE
|
||||||
|
{ 0x00C7, 0x00E7 }, // LATIN CAPITAL LETTER C WITH CEDILLA
|
||||||
|
{ 0x00C8, 0x00E8 }, // LATIN CAPITAL LETTER E WITH GRAVE
|
||||||
|
{ 0x00C9, 0x00E9 }, // LATIN CAPITAL LETTER E WITH ACUTE
|
||||||
|
{ 0x00CA, 0x00EA }, // LATIN CAPITAL LETTER E WITH CIRCUMFLEX
|
||||||
|
{ 0x00CB, 0x00EB }, // LATIN CAPITAL LETTER E WITH DIAERESIS
|
||||||
|
{ 0x00CC, 0x00EC }, // LATIN CAPITAL LETTER I WITH GRAVE
|
||||||
|
{ 0x00CD, 0x00ED }, // LATIN CAPITAL LETTER I WITH ACUTE
|
||||||
|
{ 0x00CE, 0x00EE }, // LATIN CAPITAL LETTER I WITH CIRCUMFLEX
|
||||||
|
{ 0x00CF, 0x00EF }, // LATIN CAPITAL LETTER I WITH DIAERESIS
|
||||||
{ 0x00D0, 0x00F0 }, // LATIN CAPITAL LETTER ETH
|
{ 0x00D0, 0x00F0 }, // LATIN CAPITAL LETTER ETH
|
||||||
|
{ 0x00D1, 0x00F1 }, // LATIN CAPITAL LETTER N WITH TILDE
|
||||||
|
{ 0x00D2, 0x00F2 }, // LATIN CAPITAL LETTER O WITH GRAVE
|
||||||
|
{ 0x00D3, 0x00F3 }, // LATIN CAPITAL LETTER O WITH ACUTE
|
||||||
|
{ 0x00D4, 0x00F4 }, // LATIN CAPITAL LETTER O WITH CIRCUMFLEX
|
||||||
|
{ 0x00D5, 0x00F5 }, // LATIN CAPITAL LETTER O WITH TILDE
|
||||||
|
{ 0x00D6, 0x00F6 }, // LATIN CAPITAL LETTER O WITH DIAERESIS
|
||||||
|
{ 0x00D8, 0x00F8 }, // LATIN CAPITAL LETTER O WITH STROKE
|
||||||
|
{ 0x00D9, 0x00F9 }, // LATIN CAPITAL LETTER U WITH GRAVE
|
||||||
|
{ 0x00DA, 0x00FA }, // LATIN CAPITAL LETTER U WITH ACUTE
|
||||||
|
{ 0x00DB, 0x00FB }, // LATIN CAPITAL LETTER U WITH CIRCUMFLEX
|
||||||
|
{ 0x00DC, 0x00FC }, // LATIN CAPITAL LETTER U WITH DIAERESIS
|
||||||
|
{ 0x00DD, 0x00FD }, // LATIN CAPITAL LETTER Y WITH ACUTE
|
||||||
{ 0x00DE, 0x00FE }, // LATIN CAPITAL LETTER THORN
|
{ 0x00DE, 0x00FE }, // LATIN CAPITAL LETTER THORN
|
||||||
|
{ 0x0100, 0x0101 }, // LATIN CAPITAL LETTER A WITH MACRON
|
||||||
|
{ 0x0102, 0x0103 }, // LATIN CAPITAL LETTER A WITH BREVE
|
||||||
|
{ 0x0104, 0x0105 }, // LATIN CAPITAL LETTER A WITH OGONEK
|
||||||
|
{ 0x0106, 0x0107 }, // LATIN CAPITAL LETTER C WITH ACUTE
|
||||||
|
{ 0x0108, 0x0109 }, // LATIN CAPITAL LETTER C WITH CIRCUMFLEX
|
||||||
|
{ 0x010A, 0x010B }, // LATIN CAPITAL LETTER C WITH DOT ABOVE
|
||||||
|
{ 0x010C, 0x010D }, // LATIN CAPITAL LETTER C WITH CARON
|
||||||
|
{ 0x010E, 0x010F }, // LATIN CAPITAL LETTER D WITH CARON
|
||||||
{ 0x0110, 0x0111 }, // LATIN CAPITAL LETTER D WITH STROKE
|
{ 0x0110, 0x0111 }, // LATIN CAPITAL LETTER D WITH STROKE
|
||||||
|
{ 0x0112, 0x0113 }, // LATIN CAPITAL LETTER E WITH MACRON
|
||||||
|
{ 0x0114, 0x0115 }, // LATIN CAPITAL LETTER E WITH BREVE
|
||||||
|
{ 0x0116, 0x0117 }, // LATIN CAPITAL LETTER E WITH DOT ABOVE
|
||||||
|
{ 0x0118, 0x0119 }, // LATIN CAPITAL LETTER E WITH OGONEK
|
||||||
|
{ 0x011A, 0x011B }, // LATIN CAPITAL LETTER E WITH CARON
|
||||||
|
{ 0x011C, 0x011D }, // LATIN CAPITAL LETTER G WITH CIRCUMFLEX
|
||||||
|
{ 0x011E, 0x011F }, // LATIN CAPITAL LETTER G WITH BREVE
|
||||||
|
{ 0x0120, 0x0121 }, // LATIN CAPITAL LETTER G WITH DOT ABOVE
|
||||||
|
{ 0x0122, 0x0123 }, // LATIN CAPITAL LETTER G WITH CEDILLA
|
||||||
|
{ 0x0124, 0x0125 }, // LATIN CAPITAL LETTER H WITH CIRCUMFLEX
|
||||||
{ 0x0126, 0x0127 }, // LATIN CAPITAL LETTER H WITH STROKE
|
{ 0x0126, 0x0127 }, // LATIN CAPITAL LETTER H WITH STROKE
|
||||||
|
{ 0x0128, 0x0129 }, // LATIN CAPITAL LETTER I WITH TILDE
|
||||||
|
{ 0x012A, 0x012B }, // LATIN CAPITAL LETTER I WITH MACRON
|
||||||
|
{ 0x012C, 0x012D }, // LATIN CAPITAL LETTER I WITH BREVE
|
||||||
|
{ 0x012E, 0x012F }, // LATIN CAPITAL LETTER I WITH OGONEK
|
||||||
|
{ 0x0130, 0x0069 }, // LATIN CAPITAL LETTER I WITH DOT ABOVE
|
||||||
|
{ 0x0132, 0x0133 }, // LATIN CAPITAL LIGATURE IJ
|
||||||
|
{ 0x0134, 0x0135 }, // LATIN CAPITAL LETTER J WITH CIRCUMFLEX
|
||||||
|
{ 0x0136, 0x0137 }, // LATIN CAPITAL LETTER K WITH CEDILLA
|
||||||
|
{ 0x0139, 0x013A }, // LATIN CAPITAL LETTER L WITH ACUTE
|
||||||
|
{ 0x013B, 0x013C }, // LATIN CAPITAL LETTER L WITH CEDILLA
|
||||||
|
{ 0x013D, 0x013E }, // LATIN CAPITAL LETTER L WITH CARON
|
||||||
|
{ 0x013F, 0x0140 }, // LATIN CAPITAL LETTER L WITH MIDDLE DOT
|
||||||
|
{ 0x0141, 0x0142 }, // LATIN CAPITAL LETTER L WITH STROKE
|
||||||
|
{ 0x0143, 0x0144 }, // LATIN CAPITAL LETTER N WITH ACUTE
|
||||||
|
{ 0x0145, 0x0146 }, // LATIN CAPITAL LETTER N WITH CEDILLA
|
||||||
|
{ 0x0147, 0x0148 }, // LATIN CAPITAL LETTER N WITH CARON
|
||||||
{ 0x014A, 0x014B }, // LATIN CAPITAL LETTER ENG
|
{ 0x014A, 0x014B }, // LATIN CAPITAL LETTER ENG
|
||||||
|
{ 0x014C, 0x014D }, // LATIN CAPITAL LETTER O WITH MACRON
|
||||||
|
{ 0x014E, 0x014F }, // LATIN CAPITAL LETTER O WITH BREVE
|
||||||
|
{ 0x0150, 0x0151 }, // LATIN CAPITAL LETTER O WITH DOUBLE ACUTE
|
||||||
{ 0x0152, 0x0153 }, // LATIN CAPITAL LIGATURE OE
|
{ 0x0152, 0x0153 }, // LATIN CAPITAL LIGATURE OE
|
||||||
|
{ 0x0154, 0x0155 }, // LATIN CAPITAL LETTER R WITH ACUTE
|
||||||
|
{ 0x0156, 0x0157 }, // LATIN CAPITAL LETTER R WITH CEDILLA
|
||||||
|
{ 0x0158, 0x0159 }, // LATIN CAPITAL LETTER R WITH CARON
|
||||||
|
{ 0x015A, 0x015B }, // LATIN CAPITAL LETTER S WITH ACUTE
|
||||||
|
{ 0x015C, 0x015D }, // LATIN CAPITAL LETTER S WITH CIRCUMFLEX
|
||||||
|
{ 0x015E, 0x015F }, // LATIN CAPITAL LETTER S WITH CEDILLA
|
||||||
|
{ 0x0160, 0x0161 }, // LATIN CAPITAL LETTER S WITH CARON
|
||||||
|
{ 0x0162, 0x0163 }, // LATIN CAPITAL LETTER T WITH CEDILLA
|
||||||
|
{ 0x0164, 0x0165 }, // LATIN CAPITAL LETTER T WITH CARON
|
||||||
{ 0x0166, 0x0167 }, // LATIN CAPITAL LETTER T WITH STROKE
|
{ 0x0166, 0x0167 }, // LATIN CAPITAL LETTER T WITH STROKE
|
||||||
|
{ 0x0168, 0x0169 }, // LATIN CAPITAL LETTER U WITH TILDE
|
||||||
|
{ 0x016A, 0x016B }, // LATIN CAPITAL LETTER U WITH MACRON
|
||||||
|
{ 0x016C, 0x016D }, // LATIN CAPITAL LETTER U WITH BREVE
|
||||||
|
{ 0x016E, 0x016F }, // LATIN CAPITAL LETTER U WITH RING ABOVE
|
||||||
|
{ 0x0170, 0x0171 }, // LATIN CAPITAL LETTER U WITH DOUBLE ACUTE
|
||||||
|
{ 0x0172, 0x0173 }, // LATIN CAPITAL LETTER U WITH OGONEK
|
||||||
|
{ 0x0174, 0x0175 }, // LATIN CAPITAL LETTER W WITH CIRCUMFLEX
|
||||||
|
{ 0x0176, 0x0177 }, // LATIN CAPITAL LETTER Y WITH CIRCUMFLEX
|
||||||
|
{ 0x0178, 0x00FF }, // LATIN CAPITAL LETTER Y WITH DIAERESIS
|
||||||
|
{ 0x0179, 0x017A }, // LATIN CAPITAL LETTER Z WITH ACUTE
|
||||||
|
{ 0x017B, 0x017C }, // LATIN CAPITAL LETTER Z WITH DOT ABOVE
|
||||||
|
{ 0x017D, 0x017E }, // LATIN CAPITAL LETTER Z WITH CARON
|
||||||
{ 0x0181, 0x0253 }, // LATIN CAPITAL LETTER B WITH HOOK
|
{ 0x0181, 0x0253 }, // LATIN CAPITAL LETTER B WITH HOOK
|
||||||
{ 0x0182, 0x0183 }, // LATIN CAPITAL LETTER B WITH TOPBAR
|
{ 0x0182, 0x0183 }, // LATIN CAPITAL LETTER B WITH TOPBAR
|
||||||
{ 0x0184, 0x0185 }, // LATIN CAPITAL LETTER TONE SIX
|
{ 0x0184, 0x0185 }, // LATIN CAPITAL LETTER TONE SIX
|
||||||
|
@ -105,6 +188,7 @@ static const struct LatinCapitalSmallPair SORTED_CHAR_MAP[] = {
|
||||||
{ 0x019C, 0x026F }, // LATIN CAPITAL LETTER TURNED M
|
{ 0x019C, 0x026F }, // LATIN CAPITAL LETTER TURNED M
|
||||||
{ 0x019D, 0x0272 }, // LATIN CAPITAL LETTER N WITH LEFT HOOK
|
{ 0x019D, 0x0272 }, // LATIN CAPITAL LETTER N WITH LEFT HOOK
|
||||||
{ 0x019F, 0x0275 }, // LATIN CAPITAL LETTER O WITH MIDDLE TILDE
|
{ 0x019F, 0x0275 }, // LATIN CAPITAL LETTER O WITH MIDDLE TILDE
|
||||||
|
{ 0x01A0, 0x01A1 }, // LATIN CAPITAL LETTER O WITH HORN
|
||||||
{ 0x01A2, 0x01A3 }, // LATIN CAPITAL LETTER OI
|
{ 0x01A2, 0x01A3 }, // LATIN CAPITAL LETTER OI
|
||||||
{ 0x01A4, 0x01A5 }, // LATIN CAPITAL LETTER P WITH HOOK
|
{ 0x01A4, 0x01A5 }, // LATIN CAPITAL LETTER P WITH HOOK
|
||||||
{ 0x01A6, 0x0280 }, // LATIN LETTER YR
|
{ 0x01A6, 0x0280 }, // LATIN LETTER YR
|
||||||
|
@ -112,6 +196,7 @@ static const struct LatinCapitalSmallPair SORTED_CHAR_MAP[] = {
|
||||||
{ 0x01A9, 0x0283 }, // LATIN CAPITAL LETTER ESH
|
{ 0x01A9, 0x0283 }, // LATIN CAPITAL LETTER ESH
|
||||||
{ 0x01AC, 0x01AD }, // LATIN CAPITAL LETTER T WITH HOOK
|
{ 0x01AC, 0x01AD }, // LATIN CAPITAL LETTER T WITH HOOK
|
||||||
{ 0x01AE, 0x0288 }, // LATIN CAPITAL LETTER T WITH RETROFLEX HOOK
|
{ 0x01AE, 0x0288 }, // LATIN CAPITAL LETTER T WITH RETROFLEX HOOK
|
||||||
|
{ 0x01AF, 0x01B0 }, // LATIN CAPITAL LETTER U WITH HORN
|
||||||
{ 0x01B1, 0x028A }, // LATIN CAPITAL LETTER UPSILON
|
{ 0x01B1, 0x028A }, // LATIN CAPITAL LETTER UPSILON
|
||||||
{ 0x01B2, 0x028B }, // LATIN CAPITAL LETTER V WITH HOOK
|
{ 0x01B2, 0x028B }, // LATIN CAPITAL LETTER V WITH HOOK
|
||||||
{ 0x01B3, 0x01B4 }, // LATIN CAPITAL LETTER Y WITH HOOK
|
{ 0x01B3, 0x01B4 }, // LATIN CAPITAL LETTER Y WITH HOOK
|
||||||
|
@ -119,13 +204,64 @@ static const struct LatinCapitalSmallPair SORTED_CHAR_MAP[] = {
|
||||||
{ 0x01B7, 0x0292 }, // LATIN CAPITAL LETTER EZH
|
{ 0x01B7, 0x0292 }, // LATIN CAPITAL LETTER EZH
|
||||||
{ 0x01B8, 0x01B9 }, // LATIN CAPITAL LETTER EZH REVERSED
|
{ 0x01B8, 0x01B9 }, // LATIN CAPITAL LETTER EZH REVERSED
|
||||||
{ 0x01BC, 0x01BD }, // LATIN CAPITAL LETTER TONE FIVE
|
{ 0x01BC, 0x01BD }, // LATIN CAPITAL LETTER TONE FIVE
|
||||||
|
{ 0x01C4, 0x01C6 }, // LATIN CAPITAL LETTER DZ WITH CARON
|
||||||
|
{ 0x01C5, 0x01C6 }, // LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON
|
||||||
|
{ 0x01C7, 0x01C9 }, // LATIN CAPITAL LETTER LJ
|
||||||
|
{ 0x01C8, 0x01C9 }, // LATIN CAPITAL LETTER L WITH SMALL LETTER J
|
||||||
|
{ 0x01CA, 0x01CC }, // LATIN CAPITAL LETTER NJ
|
||||||
|
{ 0x01CB, 0x01CC }, // LATIN CAPITAL LETTER N WITH SMALL LETTER J
|
||||||
|
{ 0x01CD, 0x01CE }, // LATIN CAPITAL LETTER A WITH CARON
|
||||||
|
{ 0x01CF, 0x01D0 }, // LATIN CAPITAL LETTER I WITH CARON
|
||||||
|
{ 0x01D1, 0x01D2 }, // LATIN CAPITAL LETTER O WITH CARON
|
||||||
|
{ 0x01D3, 0x01D4 }, // LATIN CAPITAL LETTER U WITH CARON
|
||||||
|
{ 0x01D5, 0x01D6 }, // LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON
|
||||||
|
{ 0x01D7, 0x01D8 }, // LATIN CAPITAL LETTER U WITH DIAERESIS AND ACUTE
|
||||||
|
{ 0x01D9, 0x01DA }, // LATIN CAPITAL LETTER U WITH DIAERESIS AND CARON
|
||||||
|
{ 0x01DB, 0x01DC }, // LATIN CAPITAL LETTER U WITH DIAERESIS AND GRAVE
|
||||||
|
{ 0x01DE, 0x01DF }, // LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON
|
||||||
|
{ 0x01E0, 0x01E1 }, // LATIN CAPITAL LETTER A WITH DOT ABOVE AND MACRON
|
||||||
|
{ 0x01E2, 0x01E3 }, // LATIN CAPITAL LETTER AE WITH MACRON
|
||||||
{ 0x01E4, 0x01E5 }, // LATIN CAPITAL LETTER G WITH STROKE
|
{ 0x01E4, 0x01E5 }, // LATIN CAPITAL LETTER G WITH STROKE
|
||||||
|
{ 0x01E6, 0x01E7 }, // LATIN CAPITAL LETTER G WITH CARON
|
||||||
|
{ 0x01E8, 0x01E9 }, // LATIN CAPITAL LETTER K WITH CARON
|
||||||
|
{ 0x01EA, 0x01EB }, // LATIN CAPITAL LETTER O WITH OGONEK
|
||||||
|
{ 0x01EC, 0x01ED }, // LATIN CAPITAL LETTER O WITH OGONEK AND MACRON
|
||||||
|
{ 0x01EE, 0x01EF }, // LATIN CAPITAL LETTER EZH WITH CARON
|
||||||
|
{ 0x01F1, 0x01F3 }, // LATIN CAPITAL LETTER DZ
|
||||||
|
{ 0x01F2, 0x01F3 }, // LATIN CAPITAL LETTER D WITH SMALL LETTER Z
|
||||||
|
{ 0x01F4, 0x01F5 }, // LATIN CAPITAL LETTER G WITH ACUTE
|
||||||
{ 0x01F6, 0x0195 }, // LATIN CAPITAL LETTER HWAIR
|
{ 0x01F6, 0x0195 }, // LATIN CAPITAL LETTER HWAIR
|
||||||
{ 0x01F7, 0x01BF }, // LATIN CAPITAL LETTER WYNN
|
{ 0x01F7, 0x01BF }, // LATIN CAPITAL LETTER WYNN
|
||||||
|
{ 0x01F8, 0x01F9 }, // LATIN CAPITAL LETTER N WITH GRAVE
|
||||||
|
{ 0x01FA, 0x01FB }, // LATIN CAPITAL LETTER A WITH RING ABOVE AND ACUTE
|
||||||
|
{ 0x01FC, 0x01FD }, // LATIN CAPITAL LETTER AE WITH ACUTE
|
||||||
|
{ 0x01FE, 0x01FF }, // LATIN CAPITAL LETTER O WITH STROKE AND ACUTE
|
||||||
|
{ 0x0200, 0x0201 }, // LATIN CAPITAL LETTER A WITH DOUBLE GRAVE
|
||||||
|
{ 0x0202, 0x0203 }, // LATIN CAPITAL LETTER A WITH INVERTED BREVE
|
||||||
|
{ 0x0204, 0x0205 }, // LATIN CAPITAL LETTER E WITH DOUBLE GRAVE
|
||||||
|
{ 0x0206, 0x0207 }, // LATIN CAPITAL LETTER E WITH INVERTED BREVE
|
||||||
|
{ 0x0208, 0x0209 }, // LATIN CAPITAL LETTER I WITH DOUBLE GRAVE
|
||||||
|
{ 0x020A, 0x020B }, // LATIN CAPITAL LETTER I WITH INVERTED BREVE
|
||||||
|
{ 0x020C, 0x020D }, // LATIN CAPITAL LETTER O WITH DOUBLE GRAVE
|
||||||
|
{ 0x020E, 0x020F }, // LATIN CAPITAL LETTER O WITH INVERTED BREVE
|
||||||
|
{ 0x0210, 0x0211 }, // LATIN CAPITAL LETTER R WITH DOUBLE GRAVE
|
||||||
|
{ 0x0212, 0x0213 }, // LATIN CAPITAL LETTER R WITH INVERTED BREVE
|
||||||
|
{ 0x0214, 0x0215 }, // LATIN CAPITAL LETTER U WITH DOUBLE GRAVE
|
||||||
|
{ 0x0216, 0x0217 }, // LATIN CAPITAL LETTER U WITH INVERTED BREVE
|
||||||
|
{ 0x0218, 0x0219 }, // LATIN CAPITAL LETTER S WITH COMMA BELOW
|
||||||
|
{ 0x021A, 0x021B }, // LATIN CAPITAL LETTER T WITH COMMA BELOW
|
||||||
{ 0x021C, 0x021D }, // LATIN CAPITAL LETTER YOGH
|
{ 0x021C, 0x021D }, // LATIN CAPITAL LETTER YOGH
|
||||||
|
{ 0x021E, 0x021F }, // LATIN CAPITAL LETTER H WITH CARON
|
||||||
{ 0x0220, 0x019E }, // LATIN CAPITAL LETTER N WITH LONG RIGHT LEG
|
{ 0x0220, 0x019E }, // LATIN CAPITAL LETTER N WITH LONG RIGHT LEG
|
||||||
{ 0x0222, 0x0223 }, // LATIN CAPITAL LETTER OU
|
{ 0x0222, 0x0223 }, // LATIN CAPITAL LETTER OU
|
||||||
{ 0x0224, 0x0225 }, // LATIN CAPITAL LETTER Z WITH HOOK
|
{ 0x0224, 0x0225 }, // LATIN CAPITAL LETTER Z WITH HOOK
|
||||||
|
{ 0x0226, 0x0227 }, // LATIN CAPITAL LETTER A WITH DOT ABOVE
|
||||||
|
{ 0x0228, 0x0229 }, // LATIN CAPITAL LETTER E WITH CEDILLA
|
||||||
|
{ 0x022A, 0x022B }, // LATIN CAPITAL LETTER O WITH DIAERESIS AND MACRON
|
||||||
|
{ 0x022C, 0x022D }, // LATIN CAPITAL LETTER O WITH TILDE AND MACRON
|
||||||
|
{ 0x022E, 0x022F }, // LATIN CAPITAL LETTER O WITH DOT ABOVE
|
||||||
|
{ 0x0230, 0x0231 }, // LATIN CAPITAL LETTER O WITH DOT ABOVE AND MACRON
|
||||||
|
{ 0x0232, 0x0233 }, // LATIN CAPITAL LETTER Y WITH MACRON
|
||||||
{ 0x023A, 0x2C65 }, // LATIN CAPITAL LETTER A WITH STROKE
|
{ 0x023A, 0x2C65 }, // LATIN CAPITAL LETTER A WITH STROKE
|
||||||
{ 0x023B, 0x023C }, // LATIN CAPITAL LETTER C WITH STROKE
|
{ 0x023B, 0x023C }, // LATIN CAPITAL LETTER C WITH STROKE
|
||||||
{ 0x023D, 0x019A }, // LATIN CAPITAL LETTER L WITH BAR
|
{ 0x023D, 0x019A }, // LATIN CAPITAL LETTER L WITH BAR
|
||||||
|
@ -142,6 +278,13 @@ static const struct LatinCapitalSmallPair SORTED_CHAR_MAP[] = {
|
||||||
{ 0x0370, 0x0371 }, // GREEK CAPITAL LETTER HETA
|
{ 0x0370, 0x0371 }, // GREEK CAPITAL LETTER HETA
|
||||||
{ 0x0372, 0x0373 }, // GREEK CAPITAL LETTER ARCHAIC SAMPI
|
{ 0x0372, 0x0373 }, // GREEK CAPITAL LETTER ARCHAIC SAMPI
|
||||||
{ 0x0376, 0x0377 }, // GREEK CAPITAL LETTER PAMPHYLIAN DIGAMMA
|
{ 0x0376, 0x0377 }, // GREEK CAPITAL LETTER PAMPHYLIAN DIGAMMA
|
||||||
|
{ 0x0386, 0x03AC }, // GREEK CAPITAL LETTER ALPHA WITH TONOS
|
||||||
|
{ 0x0388, 0x03AD }, // GREEK CAPITAL LETTER EPSILON WITH TONOS
|
||||||
|
{ 0x0389, 0x03AE }, // GREEK CAPITAL LETTER ETA WITH TONOS
|
||||||
|
{ 0x038A, 0x03AF }, // GREEK CAPITAL LETTER IOTA WITH TONOS
|
||||||
|
{ 0x038C, 0x03CC }, // GREEK CAPITAL LETTER OMICRON WITH TONOS
|
||||||
|
{ 0x038E, 0x03CD }, // GREEK CAPITAL LETTER UPSILON WITH TONOS
|
||||||
|
{ 0x038F, 0x03CE }, // GREEK CAPITAL LETTER OMEGA WITH TONOS
|
||||||
{ 0x0391, 0x03B1 }, // GREEK CAPITAL LETTER ALPHA
|
{ 0x0391, 0x03B1 }, // GREEK CAPITAL LETTER ALPHA
|
||||||
{ 0x0392, 0x03B2 }, // GREEK CAPITAL LETTER BETA
|
{ 0x0392, 0x03B2 }, // GREEK CAPITAL LETTER BETA
|
||||||
{ 0x0393, 0x03B3 }, // GREEK CAPITAL LETTER GAMMA
|
{ 0x0393, 0x03B3 }, // GREEK CAPITAL LETTER GAMMA
|
||||||
|
@ -166,6 +309,8 @@ static const struct LatinCapitalSmallPair SORTED_CHAR_MAP[] = {
|
||||||
{ 0x03A7, 0x03C7 }, // GREEK CAPITAL LETTER CHI
|
{ 0x03A7, 0x03C7 }, // GREEK CAPITAL LETTER CHI
|
||||||
{ 0x03A8, 0x03C8 }, // GREEK CAPITAL LETTER PSI
|
{ 0x03A8, 0x03C8 }, // GREEK CAPITAL LETTER PSI
|
||||||
{ 0x03A9, 0x03C9 }, // GREEK CAPITAL LETTER OMEGA
|
{ 0x03A9, 0x03C9 }, // GREEK CAPITAL LETTER OMEGA
|
||||||
|
{ 0x03AA, 0x03CA }, // GREEK CAPITAL LETTER IOTA WITH DIALYTIKA
|
||||||
|
{ 0x03AB, 0x03CB }, // GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA
|
||||||
{ 0x03CF, 0x03D7 }, // GREEK CAPITAL KAI SYMBOL
|
{ 0x03CF, 0x03D7 }, // GREEK CAPITAL KAI SYMBOL
|
||||||
{ 0x03D8, 0x03D9 }, // GREEK LETTER ARCHAIC KOPPA
|
{ 0x03D8, 0x03D9 }, // GREEK LETTER ARCHAIC KOPPA
|
||||||
{ 0x03DA, 0x03DB }, // GREEK LETTER STIGMA
|
{ 0x03DA, 0x03DB }, // GREEK LETTER STIGMA
|
||||||
|
@ -179,19 +324,28 @@ static const struct LatinCapitalSmallPair SORTED_CHAR_MAP[] = {
|
||||||
{ 0x03EA, 0x03EB }, // COPTIC CAPITAL LETTER GANGIA
|
{ 0x03EA, 0x03EB }, // COPTIC CAPITAL LETTER GANGIA
|
||||||
{ 0x03EC, 0x03ED }, // COPTIC CAPITAL LETTER SHIMA
|
{ 0x03EC, 0x03ED }, // COPTIC CAPITAL LETTER SHIMA
|
||||||
{ 0x03EE, 0x03EF }, // COPTIC CAPITAL LETTER DEI
|
{ 0x03EE, 0x03EF }, // COPTIC CAPITAL LETTER DEI
|
||||||
|
{ 0x03F4, 0x03B8 }, // GREEK CAPITAL THETA SYMBOL
|
||||||
{ 0x03F7, 0x03F8 }, // GREEK CAPITAL LETTER SHO
|
{ 0x03F7, 0x03F8 }, // GREEK CAPITAL LETTER SHO
|
||||||
|
{ 0x03F9, 0x03F2 }, // GREEK CAPITAL LUNATE SIGMA SYMBOL
|
||||||
{ 0x03FA, 0x03FB }, // GREEK CAPITAL LETTER SAN
|
{ 0x03FA, 0x03FB }, // GREEK CAPITAL LETTER SAN
|
||||||
{ 0x03FD, 0x037B }, // GREEK CAPITAL REVERSED LUNATE SIGMA SYMBOL
|
{ 0x03FD, 0x037B }, // GREEK CAPITAL REVERSED LUNATE SIGMA SYMBOL
|
||||||
{ 0x03FE, 0x037C }, // GREEK CAPITAL DOTTED LUNATE SIGMA SYMBOL
|
{ 0x03FE, 0x037C }, // GREEK CAPITAL DOTTED LUNATE SIGMA SYMBOL
|
||||||
{ 0x03FF, 0x037D }, // GREEK CAPITAL REVERSED DOTTED LUNATE SIGMA SYMBOL
|
{ 0x03FF, 0x037D }, // GREEK CAPITAL REVERSED DOTTED LUNATE SIGMA SYMBOL
|
||||||
|
{ 0x0400, 0x0450 }, // CYRILLIC CAPITAL LETTER IE WITH GRAVE
|
||||||
|
{ 0x0401, 0x0451 }, // CYRILLIC CAPITAL LETTER IO
|
||||||
{ 0x0402, 0x0452 }, // CYRILLIC CAPITAL LETTER DJE
|
{ 0x0402, 0x0452 }, // CYRILLIC CAPITAL LETTER DJE
|
||||||
|
{ 0x0403, 0x0453 }, // CYRILLIC CAPITAL LETTER GJE
|
||||||
{ 0x0404, 0x0454 }, // CYRILLIC CAPITAL LETTER UKRAINIAN IE
|
{ 0x0404, 0x0454 }, // CYRILLIC CAPITAL LETTER UKRAINIAN IE
|
||||||
{ 0x0405, 0x0455 }, // CYRILLIC CAPITAL LETTER DZE
|
{ 0x0405, 0x0455 }, // CYRILLIC CAPITAL LETTER DZE
|
||||||
{ 0x0406, 0x0456 }, // CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
|
{ 0x0406, 0x0456 }, // CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
|
||||||
|
{ 0x0407, 0x0457 }, // CYRILLIC CAPITAL LETTER YI
|
||||||
{ 0x0408, 0x0458 }, // CYRILLIC CAPITAL LETTER JE
|
{ 0x0408, 0x0458 }, // CYRILLIC CAPITAL LETTER JE
|
||||||
{ 0x0409, 0x0459 }, // CYRILLIC CAPITAL LETTER LJE
|
{ 0x0409, 0x0459 }, // CYRILLIC CAPITAL LETTER LJE
|
||||||
{ 0x040A, 0x045A }, // CYRILLIC CAPITAL LETTER NJE
|
{ 0x040A, 0x045A }, // CYRILLIC CAPITAL LETTER NJE
|
||||||
{ 0x040B, 0x045B }, // CYRILLIC CAPITAL LETTER TSHE
|
{ 0x040B, 0x045B }, // CYRILLIC CAPITAL LETTER TSHE
|
||||||
|
{ 0x040C, 0x045C }, // CYRILLIC CAPITAL LETTER KJE
|
||||||
|
{ 0x040D, 0x045D }, // CYRILLIC CAPITAL LETTER I WITH GRAVE
|
||||||
|
{ 0x040E, 0x045E }, // CYRILLIC CAPITAL LETTER SHORT U
|
||||||
{ 0x040F, 0x045F }, // CYRILLIC CAPITAL LETTER DZHE
|
{ 0x040F, 0x045F }, // CYRILLIC CAPITAL LETTER DZHE
|
||||||
{ 0x0410, 0x0430 }, // CYRILLIC CAPITAL LETTER A
|
{ 0x0410, 0x0430 }, // CYRILLIC CAPITAL LETTER A
|
||||||
{ 0x0411, 0x0431 }, // CYRILLIC CAPITAL LETTER BE
|
{ 0x0411, 0x0431 }, // CYRILLIC CAPITAL LETTER BE
|
||||||
|
@ -236,6 +390,7 @@ static const struct LatinCapitalSmallPair SORTED_CHAR_MAP[] = {
|
||||||
{ 0x0470, 0x0471 }, // CYRILLIC CAPITAL LETTER PSI
|
{ 0x0470, 0x0471 }, // CYRILLIC CAPITAL LETTER PSI
|
||||||
{ 0x0472, 0x0473 }, // CYRILLIC CAPITAL LETTER FITA
|
{ 0x0472, 0x0473 }, // CYRILLIC CAPITAL LETTER FITA
|
||||||
{ 0x0474, 0x0475 }, // CYRILLIC CAPITAL LETTER IZHITSA
|
{ 0x0474, 0x0475 }, // CYRILLIC CAPITAL LETTER IZHITSA
|
||||||
|
{ 0x0476, 0x0477 }, // CYRILLIC CAPITAL LETTER IZHITSA WITH DOUBLE GRAVE ACCENT
|
||||||
{ 0x0478, 0x0479 }, // CYRILLIC CAPITAL LETTER UK
|
{ 0x0478, 0x0479 }, // CYRILLIC CAPITAL LETTER UK
|
||||||
{ 0x047A, 0x047B }, // CYRILLIC CAPITAL LETTER ROUND OMEGA
|
{ 0x047A, 0x047B }, // CYRILLIC CAPITAL LETTER ROUND OMEGA
|
||||||
{ 0x047C, 0x047D }, // CYRILLIC CAPITAL LETTER OMEGA WITH TITLO
|
{ 0x047C, 0x047D }, // CYRILLIC CAPITAL LETTER OMEGA WITH TITLO
|
||||||
|
@ -269,17 +424,34 @@ static const struct LatinCapitalSmallPair SORTED_CHAR_MAP[] = {
|
||||||
{ 0x04BC, 0x04BD }, // CYRILLIC CAPITAL LETTER ABKHASIAN CHE
|
{ 0x04BC, 0x04BD }, // CYRILLIC CAPITAL LETTER ABKHASIAN CHE
|
||||||
{ 0x04BE, 0x04BF }, // CYRILLIC CAPITAL LETTER ABKHASIAN CHE WITH DESCENDER
|
{ 0x04BE, 0x04BF }, // CYRILLIC CAPITAL LETTER ABKHASIAN CHE WITH DESCENDER
|
||||||
{ 0x04C0, 0x04CF }, // CYRILLIC LETTER PALOCHKA
|
{ 0x04C0, 0x04CF }, // CYRILLIC LETTER PALOCHKA
|
||||||
|
{ 0x04C1, 0x04C2 }, // CYRILLIC CAPITAL LETTER ZHE WITH BREVE
|
||||||
{ 0x04C3, 0x04C4 }, // CYRILLIC CAPITAL LETTER KA WITH HOOK
|
{ 0x04C3, 0x04C4 }, // CYRILLIC CAPITAL LETTER KA WITH HOOK
|
||||||
{ 0x04C5, 0x04C6 }, // CYRILLIC CAPITAL LETTER EL WITH TAIL
|
{ 0x04C5, 0x04C6 }, // CYRILLIC CAPITAL LETTER EL WITH TAIL
|
||||||
{ 0x04C7, 0x04C8 }, // CYRILLIC CAPITAL LETTER EN WITH HOOK
|
{ 0x04C7, 0x04C8 }, // CYRILLIC CAPITAL LETTER EN WITH HOOK
|
||||||
{ 0x04C9, 0x04CA }, // CYRILLIC CAPITAL LETTER EN WITH TAIL
|
{ 0x04C9, 0x04CA }, // CYRILLIC CAPITAL LETTER EN WITH TAIL
|
||||||
{ 0x04CB, 0x04CC }, // CYRILLIC CAPITAL LETTER KHAKASSIAN CHE
|
{ 0x04CB, 0x04CC }, // CYRILLIC CAPITAL LETTER KHAKASSIAN CHE
|
||||||
{ 0x04CD, 0x04CE }, // CYRILLIC CAPITAL LETTER EM WITH TAIL
|
{ 0x04CD, 0x04CE }, // CYRILLIC CAPITAL LETTER EM WITH TAIL
|
||||||
|
{ 0x04D0, 0x04D1 }, // CYRILLIC CAPITAL LETTER A WITH BREVE
|
||||||
|
{ 0x04D2, 0x04D3 }, // CYRILLIC CAPITAL LETTER A WITH DIAERESIS
|
||||||
{ 0x04D4, 0x04D5 }, // CYRILLIC CAPITAL LIGATURE A IE
|
{ 0x04D4, 0x04D5 }, // CYRILLIC CAPITAL LIGATURE A IE
|
||||||
|
{ 0x04D6, 0x04D7 }, // CYRILLIC CAPITAL LETTER IE WITH BREVE
|
||||||
{ 0x04D8, 0x04D9 }, // CYRILLIC CAPITAL LETTER SCHWA
|
{ 0x04D8, 0x04D9 }, // CYRILLIC CAPITAL LETTER SCHWA
|
||||||
|
{ 0x04DA, 0x04DB }, // CYRILLIC CAPITAL LETTER SCHWA WITH DIAERESIS
|
||||||
|
{ 0x04DC, 0x04DD }, // CYRILLIC CAPITAL LETTER ZHE WITH DIAERESIS
|
||||||
|
{ 0x04DE, 0x04DF }, // CYRILLIC CAPITAL LETTER ZE WITH DIAERESIS
|
||||||
{ 0x04E0, 0x04E1 }, // CYRILLIC CAPITAL LETTER ABKHASIAN DZE
|
{ 0x04E0, 0x04E1 }, // CYRILLIC CAPITAL LETTER ABKHASIAN DZE
|
||||||
|
{ 0x04E2, 0x04E3 }, // CYRILLIC CAPITAL LETTER I WITH MACRON
|
||||||
|
{ 0x04E4, 0x04E5 }, // CYRILLIC CAPITAL LETTER I WITH DIAERESIS
|
||||||
|
{ 0x04E6, 0x04E7 }, // CYRILLIC CAPITAL LETTER O WITH DIAERESIS
|
||||||
{ 0x04E8, 0x04E9 }, // CYRILLIC CAPITAL LETTER BARRED O
|
{ 0x04E8, 0x04E9 }, // CYRILLIC CAPITAL LETTER BARRED O
|
||||||
|
{ 0x04EA, 0x04EB }, // CYRILLIC CAPITAL LETTER BARRED O WITH DIAERESIS
|
||||||
|
{ 0x04EC, 0x04ED }, // CYRILLIC CAPITAL LETTER E WITH DIAERESIS
|
||||||
|
{ 0x04EE, 0x04EF }, // CYRILLIC CAPITAL LETTER U WITH MACRON
|
||||||
|
{ 0x04F0, 0x04F1 }, // CYRILLIC CAPITAL LETTER U WITH DIAERESIS
|
||||||
|
{ 0x04F2, 0x04F3 }, // CYRILLIC CAPITAL LETTER U WITH DOUBLE ACUTE
|
||||||
|
{ 0x04F4, 0x04F5 }, // CYRILLIC CAPITAL LETTER CHE WITH DIAERESIS
|
||||||
{ 0x04F6, 0x04F7 }, // CYRILLIC CAPITAL LETTER GHE WITH DESCENDER
|
{ 0x04F6, 0x04F7 }, // CYRILLIC CAPITAL LETTER GHE WITH DESCENDER
|
||||||
|
{ 0x04F8, 0x04F9 }, // CYRILLIC CAPITAL LETTER YERU WITH DIAERESIS
|
||||||
{ 0x04FA, 0x04FB }, // CYRILLIC CAPITAL LETTER GHE WITH STROKE AND HOOK
|
{ 0x04FA, 0x04FB }, // CYRILLIC CAPITAL LETTER GHE WITH STROKE AND HOOK
|
||||||
{ 0x04FC, 0x04FD }, // CYRILLIC CAPITAL LETTER HA WITH HOOK
|
{ 0x04FC, 0x04FD }, // CYRILLIC CAPITAL LETTER HA WITH HOOK
|
||||||
{ 0x04FE, 0x04FF }, // CYRILLIC CAPITAL LETTER HA WITH STROKE
|
{ 0x04FE, 0x04FF }, // CYRILLIC CAPITAL LETTER HA WITH STROKE
|
||||||
|
|
|
@ -58,7 +58,8 @@ inline static int toBaseCodePoint(int c) {
|
||||||
AK_FORCE_INLINE static int toLowerCase(const int c) {
|
AK_FORCE_INLINE static int toLowerCase(const int c) {
|
||||||
if (isAsciiUpper(c)) {
|
if (isAsciiUpper(c)) {
|
||||||
return toAsciiLower(c);
|
return toAsciiLower(c);
|
||||||
} else if (isAscii(c)) {
|
}
|
||||||
|
if (isAscii(c)) {
|
||||||
return c;
|
return c;
|
||||||
}
|
}
|
||||||
return static_cast<int>(latin_tolower(static_cast<unsigned short>(c)));
|
return static_cast<int>(latin_tolower(static_cast<unsigned short>(c)));
|
||||||
|
|
Loading…
Reference in New Issue