Fix lower case conversion bug for some characters

Bug: 7232296
Change-Id: Iaf3f6be55f1bdc2294533938bb54fedcf25fb0cb
main
Tom Ouyang 2012-09-25 17:04:35 -07:00
parent a161bdac88
commit edd5b7365f
2 changed files with 5 additions and 1 deletions

View File

@ -88,6 +88,7 @@ static const struct LatinCapitalSmallPair SORTED_CHAR_MAP[] = {
{ 0x00C5, 0x00E5 }, // LATIN CAPITAL LETTER A WITH RING ABOVE { 0x00C5, 0x00E5 }, // LATIN CAPITAL LETTER A WITH RING ABOVE
{ 0x00C6, 0x00E6 }, // LATIN CAPITAL LETTER AE { 0x00C6, 0x00E6 }, // LATIN CAPITAL LETTER AE
{ 0x00D0, 0x00F0 }, // LATIN CAPITAL LETTER ETH { 0x00D0, 0x00F0 }, // LATIN CAPITAL LETTER ETH
{ 0x00D1, 0x00F1 }, // LATIN CAPITAL LETTER N WITH TILDE
{ 0x00D5, 0x00F5 }, // LATIN CAPITAL LETTER O WITH TILDE { 0x00D5, 0x00F5 }, // LATIN CAPITAL LETTER O WITH TILDE
{ 0x00D6, 0x00F6 }, // LATIN CAPITAL LETTER O WITH DIAERESIS { 0x00D6, 0x00F6 }, // LATIN CAPITAL LETTER O WITH DIAERESIS
{ 0x00D8, 0x00F8 }, // LATIN CAPITAL LETTER O WITH STROKE { 0x00D8, 0x00F8 }, // LATIN CAPITAL LETTER O WITH STROKE
@ -219,6 +220,7 @@ static const struct LatinCapitalSmallPair SORTED_CHAR_MAP[] = {
{ 0x0416, 0x0436 }, // CYRILLIC CAPITAL LETTER ZHE { 0x0416, 0x0436 }, // CYRILLIC CAPITAL LETTER ZHE
{ 0x0417, 0x0437 }, // CYRILLIC CAPITAL LETTER ZE { 0x0417, 0x0437 }, // CYRILLIC CAPITAL LETTER ZE
{ 0x0418, 0x0438 }, // CYRILLIC CAPITAL LETTER I { 0x0418, 0x0438 }, // CYRILLIC CAPITAL LETTER I
{ 0x0419, 0x0439 }, // CYRILLIC CAPITAL LETTER SHORT I
{ 0x041A, 0x043A }, // CYRILLIC CAPITAL LETTER KA { 0x041A, 0x043A }, // CYRILLIC CAPITAL LETTER KA
{ 0x041B, 0x043B }, // CYRILLIC CAPITAL LETTER EL { 0x041B, 0x043B }, // CYRILLIC CAPITAL LETTER EL
{ 0x041C, 0x043C }, // CYRILLIC CAPITAL LETTER EM { 0x041C, 0x043C }, // CYRILLIC CAPITAL LETTER EM

View File

@ -23,7 +23,9 @@
namespace latinime { namespace latinime {
inline static bool isAsciiUpper(unsigned short c) { inline static bool isAsciiUpper(unsigned short c) {
return isupper(static_cast<int>(c)) != 0; // Note: isupper(...) reports false positives for some Cyrillic characters, causing them to
// be incorrectly lower-cased using toAsciiLower(...) rather than latin_tolower(...).
return (c >= 'A' && c <= 'Z');
} }
inline static unsigned short toAsciiLower(unsigned short c) { inline static unsigned short toAsciiLower(unsigned short c) {