From edd5b7365f4ed709426cd685d5506b9461c9a2f9 Mon Sep 17 00:00:00 2001 From: Tom Ouyang Date: Tue, 25 Sep 2012 17:04:35 -0700 Subject: [PATCH] Fix lower case conversion bug for some characters Bug: 7232296 Change-Id: Iaf3f6be55f1bdc2294533938bb54fedcf25fb0cb --- native/jni/src/char_utils.cpp | 2 ++ native/jni/src/char_utils.h | 4 +++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/native/jni/src/char_utils.cpp b/native/jni/src/char_utils.cpp index 9d886da31..d0547a982 100644 --- a/native/jni/src/char_utils.cpp +++ b/native/jni/src/char_utils.cpp @@ -88,6 +88,7 @@ static const struct LatinCapitalSmallPair SORTED_CHAR_MAP[] = { { 0x00C5, 0x00E5 }, // LATIN CAPITAL LETTER A WITH RING ABOVE { 0x00C6, 0x00E6 }, // LATIN CAPITAL LETTER AE { 0x00D0, 0x00F0 }, // LATIN CAPITAL LETTER ETH + { 0x00D1, 0x00F1 }, // LATIN CAPITAL LETTER N WITH TILDE { 0x00D5, 0x00F5 }, // LATIN CAPITAL LETTER O WITH TILDE { 0x00D6, 0x00F6 }, // LATIN CAPITAL LETTER O WITH DIAERESIS { 0x00D8, 0x00F8 }, // LATIN CAPITAL LETTER O WITH STROKE @@ -219,6 +220,7 @@ static const struct LatinCapitalSmallPair SORTED_CHAR_MAP[] = { { 0x0416, 0x0436 }, // CYRILLIC CAPITAL LETTER ZHE { 0x0417, 0x0437 }, // CYRILLIC CAPITAL LETTER ZE { 0x0418, 0x0438 }, // CYRILLIC CAPITAL LETTER I + { 0x0419, 0x0439 }, // CYRILLIC CAPITAL LETTER SHORT I { 0x041A, 0x043A }, // CYRILLIC CAPITAL LETTER KA { 0x041B, 0x043B }, // CYRILLIC CAPITAL LETTER EL { 0x041C, 0x043C }, // CYRILLIC CAPITAL LETTER EM diff --git a/native/jni/src/char_utils.h b/native/jni/src/char_utils.h index b17f262ec..20cf2e8b5 100644 --- a/native/jni/src/char_utils.h +++ b/native/jni/src/char_utils.h @@ -23,7 +23,9 @@ namespace latinime { inline static bool isAsciiUpper(unsigned short c) { - return isupper(static_cast(c)) != 0; + // Note: isupper(...) reports false positives for some Cyrillic characters, causing them to + // be incorrectly lower-cased using toAsciiLower(...) rather than latin_tolower(...). + return (c >= 'A' && c <= 'Z'); } inline static unsigned short toAsciiLower(unsigned short c) {