diff --git a/native/jni/Android.mk b/native/jni/Android.mk index 34b352433..1b758c680 100644 --- a/native/jni/Android.mk +++ b/native/jni/Android.mk @@ -47,7 +47,6 @@ LATIN_IME_JNI_SRC_FILES := \ LATIN_IME_CORE_SRC_FILES := \ bigram_dictionary.cpp \ - char_utils.cpp \ correction.cpp \ dic_traverse_wrapper.cpp \ unigram_dictionary.cpp \ @@ -58,6 +57,7 @@ LATIN_IME_CORE_SRC_FILES := \ dic_node_utils.cpp \ dic_nodes_cache.cpp) \ $(addprefix suggest/core/dictionary/, \ + char_utils.cpp \ dictionary.cpp \ digraph_utils.cpp) \ $(addprefix suggest/core/layout/, \ diff --git a/native/jni/src/bigram_dictionary.cpp b/native/jni/src/bigram_dictionary.cpp index c592542bd..ebe27994f 100644 --- a/native/jni/src/bigram_dictionary.cpp +++ b/native/jni/src/bigram_dictionary.cpp @@ -20,10 +20,10 @@ #include "bigram_dictionary.h" -#include "char_utils.h" #include "defines.h" #include "suggest/core/dictionary/binary_format.h" #include "suggest/core/dictionary/bloom_filter.h" +#include "suggest/core/dictionary/char_utils.h" #include "suggest/core/dictionary/dictionary.h" namespace latinime { @@ -52,7 +52,7 @@ void BigramDictionary::addWordBigram(int *word, int length, int probability, int int insertAt = 0; while (insertAt < MAX_RESULTS) { if (probability > bigramProbability[insertAt] || (bigramProbability[insertAt] == probability - && length < getCodePointCount(MAX_WORD_LENGTH, + && length < CharUtils::getCodePointCount(MAX_WORD_LENGTH, bigramCodePoints + insertAt * MAX_WORD_LENGTH))) { break; } @@ -196,9 +196,9 @@ bool BigramDictionary::checkFirstCharacter(int *word, int *inputCodePoints) cons // what user typed. int maxAlt = MAX_ALTERNATIVES; - const int firstBaseLowerCodePoint = toBaseLowerCase(*word); + const int firstBaseLowerCodePoint = CharUtils::toBaseLowerCase(*word); while (maxAlt > 0) { - if (toBaseLowerCase(*inputCodePoints) == firstBaseLowerCodePoint) { + if (CharUtils::toBaseLowerCase(*inputCodePoints) == firstBaseLowerCodePoint) { return true; } inputCodePoints++; diff --git a/native/jni/src/char_utils.h b/native/jni/src/char_utils.h deleted file mode 100644 index b429f40b2..000000000 --- a/native/jni/src/char_utils.h +++ /dev/null @@ -1,88 +0,0 @@ -/* - * Copyright (C) 2010 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef LATINIME_CHAR_UTILS_H -#define LATINIME_CHAR_UTILS_H - -#include - -#include "defines.h" - -namespace latinime { - -inline static bool isAsciiUpper(int c) { - // Note: isupper(...) reports false positives for some Cyrillic characters, causing them to - // be incorrectly lower-cased using toAsciiLower(...) rather than latin_tolower(...). - return (c >= 'A' && c <= 'Z'); -} - -inline static int toAsciiLower(int c) { - return c - 'A' + 'a'; -} - -inline static bool isAscii(int c) { - return isascii(c) != 0; -} - -unsigned short latin_tolower(const unsigned short c); - -/** - * Table mapping most combined Latin, Greek, and Cyrillic characters - * to their base characters. If c is in range, BASE_CHARS[c] == c - * if c is not a combined character, or the base character if it - * is combined. - */ -static const int BASE_CHARS_SIZE = 0x0500; -extern const unsigned short BASE_CHARS[BASE_CHARS_SIZE]; - -inline static int toBaseCodePoint(int c) { - if (c < BASE_CHARS_SIZE) { - return static_cast(BASE_CHARS[c]); - } - return c; -} - -AK_FORCE_INLINE static int toLowerCase(const int c) { - if (isAsciiUpper(c)) { - return toAsciiLower(c); - } - if (isAscii(c)) { - return c; - } - return static_cast(latin_tolower(static_cast(c))); -} - -AK_FORCE_INLINE static int toBaseLowerCase(const int c) { - return toLowerCase(toBaseCodePoint(c)); -} - -inline static bool isIntentionalOmissionCodePoint(const int codePoint) { - // TODO: Do not hardcode here - return codePoint == KEYCODE_SINGLE_QUOTE || codePoint == KEYCODE_HYPHEN_MINUS; -} - -inline static int getCodePointCount(const int arraySize, const int *const codePoints) { - int size = 0; - for (; size < arraySize; ++size) { - if (codePoints[size] == '\0') { - break; - } - } - return size; -} - -} // namespace latinime -#endif // LATINIME_CHAR_UTILS_H diff --git a/native/jni/src/correction.cpp b/native/jni/src/correction.cpp index e2ad557c5..3dc2f3748 100644 --- a/native/jni/src/correction.cpp +++ b/native/jni/src/correction.cpp @@ -18,9 +18,9 @@ #include -#include "char_utils.h" #include "correction.h" #include "defines.h" +#include "suggest/core/dictionary/char_utils.h" #include "suggest/core/layout/proximity_info_state.h" #include "suggest/core/layout/touch_position_correction_utils.h" #include "suggest/policyimpl/utils/edit_distance.h" @@ -528,7 +528,7 @@ inline static int getQuoteCount(const int *word, const int length) { } inline static bool isUpperCase(unsigned short c) { - return isAsciiUpper(toBaseCodePoint(c)); + return CharUtils::isAsciiUpper(CharUtils::toBaseCodePoint(c)); } ////////////////////// diff --git a/native/jni/src/correction.h b/native/jni/src/correction.h index 75b49952c..3f60d48cf 100644 --- a/native/jni/src/correction.h +++ b/native/jni/src/correction.h @@ -21,6 +21,7 @@ #include "correction_state.h" #include "defines.h" +#include "suggest/core/dictionary/char_utils.h" #include "suggest/core/layout/proximity_info_state.h" namespace latinime { @@ -342,13 +343,13 @@ AK_FORCE_INLINE static void calcEditDistanceOneStep(int *editDistanceTable, cons const int *const prevprev = outputLength >= 2 ? editDistanceTable + (outputLength - 2) * (inputSize + 1) : 0; current[0] = outputLength; - const int co = toBaseLowerCase(output[outputLength - 1]); - const int prevCO = outputLength >= 2 ? toBaseLowerCase(output[outputLength - 2]) : 0; + const int co = CharUtils::toBaseLowerCase(output[outputLength - 1]); + const int prevCO = outputLength >= 2 ? CharUtils::toBaseLowerCase(output[outputLength - 2]) : 0; for (int i = 1; i <= inputSize; ++i) { - const int ci = toBaseLowerCase(input[i - 1]); + const int ci = CharUtils::toBaseLowerCase(input[i - 1]); const int cost = (ci == co) ? 0 : 1; current[i] = min(current[i - 1] + 1, min(prev[i] + 1, prev[i - 1] + cost)); - if (i >= 2 && prevprev && ci == prevCO && co == toBaseLowerCase(input[i - 2])) { + if (i >= 2 && prevprev && ci == prevCO && co == CharUtils::toBaseLowerCase(input[i - 2])) { current[i] = min(current[i], prevprev[i - 2] + 1); } } diff --git a/native/jni/src/suggest/core/dicnode/dic_node.h b/native/jni/src/suggest/core/dicnode/dic_node.h index 1510e3d5e..14bd2d57a 100644 --- a/native/jni/src/suggest/core/dicnode/dic_node.h +++ b/native/jni/src/suggest/core/dicnode/dic_node.h @@ -17,12 +17,12 @@ #ifndef LATINIME_DIC_NODE_H #define LATINIME_DIC_NODE_H -#include "char_utils.h" #include "defines.h" #include "suggest/core/dicnode/dic_node_state.h" #include "suggest/core/dicnode/dic_node_profiler.h" #include "suggest/core/dicnode/dic_node_properties.h" #include "suggest/core/dicnode/dic_node_release_listener.h" +#include "suggest/core/dictionary/char_utils.h" #include "suggest/core/dictionary/digraph_utils.h" #if DEBUG_DICT @@ -221,7 +221,7 @@ class DicNode { bool isFirstCharUppercase() const { const int c = getOutputWordBuf()[0]; - return isAsciiUpper(c); + return CharUtils::isAsciiUpper(c); } bool isFirstWord() const { @@ -375,7 +375,7 @@ class DicNode { // Whether the current codepoint can be an intentional omission, in which case the traversal // algorithm will always check for a possible omission here. bool canBeIntentionalOmission() const { - return isIntentionalOmissionCodePoint(getNodeCodePoint()); + return CharUtils::isIntentionalOmissionCodePoint(getNodeCodePoint()); } // Whether the omission is so frequent that it should incur zero cost. diff --git a/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp b/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp index 7f0d0ed0e..c754a5ec2 100644 --- a/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp +++ b/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp @@ -21,6 +21,7 @@ #include "suggest/core/dicnode/dic_node_utils.h" #include "suggest/core/dicnode/dic_node_vector.h" #include "suggest/core/dictionary/binary_format.h" +#include "suggest/core/dictionary/char_utils.h" #include "suggest/core/dictionary/multi_bigram_map.h" #include "suggest/core/layout/proximity_info.h" #include "suggest/core/layout/proximity_info_state.h" @@ -62,9 +63,9 @@ namespace latinime { DicNodeVector *childDicNodes) { // Passing multiple chars node. No need to traverse child const int codePoint = dicNode->getNodeTypedCodePoint(); - const int baseLowerCaseCodePoint = toBaseLowerCase(codePoint); + const int baseLowerCaseCodePoint = CharUtils::toBaseLowerCase(codePoint); const bool isMatch = isMatchedNodeCodePoint(pInfoState, pointIndex, exactOnly, codePoint); - if (isMatch || isIntentionalOmissionCodePoint(baseLowerCaseCodePoint)) { + if (isMatch || CharUtils::isIntentionalOmissionCodePoint(baseLowerCaseCodePoint)) { childDicNodes->pushPassingChild(dicNode); } } @@ -125,13 +126,13 @@ namespace latinime { return false; } if (pInfo && (pInfo->getKeyIndexOf(nodeCodePoint) == NOT_AN_INDEX - || isIntentionalOmissionCodePoint(nodeCodePoint))) { + || CharUtils::isIntentionalOmissionCodePoint(nodeCodePoint))) { // If normalized nodeCodePoint is not on the keyboard or skippable, this child is never // filtered. return false; } - const int lowerCodePoint = toLowerCase(nodeCodePoint); - const int baseLowerCodePoint = toBaseCodePoint(lowerCodePoint); + const int lowerCodePoint = CharUtils::toLowerCase(nodeCodePoint); + const int baseLowerCodePoint = CharUtils::toBaseCodePoint(lowerCodePoint); // TODO: Avoid linear search for (int i = 0; i < filterSize; ++i) { // Checking if a normalized code point is in filter characters when pInfo is not diff --git a/native/jni/src/suggest/core/dictionary/binary_format.h b/native/jni/src/suggest/core/dictionary/binary_format.h index 65c2e9115..ef9fd3785 100644 --- a/native/jni/src/suggest/core/dictionary/binary_format.h +++ b/native/jni/src/suggest/core/dictionary/binary_format.h @@ -21,9 +21,9 @@ #include #include -#include "char_utils.h" #include "hash_map_compat.h" #include "suggest/core/dictionary/bloom_filter.h" +#include "suggest/core/dictionary/char_utils.h" namespace latinime { @@ -473,7 +473,8 @@ AK_FORCE_INLINE int BinaryFormat::getTerminalPosition(const uint8_t *const root, // there was no match (or we would have found it). if (wordPos >= length) return NOT_VALID_WORD; int charGroupCount = BinaryFormat::getGroupCountAndForwardPointer(root, &pos); - const int wChar = forceLowerCaseSearch ? toLowerCase(inWord[wordPos]) : inWord[wordPos]; + const int wChar = forceLowerCaseSearch + ? CharUtils::toLowerCase(inWord[wordPos]) : inWord[wordPos]; while (true) { // If there are no more character groups in this node, it means we could not // find a matching character for this depth, therefore there is no match. diff --git a/native/jni/src/char_utils.cpp b/native/jni/src/suggest/core/dictionary/char_utils.cpp similarity index 99% rename from native/jni/src/char_utils.cpp rename to native/jni/src/suggest/core/dictionary/char_utils.cpp index e219beb62..8d40e54c9 100644 --- a/native/jni/src/char_utils.cpp +++ b/native/jni/src/suggest/core/dictionary/char_utils.cpp @@ -14,9 +14,10 @@ * limitations under the License. */ +#include "suggest/core/dictionary/char_utils.h" + #include -#include "char_utils.h" #include "defines.h" namespace latinime { @@ -36,8 +37,7 @@ struct LatinCapitalSmallPair { * $ apt-get install libicu-dev * * 3. Build the following code - * (You need this file, char_utils.h, and defines.h) - * $ g++ -o char_utils -DUPDATING_CHAR_UTILS char_utils.cpp -licuuc + * $ g++ -o char_utils -I../../.. -DUPDATING_CHAR_UTILS char_utils.cpp -licuuc */ #ifdef UPDATING_CHAR_UTILS #include @@ -47,7 +47,7 @@ extern "C" int main() { for (unsigned short c = 0; c < 0xFFFF; c++) { if (c <= 0x7F) continue; const unsigned short icu4cLowerC = u_tolower(c); - const unsigned short myLowerC = latin_tolower(c); + const unsigned short myLowerC = CharUtils::latin_tolower(c); if (c != icu4cLowerC) { #ifdef CONFIRMING_CHAR_UTILS if (icu4cLowerC != myLowerC) { @@ -70,7 +70,8 @@ extern "C" int main() { * * 5. Update the SORTED_CHAR_MAP[] array below with the output above. * Then, rebuild with -DCONFIRMING_CHAR_UTILS and confirm the program exits successfully. - * $ g++ -o char_utils -DUPDATING_CHAR_UTILS -DCONFIRMING_CHAR_UTILS char_utils.cpp -licuuc + * $ g++ -o char_utils -I../../.. -DUPDATING_CHAR_UTILS -DCONFIRMING_CHAR_UTILS char_utils.cpp \ + * -licuuc * $ ./char_utils * $ */ @@ -1054,7 +1055,7 @@ static int compare_pair_capital(const void *a, const void *b) { - static_cast((static_cast(b))->capital); } -unsigned short latin_tolower(const unsigned short c) { +/* static */ unsigned short CharUtils::latin_tolower(const unsigned short c) { struct LatinCapitalSmallPair *p = static_cast(bsearch(&c, SORTED_CHAR_MAP, NELEMS(SORTED_CHAR_MAP), sizeof(SORTED_CHAR_MAP[0]), compare_pair_capital)); @@ -1063,7 +1064,7 @@ unsigned short latin_tolower(const unsigned short c) { /* * Table mapping most combined Latin, Greek, and Cyrillic characters - * to their base characters. If c is in range, BASE_CHARS[c] == c + * to their base characters. If c is in range, CharUtils::BASE_CHARS[c] == c * if c is not a combined character, or the base character if it * is combined. * @@ -1074,7 +1075,7 @@ unsigned short latin_tolower(const unsigned short c) { * for ($j = $i; $j < $i + 8; $j++) { \ * printf("0x%04X, ", $base[$j] ? $base[$j] : $j)}; print "\n"; }' */ -const unsigned short BASE_CHARS[BASE_CHARS_SIZE] = { +/* static */ const unsigned short CharUtils::BASE_CHARS[CharUtils::BASE_CHARS_SIZE] = { /* U+0000 */ 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, /* U+0008 */ 0x0008, 0x0009, 0x000A, 0x000B, 0x000C, 0x000D, 0x000E, 0x000F, /* U+0010 */ 0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017, diff --git a/native/jni/src/suggest/core/dictionary/char_utils.h b/native/jni/src/suggest/core/dictionary/char_utils.h new file mode 100644 index 000000000..2e735a81c --- /dev/null +++ b/native/jni/src/suggest/core/dictionary/char_utils.h @@ -0,0 +1,93 @@ +/* + * Copyright (C) 2010 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_CHAR_UTILS_H +#define LATINIME_CHAR_UTILS_H + +#include + +#include "defines.h" + +namespace latinime { + +class CharUtils { + public: + static AK_FORCE_INLINE bool isAsciiUpper(int c) { + // Note: isupper(...) reports false positives for some Cyrillic characters, causing them to + // be incorrectly lower-cased using toAsciiLower(...) rather than latin_tolower(...). + return (c >= 'A' && c <= 'Z'); + } + + static AK_FORCE_INLINE int toAsciiLower(int c) { + return c - 'A' + 'a'; + } + + static AK_FORCE_INLINE bool isAscii(int c) { + return isascii(c) != 0; + } + + static AK_FORCE_INLINE int toLowerCase(const int c) { + if (isAsciiUpper(c)) { + return toAsciiLower(c); + } + if (isAscii(c)) { + return c; + } + return static_cast(latin_tolower(static_cast(c))); + } + + static AK_FORCE_INLINE int toBaseLowerCase(const int c) { + return toLowerCase(toBaseCodePoint(c)); + } + + static AK_FORCE_INLINE bool isIntentionalOmissionCodePoint(const int codePoint) { + // TODO: Do not hardcode here + return codePoint == KEYCODE_SINGLE_QUOTE || codePoint == KEYCODE_HYPHEN_MINUS; + } + + static AK_FORCE_INLINE int getCodePointCount(const int arraySize, const int *const codePoints) { + int size = 0; + for (; size < arraySize; ++size) { + if (codePoints[size] == '\0') { + break; + } + } + return size; + } + + static AK_FORCE_INLINE int toBaseCodePoint(int c) { + if (c < BASE_CHARS_SIZE) { + return static_cast(BASE_CHARS[c]); + } + return c; + } + + static unsigned short latin_tolower(const unsigned short c); + + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(CharUtils); + + /** + * Table mapping most combined Latin, Greek, and Cyrillic characters + * to their base characters. If c is in range, BASE_CHARS[c] == c + * if c is not a combined character, or the base character if it + * is combined. + */ + static const int BASE_CHARS_SIZE = 0x0500; + static const unsigned short BASE_CHARS[BASE_CHARS_SIZE]; +}; +} // namespace latinime +#endif // LATINIME_CHAR_UTILS_H diff --git a/native/jni/src/suggest/core/dictionary/digraph_utils.cpp b/native/jni/src/suggest/core/dictionary/digraph_utils.cpp index 7a0f755e5..e8cdd5352 100644 --- a/native/jni/src/suggest/core/dictionary/digraph_utils.cpp +++ b/native/jni/src/suggest/core/dictionary/digraph_utils.cpp @@ -16,9 +16,9 @@ #include "suggest/core/dictionary/digraph_utils.h" -#include "char_utils.h" #include "defines.h" #include "suggest/core/dictionary/binary_format.h" +#include "suggest/core/dictionary/char_utils.h" namespace latinime { @@ -122,7 +122,7 @@ const DigraphUtils::DigraphType DigraphUtils::USED_DIGRAPH_TYPES[] = /* static */ const DigraphUtils::digraph_t *DigraphUtils::getDigraphForDigraphTypeAndCodePoint( const DigraphUtils::DigraphType digraphType, const int compositeGlyphCodePoint) { const DigraphUtils::digraph_t *digraphs = 0; - const int compositeGlyphLowerCodePoint = toLowerCase(compositeGlyphCodePoint); + const int compositeGlyphLowerCodePoint = CharUtils::toLowerCase(compositeGlyphCodePoint); const int digraphsSize = DigraphUtils::getAllDigraphsForDictionaryAndReturnSize(digraphType, &digraphs); for (int i = 0; i < digraphsSize; i++) { diff --git a/native/jni/src/suggest/core/layout/proximity_info.cpp b/native/jni/src/suggest/core/layout/proximity_info.cpp index 6dd88051c..0b5d71a43 100644 --- a/native/jni/src/suggest/core/layout/proximity_info.cpp +++ b/native/jni/src/suggest/core/layout/proximity_info.cpp @@ -21,9 +21,9 @@ #include #include -#include "char_utils.h" #include "defines.h" #include "jni.h" +#include "suggest/core/dictionary/char_utils.h" #include "suggest/core/layout/additional_proximity_chars.h" #include "suggest/core/layout/geometry_utils.h" #include "suggest/core/layout/proximity_info_params.h" @@ -165,7 +165,7 @@ void ProximityInfo::initializeG() { // TODO: Optimize for (int i = 0; i < KEY_COUNT; ++i) { const int code = mKeyCodePoints[i]; - const int lowerCode = toLowerCase(code); + const int lowerCode = CharUtils::toLowerCase(code); mCenterXsG[i] = mKeyXCoordinates[i] + mKeyWidths[i] / 2; mCenterYsG[i] = mKeyYCoordinates[i] + mKeyHeights[i] / 2; mCodeToKeyMap[lowerCode] = i; diff --git a/native/jni/src/suggest/core/layout/proximity_info_state.cpp b/native/jni/src/suggest/core/layout/proximity_info_state.cpp index 2bd3ceb7e..412d5508b 100644 --- a/native/jni/src/suggest/core/layout/proximity_info_state.cpp +++ b/native/jni/src/suggest/core/layout/proximity_info_state.cpp @@ -23,6 +23,7 @@ #include #include "defines.h" +#include "suggest/core/dictionary/char_utils.h" #include "suggest/core/layout/geometry_utils.h" #include "suggest/core/layout/proximity_info.h" #include "suggest/core/layout/proximity_info_state_utils.h" @@ -175,7 +176,7 @@ float ProximityInfoState::getPointToKeyLength( const int index = inputIndex * mProximityInfo->getKeyCount() + keyId; return min(mSampledNormalizedSquaredLengthCache[index], mMaxPointToKeyLength); } - if (isIntentionalOmissionCodePoint(codePoint)) { + if (CharUtils::isIntentionalOmissionCodePoint(codePoint)) { return 0.0f; } // If the char is not a key on the keyboard then return the max length. @@ -203,7 +204,7 @@ ProximityType ProximityInfoState::getProximityType(const int index, const int co const bool checkProximityChars, int *proximityIndex) const { const int *currentCodePoints = getProximityCodePointsAt(index); const int firstCodePoint = currentCodePoints[0]; - const int baseLowerC = toBaseLowerCase(codePoint); + const int baseLowerC = CharUtils::toBaseLowerCase(codePoint); // The first char in the array is what user typed. If it matches right away, that means the // user typed that same char for this pos. @@ -215,7 +216,7 @@ ProximityType ProximityInfoState::getProximityType(const int index, const int co // If the non-accented, lowercased version of that first character matches c, then we have a // non-accented version of the accented character the user typed. Treat it as a close char. - if (toBaseLowerCase(firstCodePoint) == baseLowerC) { + if (CharUtils::toBaseLowerCase(firstCodePoint) == baseLowerC) { return PROXIMITY_CHAR; } @@ -257,8 +258,8 @@ ProximityType ProximityInfoState::getProximityTypeG(const int index, const int c if (!isUsed()) { return UNRELATED_CHAR; } - const int lowerCodePoint = toLowerCase(codePoint); - const int baseLowerCodePoint = toBaseCodePoint(lowerCodePoint); + const int lowerCodePoint = CharUtils::toLowerCase(codePoint); + const int baseLowerCodePoint = CharUtils::toBaseCodePoint(lowerCodePoint); for (int i = 0; i < static_cast(mSampledSearchKeyVectors[index].size()); ++i) { if (mSampledSearchKeyVectors[index][i] == lowerCodePoint || mSampledSearchKeyVectors[index][i] == baseLowerCodePoint) { diff --git a/native/jni/src/suggest/core/layout/proximity_info_state.h b/native/jni/src/suggest/core/layout/proximity_info_state.h index fd09307fe..a971294e3 100644 --- a/native/jni/src/suggest/core/layout/proximity_info_state.h +++ b/native/jni/src/suggest/core/layout/proximity_info_state.h @@ -20,7 +20,6 @@ #include // for memset() #include -#include "char_utils.h" #include "defines.h" #include "hash_map_compat.h" #include "suggest/core/layout/proximity_info_params.h" diff --git a/native/jni/src/suggest/core/layout/proximity_info_utils.h b/native/jni/src/suggest/core/layout/proximity_info_utils.h index c3a275b3c..3588f4df8 100644 --- a/native/jni/src/suggest/core/layout/proximity_info_utils.h +++ b/native/jni/src/suggest/core/layout/proximity_info_utils.h @@ -19,9 +19,9 @@ #include -#include "char_utils.h" #include "defines.h" #include "hash_map_compat.h" +#include "suggest/core/dictionary/char_utils.h" #include "suggest/core/layout/additional_proximity_chars.h" #include "suggest/core/layout/geometry_utils.h" @@ -37,7 +37,7 @@ class ProximityInfoUtils { if (c == NOT_A_CODE_POINT) { return NOT_AN_INDEX; } - const int lowerCode = toLowerCase(c); + const int lowerCode = CharUtils::toLowerCase(c); hash_map_compat::const_iterator mapPos = codeToKeyMap->find(lowerCode); if (mapPos != codeToKeyMap->end()) { return mapPos->second; diff --git a/native/jni/src/suggest/core/policy/weighting.cpp b/native/jni/src/suggest/core/policy/weighting.cpp index d01531f07..7ad568e25 100644 --- a/native/jni/src/suggest/core/policy/weighting.cpp +++ b/native/jni/src/suggest/core/policy/weighting.cpp @@ -16,7 +16,6 @@ #include "suggest/core/policy/weighting.h" -#include "char_utils.h" #include "defines.h" #include "suggest/core/dicnode/dic_node.h" #include "suggest/core/dicnode/dic_node_profiler.h" diff --git a/native/jni/src/suggest/core/suggest.cpp b/native/jni/src/suggest/core/suggest.cpp index 94441877a..720222363 100644 --- a/native/jni/src/suggest/core/suggest.cpp +++ b/native/jni/src/suggest/core/suggest.cpp @@ -16,7 +16,6 @@ #include "suggest/core/suggest.h" -#include "char_utils.h" #include "suggest/core/dicnode/dic_node.h" #include "suggest/core/dicnode/dic_node_priority_queue.h" #include "suggest/core/dicnode/dic_node_vector.h" diff --git a/native/jni/src/suggest/policyimpl/typing/typing_traversal.h b/native/jni/src/suggest/policyimpl/typing/typing_traversal.h index b212fe101..e0664185c 100644 --- a/native/jni/src/suggest/policyimpl/typing/typing_traversal.h +++ b/native/jni/src/suggest/policyimpl/typing/typing_traversal.h @@ -19,10 +19,10 @@ #include -#include "char_utils.h" #include "defines.h" #include "suggest/core/dicnode/dic_node.h" #include "suggest/core/dicnode/dic_node_vector.h" +#include "suggest/core/dictionary/char_utils.h" #include "suggest/core/layout/proximity_info_state.h" #include "suggest/core/policy/traversal.h" #include "suggest/core/session/dic_traverse_session.h" @@ -64,9 +64,9 @@ class TypingTraversal : public Traversal { } const int point0Index = dicNode->getInputIndex(0); const int currentBaseLowerCodePoint = - toBaseLowerCase(childDicNode->getNodeCodePoint()); + CharUtils::toBaseLowerCase(childDicNode->getNodeCodePoint()); const int typedBaseLowerCodePoint = - toBaseLowerCase(traverseSession->getProximityInfoState(0) + CharUtils::toBaseLowerCase(traverseSession->getProximityInfoState(0) ->getPrimaryCodePointAt(point0Index)); return (currentBaseLowerCodePoint != typedBaseLowerCodePoint); } @@ -172,7 +172,7 @@ class TypingTraversal : public Traversal { } const int c = dicNode->getOutputWordBuf()[0]; const bool shortCappedWord = dicNode->getDepth() - < ScoringParams::THRESHOLD_SHORT_WORD_LENGTH && isAsciiUpper(c); + < ScoringParams::THRESHOLD_SHORT_WORD_LENGTH && CharUtils::isAsciiUpper(c); return !shortCappedWord || probability >= ScoringParams::THRESHOLD_NEXT_WORD_PROBABILITY_FOR_CAPPED; } diff --git a/native/jni/src/suggest/policyimpl/typing/typing_weighting.h b/native/jni/src/suggest/policyimpl/typing/typing_weighting.h index cb6abd574..9e0c0d2f7 100644 --- a/native/jni/src/suggest/policyimpl/typing/typing_weighting.h +++ b/native/jni/src/suggest/policyimpl/typing/typing_weighting.h @@ -19,6 +19,7 @@ #include "defines.h" #include "suggest/core/dicnode/dic_node_utils.h" +#include "suggest/core/dictionary/char_utils.h" #include "suggest/core/layout/touch_position_correction_utils.h" #include "suggest/core/policy/weighting.h" #include "suggest/core/session/dic_traverse_session.h" @@ -98,9 +99,9 @@ class TypingWeighting : public Weighting { bool isProximityDicNode(const DicTraverseSession *const traverseSession, const DicNode *const dicNode) const { const int pointIndex = dicNode->getInputIndex(0); - const int primaryCodePoint = toBaseLowerCase( + const int primaryCodePoint = CharUtils::toBaseLowerCase( traverseSession->getProximityInfoState(0)->getPrimaryCodePointAt(pointIndex)); - const int dicNodeChar = toBaseLowerCase(dicNode->getNodeCodePoint()); + const int dicNodeChar = CharUtils::toBaseLowerCase(dicNode->getNodeCodePoint()); return primaryCodePoint != dicNodeChar; } diff --git a/native/jni/src/suggest/policyimpl/utils/damerau_levenshtein_edit_distance_policy.h b/native/jni/src/suggest/policyimpl/utils/damerau_levenshtein_edit_distance_policy.h index ec1457455..09f986adf 100644 --- a/native/jni/src/suggest/policyimpl/utils/damerau_levenshtein_edit_distance_policy.h +++ b/native/jni/src/suggest/policyimpl/utils/damerau_levenshtein_edit_distance_policy.h @@ -17,7 +17,7 @@ #ifndef LATINIME_DAEMARU_LEVENSHTEIN_EDIT_DISTANCE_POLICY_H #define LATINIME_DAEMARU_LEVENSHTEIN_EDIT_DISTANCE_POLICY_H -#include "char_utils.h" +#include "suggest/core/dictionary/char_utils.h" #include "suggest/policyimpl/utils/edit_distance_policy.h" namespace latinime { @@ -31,8 +31,8 @@ class DamerauLevenshteinEditDistancePolicy : public EditDistancePolicy { ~DamerauLevenshteinEditDistancePolicy() {} AK_FORCE_INLINE float getSubstitutionCost(const int index0, const int index1) const { - const int c0 = toBaseLowerCase(mString0[index0]); - const int c1 = toBaseLowerCase(mString1[index1]); + const int c0 = CharUtils::toBaseLowerCase(mString0[index0]); + const int c1 = CharUtils::toBaseLowerCase(mString1[index1]); return (c0 == c1) ? 0.0f : 1.0f; } @@ -45,10 +45,10 @@ class DamerauLevenshteinEditDistancePolicy : public EditDistancePolicy { } AK_FORCE_INLINE bool allowTransposition(const int index0, const int index1) const { - const int c0 = toBaseLowerCase(mString0[index0]); - const int c1 = toBaseLowerCase(mString1[index1]); - if (index0 > 0 && index1 > 0 && c0 == toBaseLowerCase(mString1[index1 - 1]) - && c1 == toBaseLowerCase(mString0[index0 - 1])) { + const int c0 = CharUtils::toBaseLowerCase(mString0[index0]); + const int c1 = CharUtils::toBaseLowerCase(mString1[index1]); + if (index0 > 0 && index1 > 0 && c0 == CharUtils::toBaseLowerCase(mString1[index1 - 1]) + && c1 == CharUtils::toBaseLowerCase(mString0[index0 - 1])) { return true; } return false; diff --git a/native/jni/src/unigram_dictionary.cpp b/native/jni/src/unigram_dictionary.cpp index 1133256c4..66a8b8542 100644 --- a/native/jni/src/unigram_dictionary.cpp +++ b/native/jni/src/unigram_dictionary.cpp @@ -18,9 +18,9 @@ #define LOG_TAG "LatinIME: unigram_dictionary.cpp" -#include "char_utils.h" #include "defines.h" #include "suggest/core/dictionary/binary_format.h" +#include "suggest/core/dictionary/char_utils.h" #include "suggest/core/dictionary/dictionary.h" #include "suggest/core/dictionary/digraph_utils.h" #include "suggest/core/dictionary/terminal_attributes.h" @@ -696,8 +696,8 @@ static inline bool testCharGroupForContinuedLikeness(const uint8_t flags, const bool hasMultipleChars = (0 != (BinaryFormat::FLAG_HAS_MULTIPLE_CHARS & flags)); int pos = startPos; int codePoint = BinaryFormat::getCodePointAndForwardPointer(root, &pos); - int baseChar = toBaseLowerCase(codePoint); - const int wChar = toBaseLowerCase(inWord[startInputIndex]); + int baseChar = CharUtils::toBaseLowerCase(codePoint); + const int wChar = CharUtils::toBaseLowerCase(inWord[startInputIndex]); if (baseChar != wChar) { *outPos = hasMultipleChars ? BinaryFormat::skipOtherCharacters(root, pos) : pos; @@ -709,8 +709,9 @@ static inline bool testCharGroupForContinuedLikeness(const uint8_t flags, if (hasMultipleChars) { codePoint = BinaryFormat::getCodePointAndForwardPointer(root, &pos); while (NOT_A_CODE_POINT != codePoint) { - baseChar = toBaseLowerCase(codePoint); - if (inputIndex + 1 >= inputSize || toBaseLowerCase(inWord[++inputIndex]) != baseChar) { + baseChar = CharUtils::toBaseLowerCase(codePoint); + if (inputIndex + 1 >= inputSize + || CharUtils::toBaseLowerCase(inWord[++inputIndex]) != baseChar) { *outPos = BinaryFormat::skipOtherCharacters(root, pos); *outInputIndex = startInputIndex; return false;