diff --git a/native/src/proximity_info.cpp b/native/src/proximity_info.cpp index 209c31e6e..ebf83a7e7 100644 --- a/native/src/proximity_info.cpp +++ b/native/src/proximity_info.cpp @@ -63,4 +63,29 @@ bool ProximityInfo::hasSpaceProximity(const int x, const int y) const { return false; } +// TODO: Calculate nearby codes here. +void ProximityInfo::setInputParams(const int* inputCodes, const int inputLength) { + mInputCodes = inputCodes; + mInputLength = inputLength; +} + +const int* ProximityInfo::getProximityCharsAt(const int index) const { + return mInputCodes + (index * MAX_PROXIMITY_CHARS_SIZE); +} + +bool ProximityInfo::sameAsTyped(const unsigned short *word, int length) const { + if (length != mInputLength) { + return false; + } + const int *inputCodes = mInputCodes; + while (length--) { + if ((unsigned int) *inputCodes != (unsigned int) *word) { + return false; + } + inputCodes += MAX_PROXIMITY_CHARS_SIZE; + word++; + } + return true; +} + } // namespace latinime diff --git a/native/src/proximity_info.h b/native/src/proximity_info.h index 327cd0940..0d7c9c56e 100644 --- a/native/src/proximity_info.h +++ b/native/src/proximity_info.h @@ -30,6 +30,9 @@ public: const uint32_t *proximityCharsArray); ~ProximityInfo(); bool hasSpaceProximity(const int x, const int y) const; + void setInputParams(const int* inputCodes, const int inputLength); + const int* getProximityCharsAt(const int index) const; + bool sameAsTyped(const unsigned short *word, int length) const; private: int getStartIndexFromCoordinates(const int x, const int y) const; const int MAX_PROXIMITY_CHARS_SIZE; @@ -39,7 +42,9 @@ private: const int GRID_HEIGHT; const int CELL_WIDTH; const int CELL_HEIGHT; + const int *mInputCodes; uint32_t *mProximityCharsArray; + int mInputLength; }; } // namespace latinime diff --git a/native/src/unigram_dictionary.cpp b/native/src/unigram_dictionary.cpp index 698584e54..5873e2110 100644 --- a/native/src/unigram_dictionary.cpp +++ b/native/src/unigram_dictionary.cpp @@ -54,7 +54,7 @@ UnigramDictionary::UnigramDictionary(const uint8_t* const streamStart, int typed // TODO : remove this variable. ROOT_POS(0), #endif // NEW_DICTIONARY_FORMAT - BYTES_IN_ONE_CHAR(MAX_PROXIMITY_CHARS * sizeof(*mInputCodes)), + BYTES_IN_ONE_CHAR(MAX_PROXIMITY_CHARS * sizeof(int)), MAX_UMLAUT_SEARCH_DEPTH(DEFAULT_MAX_UMLAUT_SEARCH_DEPTH) { if (DEBUG_DICT) { LOGI("UnigramDictionary - constructor"); @@ -93,7 +93,7 @@ bool UnigramDictionary::isDigraph(const int* codes, const int i, const int codes // codesDest is the current point in the work buffer. // codesSrc is the current point in the user-input, original, content-unmodified buffer. // codesRemain is the remaining size in codesSrc. -void UnigramDictionary::getWordWithDigraphSuggestionsRec(const ProximityInfo *proximityInfo, +void UnigramDictionary::getWordWithDigraphSuggestionsRec(ProximityInfo *proximityInfo, const int *xcoordinates, const int* ycoordinates, const int *codesBuffer, const int codesBufferSize, const int flags, const int* codesSrc, const int codesRemain, const int currentDepth, int* codesDest, unsigned short* outWords, int* frequencies) { @@ -143,7 +143,7 @@ void UnigramDictionary::getWordWithDigraphSuggestionsRec(const ProximityInfo *pr (codesDest - codesBuffer) / MAX_PROXIMITY_CHARS + codesRemain, outWords, frequencies); } -int UnigramDictionary::getSuggestions(const ProximityInfo *proximityInfo, const int *xcoordinates, +int UnigramDictionary::getSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates, const int *codes, const int codesSize, const int flags, unsigned short *outWords, int *frequencies) { @@ -187,13 +187,14 @@ int UnigramDictionary::getSuggestions(const ProximityInfo *proximityInfo, const return suggestedWordsCount; } -void UnigramDictionary::getWordSuggestions(const ProximityInfo *proximityInfo, +void UnigramDictionary::getWordSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates, const int *codes, const int codesSize, unsigned short *outWords, int *frequencies) { PROF_OPEN; PROF_START(0); - initSuggestions(codes, codesSize, outWords, frequencies); + initSuggestions( + proximityInfo, xcoordinates, ycoordinates, codes, codesSize, outWords, frequencies); if (DEBUG_DICT) assert(codesSize == mInputLength); const int MAX_DEPTH = min(mInputLength * MAX_DEPTH_MULTIPLIER, MAX_WORD_LENGTH); @@ -275,16 +276,18 @@ void UnigramDictionary::getWordSuggestions(const ProximityInfo *proximityInfo, PROF_END(6); } -void UnigramDictionary::initSuggestions(const int *codes, const int codesSize, +void UnigramDictionary::initSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates, + const int *ycoordinates, const int *codes, const int codesSize, unsigned short *outWords, int *frequencies) { if (DEBUG_DICT) { LOGI("initSuggest"); } mFrequencies = frequencies; mOutputChars = outWords; - mInputCodes = codes; mInputLength = codesSize; mMaxEditDistance = mInputLength < 5 ? 2 : mInputLength / 2; + proximityInfo->setInputParams(codes, codesSize); + mProximityInfo = proximityInfo; } static inline void registerNextLetter(unsigned short c, int *nextLetters, int nextLettersSize) { @@ -360,21 +363,6 @@ static inline unsigned short toBaseLowerCase(unsigned short c) { return c; } -bool UnigramDictionary::sameAsTyped(const unsigned short *word, int length) const { - if (length != mInputLength) { - return false; - } - const int *inputCodes = mInputCodes; - while (length--) { - if ((unsigned int) *inputCodes != (unsigned int) *word) { - return false; - } - inputCodes += MAX_PROXIMITY_CHARS; - word++; - } - return true; -} - static const char QUOTE = '\''; static const char SPACE = ' '; @@ -569,6 +557,8 @@ inline int UnigramDictionary::calculateFinalFreq(const int inputIndex, const int if (excessivePos >= 0) { multiplyRate(WORDS_WITH_EXCESSIVE_CHARACTER_DEMOTION_RATE, &finalFreq); if (!existsAdjacentProximityChars(inputIndex, mInputLength)) { + // If an excessive character is not adjacent to the left char or the right char, + // we will demote this word. multiplyRate(WORDS_WITH_EXCESSIVE_CHARACTER_OUT_OF_PROXIMITY_DEMOTION_RATE, &finalFreq); } } @@ -678,7 +668,7 @@ inline void UnigramDictionary::onTerminal(unsigned short int* word, const int de const int excessivePos, const int transposedPos, const int freq, const bool sameLength, int* nextLetters, const int nextLettersSize) { - const bool isSameAsTyped = sameLength ? sameAsTyped(word, depth + 1) : false; + const bool isSameAsTyped = sameLength ? mProximityInfo->sameAsTyped(word, depth + 1) : false; if (isSameAsTyped) return; if (depth >= MIN_SUGGEST_DEPTH) { diff --git a/native/src/unigram_dictionary.h b/native/src/unigram_dictionary.h index dcc8f2a9a..70702a95f 100644 --- a/native/src/unigram_dictionary.h +++ b/native/src/unigram_dictionary.h @@ -82,26 +82,26 @@ public: int maxAlternatives); #endif // NEW_DICTIONARY_FORMAT int getBigramPosition(int pos, unsigned short *word, int offset, int length) const; - int getSuggestions(const ProximityInfo *proximityInfo, const int *xcoordinates, + int getSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates, const int *codes, const int codesSize, const int flags, unsigned short *outWords, int *frequencies); ~UnigramDictionary(); private: - void getWordSuggestions(const ProximityInfo *proximityInfo, const int *xcoordinates, + void getWordSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates, const int *ycoordinates, const int *codes, const int codesSize, unsigned short *outWords, int *frequencies); bool isDigraph(const int* codes, const int i, const int codesSize) const; - void getWordWithDigraphSuggestionsRec(const ProximityInfo *proximityInfo, + void getWordWithDigraphSuggestionsRec(ProximityInfo *proximityInfo, const int *xcoordinates, const int* ycoordinates, const int *codesBuffer, const int codesBufferSize, const int flags, const int* codesSrc, const int codesRemain, const int currentDepth, int* codesDest, unsigned short* outWords, int* frequencies); - void initSuggestions(const int *codes, const int codesSize, unsigned short *outWords, - int *frequencies); + void initSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates, + const int *ycoordinates, const int *codes, const int codesSize, + unsigned short *outWords, int *frequencies); void getSuggestionCandidates(const int skipPos, const int excessivePos, const int transposedPos, int *nextLetters, const int nextLettersSize, const int maxDepth); - bool sameAsTyped(const unsigned short *word, int length) const; bool addWord(unsigned short *word, int length, int frequency); bool getSplitTwoWordsSuggestion(const int inputLength, const int firstWordStartPos, const int firstWordLength, @@ -129,7 +129,7 @@ private: int *newDiffs, int *nextSiblingPosition, int *nextOutputIndex); bool existsAdjacentProximityChars(const int inputIndex, const int inputLength) const; inline const int* getInputCharsAt(const int index) const { - return mInputCodes + (index * MAX_PROXIMITY_CHARS); + return mProximityInfo->getProximityCharsAt(index); } int getMostFrequentWordLike(const int startInputIndex, const int inputLength, unsigned short *word); @@ -174,7 +174,7 @@ private: int *mFrequencies; unsigned short *mOutputChars; - const int *mInputCodes; + const ProximityInfo *mProximityInfo; int mInputLength; // MAX_WORD_LENGTH_INTERNAL must be bigger than MAX_WORD_LENGTH unsigned short mWord[MAX_WORD_LENGTH_INTERNAL];