diff --git a/native/jni/src/bigram_dictionary.cpp b/native/jni/src/bigram_dictionary.cpp index e05e9d688..67f96281d 100644 --- a/native/jni/src/bigram_dictionary.cpp +++ b/native/jni/src/bigram_dictionary.cpp @@ -135,6 +135,7 @@ int BigramDictionary::getBigrams(const int32_t *prevWord, int prevWordLength, in // If the word is not found or has no bigrams, this function returns 0. int BigramDictionary::getBigramListPositionForWord(const int32_t *prevWord, const int prevWordLength) { + if (0 >= prevWordLength) return 0; const uint8_t* const root = DICT; int pos = BinaryFormat::getTerminalPosition(root, prevWord, prevWordLength); @@ -152,6 +153,22 @@ int BigramDictionary::getBigramListPositionForWord(const int32_t *prevWord, return pos; } +void BigramDictionary::fillBigramAddressToFrequencyMap(const int32_t *prevWord, + const int prevWordLength, std::map *map) { + const uint8_t* const root = DICT; + int pos = getBigramListPositionForWord(prevWord, prevWordLength); + if (0 == pos) return; + + int bigramFlags; + do { + bigramFlags = BinaryFormat::getFlagsAndForwardPointer(root, &pos); + const int frequency = UnigramDictionary::MASK_ATTRIBUTE_FREQUENCY & bigramFlags; + const int bigramPos = BinaryFormat::getAttributeAddressAndForwardPointer(root, bigramFlags, + &pos); + (*map)[bigramPos] = frequency; + } while (0 != (UnigramDictionary::FLAG_ATTRIBUTE_HAS_NEXT & bigramFlags)); +} + bool BigramDictionary::checkFirstCharacter(unsigned short *word) { // Checks whether this word starts with same character or neighboring characters of // what user typed. diff --git a/native/jni/src/bigram_dictionary.h b/native/jni/src/bigram_dictionary.h index 76f903958..b1233215b 100644 --- a/native/jni/src/bigram_dictionary.h +++ b/native/jni/src/bigram_dictionary.h @@ -17,6 +17,7 @@ #ifndef LATINIME_BIGRAM_DICTIONARY_H #define LATINIME_BIGRAM_DICTIONARY_H +#include #include namespace latinime { @@ -28,6 +29,8 @@ class BigramDictionary { int getBigrams(const int32_t *word, int length, int *codes, int codesSize, unsigned short *outWords, int *frequencies, int maxWordLength, int maxBigrams); int getBigramListPositionForWord(const int32_t *prevWord, const int prevWordLength); + void fillBigramAddressToFrequencyMap(const int32_t *prevWord, const int prevWordLength, + std::map *map); ~BigramDictionary(); private: bool addWordBigram(unsigned short *word, int length, int frequency); diff --git a/native/jni/src/debug.h b/native/jni/src/debug.h index b13052c95..376ba59d9 100644 --- a/native/jni/src/debug.h +++ b/native/jni/src/debug.h @@ -22,7 +22,7 @@ static inline unsigned char* convertToUnibyteString(unsigned short* input, unsigned char* output, const unsigned int length) { - int i = 0; + unsigned int i = 0; for (; i <= length && input[i] != 0; ++i) output[i] = input[i] & 0xFF; output[i] = 0; @@ -31,10 +31,10 @@ static inline unsigned char* convertToUnibyteString(unsigned short* input, unsig static inline unsigned char* convertToUnibyteStringAndReplaceLastChar(unsigned short* input, unsigned char* output, const unsigned int length, unsigned char c) { - int i = 0; + unsigned int i = 0; for (; i <= length && input[i] != 0; ++i) output[i] = input[i] & 0xFF; - output[i-1] = c; + if (i > 0) output[i-1] = c; output[i] = 0; return output; } diff --git a/native/jni/src/dictionary.h b/native/jni/src/dictionary.h index e0feeafda..a2b0491c5 100644 --- a/native/jni/src/dictionary.h +++ b/native/jni/src/dictionary.h @@ -17,6 +17,8 @@ #ifndef LATINIME_DICTIONARY_H #define LATINIME_DICTIONARY_H +#include + #include "bigram_dictionary.h" #include "char_utils.h" #include "correction.h" @@ -39,6 +41,9 @@ class Dictionary { // If none, it's zero. const int bigramListPosition = !prevWordChars ? 0 : mBigramDictionary->getBigramListPositionForWord(prevWordChars, prevWordLength); + std::map bigramMap; + mBigramDictionary->fillBigramAddressToFrequencyMap(prevWordChars, prevWordLength, + &bigramMap); return mUnigramDictionary->getSuggestions(proximityInfo, mWordsPriorityQueuePool, mCorrection, xcoordinates, ycoordinates, codes, codesSize, bigramListPosition, useFullEditDistance, outWords, frequencies);