diff --git a/native/src/binary_format.h b/native/src/binary_format.h index 9944fa2bd..1d74998f6 100644 --- a/native/src/binary_format.h +++ b/native/src/binary_format.h @@ -61,7 +61,9 @@ inline int BinaryFormat::detectFormat(const uint8_t* const dict) { } inline int BinaryFormat::getGroupCountAndForwardPointer(const uint8_t* const dict, int* pos) { - return dict[(*pos)++]; + const int msb = dict[(*pos)++]; + if (msb < 0x80) return msb; + return ((msb & 0x7F) << 8) | dict[(*pos)++]; } inline uint8_t BinaryFormat::getFlagsAndForwardPointer(const uint8_t* const dict, int* pos) { diff --git a/native/src/unigram_dictionary.cpp b/native/src/unigram_dictionary.cpp index cd73fe3f8..ca7f0be0c 100644 --- a/native/src/unigram_dictionary.cpp +++ b/native/src/unigram_dictionary.cpp @@ -507,9 +507,10 @@ int UnigramDictionary::getMostFrequentWordLikeInner(const uint16_t * const inWor int maxFreq = -1; const uint8_t* const root = DICT_ROOT; - mStackChildCount[0] = root[0]; + int startPos = 0; + mStackChildCount[0] = BinaryFormat::getGroupCountAndForwardPointer(root, &startPos); mStackInputIndex[0] = 0; - mStackSiblingPos[0] = 1; + mStackSiblingPos[0] = startPos; while (depth >= 0) { const int charGroupCount = mStackChildCount[depth]; int pos = mStackSiblingPos[depth];