From bd0d1afdb28a28e2ddac1409208c59ba64350399 Mon Sep 17 00:00:00 2001 From: Keisuke Kuroynagi Date: Tue, 11 Jun 2013 12:01:49 +0900 Subject: [PATCH] Introduce BinaryDictionaryHeader to access binary dictionary header. BinaryDictionaryHeader abstracts header structure and access header information via only its instance. Bug: 6669677 Change-Id: Ib5ab0e8fef12265ddabc1d0165548b69953bba6a --- native/jni/Android.mk | 4 +- ...oid_inputmethod_latin_BinaryDictionary.cpp | 2 +- ...cpp => binary_dictionary_format_utils.cpp} | 6 +- ...mat.h => binary_dictionary_format_utils.h} | 23 +-- .../dictionary/binary_dictionary_header.cpp | 49 ++++++ .../dictionary/binary_dictionary_header.h | 70 ++++++++ ...binary_dictionary_header_reading_utils.cpp | 121 ++++++++++++++ .../binary_dictionary_header_reading_utils.h | 102 ++++++++++++ .../core/dictionary/binary_dictionary_info.h | 18 +- .../suggest/core/dictionary/binary_format.h | 156 ------------------ .../core/dictionary/byte_array_utils.h | 39 ++++- .../suggest/core/dictionary/dictionary.cpp | 11 +- .../src/suggest/core/dictionary/dictionary.h | 6 +- .../suggest/core/dictionary/digraph_utils.cpp | 24 +-- .../suggest/core/dictionary/digraph_utils.h | 11 +- .../core/session/dic_traverse_session.cpp | 10 +- .../core/session/dic_traverse_session.h | 1 - native/jni/src/suggest/core/suggest.cpp | 4 +- 18 files changed, 424 insertions(+), 233 deletions(-) rename native/jni/src/suggest/core/dictionary/{binary_dictionary_format.cpp => binary_dictionary_format_utils.cpp} (90%) rename native/jni/src/suggest/core/dictionary/{binary_dictionary_format.h => binary_dictionary_format_utils.h} (67%) create mode 100644 native/jni/src/suggest/core/dictionary/binary_dictionary_header.cpp create mode 100644 native/jni/src/suggest/core/dictionary/binary_dictionary_header.h create mode 100644 native/jni/src/suggest/core/dictionary/binary_dictionary_header_reading_utils.cpp create mode 100644 native/jni/src/suggest/core/dictionary/binary_dictionary_header_reading_utils.h diff --git a/native/jni/Android.mk b/native/jni/Android.mk index 7ca405752..9718cf5fb 100644 --- a/native/jni/Android.mk +++ b/native/jni/Android.mk @@ -55,7 +55,9 @@ LATIN_IME_CORE_SRC_FILES := \ dic_nodes_cache.cpp) \ $(addprefix suggest/core/dictionary/, \ bigram_dictionary.cpp \ - binary_dictionary_format.cpp \ + binary_dictionary_format_utils.cpp \ + binary_dictionary_header.cpp \ + binary_dictionary_header_reading_utils.cpp \ byte_array_utils.cpp \ dictionary.cpp \ digraph_utils.cpp) \ diff --git a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp index f60793733..8490e32bc 100644 --- a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp +++ b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp @@ -27,7 +27,7 @@ #include "jni.h" #include "jni_common.h" #include "obsolete/correction.h" -#include "suggest/core/dictionary/binary_dictionary_format.h" +#include "suggest/core/dictionary/binary_dictionary_format_utils.h" #include "suggest/core/dictionary/binary_dictionary_info.h" #include "suggest/core/dictionary/dictionary.h" #include "suggest/core/suggest_options.h" diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_format.cpp b/native/jni/src/suggest/core/dictionary/binary_dictionary_format_utils.cpp similarity index 90% rename from native/jni/src/suggest/core/dictionary/binary_dictionary_format.cpp rename to native/jni/src/suggest/core/dictionary/binary_dictionary_format_utils.cpp index 50e0211d7..737df63c7 100644 --- a/native/jni/src/suggest/core/dictionary/binary_dictionary_format.cpp +++ b/native/jni/src/suggest/core/dictionary/binary_dictionary_format_utils.cpp @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "suggest/core/dictionary/binary_dictionary_format.h" +#include "suggest/core/dictionary/binary_dictionary_format_utils.h" namespace latinime { @@ -31,7 +31,6 @@ const int BinaryDictionaryFormat::DICTIONARY_MINIMUM_SIZE = 4; // then options that must be 0. Hence the first 32-bits of the format are always as follow // and it's okay to consider them a magic number as a whole. const uint32_t BinaryDictionaryFormat::FORMAT_VERSION_1_MAGIC_NUMBER = 0x78B10100; -const int BinaryDictionaryFormat::FORMAT_VERSION_1_HEADER_SIZE = 5; // The versions of Latin IME that only handle format version 1 only test for the magic // number, so we had to change it so that version 2 files would be rejected by older @@ -39,9 +38,6 @@ const int BinaryDictionaryFormat::FORMAT_VERSION_1_HEADER_SIZE = 5; const uint32_t BinaryDictionaryFormat::FORMAT_VERSION_2_MAGIC_NUMBER = 0x9BC13AFE; // Magic number (4 bytes), version (2 bytes), options (2 bytes), header size (4 bytes) = 12 const int BinaryDictionaryFormat::FORMAT_VERSION_2_MINIMUM_SIZE = 12; -const int BinaryDictionaryFormat::VERSION_2_MAGIC_NUMBER_SIZE = 4; -const int BinaryDictionaryFormat::VERSION_2_DICTIONARY_VERSION_SIZE = 2; -const int BinaryDictionaryFormat::VERSION_2_DICTIONARY_FLAG_SIZE = 2; /* static */ BinaryDictionaryFormat::FORMAT_VERSION BinaryDictionaryFormat::detectFormatVersion( const uint8_t *const dict, const int dictSize) { diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_format.h b/native/jni/src/suggest/core/dictionary/binary_dictionary_format_utils.h similarity index 67% rename from native/jni/src/suggest/core/dictionary/binary_dictionary_format.h rename to native/jni/src/suggest/core/dictionary/binary_dictionary_format_utils.h index 3aa1662da..c0fd56111 100644 --- a/native/jni/src/suggest/core/dictionary/binary_dictionary_format.h +++ b/native/jni/src/suggest/core/dictionary/binary_dictionary_format_utils.h @@ -14,8 +14,8 @@ * limitations under the License. */ -#ifndef LATINIME_BINARY_DICTIONARY_FORMAT_H -#define LATINIME_BINARY_DICTIONARY_FORMAT_H +#ifndef LATINIME_BINARY_DICTIONARY_FORMAT_UTILS_H +#define LATINIME_BINARY_DICTIONARY_FORMAT_UTILS_H #include @@ -42,30 +42,13 @@ class BinaryDictionaryFormat { static FORMAT_VERSION detectFormatVersion(const uint8_t *const dict, const int dictSize); - static AK_FORCE_INLINE int getHeaderSize( - const uint8_t *const dict, const FORMAT_VERSION format) { - switch (format) { - case VERSION_1: - return FORMAT_VERSION_1_HEADER_SIZE; - case VERSION_2: - // See the format of the header in the comment in detectFormat() above - return ByteArrayUtils::readUint32(dict, 8); - default: - return S_INT_MAX; - } - } - private: DISALLOW_IMPLICIT_CONSTRUCTORS(BinaryDictionaryFormat); static const int DICTIONARY_MINIMUM_SIZE; static const uint32_t FORMAT_VERSION_1_MAGIC_NUMBER; - static const int FORMAT_VERSION_1_HEADER_SIZE; static const uint32_t FORMAT_VERSION_2_MAGIC_NUMBER; static const int FORMAT_VERSION_2_MINIMUM_SIZE; - static const int VERSION_2_MAGIC_NUMBER_SIZE; - static const int VERSION_2_DICTIONARY_VERSION_SIZE ; - static const int VERSION_2_DICTIONARY_FLAG_SIZE; }; } // namespace latinime -#endif /* LATINIME_BINARY_DICTIONARY_FORMAT_H */ +#endif /* LATINIME_BINARY_DICTIONARY_FORMAT_UTILS_H */ diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_header.cpp b/native/jni/src/suggest/core/dictionary/binary_dictionary_header.cpp new file mode 100644 index 000000000..04bb81f71 --- /dev/null +++ b/native/jni/src/suggest/core/dictionary/binary_dictionary_header.cpp @@ -0,0 +1,49 @@ +/* + * Copyright (C) 2013, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "suggest/core/dictionary/binary_dictionary_header.h" + +#include "defines.h" +#include "suggest/core/dictionary/binary_dictionary_info.h" + +namespace latinime { + +const char *const BinaryDictionaryHeader::MULTIPLE_WORDS_DEMOTION_RATE_KEY = + "MULTIPLE_WORDS_DEMOTION_RATE"; +const float BinaryDictionaryHeader::DEFAULT_MULTI_WORD_COST_MULTIPLIER = 1.0f; +const float BinaryDictionaryHeader::MULTI_WORD_COST_MULTIPLIER_SCALE = 100.0f; + +BinaryDictionaryHeader::BinaryDictionaryHeader( + const BinaryDictionaryInfo *const binaryDictionaryInfo) + : mBinaryDictionaryInfo(binaryDictionaryInfo), + mDictionaryFlags(BinaryDictionaryHeaderReader::getFlags(binaryDictionaryInfo)), + mSize(BinaryDictionaryHeaderReader::getHeaderSize(binaryDictionaryInfo)), + mMultiWordCostMultiplier(readMultiWordCostMultiplier()) {} + +float BinaryDictionaryHeader::readMultiWordCostMultiplier() const { + const int headerValue = BinaryDictionaryHeaderReader::readHeaderValueInt( + mBinaryDictionaryInfo, MULTIPLE_WORDS_DEMOTION_RATE_KEY); + if (headerValue == S_INT_MIN) { + // not found + return DEFAULT_MULTI_WORD_COST_MULTIPLIER; + } + if (headerValue <= 0) { + return static_cast(MAX_VALUE_FOR_WEIGHTING); + } + return MULTI_WORD_COST_MULTIPLIER_SCALE / static_cast(headerValue); +} + +} // namespace latinime diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_header.h b/native/jni/src/suggest/core/dictionary/binary_dictionary_header.h new file mode 100644 index 000000000..9db000362 --- /dev/null +++ b/native/jni/src/suggest/core/dictionary/binary_dictionary_header.h @@ -0,0 +1,70 @@ +/* + * Copyright (C) 2013, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_BINARY_DICTIONARY_HEADER_H +#define LATINIME_BINARY_DICTIONARY_HEADER_H + +#include "defines.h" +#include "suggest/core/dictionary/binary_dictionary_header_reading_utils.h" + +namespace latinime { + +class BinaryDictionaryInfo; + +/** + * This class abstracts dictionary header structures and provide interface to access dictionary + * header information. + */ +class BinaryDictionaryHeader { + public: + explicit BinaryDictionaryHeader(const BinaryDictionaryInfo *const binaryDictionaryInfo); + + AK_FORCE_INLINE int getSize() const { + return mSize; + } + + AK_FORCE_INLINE bool supportsDynamicUpdate() const { + return BinaryDictionaryHeaderReader::supportsDynamicUpdate(mDictionaryFlags); + } + + AK_FORCE_INLINE bool requiresGermanUmlautProcessing() const { + return BinaryDictionaryHeaderReader::requiresGermanUmlautProcessing(mDictionaryFlags); + } + + AK_FORCE_INLINE bool requiresFrenchLigatureProcessing() const { + return BinaryDictionaryHeaderReader::requiresFrenchLigatureProcessing(mDictionaryFlags); + } + + AK_FORCE_INLINE float getMultiWordCostMultiplier() const { + return mMultiWordCostMultiplier; + } + + private: + DISALLOW_COPY_AND_ASSIGN(BinaryDictionaryHeader); + + static const char *const MULTIPLE_WORDS_DEMOTION_RATE_KEY; + static const float DEFAULT_MULTI_WORD_COST_MULTIPLIER; + static const float MULTI_WORD_COST_MULTIPLIER_SCALE; + + const BinaryDictionaryInfo *const mBinaryDictionaryInfo; + const BinaryDictionaryHeaderReader::DictionaryFlags mDictionaryFlags; + const int mSize; + const float mMultiWordCostMultiplier; + + float readMultiWordCostMultiplier() const; +}; +} // namespace latinime +#endif // LATINIME_BINARY_DICTIONARY_HEADER_H diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_header_reading_utils.cpp b/native/jni/src/suggest/core/dictionary/binary_dictionary_header_reading_utils.cpp new file mode 100644 index 000000000..c09a78f03 --- /dev/null +++ b/native/jni/src/suggest/core/dictionary/binary_dictionary_header_reading_utils.cpp @@ -0,0 +1,121 @@ +/* + * Copyright (C) 2013, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "suggest/core/dictionary/binary_dictionary_header_reading_utils.h" + +#include +#include + +#include "defines.h" +#include "suggest/core/dictionary/binary_dictionary_info.h" + +namespace latinime { + +const int BinaryDictionaryHeaderReader::MAX_OPTION_KEY_LENGTH = 256; + +const int BinaryDictionaryHeaderReader::FORMAT_VERSION_1_HEADER_SIZE = 5; + +const int BinaryDictionaryHeaderReader::VERSION_2_MAGIC_NUMBER_SIZE = 4; +const int BinaryDictionaryHeaderReader::VERSION_2_DICTIONARY_VERSION_SIZE = 2; +const int BinaryDictionaryHeaderReader::VERSION_2_DICTIONARY_FLAG_SIZE = 2; +const int BinaryDictionaryHeaderReader::VERSION_2_DICTIONARY_HEADER_SIZE_SIZE = 4; + +const BinaryDictionaryHeaderReader::DictionaryFlags BinaryDictionaryHeaderReader::NO_FLAGS = 0; +// Flags for special processing +// Those *must* match the flags in makedict (BinaryDictInputOutput#*_PROCESSING_FLAG) or +// something very bad (like, the apocalypse) will happen. Please update both at the same time. +const BinaryDictionaryHeaderReader::DictionaryFlags + BinaryDictionaryHeaderReader::GERMAN_UMLAUT_PROCESSING_FLAG = 0x1; +const BinaryDictionaryHeaderReader::DictionaryFlags + BinaryDictionaryHeaderReader::SUPPORTS_DYNAMIC_UPDATE_FLAG = 0x2; +const BinaryDictionaryHeaderReader::DictionaryFlags + BinaryDictionaryHeaderReader::FRENCH_LIGATURE_PROCESSING_FLAG = 0x4; + +/* static */ int BinaryDictionaryHeaderReader::getHeaderSize( + const BinaryDictionaryInfo *const binaryDictionaryInfo) { + switch (binaryDictionaryInfo->getFormat()) { + case BinaryDictionaryFormat::VERSION_1: + return FORMAT_VERSION_1_HEADER_SIZE; + case BinaryDictionaryFormat::VERSION_2: + // See the format of the header in the comment in + // BinaryDictionaryFormatUtils::detectFormatVersion() + return ByteArrayUtils::readUint32(binaryDictionaryInfo->getDictBuf(), + VERSION_2_MAGIC_NUMBER_SIZE + VERSION_2_DICTIONARY_VERSION_SIZE + + VERSION_2_DICTIONARY_FLAG_SIZE); + default: + return S_INT_MAX; + } +} + +/* static */ BinaryDictionaryHeaderReader::DictionaryFlags BinaryDictionaryHeaderReader::getFlags( + const BinaryDictionaryInfo *const binaryDictionaryInfo) { + switch (binaryDictionaryInfo->getFormat()) { + case BinaryDictionaryFormat::VERSION_1: + return NO_FLAGS; + case BinaryDictionaryFormat::VERSION_2: + return ByteArrayUtils::readUint16(binaryDictionaryInfo->getDictBuf(), + VERSION_2_MAGIC_NUMBER_SIZE + VERSION_2_DICTIONARY_VERSION_SIZE); + default: + return NO_FLAGS; + } +} + +// Returns if the key is found or not and reads the found value into outValue. +/* static */ bool BinaryDictionaryHeaderReader::readHeaderValue( + const BinaryDictionaryInfo *const binaryDictionaryInfo, + const char *const key, int *outValue, const int outValueSize) { + if (outValueSize <= 0 || !hasHeaderAttributes(binaryDictionaryInfo->getFormat())) { + return false; + } + const int headerSize = getHeaderSize(binaryDictionaryInfo); + int pos = getHeaderOptionsPosition(binaryDictionaryInfo->getFormat()); + while (pos < headerSize) { + if(ByteArrayUtils::compareStringInBufferWithCharArray( + binaryDictionaryInfo->getDictBuf(), key, headerSize - pos, &pos) == 0) { + // The key was found. + ByteArrayUtils::readStringAndAdvancePosition( + binaryDictionaryInfo->getDictBuf(), outValueSize, outValue, &pos); + return true; + } + ByteArrayUtils::advancePositionToBehindString( + binaryDictionaryInfo->getDictBuf(), headerSize - pos, &pos); + } + // The key was not found. + return false; +} + +/* static */ int BinaryDictionaryHeaderReader::readHeaderValueInt( + const BinaryDictionaryInfo *const binaryDictionaryInfo, const char *const key) { + const int bufferSize = LARGEST_INT_DIGIT_COUNT; + int intBuffer[bufferSize]; + char charBuffer[bufferSize]; + if (!readHeaderValue(binaryDictionaryInfo, key, intBuffer, bufferSize)) { + return S_INT_MIN; + } + for (int i = 0; i < bufferSize; ++i) { + charBuffer[i] = intBuffer[i]; + if (charBuffer[i] == '0') { + break; + } + if (!isdigit(charBuffer[i])) { + // If not a number, return S_INT_MIN + return S_INT_MIN; + } + } + return atoi(charBuffer); +} + +} // namespace latinime diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_header_reading_utils.h b/native/jni/src/suggest/core/dictionary/binary_dictionary_header_reading_utils.h new file mode 100644 index 000000000..6e9dca73c --- /dev/null +++ b/native/jni/src/suggest/core/dictionary/binary_dictionary_header_reading_utils.h @@ -0,0 +1,102 @@ +/* + * Copyright (C) 2013, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_DICTIONARY_HEADER_READING_UTILS_H +#define LATINIME_DICTIONARY_HEADER_READING_UTILS_H + +#include + +#include "defines.h" +#include "suggest/core/dictionary/binary_dictionary_format_utils.h" + +namespace latinime { + +class BinaryDictionaryInfo; + +class BinaryDictionaryHeaderReader { + public: + typedef uint16_t DictionaryFlags; + + static const int MAX_OPTION_KEY_LENGTH; + + static int getHeaderSize(const BinaryDictionaryInfo *const binaryDictionaryInfo); + + static DictionaryFlags getFlags(const BinaryDictionaryInfo *const binaryDictionaryInfo); + + static AK_FORCE_INLINE bool supportsDynamicUpdate(const DictionaryFlags flags) { + return (flags & SUPPORTS_DYNAMIC_UPDATE_FLAG) != 0; + } + + static AK_FORCE_INLINE bool requiresGermanUmlautProcessing(const DictionaryFlags flags) { + return (flags & GERMAN_UMLAUT_PROCESSING_FLAG) != 0; + } + + static AK_FORCE_INLINE bool requiresFrenchLigatureProcessing(const DictionaryFlags flags) { + return (flags & FRENCH_LIGATURE_PROCESSING_FLAG) != 0; + } + + static AK_FORCE_INLINE bool hasHeaderAttributes( + const BinaryDictionaryFormat::FORMAT_VERSION format) { + // Only format 2 and above have header attributes as {key,value} string pairs. + switch (format) { + case BinaryDictionaryFormat::VERSION_2: + return true; + break; + default: + return false; + } + } + + static AK_FORCE_INLINE int getHeaderOptionsPosition( + const BinaryDictionaryFormat::FORMAT_VERSION format) { + switch (format) { + case BinaryDictionaryFormat::VERSION_2: + return VERSION_2_MAGIC_NUMBER_SIZE + VERSION_2_DICTIONARY_VERSION_SIZE + + VERSION_2_DICTIONARY_FLAG_SIZE + VERSION_2_DICTIONARY_HEADER_SIZE_SIZE; + break; + default: + return 0; + } + } + + static bool readHeaderValue( + const BinaryDictionaryInfo *const binaryDictionaryInfo, + const char *const key, int *outValue, const int outValueSize); + + static int readHeaderValueInt( + const BinaryDictionaryInfo *const binaryDictionaryInfo, const char *const key); + + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(BinaryDictionaryHeaderReader); + + static const int FORMAT_VERSION_1_HEADER_SIZE; + + static const int VERSION_2_MAGIC_NUMBER_SIZE; + static const int VERSION_2_DICTIONARY_VERSION_SIZE; + static const int VERSION_2_DICTIONARY_FLAG_SIZE; + static const int VERSION_2_DICTIONARY_HEADER_SIZE_SIZE; + + static const DictionaryFlags NO_FLAGS; + // Flags for special processing + // Those *must* match the flags in makedict (FormatSpec#*_PROCESSING_FLAGS) or + // something very bad (like, the apocalypse) will happen. Please update both at the same time. + static const DictionaryFlags GERMAN_UMLAUT_PROCESSING_FLAG; + static const DictionaryFlags SUPPORTS_DYNAMIC_UPDATE_FLAG; + static const DictionaryFlags FRENCH_LIGATURE_PROCESSING_FLAG; + static const DictionaryFlags CONTAINS_BIGRAMS_FLAG; +}; +} +#endif /* LATINIME_DICTIONARY_HEADER_READING_UTILS_H */ diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_info.h b/native/jni/src/suggest/core/dictionary/binary_dictionary_info.h index 8508c6786..0b77e5ee9 100644 --- a/native/jni/src/suggest/core/dictionary/binary_dictionary_info.h +++ b/native/jni/src/suggest/core/dictionary/binary_dictionary_info.h @@ -20,16 +20,19 @@ #include #include "defines.h" -#include "suggest/core/dictionary/binary_dictionary_format.h" +#include "suggest/core/dictionary/binary_dictionary_format_utils.h" +#include "suggest/core/dictionary/binary_dictionary_header.h" namespace latinime { +class BinaryDictionaryHeader; + class BinaryDictionaryInfo { public: BinaryDictionaryInfo(const uint8_t *const dictBuf, const int dictSize) : mDictBuf(dictBuf), - mFormat(BinaryDictionaryFormat::detectFormatVersion(mDictBuf, dictSize)), - mDictRoot(mDictBuf + BinaryDictionaryFormat::getHeaderSize(mDictBuf, mFormat)) {} + mDictionaryFormat(BinaryDictionaryFormat::detectFormatVersion(mDictBuf, dictSize)), + mDictionaryHeader(this), mDictRoot(mDictBuf + mDictionaryHeader.getSize()) {} AK_FORCE_INLINE const uint8_t *getDictBuf() const { return mDictBuf; @@ -40,18 +43,23 @@ class BinaryDictionaryInfo { } AK_FORCE_INLINE BinaryDictionaryFormat::FORMAT_VERSION getFormat() const { - return mFormat; + return mDictionaryFormat; } AK_FORCE_INLINE int getRootPosition() const { return 0; } + AK_FORCE_INLINE const BinaryDictionaryHeader *getHeader() const { + return &mDictionaryHeader; + } + private: DISALLOW_COPY_AND_ASSIGN(BinaryDictionaryInfo); const uint8_t *const mDictBuf; - const BinaryDictionaryFormat::FORMAT_VERSION mFormat; + const BinaryDictionaryFormat::FORMAT_VERSION mDictionaryFormat; + const BinaryDictionaryHeader mDictionaryHeader; const uint8_t *const mDictRoot; }; } diff --git a/native/jni/src/suggest/core/dictionary/binary_format.h b/native/jni/src/suggest/core/dictionary/binary_format.h index c82065f97..f580bdad5 100644 --- a/native/jni/src/suggest/core/dictionary/binary_format.h +++ b/native/jni/src/suggest/core/dictionary/binary_format.h @@ -17,7 +17,6 @@ #ifndef LATINIME_BINARY_FORMAT_H #define LATINIME_BINARY_FORMAT_H -#include #include #include "suggest/core/dictionary/bloom_filter.h" @@ -61,17 +60,9 @@ class BinaryFormat { // Mask and flags for attribute address type selection. static const int MASK_ATTRIBUTE_ADDRESS_TYPE = 0x30; - static const int UNKNOWN_FORMAT = -1; static const int SHORTCUT_LIST_SIZE_SIZE = 2; - static int detectFormat(const uint8_t *const dict, const int dictSize); - static int getHeaderSize(const uint8_t *const dict, const int dictSize); - static int getFlags(const uint8_t *const dict, const int dictSize); static bool hasBlacklistedOrNotAWordFlag(const int flags); - static void readHeaderValue(const uint8_t *const dict, const int dictSize, - const char *const key, int *outValue, const int outValueSize); - static int readHeaderValueInt(const uint8_t *const dict, const int dictSize, - const char *const key); static int getGroupCountAndForwardPointer(const uint8_t *const dict, int *pos); static uint8_t getFlagsAndForwardPointer(const uint8_t *const dict, int *pos); static int getCodePointAndForwardPointer(const uint8_t *const dict, int *pos); @@ -93,20 +84,11 @@ class BinaryFormat { int *outWord, int *outUnigramProbability); static int getBigramProbabilityFromHashMap(const int position, const hash_map_compat *bigramMap, const int unigramProbability); - static float getMultiWordCostMultiplier(const uint8_t *const dict, const int dictSize); static void fillBigramProbabilityToHashMap(const uint8_t *const root, int position, hash_map_compat *bigramMap); static int getBigramProbability(const uint8_t *const root, int position, const int nextPosition, const int unigramProbability); - // Flags for special processing - // Those *must* match the flags in makedict (BinaryDictInputOutput#*_PROCESSING_FLAG) or - // something very bad (like, the apocalypse) will happen. Please update both at the same time. - enum { - REQUIRES_GERMAN_UMLAUT_PROCESSING = 0x1, - REQUIRES_FRENCH_LIGATURES_PROCESSING = 0x4 - }; - private: DISALLOW_IMPLICIT_CONSTRUCTORS(BinaryFormat); static int getBigramListPositionForWordPosition(const uint8_t *const root, int position); @@ -119,20 +101,6 @@ class BinaryFormat { static const int FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES = 0x20; static const int FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES = 0x30; - // Any file smaller than this is not a dictionary. - static const int DICTIONARY_MINIMUM_SIZE = 4; - // Originally, format version 1 had a 16-bit magic number, then the version number `01' - // then options that must be 0. Hence the first 32-bits of the format are always as follow - // and it's okay to consider them a magic number as a whole. - static const int FORMAT_VERSION_1_MAGIC_NUMBER = 0x78B10100; - static const int FORMAT_VERSION_1_HEADER_SIZE = 5; - // The versions of Latin IME that only handle format version 1 only test for the magic - // number, so we had to change it so that version 2 files would be rejected by older - // implementations. On this occasion, we made the magic number 32 bits long. - static const int FORMAT_VERSION_2_MAGIC_NUMBER = -1681835266; // 0x9BC13AFE - // Magic number (4 bytes), version (2 bytes), options (2 bytes), header size (4 bytes) = 12 - static const int FORMAT_VERSION_2_MINIMUM_SIZE = 12; - static const int CHARACTER_ARRAY_TERMINATOR_SIZE = 1; static const int MINIMAL_ONE_BYTE_CHARACTER_VALUE = 0x20; static const int CHARACTER_ARRAY_TERMINATOR = 0x1F; @@ -142,122 +110,10 @@ class BinaryFormat { static int skipBigrams(const uint8_t *const dict, const uint8_t flags, const int pos); }; -AK_FORCE_INLINE int BinaryFormat::detectFormat(const uint8_t *const dict, const int dictSize) { - // The magic number is stored big-endian. - // If the dictionary is less than 4 bytes, we can't even read the magic number, so we don't - // understand this format. - if (dictSize < DICTIONARY_MINIMUM_SIZE) return UNKNOWN_FORMAT; - const int magicNumber = (dict[0] << 24) + (dict[1] << 16) + (dict[2] << 8) + dict[3]; - switch (magicNumber) { - case FORMAT_VERSION_1_MAGIC_NUMBER: - // Format 1 header is exactly 5 bytes long and looks like: - // Magic number (2 bytes) 0x78 0xB1 - // Version number (1 byte) 0x01 - // Options (2 bytes) must be 0x00 0x00 - return 1; - case FORMAT_VERSION_2_MAGIC_NUMBER: - // Version 2 dictionaries are at least 12 bytes long (see below details for the header). - // If this dictionary has the version 2 magic number but is less than 12 bytes long, then - // it's an unknown format and we need to avoid confidently reading the next bytes. - if (dictSize < FORMAT_VERSION_2_MINIMUM_SIZE) return UNKNOWN_FORMAT; - // Format 2 header is as follows: - // Magic number (4 bytes) 0x9B 0xC1 0x3A 0xFE - // Version number (2 bytes) 0x00 0x02 - // Options (2 bytes) - // Header size (4 bytes) : integer, big endian - return (dict[4] << 8) + dict[5]; - default: - return UNKNOWN_FORMAT; - } -} - -inline int BinaryFormat::getFlags(const uint8_t *const dict, const int dictSize) { - switch (detectFormat(dict, dictSize)) { - case 1: - return NO_FLAGS; // TODO: NO_FLAGS is unused anywhere else? - default: - return (dict[6] << 8) + dict[7]; - } -} - inline bool BinaryFormat::hasBlacklistedOrNotAWordFlag(const int flags) { return (flags & (FLAG_IS_BLACKLISTED | FLAG_IS_NOT_A_WORD)) != 0; } -inline int BinaryFormat::getHeaderSize(const uint8_t *const dict, const int dictSize) { - switch (detectFormat(dict, dictSize)) { - case 1: - return FORMAT_VERSION_1_HEADER_SIZE; - case 2: - // See the format of the header in the comment in detectFormat() above - return (dict[8] << 24) + (dict[9] << 16) + (dict[10] << 8) + dict[11]; - default: - return S_INT_MAX; - } -} - -inline void BinaryFormat::readHeaderValue(const uint8_t *const dict, const int dictSize, - const char *const key, int *outValue, const int outValueSize) { - int outValueIndex = 0; - // Only format 2 and above have header attributes as {key,value} string pairs. For prior - // formats, we just return an empty string, as if the key wasn't found. - if (2 <= detectFormat(dict, dictSize)) { - const int headerOptionsOffset = 4 /* magic number */ - + 2 /* dictionary version */ + 2 /* flags */; - const int headerSize = - (dict[headerOptionsOffset] << 24) + (dict[headerOptionsOffset + 1] << 16) - + (dict[headerOptionsOffset + 2] << 8) + dict[headerOptionsOffset + 3]; - const int headerEnd = headerOptionsOffset + 4 + headerSize; - int index = headerOptionsOffset + 4; - while (index < headerEnd) { - int keyIndex = 0; - int codePoint = getCodePointAndForwardPointer(dict, &index); - while (codePoint != NOT_A_CODE_POINT) { - if (codePoint != key[keyIndex++]) { - break; - } - codePoint = getCodePointAndForwardPointer(dict, &index); - } - if (codePoint == NOT_A_CODE_POINT && key[keyIndex] == 0) { - // We found the key! Copy and return the value. - codePoint = getCodePointAndForwardPointer(dict, &index); - while (codePoint != NOT_A_CODE_POINT && outValueIndex < outValueSize) { - outValue[outValueIndex++] = codePoint; - codePoint = getCodePointAndForwardPointer(dict, &index); - } - // Finished copying. Break to go to the termination code. - break; - } - // We didn't find the key, skip the remainder of it and its value - while (codePoint != NOT_A_CODE_POINT) { - codePoint = getCodePointAndForwardPointer(dict, &index); - } - codePoint = getCodePointAndForwardPointer(dict, &index); - while (codePoint != NOT_A_CODE_POINT) { - codePoint = getCodePointAndForwardPointer(dict, &index); - } - } - // We couldn't find it - fall through and return an empty value. - } - // Put a terminator 0 if possible at all (always unless outValueSize is <= 0) - if (outValueIndex >= outValueSize) outValueIndex = outValueSize - 1; - if (outValueIndex >= 0) outValue[outValueIndex] = 0; -} - -inline int BinaryFormat::readHeaderValueInt(const uint8_t *const dict, const int dictSize, - const char *const key) { - const int bufferSize = LARGEST_INT_DIGIT_COUNT; - int intBuffer[bufferSize]; - char charBuffer[bufferSize]; - BinaryFormat::readHeaderValue(dict, dictSize, key, intBuffer, bufferSize); - for (int i = 0; i < bufferSize; ++i) { - charBuffer[i] = intBuffer[i]; - } - // If not a number, return S_INT_MIN - if (!isdigit(charBuffer[0])) return S_INT_MIN; - return atoi(charBuffer); -} - AK_FORCE_INLINE int BinaryFormat::getGroupCountAndForwardPointer(const uint8_t *const dict, int *pos) { const int msb = dict[(*pos)++]; @@ -265,18 +121,6 @@ AK_FORCE_INLINE int BinaryFormat::getGroupCountAndForwardPointer(const uint8_t * return ((msb & 0x7F) << 8) | dict[(*pos)++]; } -inline float BinaryFormat::getMultiWordCostMultiplier(const uint8_t *const dict, - const int dictSize) { - const int headerValue = readHeaderValueInt(dict, dictSize, "MULTIPLE_WORDS_DEMOTION_RATE"); - if (headerValue == S_INT_MIN) { - return 1.0f; - } - if (headerValue <= 0) { - return static_cast(MAX_VALUE_FOR_WEIGHTING); - } - return 100.0f / static_cast(headerValue); -} - inline uint8_t BinaryFormat::getFlagsAndForwardPointer(const uint8_t *const dict, int *pos) { return dict[(*pos)++]; } diff --git a/native/jni/src/suggest/core/dictionary/byte_array_utils.h b/native/jni/src/suggest/core/dictionary/byte_array_utils.h index 832b74725..d3321f624 100644 --- a/native/jni/src/suggest/core/dictionary/byte_array_utils.h +++ b/native/jni/src/suggest/core/dictionary/byte_array_utils.h @@ -116,8 +116,8 @@ class ByteArrayUtils { * Reads code points until the terminator is found. */ // Returns the length of the string. - static int readStringAndAdvancePosition(const uint8_t *const buffer, int *const pos, - int *const outBuffer, const int maxLength) { + static int readStringAndAdvancePosition(const uint8_t *const buffer, + const int maxLength, int *const outBuffer, int *const pos) { int length = 0; int codePoint = readCodePointAndAdvancePosition(buffer, pos); while (NOT_A_CODE_POINT != codePoint && length < maxLength) { @@ -129,7 +129,7 @@ class ByteArrayUtils { // Advances the position and returns the length of the string. static int advancePositionToBehindString( - const uint8_t *const buffer, int *const pos, const int maxLength) { + const uint8_t *const buffer, const int maxLength, int *const pos) { int length = 0; int codePoint = readCodePointAndAdvancePosition(buffer, pos); while (NOT_A_CODE_POINT != codePoint && length < maxLength) { @@ -138,6 +138,39 @@ class ByteArrayUtils { return length; } + // Returns an integer less than, equal to, or greater than zero when string starting from pos + // in buffer is less than, match, or is greater than charArray. + static AK_FORCE_INLINE int compareStringInBufferWithCharArray(const uint8_t *const buffer, + const char *const charArray, const int maxLength, int *const pos) { + int index = 0; + int codePoint = readCodePointAndAdvancePosition(buffer, pos); + const uint8_t *const uint8CharArrayForComparison = + reinterpret_cast(charArray); + while (NOT_A_CODE_POINT != codePoint + && '\0' != uint8CharArrayForComparison[index] && index < maxLength) { + if (codePoint != uint8CharArrayForComparison[index]) { + // Different character is found. + // Skip the rest of the string in the buffer. + advancePositionToBehindString(buffer, maxLength - index, pos); + return codePoint - uint8CharArrayForComparison[index]; + } + // Advance + codePoint = readCodePointAndAdvancePosition(buffer, pos); + ++index; + } + if (NOT_A_CODE_POINT != codePoint && index < maxLength) { + // Skip the rest of the string in the buffer. + advancePositionToBehindString(buffer, maxLength - index, pos); + } + if (NOT_A_CODE_POINT == codePoint && '\0' == uint8CharArrayForComparison[index]) { + // When both of the last characters are terminals, we consider the string in the buffer + // matches the given char array + return 0; + } else { + return codePoint - uint8CharArrayForComparison[index]; + } + } + private: DISALLOW_IMPLICIT_CONSTRUCTORS(ByteArrayUtils); diff --git a/native/jni/src/suggest/core/dictionary/dictionary.cpp b/native/jni/src/suggest/core/dictionary/dictionary.cpp index 2d4ad5df5..561e22d2d 100644 --- a/native/jni/src/suggest/core/dictionary/dictionary.cpp +++ b/native/jni/src/suggest/core/dictionary/dictionary.cpp @@ -33,11 +33,10 @@ namespace latinime { Dictionary::Dictionary(void *dict, int dictSize, int mmapFd, int dictBufAdjust) - : mBinaryDicitonaryInfo(static_cast(dict), dictSize), + : mBinaryDictionaryInfo(static_cast(dict), dictSize), mDictSize(dictSize), - mDictFlags(BinaryFormat::getFlags(mBinaryDicitonaryInfo.getDictBuf(), dictSize)), mMmapFd(mmapFd), mDictBufAdjust(dictBufAdjust), - mBigramDictionary(new BigramDictionary(&mBinaryDicitonaryInfo)), + mBigramDictionary(new BigramDictionary(&mBinaryDictionaryInfo)), mGestureSuggest(new Suggest(GestureSuggestPolicyFactory::getGestureSuggestPolicy())), mTypingSuggest(new Suggest(TypingSuggestPolicyFactory::getTypingSuggestPolicy())) { } @@ -85,7 +84,7 @@ int Dictionary::getBigrams(const int *word, int length, int *inputCodePoints, in } int Dictionary::getProbability(const int *word, int length) const { - const uint8_t *const root = mBinaryDicitonaryInfo.getDictRoot(); + const uint8_t *const root = mBinaryDictionaryInfo.getDictRoot(); int pos = BinaryFormat::getTerminalPosition(root, word, length, false /* forceLowerCaseSearch */); if (NOT_VALID_WORD == pos) { @@ -112,8 +111,4 @@ bool Dictionary::isValidBigram(const int *word1, int length1, const int *word2, return mBigramDictionary->isValidBigram(word1, length1, word2, length2); } -int Dictionary::getDictFlags() const { - return mDictFlags; -} - } // namespace latinime diff --git a/native/jni/src/suggest/core/dictionary/dictionary.h b/native/jni/src/suggest/core/dictionary/dictionary.h index 1f25080b1..151f26183 100644 --- a/native/jni/src/suggest/core/dictionary/dictionary.h +++ b/native/jni/src/suggest/core/dictionary/dictionary.h @@ -66,22 +66,20 @@ class Dictionary { int getProbability(const int *word, int length) const; bool isValidBigram(const int *word1, int length1, const int *word2, int length2) const; const BinaryDictionaryInfo *getBinaryDictionaryInfo() const { - return &mBinaryDicitonaryInfo; + return &mBinaryDictionaryInfo; } int getDictSize() const { return mDictSize; } int getMmapFd() const { return mMmapFd; } int getDictBufAdjust() const { return mDictBufAdjust; } - int getDictFlags() const; virtual ~Dictionary(); private: DISALLOW_IMPLICIT_CONSTRUCTORS(Dictionary); - const BinaryDictionaryInfo mBinaryDicitonaryInfo; + const BinaryDictionaryInfo mBinaryDictionaryInfo; // Used only for the mmap version of dictionary loading, but we use these as dummy variables // also for the malloc version. const int mDictSize; - const int mDictFlags; const int mMmapFd; const int mDictBufAdjust; diff --git a/native/jni/src/suggest/core/dictionary/digraph_utils.cpp b/native/jni/src/suggest/core/dictionary/digraph_utils.cpp index f53e56ef1..af378b1b7 100644 --- a/native/jni/src/suggest/core/dictionary/digraph_utils.cpp +++ b/native/jni/src/suggest/core/dictionary/digraph_utils.cpp @@ -16,8 +16,10 @@ #include "suggest/core/dictionary/digraph_utils.h" +#include + #include "defines.h" -#include "suggest/core/dictionary/binary_format.h" +#include "suggest/core/dictionary/binary_dictionary_header.h" #include "utils/char_utils.h" namespace latinime { @@ -33,8 +35,8 @@ const DigraphUtils::DigraphType DigraphUtils::USED_DIGRAPH_TYPES[] = { DIGRAPH_TYPE_GERMAN_UMLAUT, DIGRAPH_TYPE_FRENCH_LIGATURES }; /* static */ bool DigraphUtils::hasDigraphForCodePoint( - const int dictFlags, const int compositeGlyphCodePoint) { - const DigraphUtils::DigraphType digraphType = getDigraphTypeForDictionary(dictFlags); + const BinaryDictionaryHeader *const header, const int compositeGlyphCodePoint) { + const DigraphUtils::DigraphType digraphType = getDigraphTypeForDictionary(header); if (DigraphUtils::getDigraphForDigraphTypeAndCodePoint(digraphType, compositeGlyphCodePoint)) { return true; } @@ -43,24 +45,16 @@ const DigraphUtils::DigraphType DigraphUtils::USED_DIGRAPH_TYPES[] = // Returns the digraph type associated with the given dictionary. /* static */ DigraphUtils::DigraphType DigraphUtils::getDigraphTypeForDictionary( - const int dictFlags) { - if (BinaryFormat::REQUIRES_GERMAN_UMLAUT_PROCESSING & dictFlags) { + const BinaryDictionaryHeader *const header) { + if (header->requiresGermanUmlautProcessing()) { return DIGRAPH_TYPE_GERMAN_UMLAUT; } - if (BinaryFormat::REQUIRES_FRENCH_LIGATURES_PROCESSING & dictFlags) { + if (header->requiresFrenchLigatureProcessing()) { return DIGRAPH_TYPE_FRENCH_LIGATURES; } return DIGRAPH_TYPE_NONE; } -// Retrieves the set of all digraphs associated with the given dictionary flags. -// Returns the size of the digraph array, or 0 if none exist. -/* static */ int DigraphUtils::getAllDigraphsForDictionaryAndReturnSize( - const int dictFlags, const DigraphUtils::digraph_t **const digraphs) { - const DigraphUtils::DigraphType digraphType = getDigraphTypeForDictionary(dictFlags); - return getAllDigraphsForDigraphTypeAndReturnSize(digraphType, digraphs); -} - // Returns the digraph codepoint for the given composite glyph codepoint and digraph codepoint index // (which specifies the first or second codepoint in the digraph). /* static */ int DigraphUtils::getDigraphCodePointForIndex(const int compositeGlyphCodePoint, @@ -124,7 +118,7 @@ const DigraphUtils::DigraphType DigraphUtils::USED_DIGRAPH_TYPES[] = const DigraphUtils::digraph_t *digraphs = 0; const int compositeGlyphLowerCodePoint = CharUtils::toLowerCase(compositeGlyphCodePoint); const int digraphsSize = - DigraphUtils::getAllDigraphsForDictionaryAndReturnSize(digraphType, &digraphs); + DigraphUtils::getAllDigraphsForDigraphTypeAndReturnSize(digraphType, &digraphs); for (int i = 0; i < digraphsSize; i++) { if (digraphs[i].compositeGlyph == compositeGlyphLowerCodePoint) { return &digraphs[i]; diff --git a/native/jni/src/suggest/core/dictionary/digraph_utils.h b/native/jni/src/suggest/core/dictionary/digraph_utils.h index c1205940c..9d74fe3a6 100644 --- a/native/jni/src/suggest/core/dictionary/digraph_utils.h +++ b/native/jni/src/suggest/core/dictionary/digraph_utils.h @@ -21,6 +21,8 @@ namespace latinime { +class BinaryDictionaryHeader; + class DigraphUtils { public: typedef enum { @@ -37,17 +39,14 @@ class DigraphUtils { typedef struct { int first; int second; int compositeGlyph; } digraph_t; - static bool hasDigraphForCodePoint(const int dictFlags, const int compositeGlyphCodePoint); - static int getAllDigraphsForDictionaryAndReturnSize( - const int dictFlags, const digraph_t **const digraphs); - static int getDigraphCodePointForIndex(const int dictFlags, const int compositeGlyphCodePoint, - const DigraphCodePointIndex digraphCodePointIndex); + static bool hasDigraphForCodePoint( + const BinaryDictionaryHeader *const header, const int compositeGlyphCodePoint); static int getDigraphCodePointForIndex(const int compositeGlyphCodePoint, const DigraphCodePointIndex digraphCodePointIndex); private: DISALLOW_IMPLICIT_CONSTRUCTORS(DigraphUtils); - static DigraphType getDigraphTypeForDictionary(const int dictFlags); + static DigraphType getDigraphTypeForDictionary(const BinaryDictionaryHeader *const header); static int getAllDigraphsForDigraphTypeAndReturnSize( const DigraphType digraphType, const digraph_t **const digraphs); static const digraph_t *getDigraphForCodePoint(const int compositeGlyphCodePoint); diff --git a/native/jni/src/suggest/core/session/dic_traverse_session.cpp b/native/jni/src/suggest/core/session/dic_traverse_session.cpp index c398caefa..774d6074e 100644 --- a/native/jni/src/suggest/core/session/dic_traverse_session.cpp +++ b/native/jni/src/suggest/core/session/dic_traverse_session.cpp @@ -19,6 +19,7 @@ #include "defines.h" #include "jni.h" #include "suggest/core/dicnode/dic_node_utils.h" +#include "suggest/core/dictionary/binary_dictionary_header.h" #include "suggest/core/dictionary/binary_dictionary_info.h" #include "suggest/core/dictionary/binary_format.h" #include "suggest/core/dictionary/dictionary.h" @@ -28,9 +29,8 @@ namespace latinime { void DicTraverseSession::init(const Dictionary *const dictionary, const int *prevWord, int prevWordLength, const SuggestOptions *const suggestOptions) { mDictionary = dictionary; - mMultiWordCostMultiplier = BinaryFormat::getMultiWordCostMultiplier( - mDictionary->getBinaryDictionaryInfo()->getDictBuf(), - mDictionary->getDictSize()); + mMultiWordCostMultiplier = mDictionary->getBinaryDictionaryInfo() + ->getHeader()->getMultiWordCostMultiplier(); mSuggestOptions = suggestOptions; if (!prevWord) { mPrevWordPos = NOT_VALID_WORD; @@ -63,10 +63,6 @@ const BinaryDictionaryInfo *DicTraverseSession::getBinaryDictionaryInfo() const return mDictionary->getBinaryDictionaryInfo(); } -int DicTraverseSession::getDictFlags() const { - return mDictionary->getDictFlags(); -} - void DicTraverseSession::resetCache(const int nextActiveCacheSize, const int maxWords) { mDicNodesCache.reset(nextActiveCacheSize, maxWords); mMultiBigramMap.clear(); diff --git a/native/jni/src/suggest/core/session/dic_traverse_session.h b/native/jni/src/suggest/core/session/dic_traverse_session.h index 630b3b59b..f95a0b23d 100644 --- a/native/jni/src/suggest/core/session/dic_traverse_session.h +++ b/native/jni/src/suggest/core/session/dic_traverse_session.h @@ -77,7 +77,6 @@ class DicTraverseSession { // TODO: Remove const BinaryDictionaryInfo *getBinaryDictionaryInfo() const; - int getDictFlags() const; //-------------------- // getters and setters diff --git a/native/jni/src/suggest/core/suggest.cpp b/native/jni/src/suggest/core/suggest.cpp index 1f108e400..6c4a6c166 100644 --- a/native/jni/src/suggest/core/suggest.cpp +++ b/native/jni/src/suggest/core/suggest.cpp @@ -19,6 +19,7 @@ #include "suggest/core/dicnode/dic_node.h" #include "suggest/core/dicnode/dic_node_priority_queue.h" #include "suggest/core/dicnode/dic_node_vector.h" +#include "suggest/core/dictionary/binary_dictionary_info.h" #include "suggest/core/dictionary/dictionary.h" #include "suggest/core/dictionary/digraph_utils.h" #include "suggest/core/dictionary/shortcut_utils.h" @@ -294,7 +295,8 @@ void Suggest::expandCurrentDicNodes(DicTraverseSession *traverseSession) const { processDicNodeAsMatch(traverseSession, childDicNode); continue; } - if (DigraphUtils::hasDigraphForCodePoint(traverseSession->getDictFlags(), + if (DigraphUtils::hasDigraphForCodePoint( + traverseSession->getBinaryDictionaryInfo()->getHeader(), childDicNode->getNodeCodePoint())) { correctionDicNode.initByCopy(childDicNode); correctionDicNode.advanceDigraphIndex();