diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_format_utils.cpp b/native/jni/src/suggest/core/dictionary/binary_dictionary_format_utils.cpp index f48386bba..5d14a0554 100644 --- a/native/jni/src/suggest/core/dictionary/binary_dictionary_format_utils.cpp +++ b/native/jni/src/suggest/core/dictionary/binary_dictionary_format_utils.cpp @@ -31,9 +31,9 @@ const int BinaryDictionaryFormatUtils::DICTIONARY_MINIMUM_SIZE = 4; // The versions of Latin IME that only handle format version 1 only test for the magic // number, so we had to change it so that version 2 files would be rejected by older // implementations. On this occasion, we made the magic number 32 bits long. -const uint32_t BinaryDictionaryFormatUtils::FORMAT_VERSION_2_MAGIC_NUMBER = 0x9BC13AFE; +const uint32_t BinaryDictionaryFormatUtils::HEADER_VERSION_2_MAGIC_NUMBER = 0x9BC13AFE; // Magic number (4 bytes), version (2 bytes), options (2 bytes), header size (4 bytes) = 12 -const int BinaryDictionaryFormatUtils::FORMAT_VERSION_2_MINIMUM_SIZE = 12; +const int BinaryDictionaryFormatUtils::HEADER_VERSION_2_MINIMUM_SIZE = 12; /* static */ BinaryDictionaryFormatUtils::FORMAT_VERSION BinaryDictionaryFormatUtils::detectFormatVersion(const uint8_t *const dict, @@ -46,25 +46,28 @@ const int BinaryDictionaryFormatUtils::FORMAT_VERSION_2_MINIMUM_SIZE = 12; } const uint32_t magicNumber = ByteArrayUtils::readUint32(dict, 0); switch (magicNumber) { - case FORMAT_VERSION_2_MAGIC_NUMBER: - // Version 2 dictionaries are at least 12 bytes long. - // If this dictionary has the version 2 magic number but is less than 12 bytes long, - // then it's an unknown format and we need to avoid confidently reading the next bytes. - if (dictSize < FORMAT_VERSION_2_MINIMUM_SIZE) { + case HEADER_VERSION_2_MAGIC_NUMBER: + // Version 2 header are at least 12 bytes long. + // If this header has the version 2 magic number but is less than 12 bytes long, + // then it's an unknown format and we need to avoid confidently reading the next bytes. + if (dictSize < HEADER_VERSION_2_MINIMUM_SIZE) { + return UNKNOWN_VERSION; + } + // Version 2 header is as follows: + // Magic number (4 bytes) 0x9B 0xC1 0x3A 0xFE + // Version number (2 bytes) + // Options (2 bytes) + // Header size (4 bytes) : integer, big endian + if (ByteArrayUtils::readUint16(dict, 4) == 2) { + return VERSION_2; + } else if (ByteArrayUtils::readUint16(dict, 4) == 3) { + // TODO: Support version 3 dictionary. + return UNKNOWN_VERSION; + } else { + return UNKNOWN_VERSION; + } + default: return UNKNOWN_VERSION; - } - // Format 2 header is as follows: - // Magic number (4 bytes) 0x9B 0xC1 0x3A 0xFE - // Version number (2 bytes) 0x00 0x02 - // Options (2 bytes) - // Header size (4 bytes) : integer, big endian - if (ByteArrayUtils::readUint16(dict, 4) == 2) { - return VERSION_2; - } else { - return UNKNOWN_VERSION; - } - default: - return UNKNOWN_VERSION; } } diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_format_utils.h b/native/jni/src/suggest/core/dictionary/binary_dictionary_format_utils.h index 80067b255..830684c70 100644 --- a/native/jni/src/suggest/core/dictionary/binary_dictionary_format_utils.h +++ b/native/jni/src/suggest/core/dictionary/binary_dictionary_format_utils.h @@ -33,9 +33,9 @@ namespace latinime { */ class BinaryDictionaryFormatUtils { public: - // TODO: Support version 3 format. enum FORMAT_VERSION { - VERSION_2 = 1, + VERSION_2, + VERSION_3, UNKNOWN_VERSION }; @@ -45,8 +45,8 @@ class BinaryDictionaryFormatUtils { DISALLOW_IMPLICIT_CONSTRUCTORS(BinaryDictionaryFormatUtils); static const int DICTIONARY_MINIMUM_SIZE; - static const uint32_t FORMAT_VERSION_2_MAGIC_NUMBER; - static const int FORMAT_VERSION_2_MINIMUM_SIZE; + static const uint32_t HEADER_VERSION_2_MAGIC_NUMBER; + static const int HEADER_VERSION_2_MINIMUM_SIZE; }; } // namespace latinime #endif /* LATINIME_BINARY_DICTIONARY_FORMAT_UTILS_H */ diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_header_reading_utils.cpp b/native/jni/src/suggest/core/dictionary/binary_dictionary_header_reading_utils.cpp index 6e1b15ce7..1e437dd63 100644 --- a/native/jni/src/suggest/core/dictionary/binary_dictionary_header_reading_utils.cpp +++ b/native/jni/src/suggest/core/dictionary/binary_dictionary_header_reading_utils.cpp @@ -26,10 +26,10 @@ namespace latinime { const int BinaryDictionaryHeaderReadingUtils::MAX_OPTION_KEY_LENGTH = 256; -const int BinaryDictionaryHeaderReadingUtils::VERSION_2_MAGIC_NUMBER_SIZE = 4; -const int BinaryDictionaryHeaderReadingUtils::VERSION_2_DICTIONARY_VERSION_SIZE = 2; -const int BinaryDictionaryHeaderReadingUtils::VERSION_2_DICTIONARY_FLAG_SIZE = 2; -const int BinaryDictionaryHeaderReadingUtils::VERSION_2_DICTIONARY_HEADER_SIZE_SIZE = 4; +const int BinaryDictionaryHeaderReadingUtils::VERSION_2_HEADER_MAGIC_NUMBER_SIZE = 4; +const int BinaryDictionaryHeaderReadingUtils::VERSION_2_HEADER_DICTIONARY_VERSION_SIZE = 2; +const int BinaryDictionaryHeaderReadingUtils::VERSION_2_HEADER_FLAG_SIZE = 2; +const int BinaryDictionaryHeaderReadingUtils::VERSION_2_HEADER_SIZE_FIELD_SIZE = 4; const BinaryDictionaryHeaderReadingUtils::DictionaryFlags BinaryDictionaryHeaderReadingUtils::NO_FLAGS = 0; @@ -45,13 +45,13 @@ const BinaryDictionaryHeaderReadingUtils::DictionaryFlags /* static */ int BinaryDictionaryHeaderReadingUtils::getHeaderSize( const BinaryDictionaryInfo *const binaryDictionaryInfo) { - switch (binaryDictionaryInfo->getFormat()) { - case BinaryDictionaryFormatUtils::VERSION_2: + switch (getHeaderVersion(binaryDictionaryInfo->getFormat())) { + case HEADER_VERSION_2: // See the format of the header in the comment in // BinaryDictionaryFormatUtils::detectFormatVersion() return ByteArrayUtils::readUint32(binaryDictionaryInfo->getDictBuf(), - VERSION_2_MAGIC_NUMBER_SIZE + VERSION_2_DICTIONARY_VERSION_SIZE - + VERSION_2_DICTIONARY_FLAG_SIZE); + VERSION_2_HEADER_MAGIC_NUMBER_SIZE + VERSION_2_HEADER_DICTIONARY_VERSION_SIZE + + VERSION_2_HEADER_FLAG_SIZE); default: return S_INT_MAX; } @@ -60,10 +60,10 @@ const BinaryDictionaryHeaderReadingUtils::DictionaryFlags /* static */ BinaryDictionaryHeaderReadingUtils::DictionaryFlags BinaryDictionaryHeaderReadingUtils::getFlags( const BinaryDictionaryInfo *const binaryDictionaryInfo) { - switch (binaryDictionaryInfo->getFormat()) { - case BinaryDictionaryFormatUtils::VERSION_2: + switch (getHeaderVersion(binaryDictionaryInfo->getFormat())) { + case HEADER_VERSION_2: return ByteArrayUtils::readUint16(binaryDictionaryInfo->getDictBuf(), - VERSION_2_MAGIC_NUMBER_SIZE + VERSION_2_DICTIONARY_VERSION_SIZE); + VERSION_2_HEADER_MAGIC_NUMBER_SIZE + VERSION_2_HEADER_DICTIONARY_VERSION_SIZE); default: return NO_FLAGS; } @@ -73,11 +73,15 @@ const BinaryDictionaryHeaderReadingUtils::DictionaryFlags /* static */ bool BinaryDictionaryHeaderReadingUtils::readHeaderValue( const BinaryDictionaryInfo *const binaryDictionaryInfo, const char *const key, int *outValue, const int outValueSize) { - if (outValueSize <= 0 || !hasHeaderAttributes(binaryDictionaryInfo->getFormat())) { + if (outValueSize <= 0) { return false; } const int headerSize = getHeaderSize(binaryDictionaryInfo); int pos = getHeaderOptionsPosition(binaryDictionaryInfo->getFormat()); + if (pos == NOT_A_DICT_POS) { + // The header doesn't have header options. + return false; + } while (pos < headerSize) { if(ByteArrayUtils::compareStringInBufferWithCharArray( binaryDictionaryInfo->getDictBuf(), key, headerSize - pos, &pos) == 0) { diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_header_reading_utils.h b/native/jni/src/suggest/core/dictionary/binary_dictionary_header_reading_utils.h index 94b9e124d..61748227e 100644 --- a/native/jni/src/suggest/core/dictionary/binary_dictionary_header_reading_utils.h +++ b/native/jni/src/suggest/core/dictionary/binary_dictionary_header_reading_utils.h @@ -48,27 +48,15 @@ class BinaryDictionaryHeaderReadingUtils { return (flags & FRENCH_LIGATURE_PROCESSING_FLAG) != 0; } - static AK_FORCE_INLINE bool hasHeaderAttributes( - const BinaryDictionaryFormatUtils::FORMAT_VERSION format) { - // Only format 2 and above have header attributes as {key,value} string pairs. - switch (format) { - case BinaryDictionaryFormatUtils::VERSION_2: - return true; - break; - default: - return false; - } - } - static AK_FORCE_INLINE int getHeaderOptionsPosition( - const BinaryDictionaryFormatUtils::FORMAT_VERSION format) { - switch (format) { - case BinaryDictionaryFormatUtils::VERSION_2: - return VERSION_2_MAGIC_NUMBER_SIZE + VERSION_2_DICTIONARY_VERSION_SIZE - + VERSION_2_DICTIONARY_FLAG_SIZE + VERSION_2_DICTIONARY_HEADER_SIZE_SIZE; + const BinaryDictionaryFormatUtils::FORMAT_VERSION dictionaryFormat) { + switch (getHeaderVersion(dictionaryFormat)) { + case HEADER_VERSION_2: + return VERSION_2_HEADER_MAGIC_NUMBER_SIZE + VERSION_2_HEADER_DICTIONARY_VERSION_SIZE + + VERSION_2_HEADER_FLAG_SIZE + VERSION_2_HEADER_SIZE_FIELD_SIZE; break; default: - return 0; + return NOT_A_DICT_POS; } } @@ -82,10 +70,15 @@ class BinaryDictionaryHeaderReadingUtils { private: DISALLOW_IMPLICIT_CONSTRUCTORS(BinaryDictionaryHeaderReadingUtils); - static const int VERSION_2_MAGIC_NUMBER_SIZE; - static const int VERSION_2_DICTIONARY_VERSION_SIZE; - static const int VERSION_2_DICTIONARY_FLAG_SIZE; - static const int VERSION_2_DICTIONARY_HEADER_SIZE_SIZE; + enum HEADER_VERSION { + HEADER_VERSION_2, + UNKNOWN_HEADER_VERSION + }; + + static const int VERSION_2_HEADER_MAGIC_NUMBER_SIZE; + static const int VERSION_2_HEADER_DICTIONARY_VERSION_SIZE; + static const int VERSION_2_HEADER_FLAG_SIZE; + static const int VERSION_2_HEADER_SIZE_FIELD_SIZE; static const DictionaryFlags NO_FLAGS; // Flags for special processing @@ -95,6 +88,18 @@ class BinaryDictionaryHeaderReadingUtils { static const DictionaryFlags SUPPORTS_DYNAMIC_UPDATE_FLAG; static const DictionaryFlags FRENCH_LIGATURE_PROCESSING_FLAG; static const DictionaryFlags CONTAINS_BIGRAMS_FLAG; + + static HEADER_VERSION getHeaderVersion( + const BinaryDictionaryFormatUtils::FORMAT_VERSION formatVersion) { + switch(formatVersion) { + case BinaryDictionaryFormatUtils::VERSION_2: + // Fall through + case BinaryDictionaryFormatUtils::VERSION_3: + return HEADER_VERSION_2; + default: + return UNKNOWN_HEADER_VERSION; + } + } }; } #endif /* LATINIME_DICTIONARY_HEADER_READING_UTILS_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_policy_factory.h b/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_policy_factory.h index c0e24fa4e..70dad67e8 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_policy_factory.h +++ b/native/jni/src/suggest/policyimpl/dictionary/dictionary_structure_policy_factory.h @@ -32,6 +32,9 @@ class DictionaryStructurePolicyFactory { switch (dictionaryFormat) { case BinaryDictionaryFormatUtils::VERSION_2: return PatriciaTriePolicy::getInstance(); + case BinaryDictionaryFormatUtils::VERSION_3: + // TODO: support version 3 dictionaries. + return 0; default: ASSERT(false); return 0;