From 484fa7b59cb0659ac18fa68da5c7b641d9255be8 Mon Sep 17 00:00:00 2001 From: Keisuke Kuroyanagi Date: Wed, 18 Sep 2013 18:08:33 +0900 Subject: [PATCH] Add dictionary header writing methods. Bug: 6669677 Change-Id: I392ac4776b55779903cbaa17e683005d80017a41 --- .../dynamic_patricia_trie_policy.cpp | 2 +- .../dictionary/dynamic_patricia_trie_policy.h | 2 +- .../dynamic_patricia_trie_writing_helper.cpp | 10 ++- .../dynamic_patricia_trie_writing_helper.h | 6 +- .../dictionary/header/header_policy.cpp | 60 +++++++++++-- .../dictionary/header/header_policy.h | 26 +++--- .../header/header_reading_utils.cpp | 84 +++++++++++++++---- .../dictionary/header/header_reading_utils.h | 20 ++++- .../dictionary/patricia_trie_policy.h | 2 +- .../dictionary/utils/format_utils.cpp | 26 ++---- .../dictionary/utils/format_utils.h | 6 +- .../latin/BinaryDictionaryTests.java | 4 +- 12 files changed, 181 insertions(+), 67 deletions(-) diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp index 2198a13c9..f91dd0e56 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp @@ -250,7 +250,7 @@ void DynamicPatriciaTriePolicy::flush(const char *const filePath) { } DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer, &mBigramListPolicy, &mShortcutListPolicy); - writingHelper.writeToDictFile(filePath, mBuffer->getBuffer(), mHeaderPolicy.getSize()); + writingHelper.writeToDictFile(filePath, &mHeaderPolicy); } void DynamicPatriciaTriePolicy::flushWithGC(const char *const filePath) { diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h index 2cbb0ff3b..ebe1f3212 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h @@ -33,7 +33,7 @@ class DicNodeVector; class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { public: DynamicPatriciaTriePolicy(const MmappedBuffer *const buffer) - : mBuffer(buffer), mHeaderPolicy(mBuffer->getBuffer()), + : mBuffer(buffer), mHeaderPolicy(mBuffer->getBuffer(), buffer->getBufferSize()), mBufferWithExtendableBuffer(mBuffer->getBuffer() + mHeaderPolicy.getSize(), mBuffer->getBufferSize() - mHeaderPolicy.getSize()), mShortcutListPolicy(&mBufferWithExtendableBuffer), diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp index a67c0d94a..31178fb5c 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp @@ -24,6 +24,7 @@ #include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h" #include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h" #include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_utils.h" +#include "suggest/policyimpl/dictionary/header/header_policy.h" #include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h" #include "suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h" @@ -137,7 +138,11 @@ bool DynamicPatriciaTrieWritingHelper::removeBigramWords(const int word0Pos, con } void DynamicPatriciaTrieWritingHelper::writeToDictFile(const char *const fileName, - const uint8_t *const headerBuf, const int headerSize) { + const HeaderPolicy *const headerPolicy) { + BufferWithExtendableBuffer headerBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */); + if (!headerPolicy->writeHeaderToBuffer(&headerBuffer, false /* updatesLastUpdatedTime */)) { + return; + } const int tmpFileNameBufSize = strlen(fileName) + strlen(TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE) + 1; char tmpFileName[tmpFileNameBufSize]; @@ -148,7 +153,8 @@ void DynamicPatriciaTrieWritingHelper::writeToDictFile(const char *const fileNam return; } // Write header. - if (fwrite(headerBuf, headerSize, 1, file) < 1) { + if (fwrite(headerBuffer.getBuffer(true /* usesAdditionalBuffer */), + headerBuffer.getTailPosition(), 1, file) < 1) { fclose(file); remove(tmpFileName); return; diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h index faf7a4e1b..219ea9857 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h +++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h @@ -28,6 +28,7 @@ class DynamicBigramListPolicy; class DynamicPatriciaTrieNodeReader; class DynamicPatriciaTrieReadingHelper; class DynamicShortcutListPolicy; +class HeaderPolicy; class DynamicPatriciaTrieWritingHelper { public: @@ -48,8 +49,9 @@ class DynamicPatriciaTrieWritingHelper { // Remove a bigram relation from word0Pos to word1Pos. bool removeBigramWords(const int word0Pos, const int word1Pos); - void writeToDictFile(const char *const fileName, const uint8_t *const headerBuf, - const int headerSize); + void writeToDictFile(const char *const fileName, const HeaderPolicy *const headerPolicy); + + void writeToDictFileWithGC(const char *const fileName, const HeaderPolicy *const headerPolicy); private: DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTrieWritingHelper); diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp index 196da5c97..47ace23a1 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp @@ -17,6 +17,8 @@ #include "suggest/policyimpl/dictionary/header/header_policy.h" #include +#include +#include namespace latinime { @@ -36,7 +38,7 @@ void HeaderPolicy::readHeaderValueOrQuestionMark(const char *const key, int *out } std::vector keyCodePointVector; insertCharactersIntoVector(key, &keyCodePointVector); - HeaderReadingUtils::AttributeMap::const_iterator it = mAttributeMap.find(keyCodePointVector); + HeaderReadWriteUtils::AttributeMap::const_iterator it = mAttributeMap.find(keyCodePointVector); if (it == mAttributeMap.end()) { // The key was not found. outValue[0] = '?'; @@ -85,7 +87,7 @@ int HeaderPolicy::readLastUpdatedTime() const { bool HeaderPolicy::getAttributeValueAsInt(const char *const key, int *const outValue) const { std::vector keyVector; insertCharactersIntoVector(key, &keyVector); - HeaderReadingUtils::AttributeMap::const_iterator it = mAttributeMap.find(keyVector); + HeaderReadWriteUtils::AttributeMap::const_iterator it = mAttributeMap.find(keyVector); if (it == mAttributeMap.end()) { // The key was not found. return false; @@ -94,10 +96,56 @@ bool HeaderPolicy::getAttributeValueAsInt(const char *const key, int *const outV return true; } -/* static */ HeaderReadingUtils::AttributeMap HeaderPolicy::createAttributeMapAndReadAllAttributes( - const uint8_t *const dictBuf) { - HeaderReadingUtils::AttributeMap attributeMap; - HeaderReadingUtils::fetchAllHeaderAttributes(dictBuf, &attributeMap); +bool HeaderPolicy::writeHeaderToBuffer(BufferWithExtendableBuffer *const bufferToWrite, + const bool updatesLastUpdatedTime) const { + int writingPos = 0; + if (!HeaderReadWriteUtils::writeDictionaryVersion(bufferToWrite, mDictFormatVersion, + &writingPos)) { + return false; + } + if (!HeaderReadWriteUtils::writeDictionaryFlags(bufferToWrite, mDictionaryFlags, + &writingPos)) { + return false; + } + // Temporarily writes a dummy header size. + int headerSizeFieldPos = writingPos; + if (!HeaderReadWriteUtils::writeDictionaryHeaderSize(bufferToWrite, 0 /* size */, + &writingPos)) { + return false; + } + if (updatesLastUpdatedTime) { + // Set current time as a last updated time. + HeaderReadWriteUtils::AttributeMap attributeMapTowrite(mAttributeMap); + std::vector updatedTimekey; + insertCharactersIntoVector(LAST_UPDATED_TIME_KEY, &updatedTimekey); + const time_t currentTime = time(NULL); + std::vector updatedTimeValue; + char charBuf[LARGEST_INT_DIGIT_COUNT + 1]; + snprintf(charBuf, LARGEST_INT_DIGIT_COUNT + 1, "%ld", currentTime); + insertCharactersIntoVector(charBuf, &updatedTimeValue); + attributeMapTowrite[updatedTimekey] = updatedTimeValue; + if (!HeaderReadWriteUtils::writeHeaderAttributes(bufferToWrite, &attributeMapTowrite, + &writingPos)) { + return false; + } + } else { + if (!HeaderReadWriteUtils::writeHeaderAttributes(bufferToWrite, &mAttributeMap, + &writingPos)) { + return false; + } + } + // Writes an actual header size. + if (!HeaderReadWriteUtils::writeDictionaryHeaderSize(bufferToWrite, writingPos, + &headerSizeFieldPos)) { + return false; + } + return true; +} + +/* static */ HeaderReadWriteUtils::AttributeMap + HeaderPolicy::createAttributeMapAndReadAllAttributes(const uint8_t *const dictBuf) { + HeaderReadWriteUtils::AttributeMap attributeMap; + HeaderReadWriteUtils::fetchAllHeaderAttributes(dictBuf, &attributeMap); return attributeMap; } diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h index 930b475c7..cdad7de4d 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h @@ -23,14 +23,17 @@ #include "defines.h" #include "suggest/core/policy/dictionary_header_structure_policy.h" #include "suggest/policyimpl/dictionary/header/header_reading_utils.h" +#include "suggest/policyimpl/dictionary/utils/format_utils.h" namespace latinime { class HeaderPolicy : public DictionaryHeaderStructurePolicy { public: - explicit HeaderPolicy(const uint8_t *const dictBuf) - : mDictBuf(dictBuf), mDictionaryFlags(HeaderReadingUtils::getFlags(dictBuf)), - mSize(HeaderReadingUtils::getHeaderSize(dictBuf)), + explicit HeaderPolicy(const uint8_t *const dictBuf, const int dictSize) + : mDictBuf(dictBuf), + mDictFormatVersion(FormatUtils::detectFormatVersion(dictBuf, dictSize)), + mDictionaryFlags(HeaderReadWriteUtils::getFlags(dictBuf)), + mSize(HeaderReadWriteUtils::getHeaderSize(dictBuf)), mAttributeMap(createAttributeMapAndReadAllAttributes(mDictBuf)), mMultiWordCostMultiplier(readMultipleWordCostMultiplier()), mUsesForgettingCurve(readUsesForgettingCurveFlag()), @@ -43,16 +46,15 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy { } AK_FORCE_INLINE bool supportsDynamicUpdate() const { - return HeaderReadingUtils::supportsDynamicUpdate(mDictionaryFlags); + return HeaderReadWriteUtils::supportsDynamicUpdate(mDictionaryFlags); } AK_FORCE_INLINE bool requiresGermanUmlautProcessing() const { - return HeaderReadingUtils::requiresGermanUmlautProcessing(mDictionaryFlags); + return HeaderReadWriteUtils::requiresGermanUmlautProcessing(mDictionaryFlags); } AK_FORCE_INLINE bool requiresFrenchLigatureProcessing() const { - return HeaderReadingUtils::requiresFrenchLigatureProcessing( - mDictionaryFlags); + return HeaderReadWriteUtils::requiresFrenchLigatureProcessing(mDictionaryFlags); } AK_FORCE_INLINE float getMultiWordCostMultiplier() const { @@ -70,6 +72,9 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy { void readHeaderValueOrQuestionMark(const char *const key, int *outValue, int outValueSize) const; + bool writeHeaderToBuffer(BufferWithExtendableBuffer *const bufferToWrite, + const bool updatesLastUpdatedTime) const; + private: DISALLOW_IMPLICIT_CONSTRUCTORS(HeaderPolicy); @@ -80,9 +85,10 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy { static const float MULTIPLE_WORD_COST_MULTIPLIER_SCALE; const uint8_t *const mDictBuf; - const HeaderReadingUtils::DictionaryFlags mDictionaryFlags; + const FormatUtils::FORMAT_VERSION mDictFormatVersion; + const HeaderReadWriteUtils::DictionaryFlags mDictionaryFlags; const int mSize; - HeaderReadingUtils::AttributeMap mAttributeMap; + HeaderReadWriteUtils::AttributeMap mAttributeMap; const float mMultiWordCostMultiplier; const bool mUsesForgettingCurve; const int mLastUpdatedTime; @@ -95,7 +101,7 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy { bool getAttributeValueAsInt(const char *const key, int *const outValue) const; - static HeaderReadingUtils::AttributeMap createAttributeMapAndReadAllAttributes( + static HeaderReadWriteUtils::AttributeMap createAttributeMapAndReadAllAttributes( const uint8_t *const dictBuf); static int parseIntAttributeValue(const std::vector *const attributeValue); diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_reading_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/header/header_reading_utils.cpp index 186c043c1..705355db3 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/header/header_reading_utils.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_reading_utils.cpp @@ -19,43 +19,44 @@ #include #include "defines.h" +#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" #include "suggest/policyimpl/dictionary/utils/byte_array_utils.h" namespace latinime { -const int HeaderReadingUtils::MAX_ATTRIBUTE_KEY_LENGTH = 256; -const int HeaderReadingUtils::MAX_ATTRIBUTE_VALUE_LENGTH = 256; +const int HeaderReadWriteUtils::MAX_ATTRIBUTE_KEY_LENGTH = 256; +const int HeaderReadWriteUtils::MAX_ATTRIBUTE_VALUE_LENGTH = 256; -const int HeaderReadingUtils::HEADER_MAGIC_NUMBER_SIZE = 4; -const int HeaderReadingUtils::HEADER_DICTIONARY_VERSION_SIZE = 2; -const int HeaderReadingUtils::HEADER_FLAG_SIZE = 2; -const int HeaderReadingUtils::HEADER_SIZE_FIELD_SIZE = 4; +const int HeaderReadWriteUtils::HEADER_MAGIC_NUMBER_SIZE = 4; +const int HeaderReadWriteUtils::HEADER_DICTIONARY_VERSION_SIZE = 2; +const int HeaderReadWriteUtils::HEADER_FLAG_SIZE = 2; +const int HeaderReadWriteUtils::HEADER_SIZE_FIELD_SIZE = 4; -const HeaderReadingUtils::DictionaryFlags HeaderReadingUtils::NO_FLAGS = 0; +const HeaderReadWriteUtils::DictionaryFlags HeaderReadWriteUtils::NO_FLAGS = 0; // Flags for special processing // Those *must* match the flags in makedict (FormatSpec#*_PROCESSING_FLAG) or // something very bad (like, the apocalypse) will happen. Please update both at the same time. -const HeaderReadingUtils::DictionaryFlags - HeaderReadingUtils::GERMAN_UMLAUT_PROCESSING_FLAG = 0x1; -const HeaderReadingUtils::DictionaryFlags - HeaderReadingUtils::SUPPORTS_DYNAMIC_UPDATE_FLAG = 0x2; -const HeaderReadingUtils::DictionaryFlags - HeaderReadingUtils::FRENCH_LIGATURE_PROCESSING_FLAG = 0x4; +const HeaderReadWriteUtils::DictionaryFlags + HeaderReadWriteUtils::GERMAN_UMLAUT_PROCESSING_FLAG = 0x1; +const HeaderReadWriteUtils::DictionaryFlags + HeaderReadWriteUtils::SUPPORTS_DYNAMIC_UPDATE_FLAG = 0x2; +const HeaderReadWriteUtils::DictionaryFlags + HeaderReadWriteUtils::FRENCH_LIGATURE_PROCESSING_FLAG = 0x4; -/* static */ int HeaderReadingUtils::getHeaderSize(const uint8_t *const dictBuf) { +/* static */ int HeaderReadWriteUtils::getHeaderSize(const uint8_t *const dictBuf) { // See the format of the header in the comment in // BinaryDictionaryFormatUtils::detectFormatVersion() return ByteArrayUtils::readUint32(dictBuf, HEADER_MAGIC_NUMBER_SIZE + HEADER_DICTIONARY_VERSION_SIZE + HEADER_FLAG_SIZE); } -/* static */ HeaderReadingUtils::DictionaryFlags - HeaderReadingUtils::getFlags(const uint8_t *const dictBuf) { +/* static */ HeaderReadWriteUtils::DictionaryFlags + HeaderReadWriteUtils::getFlags(const uint8_t *const dictBuf) { return ByteArrayUtils::readUint16(dictBuf, HEADER_MAGIC_NUMBER_SIZE + HEADER_DICTIONARY_VERSION_SIZE); } -/* static */ void HeaderReadingUtils::fetchAllHeaderAttributes(const uint8_t *const dictBuf, +/* static */ void HeaderReadWriteUtils::fetchAllHeaderAttributes(const uint8_t *const dictBuf, AttributeMap *const headerAttributes) { const int headerSize = getHeaderSize(dictBuf); int pos = getHeaderOptionsPosition(); @@ -78,4 +79,53 @@ const HeaderReadingUtils::DictionaryFlags } } +/* static */ bool HeaderReadWriteUtils::writeDictionaryVersion( + BufferWithExtendableBuffer *const buffer, const FormatUtils::FORMAT_VERSION version, + int *const writingPos) { + if (!buffer->writeUintAndAdvancePosition(FormatUtils::MAGIC_NUMBER, HEADER_MAGIC_NUMBER_SIZE, + writingPos)) { + return false; + } + switch (version) { + case FormatUtils::VERSION_2: + // Version 2 dictionary writing is not supported. + return false; + case FormatUtils::VERSION_3: + return buffer->writeUintAndAdvancePosition(3 /* data */, + HEADER_DICTIONARY_VERSION_SIZE, writingPos); + default: + return false; + } +} + +/* static */ bool HeaderReadWriteUtils::writeDictionaryFlags( + BufferWithExtendableBuffer *const buffer, const DictionaryFlags flags, + int *const writingPos) { + return buffer->writeUintAndAdvancePosition(flags, HEADER_FLAG_SIZE, writingPos); +} + +/* static */ bool HeaderReadWriteUtils::writeDictionaryHeaderSize( + BufferWithExtendableBuffer *const buffer, const int size, int *const writingPos) { + return buffer->writeUintAndAdvancePosition(size, HEADER_SIZE_FIELD_SIZE, writingPos); +} + +/* static */ bool HeaderReadWriteUtils::writeHeaderAttributes( + BufferWithExtendableBuffer *const buffer, const AttributeMap *const headerAttributes, + int *const writingPos) { + for (AttributeMap::const_iterator it = headerAttributes->begin(); + it != headerAttributes->end(); ++it) { + // Write a key. + if (!buffer->writeCodePointsAndAdvancePosition(&(it->first.at(0)), it->first.size(), + true /* writesTerminator */, writingPos)) { + return false; + } + // Write a value. + if (!buffer->writeCodePointsAndAdvancePosition(&(it->second.at(0)), it->second.size(), + true /* writesTerminator */, writingPos)) { + return false; + } + } + return true; +} + } // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_reading_utils.h b/native/jni/src/suggest/policyimpl/dictionary/header/header_reading_utils.h index 5716198fb..4b4e2205a 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/header/header_reading_utils.h +++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_reading_utils.h @@ -22,10 +22,14 @@ #include #include "defines.h" +#include "suggest/policyimpl/dictionary/utils/format_utils.h" namespace latinime { -class HeaderReadingUtils { +class BufferWithExtendableBuffer; + +// TODO: Change this file name to header_read_write_utils.h. +class HeaderReadWriteUtils { public: typedef uint16_t DictionaryFlags; typedef std::map, std::vector > AttributeMap; @@ -54,8 +58,20 @@ class HeaderReadingUtils { static void fetchAllHeaderAttributes(const uint8_t *const dictBuf, AttributeMap *const headerAttributes); + static bool writeDictionaryVersion(BufferWithExtendableBuffer *const buffer, + const FormatUtils::FORMAT_VERSION version, int *const writingPos); + + static bool writeDictionaryFlags(BufferWithExtendableBuffer *const buffer, + const DictionaryFlags flags, int *const writingPos); + + static bool writeDictionaryHeaderSize(BufferWithExtendableBuffer *const buffer, + const int size, int *const writingPos); + + static bool writeHeaderAttributes(BufferWithExtendableBuffer *const buffer, + const AttributeMap *const headerAttributes, int *const writingPos); + private: - DISALLOW_IMPLICIT_CONSTRUCTORS(HeaderReadingUtils); + DISALLOW_IMPLICIT_CONSTRUCTORS(HeaderReadWriteUtils); static const int MAX_ATTRIBUTE_KEY_LENGTH; static const int MAX_ATTRIBUTE_VALUE_LENGTH; diff --git a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h index cee3e4ab2..697d0159c 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h @@ -34,7 +34,7 @@ class DicNodeVector; class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { public: PatriciaTriePolicy(const MmappedBuffer *const buffer) - : mBuffer(buffer), mHeaderPolicy(mBuffer->getBuffer()), + : mBuffer(buffer), mHeaderPolicy(mBuffer->getBuffer(), buffer->getBufferSize()), mDictRoot(mBuffer->getBuffer() + mHeaderPolicy.getSize()), mBigramListPolicy(mDictRoot), mShortcutListPolicy(mDictRoot) {} diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp index 3796c7b7b..1d77d5c27 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp @@ -20,20 +20,10 @@ namespace latinime { -/** - * Dictionary size - */ -// Any file smaller than this is not a dictionary. -const int FormatUtils::DICTIONARY_MINIMUM_SIZE = 4; +const uint32_t FormatUtils::MAGIC_NUMBER = 0x9BC13AFE; -/** - * Format versions - */ -// 32 bit magic number is stored at the beginning of the dictionary header to reject unsupported -// or obsolete dictionary formats. -const uint32_t FormatUtils::HEADER_VERSION_2_MAGIC_NUMBER = 0x9BC13AFE; -// Magic number (4 bytes), version (2 bytes), options (2 bytes), header size (4 bytes) = 12 -const int FormatUtils::HEADER_VERSION_2_MINIMUM_SIZE = 12; +// Magic number (4 bytes), version (2 bytes), flags (2 bytes), header size (4 bytes) = 12 +const int FormatUtils::DICTIONARY_MINIMUM_SIZE = 12; /* static */ FormatUtils::FORMAT_VERSION FormatUtils::detectFormatVersion( const uint8_t *const dict, const int dictSize) { @@ -45,16 +35,10 @@ const int FormatUtils::HEADER_VERSION_2_MINIMUM_SIZE = 12; } const uint32_t magicNumber = ByteArrayUtils::readUint32(dict, 0); switch (magicNumber) { - case HEADER_VERSION_2_MAGIC_NUMBER: - // Version 2 header are at least 12 bytes long. - // If this header has the version 2 magic number but is less than 12 bytes long, - // then it's an unknown format and we need to avoid confidently reading the next bytes. - if (dictSize < HEADER_VERSION_2_MINIMUM_SIZE) { - return UNKNOWN_VERSION; - } + case MAGIC_NUMBER: // Version 2 header is as follows: // Magic number (4 bytes) 0x9B 0xC1 0x3A 0xFE - // Version number (2 bytes) + // Dictionary format version number (2 bytes) // Options (2 bytes) // Header size (4 bytes) : integer, big endian if (ByteArrayUtils::readUint16(dict, 4) == 2) { diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h index f84321577..79ed0de29 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h @@ -34,14 +34,16 @@ class FormatUtils { UNKNOWN_VERSION }; + // 32 bit magic number is stored at the beginning of the dictionary header to reject + // unsupported or obsolete dictionary formats. + static const uint32_t MAGIC_NUMBER; + static FORMAT_VERSION detectFormatVersion(const uint8_t *const dict, const int dictSize); private: DISALLOW_IMPLICIT_CONSTRUCTORS(FormatUtils); static const int DICTIONARY_MINIMUM_SIZE; - static const uint32_t HEADER_VERSION_2_MAGIC_NUMBER; - static const int HEADER_VERSION_2_MINIMUM_SIZE; }; } // namespace latinime #endif /* LATINIME_FORMAT_UTILS_H */ diff --git a/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java b/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java index d8105ba38..00d76c990 100644 --- a/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java +++ b/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java @@ -358,8 +358,8 @@ public class BinaryDictionaryTests extends AndroidTestCase { 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); - assertEquals(-1, binaryDictionary.getFrequency("aaa")); - assertEquals(-1, binaryDictionary.getFrequency("abcd")); + assertEquals(Dictionary.NOT_A_PROBABILITY, binaryDictionary.getFrequency("aaa")); + assertEquals(Dictionary.NOT_A_PROBABILITY, binaryDictionary.getFrequency("abcd")); binaryDictionary.addUnigramWord("aaa", probability); binaryDictionary.addUnigramWord("abcd", probability);