diff --git a/java/src/com/android/inputmethod/latin/BinaryDictionary.java b/java/src/com/android/inputmethod/latin/BinaryDictionary.java index 83ee982b1..7ab90880c 100644 --- a/java/src/com/android/inputmethod/latin/BinaryDictionary.java +++ b/java/src/com/android/inputmethod/latin/BinaryDictionary.java @@ -541,7 +541,9 @@ public final class BinaryDictionary extends Dictionary { close(); final File dictFile = new File(mDictFilePath); final File tmpDictFile = new File(tmpDictFilePath); - FileUtils.deleteRecursively(dictFile); + if (!FileUtils.deleteRecursively(dictFile)) { + return false; + } if (!BinaryDictionaryUtils.renameDict(tmpDictFile, dictFile)) { return false; } diff --git a/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java b/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java index e3bed318e..2aa9ca31c 100644 --- a/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java +++ b/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java @@ -119,8 +119,9 @@ abstract public class ExpandableBinaryDictionary extends Dictionary { } private boolean needsToMigrateDictionary(final int formatVersion) { - // TODO: Check version. - return false; + // When we bump up the dictionary format version, the old version should be added to here + // for supporting migration. Note that native code has to support reading such formats. + return formatVersion == FormatSpec.VERSION4_ONLY_FOR_TESTING; } public boolean isValidDictionaryLocked() { diff --git a/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java b/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java index f25503488..613ff2ba4 100644 --- a/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java +++ b/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java @@ -186,7 +186,12 @@ public final class FormatSpec { // From version 4 on, we use version * 100 + revision as a version number. That allows // us to change the format during development while having testing devices remove // older files with each upgrade, while still having a readable versioning scheme. + // When we bump up the dictionary format version, we should update + // ExpandableDictionary.needsToMigrateDictionary() and + // ExpandableDictionary.matchesExpectedBinaryDictFormatVersionForThisType(). public static final int VERSION2 = 2; + // Dictionary version used for testing. + public static final int VERSION4_ONLY_FOR_TESTING = 399; public static final int VERSION4 = 401; static final int MINIMUM_SUPPORTED_VERSION = VERSION2; static final int MAXIMUM_SUPPORTED_VERSION = VERSION4; diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h index e4a6dc594..da24302c2 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h @@ -139,6 +139,8 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy { switch (mDictFormatVersion) { case FormatUtils::VERSION_2: return FormatUtils::VERSION_2; + case FormatUtils::VERSION_4_ONLY_FOR_TESTING: + return FormatUtils::VERSION_4_ONLY_FOR_TESTING; case FormatUtils::VERSION_4: return FormatUtils::VERSION_4; default: diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp index 5608e27d4..2a9028a9e 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp @@ -98,8 +98,9 @@ typedef DictionaryHeaderStructurePolicy::AttributeMap AttributeMap; case FormatUtils::VERSION_2: // Version 2 dictionary writing is not supported. return false; + case FormatUtils::VERSION_4_ONLY_FOR_TESTING: case FormatUtils::VERSION_4: - return buffer->writeUintAndAdvancePosition(FormatUtils::VERSION_4 /* data */, + return buffer->writeUintAndAdvancePosition(version /* data */, HEADER_DICTIONARY_VERSION_SIZE, writingPos); default: return false; diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp index be7e43b98..c4d18608c 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp @@ -52,9 +52,11 @@ namespace latinime { DictionaryStructureWithBufferPolicyFactory:: newPolicyForOnMemoryDict( const int formatVersion, const std::vector &locale, const DictionaryHeaderStructurePolicy::AttributeMap *const attributeMap) { - switch (formatVersion) { + FormatUtils::FORMAT_VERSION dictFormatVersion = FormatUtils::getFormatVersion(formatVersion); + switch (dictFormatVersion) { + case FormatUtils::VERSION_4_ONLY_FOR_TESTING: case FormatUtils::VERSION_4: { - HeaderPolicy headerPolicy(FormatUtils::VERSION_4, locale, attributeMap); + HeaderPolicy headerPolicy(dictFormatVersion, locale, attributeMap); Ver4DictBuffers::Ver4DictBuffersPtr dictBuffers = Ver4DictBuffers::createVer4DictBuffers(&headerPolicy, Ver4DictConstants::MAX_DICT_EXTENDED_REGION_SIZE); @@ -87,11 +89,13 @@ namespace latinime { if (!mmappedBuffer) { return DictionaryStructureWithBufferPolicy::StructurePolicyPtr(nullptr); } - switch (FormatUtils::detectFormatVersion(mmappedBuffer->getBuffer(), - mmappedBuffer->getBufferSize())) { + const FormatUtils::FORMAT_VERSION formatVersion = FormatUtils::detectFormatVersion( + mmappedBuffer->getBuffer(), mmappedBuffer->getBufferSize()); + switch (formatVersion) { case FormatUtils::VERSION_2: AKLOGE("Given path is a directory but the format is version 2. path: %s", path); break; + case FormatUtils::VERSION_4_ONLY_FOR_TESTING: case FormatUtils::VERSION_4: { const int dictDirPathBufSize = strlen(headerFilePath) + 1 /* terminator */; char dictPath[dictDirPathBufSize]; @@ -102,7 +106,8 @@ namespace latinime { return DictionaryStructureWithBufferPolicy::StructurePolicyPtr(nullptr); } Ver4DictBuffers::Ver4DictBuffersPtr dictBuffers( - Ver4DictBuffers::openVer4DictBuffers(dictPath, std::move(mmappedBuffer))); + Ver4DictBuffers::openVer4DictBuffers(dictPath, std::move(mmappedBuffer), + formatVersion)); if (!dictBuffers || !dictBuffers->isValid()) { AKLOGE("DICT: The dictionary doesn't satisfy ver4 format requirements. path: %s", path); @@ -135,6 +140,7 @@ namespace latinime { case FormatUtils::VERSION_2: return DictionaryStructureWithBufferPolicy::StructurePolicyPtr( new PatriciaTriePolicy(std::move(mmappedBuffer))); + case FormatUtils::VERSION_4_ONLY_FOR_TESTING: case FormatUtils::VERSION_4: AKLOGE("Given path is a file but the format is version 4. path: %s", path); break; diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.cpp index 95f654498..77ed38b89 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.cpp @@ -27,7 +27,8 @@ namespace latinime { /* static */ Ver4DictBuffers::Ver4DictBuffersPtr Ver4DictBuffers::openVer4DictBuffers( - const char *const dictPath, MmappedBuffer::MmappedBufferPtr headerBuffer) { + const char *const dictPath, MmappedBuffer::MmappedBufferPtr headerBuffer, + const FormatUtils::FORMAT_VERSION formatVersion) { if (!headerBuffer) { ASSERT(false); AKLOGE("The header buffer must be valid to open ver4 dict buffers."); @@ -35,7 +36,8 @@ namespace latinime { } // TODO: take only dictDirPath, and open both header and trie files in the constructor below const bool isUpdatable = headerBuffer->isUpdatable(); - return Ver4DictBuffersPtr(new Ver4DictBuffers(dictPath, std::move(headerBuffer), isUpdatable)); + return Ver4DictBuffersPtr(new Ver4DictBuffers(dictPath, std::move(headerBuffer), isUpdatable, + formatVersion)); } bool Ver4DictBuffers::flushHeaderAndDictBuffers(const char *const dictDirPath, @@ -113,11 +115,12 @@ bool Ver4DictBuffers::flushHeaderAndDictBuffers(const char *const dictDirPath, } Ver4DictBuffers::Ver4DictBuffers(const char *const dictPath, - MmappedBuffer::MmappedBufferPtr headerBuffer, const bool isUpdatable) + MmappedBuffer::MmappedBufferPtr headerBuffer, const bool isUpdatable, + const FormatUtils::FORMAT_VERSION formatVersion) : mHeaderBuffer(std::move(headerBuffer)), mDictBuffer(MmappedBuffer::openBuffer(dictPath, Ver4DictConstants::TRIE_FILE_EXTENSION, isUpdatable)), - mHeaderPolicy(mHeaderBuffer->getBuffer(), FormatUtils::VERSION_4), + mHeaderPolicy(mHeaderBuffer->getBuffer(), formatVersion), mExpandableHeaderBuffer(mHeaderBuffer ? mHeaderBuffer->getBuffer() : nullptr, mHeaderPolicy.getSize(), BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE), diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h index fc41432f4..df177c14a 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h @@ -36,7 +36,8 @@ class Ver4DictBuffers { typedef std::unique_ptr Ver4DictBuffersPtr; static Ver4DictBuffersPtr openVer4DictBuffers(const char *const dictDirPath, - MmappedBuffer::MmappedBufferPtr headerBuffer); + MmappedBuffer::MmappedBufferPtr headerBuffer, + const FormatUtils::FORMAT_VERSION formatVersion); static AK_FORCE_INLINE Ver4DictBuffersPtr createVer4DictBuffers( const HeaderPolicy *const headerPolicy, const int maxTrieSize) { @@ -120,7 +121,8 @@ class Ver4DictBuffers { DISALLOW_COPY_AND_ASSIGN(Ver4DictBuffers); Ver4DictBuffers(const char *const dictDirPath, - const MmappedBuffer::MmappedBufferPtr headerBuffer, const bool isUpdatable); + const MmappedBuffer::MmappedBufferPtr headerBuffer, const bool isUpdatable, + const FormatUtils::FORMAT_VERSION formatVersion); Ver4DictBuffers(const HeaderPolicy *const headerPolicy, const int maxTrieSize); diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp index 87fa5994c..7bc7b0a48 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp @@ -34,9 +34,12 @@ const char *const DictFileWritingUtils::TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE = const int dictVersion, const std::vector localeAsCodePointVector, const DictionaryHeaderStructurePolicy::AttributeMap *const attributeMap) { TimeKeeper::setCurrentTime(); - switch (dictVersion) { + const FormatUtils::FORMAT_VERSION formatVersion = FormatUtils::getFormatVersion(dictVersion); + switch (formatVersion) { + case FormatUtils::VERSION_4_ONLY_FOR_TESTING: case FormatUtils::VERSION_4: - return createEmptyV4DictFile(filePath, localeAsCodePointVector, attributeMap); + return createEmptyV4DictFile(filePath, localeAsCodePointVector, attributeMap, + formatVersion); default: AKLOGE("Cannot create dictionary %s because format version %d is not supported.", filePath, dictVersion); @@ -46,8 +49,9 @@ const char *const DictFileWritingUtils::TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE = /* static */ bool DictFileWritingUtils::createEmptyV4DictFile(const char *const dirPath, const std::vector localeAsCodePointVector, - const DictionaryHeaderStructurePolicy::AttributeMap *const attributeMap) { - HeaderPolicy headerPolicy(FormatUtils::VERSION_4, localeAsCodePointVector, attributeMap); + const DictionaryHeaderStructurePolicy::AttributeMap *const attributeMap, + const FormatUtils::FORMAT_VERSION formatVersion) { + HeaderPolicy headerPolicy(formatVersion, localeAsCodePointVector, attributeMap); Ver4DictBuffers::Ver4DictBuffersPtr dictBuffers( Ver4DictBuffers::createVer4DictBuffers(&headerPolicy, Ver4DictConstants::MAX_DICT_EXTENDED_REGION_SIZE)); diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h b/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h index 54ec651f7..a822989db 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h @@ -21,6 +21,7 @@ #include "defines.h" #include "suggest/policyimpl/dictionary/header/header_read_write_utils.h" +#include "suggest/policyimpl/dictionary/utils/format_utils.h" namespace latinime { @@ -46,7 +47,8 @@ class DictFileWritingUtils { static bool createEmptyV4DictFile(const char *const filePath, const std::vector localeAsCodePointVector, - const DictionaryHeaderStructurePolicy::AttributeMap *const attributeMap); + const DictionaryHeaderStructurePolicy::AttributeMap *const attributeMap, + const FormatUtils::FORMAT_VERSION formatVersion); static bool flushBufferToFile(const char *const filePath, const BufferWithExtendableBuffer *const buffer); diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp index cd3c403fa..a8518cdca 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp @@ -25,6 +25,18 @@ const uint32_t FormatUtils::MAGIC_NUMBER = 0x9BC13AFE; // Magic number (4 bytes), version (2 bytes), flags (2 bytes), header size (4 bytes) = 12 const int FormatUtils::DICTIONARY_MINIMUM_SIZE = 12; +/* static */ FormatUtils::FORMAT_VERSION FormatUtils::getFormatVersion(const int formatVersion) { + switch (formatVersion) { + case VERSION_2: + return VERSION_2; + case VERSION_4_ONLY_FOR_TESTING: + return VERSION_4_ONLY_FOR_TESTING; + case VERSION_4: + return VERSION_4; + default: + return UNKNOWN_VERSION; + } +} /* static */ FormatUtils::FORMAT_VERSION FormatUtils::detectFormatVersion( const uint8_t *const dict, const int dictSize) { // The magic number is stored big-endian. @@ -46,6 +58,8 @@ const int FormatUtils::DICTIONARY_MINIMUM_SIZE = 12; // same so we use them for both here. if (ByteArrayUtils::readUint16(dict, 4) == VERSION_2) { return VERSION_2; + } else if (ByteArrayUtils::readUint16(dict, 4) == VERSION_4_ONLY_FOR_TESTING) { + return VERSION_4_ONLY_FOR_TESTING; } else if (ByteArrayUtils::readUint16(dict, 4) == VERSION_4) { return VERSION_4; } else { diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h index 759b1c9b2..20dfb9d8c 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h @@ -31,6 +31,7 @@ class FormatUtils { enum FORMAT_VERSION { // These MUST have the same values as the relevant constants in FormatSpec.java. VERSION_2 = 2, + VERSION_4_ONLY_FOR_TESTING = 399, VERSION_4 = 401, UNKNOWN_VERSION = -1 }; @@ -39,6 +40,7 @@ class FormatUtils { // unsupported or obsolete dictionary formats. static const uint32_t MAGIC_NUMBER; + static FORMAT_VERSION getFormatVersion(const int formatVersion); static FORMAT_VERSION detectFormatVersion(const uint8_t *const dict, const int dictSize); private: diff --git a/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java b/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java index 0fb0fa587..9ceafa705 100644 --- a/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java +++ b/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java @@ -46,21 +46,23 @@ public class BinaryDictionaryTests extends AndroidTestCase { private File createEmptyDictionaryAndGetFile(final String dictId, final int formatVersion) throws IOException { - if (formatVersion == FormatSpec.VERSION4) { - return createEmptyVer4DictionaryAndGetFile(dictId); + if (formatVersion == FormatSpec.VERSION4 + || formatVersion == FormatSpec.VERSION4_ONLY_FOR_TESTING) { + return createEmptyVer4DictionaryAndGetFile(dictId, formatVersion); } else { throw new IOException("Dictionary format version " + formatVersion + " is not supported."); } } - private File createEmptyVer4DictionaryAndGetFile(final String dictId) throws IOException { + private File createEmptyVer4DictionaryAndGetFile(final String dictId, + final int formatVersion) throws IOException { final File file = File.createTempFile(dictId, TEST_DICT_FILE_EXTENSION, getContext().getCacheDir()); file.delete(); file.mkdir(); Map attributeMap = new HashMap(); - if (BinaryDictionaryUtils.createEmptyDictFile(file.getAbsolutePath(), FormatSpec.VERSION4, + if (BinaryDictionaryUtils.createEmptyDictFile(file.getAbsolutePath(), formatVersion, Locale.ENGLISH, attributeMap)) { return file; } else { @@ -1223,4 +1225,36 @@ public class BinaryDictionaryTests extends AndroidTestCase { } } } + + public void testDictMigration() { + testDictMigration(FormatSpec.VERSION4_ONLY_FOR_TESTING, FormatSpec.VERSION4); + } + + private void testDictMigration(final int fromFormatVersion, final int toFormatVersion) { + File dictFile = null; + try { + dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", fromFormatVersion); + } catch (IOException e) { + fail("IOException while writing an initial dictionary : " + e); + } + final BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(), + 0 /* offset */, dictFile.length(), true /* useFullEditDistance */, + Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); + final int unigramProbability = 100; + addUnigramWord(binaryDictionary, "aaa", unigramProbability); + addUnigramWord(binaryDictionary, "bbb", unigramProbability); + final int bigramProbability = 10; + addBigramWords(binaryDictionary, "aaa", "bbb", bigramProbability); + assertEquals(unigramProbability, binaryDictionary.getFrequency("aaa")); + assertEquals(unigramProbability, binaryDictionary.getFrequency("bbb")); + assertTrue(binaryDictionary.isValidBigram("aaa", "bbb")); + assertEquals(fromFormatVersion, binaryDictionary.getFormatVersion()); + assertTrue(binaryDictionary.migrateTo(toFormatVersion)); + assertTrue(binaryDictionary.isValidDictionary()); + assertEquals(toFormatVersion, binaryDictionary.getFormatVersion()); + assertEquals(unigramProbability, binaryDictionary.getFrequency("aaa")); + assertEquals(unigramProbability, binaryDictionary.getFrequency("bbb")); + // TODO: Add tests for bigram frequency when the implementation gets ready. + assertTrue(binaryDictionary.isValidBigram("aaa", "bbb")); + } }