From dc14d5fbba553c39929539ca8a71f534383d7e0a Mon Sep 17 00:00:00 2001 From: Keisuke Kuroyanagi Date: Wed, 20 Nov 2013 14:12:04 +0900 Subject: [PATCH] Implement Ver4PatriciaTriePolicy::flush(). Bug: 11073222 Change-Id: I7da5f7f8b7918ce6cc35d36e1ce993840cad797c --- native/jni/Android.mk | 3 +- ...y_structure_with_buffer_policy_factory.cpp | 14 +++--- .../content/terminal_position_lookup_table.h | 24 ++++++++- .../structure/v4/ver4_dict_buffers.cpp | 8 +-- .../structure/v4/ver4_dict_buffers.h | 11 +++- .../v4/ver4_patricia_trie_policy.cpp | 6 ++- .../structure/v4/ver4_patricia_trie_policy.h | 3 ++ .../v4/ver4_patricia_trie_writing_helper.cpp | 50 +++++++++++++++++++ .../v4/ver4_patricia_trie_writing_helper.h | 42 ++++++++++++++++ .../dictionary/utils/file_utils.cpp | 39 +++++++++++++++ .../policyimpl/dictionary/utils/file_utils.h | 7 +++ .../latin/Ver4BinaryDictionaryTests.java | 47 +++++++++++++++++ 12 files changed, 237 insertions(+), 17 deletions(-) create mode 100644 native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp create mode 100644 native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.h diff --git a/native/jni/Android.mk b/native/jni/Android.mk index e770d9866..2c5401ba5 100644 --- a/native/jni/Android.mk +++ b/native/jni/Android.mk @@ -97,7 +97,8 @@ LATIN_IME_CORE_SRC_FILES := \ ver4_patricia_trie_node_reader.cpp \ ver4_patricia_trie_node_writer.cpp \ ver4_patricia_trie_policy.cpp \ - ver4_patricia_trie_reading_utils.cpp ) \ + ver4_patricia_trie_reading_utils.cpp \ + ver4_patricia_trie_writing_helper.cpp) \ $(addprefix suggest/policyimpl/dictionary/utils/, \ buffer_with_extendable_buffer.cpp \ byte_array_utils.cpp \ diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp index 063b84cbf..903d55307 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp @@ -17,13 +17,13 @@ #include "suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.h" #include -#include #include "defines.h" #include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h" #include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_policy.h" #include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h" #include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h" +#include "suggest/policyimpl/dictionary/utils/file_utils.h" #include "suggest/policyimpl/dictionary/utils/format_utils.h" #include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h" @@ -49,17 +49,15 @@ namespace latinime { return DictionaryStructureWithBufferPolicy::StructurePoilcyPtr( new DynamicPatriciaTriePolicy(mmappedBuffer)); case FormatUtils::VERSION_4: { - std::string dictDirPath(path); - const std::string::size_type pos = - dictDirPath.rfind(Ver4DictConstants::TRIE_FILE_EXTENSION); - if (pos == std::string::npos) { + const int dictDirPathBufSize = strlen(path) + 1 /* terminator */; + char dictDirPath[dictDirPathBufSize]; + if (!FileUtils::getFilePathWithoutSuffix(path, Ver4DictConstants::TRIE_FILE_EXTENSION, + dictDirPathBufSize, dictDirPath)) { // Dictionary file name is not valid as a version 4 dictionary. return DictionaryStructureWithBufferPolicy::StructurePoilcyPtr(0); } - // Removing extension to get the base path. - dictDirPath.erase(pos); const Ver4DictBuffers::Ver4DictBuffersPtr dictBuffers = - Ver4DictBuffers::openVer4DictBuffers(dictDirPath.c_str(), mmappedBuffer); + Ver4DictBuffers::openVer4DictBuffers(dictDirPath, mmappedBuffer); if (!dictBuffers.get()->isValid()) { AKLOGE("DICT: The dictionary doesn't satisfy ver4 format requirements."); ASSERT(false); diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h index eaf18b56a..873b2406c 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h @@ -67,8 +67,28 @@ class TerminalPositionLookupTable : public SingleDictContent { return mSize; } - bool flushToFile(const char *const dictDirPath) const { - return flush(dictDirPath, Ver4DictConstants::TERMINAL_ADDRESS_TABLE_FILE_EXTENSION); + bool flushToFile(const char *const dictDirPath, const int newHeaderRegionSize) const { + const int headerRegionSizeDiff = newHeaderRegionSize - mHeaderRegionSize; + // If header region size has been changed, terminal PtNode positions have to be adjusted + // depending on the new header region size. + if (headerRegionSizeDiff != 0) { + TerminalPositionLookupTable lookupTableToWrite; + for (int i = 0; i < mSize; ++i) { + const int terminalPtNodePosition = getTerminalPtNodePosition(i) + + headerRegionSizeDiff; + if (!lookupTableToWrite.setTerminalPtNodePosition(i, terminalPtNodePosition)) { + AKLOGE("Cannot set terminal position to lookupTableToWrite." + " terminalId: %d, position: %d", i, terminalPtNodePosition); + return false; + } + } + return lookupTableToWrite.flush(dictDirPath, + Ver4DictConstants::TERMINAL_ADDRESS_TABLE_FILE_EXTENSION); + } else { + // We can simply use this lookup table because the header region size has not been + // changed. + return flush(dictDirPath, Ver4DictConstants::TERMINAL_ADDRESS_TABLE_FILE_EXTENSION); + } } private: diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.cpp index e17c5eab4..d31253153 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.cpp @@ -25,7 +25,8 @@ namespace latinime { -bool Ver4DictBuffers::flush(const char *const dictDirPath) const { +bool Ver4DictBuffers::flushHeaderAndDictBuffers(const char *const dictDirPath, + const BufferWithExtendableBuffer *const headerBuffer) const { // Create temporary directory. const int tmpDirPathBufSize = FileUtils::getFilePathWithSuffixBufSize(dictDirPath, DictFileWritingUtils::TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE); @@ -38,8 +39,7 @@ bool Ver4DictBuffers::flush(const char *const dictDirPath) const { return false; } // Write trie file. - const BufferWithExtendableBuffer *buffers[] = - {&mExpandableHeaderBuffer, &mExpandableTrieBuffer}; + const BufferWithExtendableBuffer *buffers[] = {headerBuffer, &mExpandableTrieBuffer}; if (!DictFileWritingUtils::flushBuffersToFileInDir(tmpDirPath, Ver4DictConstants::TRIE_FILE_EXTENSION, buffers, 2 /* bufferCount */)) { AKLOGE("Dictionary trie file %s/%s cannot be written.", tmpDirPath, @@ -47,7 +47,7 @@ bool Ver4DictBuffers::flush(const char *const dictDirPath) const { return false; } // Write dictionary contents. - if (!mTerminalPositionLookupTable.flushToFile(tmpDirPath)) { + if (!mTerminalPositionLookupTable.flushToFile(tmpDirPath, headerBuffer->getTailPosition())) { AKLOGE("Terminal position lookup table cannot be written. %s", tmpDirPath); return false; } diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h index 0684bdd0c..bfd0bbdfe 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h @@ -57,6 +57,10 @@ class Ver4DictBuffers { return &mExpandableTrieBuffer; } + AK_FORCE_INLINE const BufferWithExtendableBuffer *getTrieBuffer() const { + return &mExpandableTrieBuffer; + } + AK_FORCE_INLINE TerminalPositionLookupTable *getUpdatableTerminalPositionLookupTable() { return &mTerminalPositionLookupTable; } @@ -89,7 +93,12 @@ class Ver4DictBuffers { return mIsUpdatable; } - bool flush(const char *const dictDirPath) const; + bool flush(const char *const dictDirPath) const { + return flushHeaderAndDictBuffers(dictDirPath, &mExpandableHeaderBuffer); + } + + bool flushHeaderAndDictBuffers(const char *const dictDirPath, + const BufferWithExtendableBuffer *const headerBuffer) const; private: DISALLOW_COPY_AND_ASSIGN(Ver4DictBuffers); diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp index 698483a79..8ee15e0ef 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp @@ -216,7 +216,11 @@ bool Ver4PatriciaTriePolicy::removeBigramWords(const int *const word0, const int } void Ver4PatriciaTriePolicy::flush(const char *const filePath) { - // TODO: Implement. + if (!mBuffers.get()->isUpdatable()) { + AKLOGI("Warning: flush() is called for non-updatable dictionary. filePath: %s", filePath); + return; + } + mWritingHelper.writeToDictFile(filePath, &mHeaderPolicy, mUnigramCount, mBigramCount); } void Ver4PatriciaTriePolicy::flushWithGC(const char *const filePath) { diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h index e8fdf5513..605de96a7 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h @@ -26,6 +26,7 @@ #include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h" #include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h" #include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h" +#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.h" #include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" namespace latinime { @@ -50,6 +51,7 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { &mShortcutPolicy), mUpdatingHelper(mDictBuffer, &mNodeReader, &mNodeWriter, mHeaderPolicy.isDecayingDict()), + mWritingHelper(mBuffers.get()), mUnigramCount(mHeaderPolicy.getUnigramCount()), mBigramCount(mHeaderPolicy.getBigramCount()) {}; @@ -120,6 +122,7 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { Ver4PatriciaTrieNodeReader mNodeReader; Ver4PatriciaTrieNodeWriter mNodeWriter; DynamicPatriciaTrieUpdatingHelper mUpdatingHelper; + Ver4PatriciaTrieWritingHelper mWritingHelper; int mUnigramCount; int mBigramCount; }; diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp new file mode 100644 index 000000000..c85a632d3 --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.cpp @@ -0,0 +1,50 @@ +/* + * Copyright (C) 2013, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.h" + +#include + +#include "suggest/policyimpl/dictionary/header/header_policy.h" +#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h" +#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h" +#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" +#include "suggest/policyimpl/dictionary/utils/file_utils.h" + +namespace latinime { + +void Ver4PatriciaTrieWritingHelper::writeToDictFile(const char *const trieFilePath, + const HeaderPolicy *const headerPolicy, const int unigramCount, + const int bigramCount) const { + const int dirPathBufSize = strlen(trieFilePath) + 1 /* terminator */; + char dirPath[dirPathBufSize]; + FileUtils::getDirPath(trieFilePath, dirPathBufSize, dirPath); + BufferWithExtendableBuffer headerBuffer( + BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE); + const int extendedRegionSize = headerPolicy->getExtendedRegionSize() + + mBuffers->getTrieBuffer()->getUsedAdditionalBufferSize(); + if (!headerPolicy->writeHeaderToBuffer(&headerBuffer, false /* updatesLastUpdatedTime */, + false /* updatesLastDecayedTime */, unigramCount, bigramCount, extendedRegionSize)) { + AKLOGE("Cannot write header structure to buffer. updatesLastUpdatedTime: %d, " + "updatesLastDecayedTime: %d, unigramCount: %d, bigramCount: %d, " + "extendedRegionSize: %d", false, false, unigramCount, bigramCount, + extendedRegionSize); + return; + } + mBuffers->flushHeaderAndDictBuffers(dirPath, &headerBuffer); +} + +} // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.h new file mode 100644 index 000000000..80d631527 --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.h @@ -0,0 +1,42 @@ +/* + * Copyright (C) 2013, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LATINIME_VER4_PATRICIA_TRIE_WRITING_HELPER_H +#define LATINIME_VER4_PATRICIA_TRIE_WRITING_HELPER_H + +#include "defines.h" + +namespace latinime { + +class HeaderPolicy; +class Ver4DictBuffers; + +class Ver4PatriciaTrieWritingHelper { + public: + Ver4PatriciaTrieWritingHelper(Ver4DictBuffers *const buffers) + : mBuffers(buffers) {} + + void writeToDictFile(const char *const trieFilePath, const HeaderPolicy *const headerPolicy, + const int unigramCount, const int bigramCount) const; + + private: + DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4PatriciaTrieWritingHelper); + + Ver4DictBuffers *const mBuffers; +}; +} // namespace latinime + +#endif /* LATINIME_VER4_PATRICIA_TRIE_WRITING_HELPER_H */ diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/file_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/file_utils.cpp index 1748d5a49..dedcd7a99 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/file_utils.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/file_utils.cpp @@ -88,4 +88,43 @@ namespace latinime { snprintf(outFilePath, filePathBufSize, "%s/%s", dirPath, fileName); } +/* static */ bool FileUtils::getFilePathWithoutSuffix(const char *const filePath, + const char *const suffix, const int outDirPathBufSize, char *const outDirPath) { + const int filePathLength = strlen(filePath); + const int suffixLength = strlen(suffix); + if (filePathLength <= suffixLength) { + AKLOGE("File path length (%s:%d) is shorter that suffix length (%s:%d).", + filePath, filePathLength, suffix, suffixLength); + return false; + } + const int resultFilePathLength = filePathLength - suffixLength; + if (outDirPathBufSize <= resultFilePathLength) { + AKLOGE("outDirPathBufSize is too small. filePath: %s, suffix: %s, outDirPathBufSize: %d", + filePath, suffix, outDirPathBufSize); + return false; + } + if (strncmp(filePath + resultFilePathLength, suffix, suffixLength) != 0) { + AKLOGE("File Path %s does not have %s as a suffix", filePath, suffix); + return false; + } + snprintf(outDirPath, resultFilePathLength + 1 /* terminator */, "%s", filePath); + return true; +} + +/* static */ void FileUtils::getDirPath(const char *const filePath, const int outDirPathBufSize, + char *const outDirPath) { + for (int i = strlen(filePath) - 1; i >= 0; --i) { + if (filePath[i] == '/') { + if (i >= outDirPathBufSize) { + AKLOGE("outDirPathBufSize is too small. filePath: %s, outDirPathBufSize: %d", + filePath, outDirPathBufSize); + ASSERT(false); + return; + } + snprintf(outDirPath, i + 1 /* terminator */, "%s", filePath); + return; + } + } +} + } // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/file_utils.h b/native/jni/src/suggest/policyimpl/dictionary/utils/file_utils.h index fc27aeecb..7dcdef85f 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/file_utils.h +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/file_utils.h @@ -39,6 +39,13 @@ class FileUtils { static void getFilePath(const char *const dirPath, const char *const fileName, const int filePathBufSize, char *const outFilePath); + // Returns whether the filePath have the suffix. + static bool getFilePathWithoutSuffix(const char *const filePath, const char *const suffix, + const int dirPathBufSize, char *const outDirPath); + + static void getDirPath(const char *const filePath, const int dirPathBufSize, + char *const outDirPath); + private: DISALLOW_IMPLICIT_CONSTRUCTORS(FileUtils); }; diff --git a/tests/src/com/android/inputmethod/latin/Ver4BinaryDictionaryTests.java b/tests/src/com/android/inputmethod/latin/Ver4BinaryDictionaryTests.java index 15d990c6d..b51a86b1c 100644 --- a/tests/src/com/android/inputmethod/latin/Ver4BinaryDictionaryTests.java +++ b/tests/src/com/android/inputmethod/latin/Ver4BinaryDictionaryTests.java @@ -250,4 +250,51 @@ public class Ver4BinaryDictionaryTests extends AndroidTestCase { binaryDictionary.removeBigramWords("bcc", "aaa"); } + public void testFlushDictionary() { + final String dictVersion = Long.toString(System.currentTimeMillis()); + File trieFile = null; + try { + trieFile = createEmptyDictionaryAndGetTrieFile(dictVersion); + } catch (IOException e) { + fail("IOException while writing an initial dictionary : " + e); + } + BinaryDictionary binaryDictionary = new BinaryDictionary(trieFile.getAbsolutePath(), + 0 /* offset */, trieFile.length(), true /* useFullEditDistance */, + Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); + + final int probability = 100; + binaryDictionary.addUnigramWord("aaa", probability); + binaryDictionary.addUnigramWord("abcd", probability); + // Close without flushing. + binaryDictionary.close(); + + binaryDictionary = new BinaryDictionary(trieFile.getAbsolutePath(), + 0 /* offset */, trieFile.length(), true /* useFullEditDistance */, + Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); + + assertEquals(Dictionary.NOT_A_PROBABILITY, binaryDictionary.getFrequency("aaa")); + assertEquals(Dictionary.NOT_A_PROBABILITY, binaryDictionary.getFrequency("abcd")); + + binaryDictionary.addUnigramWord("aaa", probability); + binaryDictionary.addUnigramWord("abcd", probability); + binaryDictionary.flush(); + binaryDictionary.close(); + + binaryDictionary = new BinaryDictionary(trieFile.getAbsolutePath(), + 0 /* offset */, trieFile.length(), true /* useFullEditDistance */, + Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); + + assertEquals(probability, binaryDictionary.getFrequency("aaa")); + assertEquals(probability, binaryDictionary.getFrequency("abcd")); + binaryDictionary.addUnigramWord("bcde", probability); + binaryDictionary.flush(); + binaryDictionary.close(); + + binaryDictionary = new BinaryDictionary(trieFile.getAbsolutePath(), + 0 /* offset */, trieFile.length(), true /* useFullEditDistance */, + Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); + assertEquals(probability, binaryDictionary.getFrequency("bcde")); + binaryDictionary.close(); + } + }