From 80bd4a7585e10faf48b7aef001a8f4bb0530f6fa Mon Sep 17 00:00:00 2001 From: Keisuke Kuroyanagi Date: Mon, 18 Nov 2013 14:57:53 +0900 Subject: [PATCH] Implement writing empty ver4 dictionary to file. Bug: 11073222 Change-Id: Ic1a9295953d091c8e8982264ffff15669c30544c --- native/jni/Android.mk | 3 + .../v4/content/bigram_dict_content.h | 6 + .../v4/content/probability_dict_content.h | 4 + .../v4/content/shortcut_dict_content.h | 6 + .../v4/content/single_dict_content.h | 7 + .../v4/content/sparse_table_dict_content.cpp | 42 +++++ .../v4/content/sparse_table_dict_content.h | 4 + .../content/terminal_position_lookup_table.h | 4 + .../structure/v4/ver4_dict_buffers.cpp | 81 +++++++++ .../structure/v4/ver4_dict_buffers.h | 5 +- .../utils/dict_file_writing_utils.cpp | 68 +++++--- .../utils/dict_file_writing_utils.h | 10 +- .../dictionary/utils/file_utils.cpp | 91 ++++++++++ .../policyimpl/dictionary/utils/file_utils.h | 32 ++-- .../latin/Ver4BinaryDictionaryTests.java | 164 +++++++----------- 15 files changed, 372 insertions(+), 155 deletions(-) create mode 100644 native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.cpp create mode 100644 native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.cpp create mode 100644 native/jni/src/suggest/policyimpl/dictionary/utils/file_utils.cpp diff --git a/native/jni/Android.mk b/native/jni/Android.mk index b61a66ce6..e770d9866 100644 --- a/native/jni/Android.mk +++ b/native/jni/Android.mk @@ -91,6 +91,8 @@ LATIN_IME_CORE_SRC_FILES := \ dynamic_patricia_trie_writing_utils.cpp) \ $(addprefix suggest/policyimpl/dictionary/structure/v4/, \ content/bigram_dict_content.cpp \ + content/sparse_table_dict_content.cpp \ + ver4_dict_buffers.cpp \ ver4_dict_constants.cpp \ ver4_patricia_trie_node_reader.cpp \ ver4_patricia_trie_node_writer.cpp \ @@ -100,6 +102,7 @@ LATIN_IME_CORE_SRC_FILES := \ buffer_with_extendable_buffer.cpp \ byte_array_utils.cpp \ dict_file_writing_utils.cpp \ + file_utils.cpp \ forgetting_curve_utils.cpp \ format_utils.cpp \ mmapped_buffer.cpp \ diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h index bc9e4b619..c5410b83f 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/bigram_dict_content.h @@ -59,6 +59,12 @@ class BigramDictContent : public SparseTableDictContent { bool copyBigramList(const int bigramListPos, const int toPos); + bool flushToFile(const char *const dictDirPath) const { + return flush(dictDirPath, Ver4DictConstants::BIGRAM_LOOKUP_TABLE_FILE_EXTENSION, + Ver4DictConstants::BIGRAM_CONTENT_TABLE_FILE_EXTENSION, + Ver4DictConstants::BIGRAM_FILE_EXTENSION); + } + private: DISALLOW_COPY_AND_ASSIGN(BigramDictContent); diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.h index c109cbf51..518376426 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/probability_dict_content.h @@ -62,6 +62,10 @@ class ProbabilityDictContent : public SingleDictContent { Ver4DictConstants::PROBABILITY_SIZE, &probabilityWritingPos); } + bool flushToFile(const char *const dictDirPath) const { + return flush(dictDirPath, Ver4DictConstants::FREQ_FILE_EXTENSION); + } + private: DISALLOW_COPY_AND_ASSIGN(ProbabilityDictContent); diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/shortcut_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/shortcut_dict_content.h index 8463a1753..b11e23338 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/shortcut_dict_content.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/shortcut_dict_content.h @@ -60,6 +60,12 @@ class ShortcutDictContent : public SparseTableDictContent { return addressLookupTable->get(terminalId); } + bool flushToFile(const char *const dictDirPath) const { + return flush(dictDirPath, Ver4DictConstants::SHORTCUT_LOOKUP_TABLE_FILE_EXTENSION, + Ver4DictConstants::SHORTCUT_CONTENT_TABLE_FILE_EXTENSION, + Ver4DictConstants::SHORTCUT_FILE_EXTENSION); + } + private: DISALLOW_COPY_AND_ASSIGN(ShortcutDictContent); }; diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/single_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/single_dict_content.h index 7669c1eca..08780998e 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/single_dict_content.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/single_dict_content.h @@ -21,6 +21,7 @@ #include "suggest/policyimpl/dictionary/structure/v4/content/dict_content.h" #include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h" #include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" +#include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h" #include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h" namespace latinime { @@ -54,6 +55,12 @@ class SingleDictContent : public DictContent { return &mExpandableContentBuffer; } + bool flush(const char *const dictDirPath, const char *const contentFileName) const { + const BufferWithExtendableBuffer *bufferPtr = &mExpandableContentBuffer; + return DictFileWritingUtils::flushBuffersToFileInDir(dictDirPath, contentFileName, + &bufferPtr, 1 /* bufferCount */); + } + private: DISALLOW_COPY_AND_ASSIGN(SingleDictContent); diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.cpp new file mode 100644 index 000000000..c65420614 --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.cpp @@ -0,0 +1,42 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h" + +namespace latinime { + +bool SparseTableDictContent::flush(const char *const dictDirPath, + const char *const lookupTableFileName, const char *const addressTableFileName, + const char *const contentFileName) const { + const BufferWithExtendableBuffer *lookupTableBufferPtr = &mExpandableLookupTableBuffer; + if (!DictFileWritingUtils::flushBuffersToFileInDir(dictDirPath, lookupTableFileName, + &lookupTableBufferPtr, 1 /* bufferCount */)) { + return false; + } + const BufferWithExtendableBuffer *addressTableBufferPtr = &mExpandableAddressTableBuffer; + if (!DictFileWritingUtils::flushBuffersToFileInDir(dictDirPath, addressTableFileName, + &addressTableBufferPtr, 1 /* bufferCount */)) { + return false; + } + const BufferWithExtendableBuffer *contentBufferPtr = &mExpandableContentBuffer; + if (!DictFileWritingUtils::flushBuffersToFileInDir(dictDirPath, contentFileName, + &contentBufferPtr, 1 /* bufferCount */)) { + return false; + } + return true; +} + +} // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h index 5ae5f0ff1..bcfecdbfb 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/sparse_table_dict_content.h @@ -21,6 +21,7 @@ #include "suggest/policyimpl/dictionary/structure/v4/content/dict_content.h" #include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h" #include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" +#include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h" #include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h" #include "suggest/policyimpl/dictionary/utils/sparse_table.h" @@ -85,6 +86,9 @@ class SparseTableDictContent : public DictContent { return &mExpandableContentBuffer; } + bool flush(const char *const dictDirPath, const char *const lookupTableFileName, + const char *const addressTableFileName, const char *const contentFileName) const; + private: DISALLOW_IMPLICIT_CONSTRUCTORS(SparseTableDictContent); diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h index e016a2b5f..eaf18b56a 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/content/terminal_position_lookup_table.h @@ -67,6 +67,10 @@ class TerminalPositionLookupTable : public SingleDictContent { return mSize; } + bool flushToFile(const char *const dictDirPath) const { + return flush(dictDirPath, Ver4DictConstants::TERMINAL_ADDRESS_TABLE_FILE_EXTENSION); + } + private: DISALLOW_COPY_AND_ASSIGN(TerminalPositionLookupTable); diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.cpp new file mode 100644 index 000000000..e17c5eab4 --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.cpp @@ -0,0 +1,81 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h" + +#include +#include +#include + +#include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h" +#include "suggest/policyimpl/dictionary/utils/file_utils.h" + +namespace latinime { + +bool Ver4DictBuffers::flush(const char *const dictDirPath) const { + // Create temporary directory. + const int tmpDirPathBufSize = FileUtils::getFilePathWithSuffixBufSize(dictDirPath, + DictFileWritingUtils::TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE); + char tmpDirPath[tmpDirPathBufSize]; + FileUtils::getFilePathWithSuffix(dictDirPath, + DictFileWritingUtils::TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE, tmpDirPathBufSize, + tmpDirPath); + if (mkdir(tmpDirPath, S_IRWXU) == -1) { + AKLOGE("Cannot create directory: %s. errno: %d.", tmpDirPath, errno); + return false; + } + // Write trie file. + const BufferWithExtendableBuffer *buffers[] = + {&mExpandableHeaderBuffer, &mExpandableTrieBuffer}; + if (!DictFileWritingUtils::flushBuffersToFileInDir(tmpDirPath, + Ver4DictConstants::TRIE_FILE_EXTENSION, buffers, 2 /* bufferCount */)) { + AKLOGE("Dictionary trie file %s/%s cannot be written.", tmpDirPath, + Ver4DictConstants::TRIE_FILE_EXTENSION); + return false; + } + // Write dictionary contents. + if (!mTerminalPositionLookupTable.flushToFile(tmpDirPath)) { + AKLOGE("Terminal position lookup table cannot be written. %s", tmpDirPath); + return false; + } + if (!mProbabilityDictContent.flushToFile(tmpDirPath)) { + AKLOGE("Probability dict content cannot be written. %s", tmpDirPath); + return false; + } + if (!mBigramDictContent.flushToFile(tmpDirPath)) { + AKLOGE("Bigram dict content cannot be written. %s", tmpDirPath); + return false; + } + if (!mShortcutDictContent.flushToFile(tmpDirPath)) { + AKLOGE("Shortcut dict content cannot be written. %s", tmpDirPath); + return false; + } + // Remove existing dictionary. + if (!FileUtils::removeDirAndFiles(dictDirPath)) { + AKLOGE("Existing directory %s cannot be removed.", dictDirPath); + ASSERT(false); + return false; + } + // Rename temporary directory. + if (rename(tmpDirPath, dictDirPath) != 0) { + AKLOGE("%s cannot be renamed to %s", tmpDirPath, dictDirPath); + ASSERT(false); + return false; + } + return true; +} + +} // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h index e468be591..0684bdd0c 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h @@ -89,10 +89,7 @@ class Ver4DictBuffers { return mIsUpdatable; } - bool flush(const char *const dictDirPath) { - // TODO: Implement. - return false; - } + bool flush(const char *const dictDirPath) const; private: DISALLOW_COPY_AND_ASSIGN(Ver4DictBuffers); diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp index 40f7d1f5c..2de8a6012 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp @@ -17,12 +17,12 @@ #include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h" #include -#include #include "suggest/policyimpl/dictionary/header/header_policy.h" #include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_writing_utils.h" #include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h" #include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h" +#include "suggest/policyimpl/dictionary/utils/file_utils.h" #include "suggest/policyimpl/dictionary/utils/format_utils.h" namespace latinime { @@ -36,9 +36,9 @@ const char *const DictFileWritingUtils::TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE = return createEmptyV3DictFile(filePath, attributeMap); case 4: return createEmptyV4DictFile(filePath, attributeMap); - return false; default: - // Only version 3 dictionary is supported for now. + AKLOGE("Cannot create dictionary %s because format version %d is not supported.", + filePath, dictVersion); return false; } } @@ -54,12 +54,13 @@ const char *const DictFileWritingUtils::TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE = BufferWithExtendableBuffer bodyBuffer( BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE); if (!DynamicPatriciaTrieWritingUtils::writeEmptyDictionary(&bodyBuffer, 0 /* rootPos */)) { + AKLOGE("Empty ver3 dictionary structure cannot be created on memory."); return false; } return flushAllHeaderAndBodyToFile(filePath, &headerBuffer, &bodyBuffer); } -/* static */ bool DictFileWritingUtils::createEmptyV4DictFile(const char *const filePath, +/* static */ bool DictFileWritingUtils::createEmptyV4DictFile(const char *const dirPath, const HeaderReadWriteUtils::AttributeMap *const attributeMap) { Ver4DictBuffers::Ver4DictBuffersPtr dictBuffers = Ver4DictBuffers::createVer4DictBuffers(); HeaderPolicy headerPolicy(FormatUtils::VERSION_4, attributeMap); @@ -68,42 +69,59 @@ const char *const DictFileWritingUtils::TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE = 0 /* unigramCount */, 0 /* bigramCount */, 0 /* extendedRegionSize */); if (!DynamicPatriciaTrieWritingUtils::writeEmptyDictionary( dictBuffers.get()->getWritableTrieBuffer(), 0 /* rootPos */)) { + AKLOGE("Empty ver4 dictionary structure cannot be created on memory."); return false; } - return dictBuffers.get()->flush(filePath); + return dictBuffers.get()->flush(dirPath); } /* static */ bool DictFileWritingUtils::flushAllHeaderAndBodyToFile(const char *const filePath, BufferWithExtendableBuffer *const dictHeader, BufferWithExtendableBuffer *const dictBody) { - const int tmpFileNameBufSize = strlen(filePath) - + strlen(TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE) + 1 /* terminator */; + const int tmpFileNameBufSize = FileUtils::getFilePathWithSuffixBufSize(filePath, + TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE); // Name of a temporary file used for writing that is a connected string of original name and // TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE. char tmpFileName[tmpFileNameBufSize]; - snprintf(tmpFileName, tmpFileNameBufSize, "%s%s", filePath, - TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE); - FILE *const file = fopen(tmpFileName, "wb"); + FileUtils::getFilePathWithSuffix(filePath, TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE, + tmpFileNameBufSize, tmpFileName); + const BufferWithExtendableBuffer *buffers[] = {dictHeader, dictBody}; + if (!DictFileWritingUtils::flushBuffersToFile(tmpFileName, buffers, 2 /* bufferCount */)) { + AKLOGE("Dictionary structure cannot be written to %s.", tmpFileName); + return false; + } + if (rename(tmpFileName, filePath) != 0) { + AKLOGE("Dictionary file %s cannot be renamed to %s", tmpFileName, filePath);; + } + return true; +} + +/* static */ bool DictFileWritingUtils::flushBuffersToFileInDir(const char *const dirPath, + const char *const fileName, const BufferWithExtendableBuffer **const buffers, + const int bufferCount) { + const int filePathBufSize = FileUtils::getFilePathBufSize(dirPath, fileName); + char filePath[filePathBufSize]; + FileUtils::getFilePath(dirPath, fileName, filePathBufSize, filePath); + return flushBuffersToFile(filePath, buffers, bufferCount); +} + +/* static */ bool DictFileWritingUtils::flushBuffersToFile(const char *const filePath, + const BufferWithExtendableBuffer **const buffers, const int bufferCount) { + FILE *const file = fopen(filePath, "wb"); if (!file) { - AKLOGE("Dictionary file %s cannot be opened.", tmpFileName); + AKLOGE("File %s cannot be opened.", filePath); ASSERT(false); return false; } - // Write the dictionary header. - if (!writeBufferToFile(file, dictHeader)) { - remove(tmpFileName); - AKLOGE("Dictionary header cannot be written. size: %d", dictHeader->getTailPosition()); - ASSERT(false); - return false; - } - // Write the dictionary body. - if (!writeBufferToFile(file, dictBody)) { - remove(tmpFileName); - AKLOGE("Dictionary body cannot be written. size: %d", dictBody->getTailPosition()); - ASSERT(false); - return false; + for (int i = 0; i < bufferCount; ++i) { + if (!writeBufferToFile(file, buffers[i])) { + remove(filePath); + AKLOGE("Buffer cannot be written to the file %s. size: %d", filePath, + buffers[i]->getTailPosition()); + ASSERT(false); + return false; + } } fclose(file); - rename(tmpFileName, filePath); return true; } diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h b/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h index 3291f98c7..980a1ff4e 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h @@ -28,6 +28,8 @@ class BufferWithExtendableBuffer; class DictFileWritingUtils { public: + static const char *const TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE; + static bool createEmptyDictFile(const char *const filePath, const int dictVersion, const HeaderReadWriteUtils::AttributeMap *const attributeMap); @@ -35,17 +37,21 @@ class DictFileWritingUtils { BufferWithExtendableBuffer *const dictHeader, BufferWithExtendableBuffer *const dictBody); + static bool flushBuffersToFileInDir(const char *const dirPath, const char *const fileName, + const BufferWithExtendableBuffer **const buffers, const int bufferCount); + private: DISALLOW_IMPLICIT_CONSTRUCTORS(DictFileWritingUtils); - static const char *const TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE; - static bool createEmptyV3DictFile(const char *const filePath, const HeaderReadWriteUtils::AttributeMap *const attributeMap); static bool createEmptyV4DictFile(const char *const filePath, const HeaderReadWriteUtils::AttributeMap *const attributeMap); + static bool flushBuffersToFile(const char *const filePath, + const BufferWithExtendableBuffer **const buffers, const int bufferCount); + static bool writeBufferToFile(FILE *const file, const BufferWithExtendableBuffer *const buffer); }; diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/file_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/file_utils.cpp new file mode 100644 index 000000000..1748d5a49 --- /dev/null +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/file_utils.cpp @@ -0,0 +1,91 @@ +/* + * Copyright (C) 2013, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "suggest/policyimpl/dictionary/utils/file_utils.h" + +#include +#include +#include +#include +#include +#include +#include + +namespace latinime { + +// Returns -1 on error. +/* static */ int FileUtils::getFileSize(const char *const filePath) { + const int fd = open(filePath, O_RDONLY); + if (fd == -1) { + return -1; + } + struct stat statBuf; + if (fstat(fd, &statBuf) != 0) { + close(fd); + return -1; + } + close(fd); + return static_cast(statBuf.st_size); +} + +// Remove a directory and all files in the directory. +/* static */ bool FileUtils::removeDirAndFiles(const char *const dirPath) { + DIR *const dir = opendir(dirPath); + if (dir == NULL) { + AKLOGE("Cannot open dir %s.", dirPath); + return true; + } + struct dirent *dirent; + while ((dirent = readdir(dir)) != NULL) { + if (dirent->d_type != DT_REG) { + continue; + } + const int filePathBufSize = getFilePathBufSize(dirPath, dirent->d_name); + char filePath[filePathBufSize]; + getFilePath(dirPath, dirent->d_name, filePathBufSize, filePath); + if (remove(filePath) != 0) { + AKLOGE("Cannot remove file %s.", filePath); + return false; + } + } + if (remove(dirPath) != 0) { + AKLOGE("Cannot remove directory %s.", dirPath); + return false; + } + return true; +} + +/* static */ int FileUtils::getFilePathWithSuffixBufSize(const char *const filePath, + const char *const suffix) { + return strlen(filePath) + strlen(suffix) + 1 /* terminator */; +} + +/* static */ void FileUtils::getFilePathWithSuffix(const char *const filePath, + const char *const suffix, const int filePathBufSize, char *const outFilePath) { + snprintf(outFilePath, filePathBufSize, "%s%s", filePath, suffix); +} + +/* static */ int FileUtils::getFilePathBufSize(const char *const dirPath, + const char *const fileName) { + return strlen(dirPath) + 1 /* '/' */ + strlen(fileName) + 1 /* terminator */; +} + +/* static */ void FileUtils::getFilePath(const char *const dirPath, const char *const fileName, + const int filePathBufSize, char *const outFilePath) { + snprintf(outFilePath, filePathBufSize, "%s/%s", dirPath, fileName); +} + +} // namespace latinime diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/file_utils.h b/native/jni/src/suggest/policyimpl/dictionary/utils/file_utils.h index 59b894fa6..fc27aeecb 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/utils/file_utils.h +++ b/native/jni/src/suggest/policyimpl/dictionary/utils/file_utils.h @@ -17,11 +17,6 @@ #ifndef LATINIME_FILE_UTILS_H #define LATINIME_FILE_UTILS_H -#include -#include -#include -#include - #include "defines.h" namespace latinime { @@ -29,19 +24,20 @@ namespace latinime { class FileUtils { public: // Returns -1 on error. - static int getFileSize(const char *const filePath) { - const int fd = open(filePath, O_RDONLY); - if (fd == -1) { - return -1; - } - struct stat statBuf; - if (fstat(fd, &statBuf) != 0) { - close(fd); - return -1; - } - close(fd); - return static_cast(statBuf.st_size); - } + static int getFileSize(const char *const filePath); + + // Remove a directory and all files in the directory. + static bool removeDirAndFiles(const char *const dirPath); + + static int getFilePathWithSuffixBufSize(const char *const filePath, const char *const suffix); + + static void getFilePathWithSuffix(const char *const filePath, const char *const suffix, + const int filePathBufSize, char *const outFilePath); + + static int getFilePathBufSize(const char *const dirPath, const char *const fileName); + + static void getFilePath(const char *const dirPath, const char *const fileName, + const int filePathBufSize, char *const outFilePath); private: DISALLOW_IMPLICIT_CONSTRUCTORS(FileUtils); diff --git a/tests/src/com/android/inputmethod/latin/Ver4BinaryDictionaryTests.java b/tests/src/com/android/inputmethod/latin/Ver4BinaryDictionaryTests.java index ad57e4c9f..15d990c6d 100644 --- a/tests/src/com/android/inputmethod/latin/Ver4BinaryDictionaryTests.java +++ b/tests/src/com/android/inputmethod/latin/Ver4BinaryDictionaryTests.java @@ -18,29 +18,21 @@ package com.android.inputmethod.latin; import android.test.AndroidTestCase; import android.test.suitebuilder.annotation.LargeTest; -import android.util.Log; import com.android.inputmethod.latin.makedict.BinaryDictEncoderUtils; -import com.android.inputmethod.latin.makedict.DictEncoder; import com.android.inputmethod.latin.makedict.FormatSpec; -import com.android.inputmethod.latin.makedict.FusionDictionary; -import com.android.inputmethod.latin.makedict.UnsupportedFormatException; -import com.android.inputmethod.latin.makedict.Ver4DictEncoder; -import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions; -import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray; import java.io.File; import java.io.IOException; import java.util.HashMap; import java.util.Locale; +import java.util.Map; // TODO: Add a test to evaluate the speed of operations of Ver4 dictionary. @LargeTest public class Ver4BinaryDictionaryTests extends AndroidTestCase { - private static final String TAG = Ver4BinaryDictionaryTests.class.getSimpleName(); private static final String TEST_LOCALE = "test"; - private static final FormatSpec.FormatOptions FORMAT_OPTIONS = - new FormatSpec.FormatOptions(4, true /* supportsDynamicUpdate */); + private static final String TEST_DICT_FILE_EXTENSION = ".testDict"; @Override protected void setUp() throws Exception { @@ -52,42 +44,35 @@ public class Ver4BinaryDictionaryTests extends AndroidTestCase { super.tearDown(); } - // TODO: remove after native code support dictionary creation. - private DictionaryOptions getDictionaryOptions(final String id, final String version) { - final DictionaryOptions options = new DictionaryOptions(new HashMap(), - false /* germanUmlautProcessing */, false /* frenchLigatureProcessing */); - options.mAttributes.put("version", version); - options.mAttributes.put("dictionary", id); - return options; - } - - // TODO: remove after native code support dictionary creation. - private File getTrieFile(final String id, final String version) { - return new File(getContext().getCacheDir() + "/" + id + "." + version, - TEST_LOCALE + "." + version + FormatSpec.TRIE_FILE_EXTENSION); + // Note that dictVersion is different from dictionary format version and it never affects the + // dictionary format. + // TODO: Rename dictVersion to understandable name such as dictRevision. + private File createEmptyDictionaryAndGetTrieFile(final String dictVersion) throws IOException { + final File file = File.createTempFile(dictVersion, TEST_DICT_FILE_EXTENSION, + getContext().getCacheDir()); + file.delete(); + file.mkdir(); + Map attributeMap = new HashMap(); + attributeMap.put(FormatSpec.FileHeader.SUPPORTS_DYNAMIC_UPDATE_ATTRIBUTE, + FormatSpec.FileHeader.ATTRIBUTE_VALUE_TRUE); + if (BinaryDictionary.createEmptyDictFile(file.getAbsolutePath(), + 4 /* dictVersion */, attributeMap)) { + return new File(file, FormatSpec.TRIE_FILE_EXTENSION); + } else { + throw new IOException("Empty dictionary " + file.getAbsolutePath() + " " + + FormatSpec.TRIE_FILE_EXTENSION + " cannot be created."); + } } public void testIsValidDictionary() { final String dictVersion = Long.toString(System.currentTimeMillis()); - final File trieFile = getTrieFile(TEST_LOCALE, dictVersion); - - BinaryDictionary binaryDictionary = new BinaryDictionary(trieFile.getAbsolutePath(), - 0 /* offset */, trieFile.length(), true /* useFullEditDistance */, - Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); - assertFalse(binaryDictionary.isValidDictionary()); - - final FusionDictionary dict = new FusionDictionary(new PtNodeArray(), - getDictionaryOptions(TEST_LOCALE, dictVersion)); - final DictEncoder encoder = new Ver4DictEncoder(getContext().getCacheDir()); + File trieFile = null; try { - encoder.writeDictionary(dict, FORMAT_OPTIONS); + trieFile = createEmptyDictionaryAndGetTrieFile(dictVersion); } catch (IOException e) { - Log.e(TAG, "IOException while writing dictionary", e); - } catch (UnsupportedFormatException e) { - Log.e(TAG, "Unsupported format", e); + fail("IOException while writing an initial dictionary : " + e); } - - binaryDictionary = new BinaryDictionary(trieFile.getAbsolutePath(), + final BinaryDictionary binaryDictionary = new BinaryDictionary(trieFile.getAbsolutePath(), 0 /* offset */, trieFile.length(), true /* useFullEditDistance */, Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); assertTrue(binaryDictionary.isValidDictionary()); @@ -96,27 +81,21 @@ public class Ver4BinaryDictionaryTests extends AndroidTestCase { // TODO: Add large tests. public void testReadProbability() { final String dictVersion = Long.toString(System.currentTimeMillis()); - final FusionDictionary dict = new FusionDictionary(new PtNodeArray(), - getDictionaryOptions(TEST_LOCALE, dictVersion)); - - final int frequency = 100; - dict.add("a", frequency, null, false /* isNotAWord */); - dict.add("aaa", frequency, null, false /* isNotAWord */); - dict.add("ab", frequency, null, false /* isNotAWord */); - - final DictEncoder encoder = new Ver4DictEncoder(getContext().getCacheDir()); + File trieFile = null; try { - encoder.writeDictionary(dict, FORMAT_OPTIONS); + trieFile = createEmptyDictionaryAndGetTrieFile(dictVersion); } catch (IOException e) { - Log.e(TAG, "IOException while writing dictionary", e); - } catch (UnsupportedFormatException e) { - Log.e(TAG, "Unsupported format", e); + fail("IOException while writing an initial dictionary : " + e); } - final File trieFile = getTrieFile(TEST_LOCALE, dictVersion); final BinaryDictionary binaryDictionary = new BinaryDictionary(trieFile.getAbsolutePath(), 0 /* offset */, trieFile.length(), true /* useFullEditDistance */, Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); - assertTrue(binaryDictionary.isValidDictionary()); + + final int frequency = 100; + binaryDictionary.addUnigramWord("a", frequency); + binaryDictionary.addUnigramWord("aaa", frequency); + binaryDictionary.addUnigramWord("ab", frequency); + assertEquals(frequency, binaryDictionary.getFrequency("a")); assertEquals(frequency, binaryDictionary.getFrequency("aaa")); assertEquals(frequency, binaryDictionary.getFrequency("ab")); @@ -132,40 +111,32 @@ public class Ver4BinaryDictionaryTests extends AndroidTestCase { // TODO: Add large tests. public void testReadBigrams() { final String dictVersion = Long.toString(System.currentTimeMillis()); - final FusionDictionary dict = new FusionDictionary(new PtNodeArray(), - getDictionaryOptions(TEST_LOCALE, dictVersion)); - - final int unigramFrequency = 1; - final int bigramFrequency0 = 150; - final int bigramFrequency1 = 1; - final int bigramFrequency2 = 255; - dict.add("a", unigramFrequency, null, false /* isNotAWord */); - dict.add("aaa", unigramFrequency, null, false /* isNotAWord */); - dict.add("ab", unigramFrequency, null, false /* isNotAWord */); - dict.setBigram("a", "aaa", bigramFrequency0); - dict.setBigram("a", "ab", bigramFrequency1); - dict.setBigram("aaa", "ab", bigramFrequency2); - - final DictEncoder encoder = new Ver4DictEncoder(getContext().getCacheDir()); + File trieFile = null; try { - encoder.writeDictionary(dict, FORMAT_OPTIONS); + trieFile = createEmptyDictionaryAndGetTrieFile(dictVersion); } catch (IOException e) { - Log.e(TAG, "IOException while writing dictionary", e); - } catch (UnsupportedFormatException e) { - Log.e(TAG, "Unsupported format", e); + fail("IOException while writing an initial dictionary : " + e); } - final File trieFile = getTrieFile(TEST_LOCALE, dictVersion); final BinaryDictionary binaryDictionary = new BinaryDictionary(trieFile.getAbsolutePath(), 0 /* offset */, trieFile.length(), true /* useFullEditDistance */, Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); - assertTrue(binaryDictionary.isValidDictionary()); + final int unigramFrequency = 1; + final int bigramFrequency0 = 10; + final int bigramFrequency1 = 1; + final int bigramFrequency2 = 15; + binaryDictionary.addUnigramWord("a", unigramFrequency); + binaryDictionary.addUnigramWord("aaa", unigramFrequency); + binaryDictionary.addUnigramWord("ab", unigramFrequency); + binaryDictionary.addBigramWords("a", "aaa", bigramFrequency0); + binaryDictionary.addBigramWords("a", "ab", bigramFrequency1); + binaryDictionary.addBigramWords("aaa", "ab", bigramFrequency2); - assertEquals(getCalculatedBigramProbabiliy(binaryDictionary, unigramFrequency, + assertEquals(binaryDictionary.calculateProbability(unigramFrequency, bigramFrequency0), binaryDictionary.getBigramProbability("a", "aaa")); - assertEquals(getCalculatedBigramProbabiliy(binaryDictionary, unigramFrequency, + assertEquals(binaryDictionary.calculateProbability(unigramFrequency, bigramFrequency1), binaryDictionary.getBigramProbability("a", "ab")); - assertEquals(getCalculatedBigramProbabiliy(binaryDictionary, unigramFrequency, + assertEquals(binaryDictionary.calculateProbability(unigramFrequency, bigramFrequency2), binaryDictionary.getBigramProbability("aaa", "ab")); assertFalse(binaryDictionary.isValidBigram("aaa", "a")); @@ -176,21 +147,15 @@ public class Ver4BinaryDictionaryTests extends AndroidTestCase { // TODO: Add large tests. public void testWriteUnigrams() { final String dictVersion = Long.toString(System.currentTimeMillis()); - final FusionDictionary dict = new FusionDictionary(new PtNodeArray(), - getDictionaryOptions(TEST_LOCALE, dictVersion)); - final DictEncoder encoder = new Ver4DictEncoder(getContext().getCacheDir()); + File trieFile = null; try { - encoder.writeDictionary(dict, FORMAT_OPTIONS); + trieFile = createEmptyDictionaryAndGetTrieFile(dictVersion); } catch (IOException e) { - Log.e(TAG, "IOException while writing dictionary", e); - } catch (UnsupportedFormatException e) { - Log.e(TAG, "Unsupported format", e); + fail("IOException while writing an initial dictionary : " + e); } - final File trieFile = getTrieFile(TEST_LOCALE, dictVersion); final BinaryDictionary binaryDictionary = new BinaryDictionary(trieFile.getAbsolutePath(), 0 /* offset */, trieFile.length(), true /* useFullEditDistance */, Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); - assertTrue(binaryDictionary.isValidDictionary()); final int probability = 100; binaryDictionary.addUnigramWord("aaa", probability); @@ -208,25 +173,18 @@ public class Ver4BinaryDictionaryTests extends AndroidTestCase { public void testWriteBigrams() { final String dictVersion = Long.toString(System.currentTimeMillis()); - final FusionDictionary dict = new FusionDictionary(new PtNodeArray(), - getDictionaryOptions(TEST_LOCALE, dictVersion)); - final DictEncoder encoder = new Ver4DictEncoder(getContext().getCacheDir()); + File trieFile = null; try { - encoder.writeDictionary(dict, FORMAT_OPTIONS); + trieFile = createEmptyDictionaryAndGetTrieFile(dictVersion); } catch (IOException e) { - Log.e(TAG, "IOException while writing dictionary", e); - } catch (UnsupportedFormatException e) { - Log.e(TAG, "Unsupported format", e); + fail("IOException while writing an initial dictionary : " + e); } - final File trieFile = getTrieFile(TEST_LOCALE, dictVersion); final BinaryDictionary binaryDictionary = new BinaryDictionary(trieFile.getAbsolutePath(), 0 /* offset */, trieFile.length(), true /* useFullEditDistance */, Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); - assertTrue(binaryDictionary.isValidDictionary()); final int unigramProbability = 100; final int bigramProbability = 10; - final int updatedBigramProbability = 15; binaryDictionary.addUnigramWord("aaa", unigramProbability); binaryDictionary.addUnigramWord("abb", unigramProbability); binaryDictionary.addUnigramWord("bcc", unigramProbability); @@ -249,21 +207,15 @@ public class Ver4BinaryDictionaryTests extends AndroidTestCase { public void testRemoveBigramWords() { final String dictVersion = Long.toString(System.currentTimeMillis()); - final FusionDictionary dict = new FusionDictionary(new PtNodeArray(), - getDictionaryOptions(TEST_LOCALE, dictVersion)); - final DictEncoder encoder = new Ver4DictEncoder(getContext().getCacheDir()); + File trieFile = null; try { - encoder.writeDictionary(dict, FORMAT_OPTIONS); + trieFile = createEmptyDictionaryAndGetTrieFile(dictVersion); } catch (IOException e) { - Log.e(TAG, "IOException while writing dictionary", e); - } catch (UnsupportedFormatException e) { - Log.e(TAG, "Unsupported format", e); + fail("IOException while writing an initial dictionary : " + e); } - final File trieFile = getTrieFile(TEST_LOCALE, dictVersion); final BinaryDictionary binaryDictionary = new BinaryDictionary(trieFile.getAbsolutePath(), 0 /* offset */, trieFile.length(), true /* useFullEditDistance */, Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */); - assertTrue(binaryDictionary.isValidDictionary()); final int unigramProbability = 100; final int bigramProbability = 10;