am 15c8b84a
: Merge "Implement Ver4PatriciaTriePolicy::flush()."
* commit '15c8b84af87441fdd10b239f0a565b0ce6ffb533': Implement Ver4PatriciaTriePolicy::flush().
This commit is contained in:
commit
9bc53e4fba
12 changed files with 237 additions and 17 deletions
|
@ -97,7 +97,8 @@ LATIN_IME_CORE_SRC_FILES := \
|
|||
ver4_patricia_trie_node_reader.cpp \
|
||||
ver4_patricia_trie_node_writer.cpp \
|
||||
ver4_patricia_trie_policy.cpp \
|
||||
ver4_patricia_trie_reading_utils.cpp ) \
|
||||
ver4_patricia_trie_reading_utils.cpp \
|
||||
ver4_patricia_trie_writing_helper.cpp) \
|
||||
$(addprefix suggest/policyimpl/dictionary/utils/, \
|
||||
buffer_with_extendable_buffer.cpp \
|
||||
byte_array_utils.cpp \
|
||||
|
|
|
@ -17,13 +17,13 @@
|
|||
#include "suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.h"
|
||||
|
||||
#include <stdint.h>
|
||||
#include <string>
|
||||
|
||||
#include "defines.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_policy.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h"
|
||||
#include "suggest/policyimpl/dictionary/utils/file_utils.h"
|
||||
#include "suggest/policyimpl/dictionary/utils/format_utils.h"
|
||||
#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
|
||||
|
||||
|
@ -49,17 +49,15 @@ namespace latinime {
|
|||
return DictionaryStructureWithBufferPolicy::StructurePoilcyPtr(
|
||||
new DynamicPatriciaTriePolicy(mmappedBuffer));
|
||||
case FormatUtils::VERSION_4: {
|
||||
std::string dictDirPath(path);
|
||||
const std::string::size_type pos =
|
||||
dictDirPath.rfind(Ver4DictConstants::TRIE_FILE_EXTENSION);
|
||||
if (pos == std::string::npos) {
|
||||
const int dictDirPathBufSize = strlen(path) + 1 /* terminator */;
|
||||
char dictDirPath[dictDirPathBufSize];
|
||||
if (!FileUtils::getFilePathWithoutSuffix(path, Ver4DictConstants::TRIE_FILE_EXTENSION,
|
||||
dictDirPathBufSize, dictDirPath)) {
|
||||
// Dictionary file name is not valid as a version 4 dictionary.
|
||||
return DictionaryStructureWithBufferPolicy::StructurePoilcyPtr(0);
|
||||
}
|
||||
// Removing extension to get the base path.
|
||||
dictDirPath.erase(pos);
|
||||
const Ver4DictBuffers::Ver4DictBuffersPtr dictBuffers =
|
||||
Ver4DictBuffers::openVer4DictBuffers(dictDirPath.c_str(), mmappedBuffer);
|
||||
Ver4DictBuffers::openVer4DictBuffers(dictDirPath, mmappedBuffer);
|
||||
if (!dictBuffers.get()->isValid()) {
|
||||
AKLOGE("DICT: The dictionary doesn't satisfy ver4 format requirements.");
|
||||
ASSERT(false);
|
||||
|
|
|
@ -67,8 +67,28 @@ class TerminalPositionLookupTable : public SingleDictContent {
|
|||
return mSize;
|
||||
}
|
||||
|
||||
bool flushToFile(const char *const dictDirPath) const {
|
||||
return flush(dictDirPath, Ver4DictConstants::TERMINAL_ADDRESS_TABLE_FILE_EXTENSION);
|
||||
bool flushToFile(const char *const dictDirPath, const int newHeaderRegionSize) const {
|
||||
const int headerRegionSizeDiff = newHeaderRegionSize - mHeaderRegionSize;
|
||||
// If header region size has been changed, terminal PtNode positions have to be adjusted
|
||||
// depending on the new header region size.
|
||||
if (headerRegionSizeDiff != 0) {
|
||||
TerminalPositionLookupTable lookupTableToWrite;
|
||||
for (int i = 0; i < mSize; ++i) {
|
||||
const int terminalPtNodePosition = getTerminalPtNodePosition(i)
|
||||
+ headerRegionSizeDiff;
|
||||
if (!lookupTableToWrite.setTerminalPtNodePosition(i, terminalPtNodePosition)) {
|
||||
AKLOGE("Cannot set terminal position to lookupTableToWrite."
|
||||
" terminalId: %d, position: %d", i, terminalPtNodePosition);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return lookupTableToWrite.flush(dictDirPath,
|
||||
Ver4DictConstants::TERMINAL_ADDRESS_TABLE_FILE_EXTENSION);
|
||||
} else {
|
||||
// We can simply use this lookup table because the header region size has not been
|
||||
// changed.
|
||||
return flush(dictDirPath, Ver4DictConstants::TERMINAL_ADDRESS_TABLE_FILE_EXTENSION);
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
|
|
|
@ -25,7 +25,8 @@
|
|||
|
||||
namespace latinime {
|
||||
|
||||
bool Ver4DictBuffers::flush(const char *const dictDirPath) const {
|
||||
bool Ver4DictBuffers::flushHeaderAndDictBuffers(const char *const dictDirPath,
|
||||
const BufferWithExtendableBuffer *const headerBuffer) const {
|
||||
// Create temporary directory.
|
||||
const int tmpDirPathBufSize = FileUtils::getFilePathWithSuffixBufSize(dictDirPath,
|
||||
DictFileWritingUtils::TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE);
|
||||
|
@ -38,8 +39,7 @@ bool Ver4DictBuffers::flush(const char *const dictDirPath) const {
|
|||
return false;
|
||||
}
|
||||
// Write trie file.
|
||||
const BufferWithExtendableBuffer *buffers[] =
|
||||
{&mExpandableHeaderBuffer, &mExpandableTrieBuffer};
|
||||
const BufferWithExtendableBuffer *buffers[] = {headerBuffer, &mExpandableTrieBuffer};
|
||||
if (!DictFileWritingUtils::flushBuffersToFileInDir(tmpDirPath,
|
||||
Ver4DictConstants::TRIE_FILE_EXTENSION, buffers, 2 /* bufferCount */)) {
|
||||
AKLOGE("Dictionary trie file %s/%s cannot be written.", tmpDirPath,
|
||||
|
@ -47,7 +47,7 @@ bool Ver4DictBuffers::flush(const char *const dictDirPath) const {
|
|||
return false;
|
||||
}
|
||||
// Write dictionary contents.
|
||||
if (!mTerminalPositionLookupTable.flushToFile(tmpDirPath)) {
|
||||
if (!mTerminalPositionLookupTable.flushToFile(tmpDirPath, headerBuffer->getTailPosition())) {
|
||||
AKLOGE("Terminal position lookup table cannot be written. %s", tmpDirPath);
|
||||
return false;
|
||||
}
|
||||
|
|
|
@ -57,6 +57,10 @@ class Ver4DictBuffers {
|
|||
return &mExpandableTrieBuffer;
|
||||
}
|
||||
|
||||
AK_FORCE_INLINE const BufferWithExtendableBuffer *getTrieBuffer() const {
|
||||
return &mExpandableTrieBuffer;
|
||||
}
|
||||
|
||||
AK_FORCE_INLINE TerminalPositionLookupTable *getUpdatableTerminalPositionLookupTable() {
|
||||
return &mTerminalPositionLookupTable;
|
||||
}
|
||||
|
@ -89,7 +93,12 @@ class Ver4DictBuffers {
|
|||
return mIsUpdatable;
|
||||
}
|
||||
|
||||
bool flush(const char *const dictDirPath) const;
|
||||
bool flush(const char *const dictDirPath) const {
|
||||
return flushHeaderAndDictBuffers(dictDirPath, &mExpandableHeaderBuffer);
|
||||
}
|
||||
|
||||
bool flushHeaderAndDictBuffers(const char *const dictDirPath,
|
||||
const BufferWithExtendableBuffer *const headerBuffer) const;
|
||||
|
||||
private:
|
||||
DISALLOW_COPY_AND_ASSIGN(Ver4DictBuffers);
|
||||
|
|
|
@ -216,7 +216,11 @@ bool Ver4PatriciaTriePolicy::removeBigramWords(const int *const word0, const int
|
|||
}
|
||||
|
||||
void Ver4PatriciaTriePolicy::flush(const char *const filePath) {
|
||||
// TODO: Implement.
|
||||
if (!mBuffers.get()->isUpdatable()) {
|
||||
AKLOGI("Warning: flush() is called for non-updatable dictionary. filePath: %s", filePath);
|
||||
return;
|
||||
}
|
||||
mWritingHelper.writeToDictFile(filePath, &mHeaderPolicy, mUnigramCount, mBigramCount);
|
||||
}
|
||||
|
||||
void Ver4PatriciaTriePolicy::flushWithGC(const char *const filePath) {
|
||||
|
|
|
@ -26,6 +26,7 @@
|
|||
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.h"
|
||||
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
|
||||
|
||||
namespace latinime {
|
||||
|
@ -50,6 +51,7 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
|||
&mShortcutPolicy),
|
||||
mUpdatingHelper(mDictBuffer, &mNodeReader, &mNodeWriter,
|
||||
mHeaderPolicy.isDecayingDict()),
|
||||
mWritingHelper(mBuffers.get()),
|
||||
mUnigramCount(mHeaderPolicy.getUnigramCount()),
|
||||
mBigramCount(mHeaderPolicy.getBigramCount()) {};
|
||||
|
||||
|
@ -120,6 +122,7 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
|||
Ver4PatriciaTrieNodeReader mNodeReader;
|
||||
Ver4PatriciaTrieNodeWriter mNodeWriter;
|
||||
DynamicPatriciaTrieUpdatingHelper mUpdatingHelper;
|
||||
Ver4PatriciaTrieWritingHelper mWritingHelper;
|
||||
int mUnigramCount;
|
||||
int mBigramCount;
|
||||
};
|
||||
|
|
|
@ -0,0 +1,50 @@
|
|||
/*
|
||||
* Copyright (C) 2013, The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.h"
|
||||
|
||||
#include <cstring>
|
||||
|
||||
#include "suggest/policyimpl/dictionary/header/header_policy.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
|
||||
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
|
||||
#include "suggest/policyimpl/dictionary/utils/file_utils.h"
|
||||
|
||||
namespace latinime {
|
||||
|
||||
void Ver4PatriciaTrieWritingHelper::writeToDictFile(const char *const trieFilePath,
|
||||
const HeaderPolicy *const headerPolicy, const int unigramCount,
|
||||
const int bigramCount) const {
|
||||
const int dirPathBufSize = strlen(trieFilePath) + 1 /* terminator */;
|
||||
char dirPath[dirPathBufSize];
|
||||
FileUtils::getDirPath(trieFilePath, dirPathBufSize, dirPath);
|
||||
BufferWithExtendableBuffer headerBuffer(
|
||||
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE);
|
||||
const int extendedRegionSize = headerPolicy->getExtendedRegionSize()
|
||||
+ mBuffers->getTrieBuffer()->getUsedAdditionalBufferSize();
|
||||
if (!headerPolicy->writeHeaderToBuffer(&headerBuffer, false /* updatesLastUpdatedTime */,
|
||||
false /* updatesLastDecayedTime */, unigramCount, bigramCount, extendedRegionSize)) {
|
||||
AKLOGE("Cannot write header structure to buffer. updatesLastUpdatedTime: %d, "
|
||||
"updatesLastDecayedTime: %d, unigramCount: %d, bigramCount: %d, "
|
||||
"extendedRegionSize: %d", false, false, unigramCount, bigramCount,
|
||||
extendedRegionSize);
|
||||
return;
|
||||
}
|
||||
mBuffers->flushHeaderAndDictBuffers(dirPath, &headerBuffer);
|
||||
}
|
||||
|
||||
} // namespace latinime
|
|
@ -0,0 +1,42 @@
|
|||
/*
|
||||
* Copyright (C) 2013, The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef LATINIME_VER4_PATRICIA_TRIE_WRITING_HELPER_H
|
||||
#define LATINIME_VER4_PATRICIA_TRIE_WRITING_HELPER_H
|
||||
|
||||
#include "defines.h"
|
||||
|
||||
namespace latinime {
|
||||
|
||||
class HeaderPolicy;
|
||||
class Ver4DictBuffers;
|
||||
|
||||
class Ver4PatriciaTrieWritingHelper {
|
||||
public:
|
||||
Ver4PatriciaTrieWritingHelper(Ver4DictBuffers *const buffers)
|
||||
: mBuffers(buffers) {}
|
||||
|
||||
void writeToDictFile(const char *const trieFilePath, const HeaderPolicy *const headerPolicy,
|
||||
const int unigramCount, const int bigramCount) const;
|
||||
|
||||
private:
|
||||
DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4PatriciaTrieWritingHelper);
|
||||
|
||||
Ver4DictBuffers *const mBuffers;
|
||||
};
|
||||
} // namespace latinime
|
||||
|
||||
#endif /* LATINIME_VER4_PATRICIA_TRIE_WRITING_HELPER_H */
|
|
@ -88,4 +88,43 @@ namespace latinime {
|
|||
snprintf(outFilePath, filePathBufSize, "%s/%s", dirPath, fileName);
|
||||
}
|
||||
|
||||
/* static */ bool FileUtils::getFilePathWithoutSuffix(const char *const filePath,
|
||||
const char *const suffix, const int outDirPathBufSize, char *const outDirPath) {
|
||||
const int filePathLength = strlen(filePath);
|
||||
const int suffixLength = strlen(suffix);
|
||||
if (filePathLength <= suffixLength) {
|
||||
AKLOGE("File path length (%s:%d) is shorter that suffix length (%s:%d).",
|
||||
filePath, filePathLength, suffix, suffixLength);
|
||||
return false;
|
||||
}
|
||||
const int resultFilePathLength = filePathLength - suffixLength;
|
||||
if (outDirPathBufSize <= resultFilePathLength) {
|
||||
AKLOGE("outDirPathBufSize is too small. filePath: %s, suffix: %s, outDirPathBufSize: %d",
|
||||
filePath, suffix, outDirPathBufSize);
|
||||
return false;
|
||||
}
|
||||
if (strncmp(filePath + resultFilePathLength, suffix, suffixLength) != 0) {
|
||||
AKLOGE("File Path %s does not have %s as a suffix", filePath, suffix);
|
||||
return false;
|
||||
}
|
||||
snprintf(outDirPath, resultFilePathLength + 1 /* terminator */, "%s", filePath);
|
||||
return true;
|
||||
}
|
||||
|
||||
/* static */ void FileUtils::getDirPath(const char *const filePath, const int outDirPathBufSize,
|
||||
char *const outDirPath) {
|
||||
for (int i = strlen(filePath) - 1; i >= 0; --i) {
|
||||
if (filePath[i] == '/') {
|
||||
if (i >= outDirPathBufSize) {
|
||||
AKLOGE("outDirPathBufSize is too small. filePath: %s, outDirPathBufSize: %d",
|
||||
filePath, outDirPathBufSize);
|
||||
ASSERT(false);
|
||||
return;
|
||||
}
|
||||
snprintf(outDirPath, i + 1 /* terminator */, "%s", filePath);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace latinime
|
||||
|
|
|
@ -39,6 +39,13 @@ class FileUtils {
|
|||
static void getFilePath(const char *const dirPath, const char *const fileName,
|
||||
const int filePathBufSize, char *const outFilePath);
|
||||
|
||||
// Returns whether the filePath have the suffix.
|
||||
static bool getFilePathWithoutSuffix(const char *const filePath, const char *const suffix,
|
||||
const int dirPathBufSize, char *const outDirPath);
|
||||
|
||||
static void getDirPath(const char *const filePath, const int dirPathBufSize,
|
||||
char *const outDirPath);
|
||||
|
||||
private:
|
||||
DISALLOW_IMPLICIT_CONSTRUCTORS(FileUtils);
|
||||
};
|
||||
|
|
|
@ -250,4 +250,51 @@ public class Ver4BinaryDictionaryTests extends AndroidTestCase {
|
|||
binaryDictionary.removeBigramWords("bcc", "aaa");
|
||||
}
|
||||
|
||||
public void testFlushDictionary() {
|
||||
final String dictVersion = Long.toString(System.currentTimeMillis());
|
||||
File trieFile = null;
|
||||
try {
|
||||
trieFile = createEmptyDictionaryAndGetTrieFile(dictVersion);
|
||||
} catch (IOException e) {
|
||||
fail("IOException while writing an initial dictionary : " + e);
|
||||
}
|
||||
BinaryDictionary binaryDictionary = new BinaryDictionary(trieFile.getAbsolutePath(),
|
||||
0 /* offset */, trieFile.length(), true /* useFullEditDistance */,
|
||||
Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
|
||||
|
||||
final int probability = 100;
|
||||
binaryDictionary.addUnigramWord("aaa", probability);
|
||||
binaryDictionary.addUnigramWord("abcd", probability);
|
||||
// Close without flushing.
|
||||
binaryDictionary.close();
|
||||
|
||||
binaryDictionary = new BinaryDictionary(trieFile.getAbsolutePath(),
|
||||
0 /* offset */, trieFile.length(), true /* useFullEditDistance */,
|
||||
Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
|
||||
|
||||
assertEquals(Dictionary.NOT_A_PROBABILITY, binaryDictionary.getFrequency("aaa"));
|
||||
assertEquals(Dictionary.NOT_A_PROBABILITY, binaryDictionary.getFrequency("abcd"));
|
||||
|
||||
binaryDictionary.addUnigramWord("aaa", probability);
|
||||
binaryDictionary.addUnigramWord("abcd", probability);
|
||||
binaryDictionary.flush();
|
||||
binaryDictionary.close();
|
||||
|
||||
binaryDictionary = new BinaryDictionary(trieFile.getAbsolutePath(),
|
||||
0 /* offset */, trieFile.length(), true /* useFullEditDistance */,
|
||||
Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
|
||||
|
||||
assertEquals(probability, binaryDictionary.getFrequency("aaa"));
|
||||
assertEquals(probability, binaryDictionary.getFrequency("abcd"));
|
||||
binaryDictionary.addUnigramWord("bcde", probability);
|
||||
binaryDictionary.flush();
|
||||
binaryDictionary.close();
|
||||
|
||||
binaryDictionary = new BinaryDictionary(trieFile.getAbsolutePath(),
|
||||
0 /* offset */, trieFile.length(), true /* useFullEditDistance */,
|
||||
Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
|
||||
assertEquals(probability, binaryDictionary.getFrequency("bcde"));
|
||||
binaryDictionary.close();
|
||||
}
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue