am 15c8b84a: Merge "Implement Ver4PatriciaTriePolicy::flush()."

* commit '15c8b84af87441fdd10b239f0a565b0ce6ffb533':
  Implement Ver4PatriciaTriePolicy::flush().
This commit is contained in:
Keisuke Kuroyanagi 2013-11-19 21:38:50 -08:00 committed by Android Git Automerger
commit 9bc53e4fba
12 changed files with 237 additions and 17 deletions

View file

@ -97,7 +97,8 @@ LATIN_IME_CORE_SRC_FILES := \
ver4_patricia_trie_node_reader.cpp \
ver4_patricia_trie_node_writer.cpp \
ver4_patricia_trie_policy.cpp \
ver4_patricia_trie_reading_utils.cpp ) \
ver4_patricia_trie_reading_utils.cpp \
ver4_patricia_trie_writing_helper.cpp) \
$(addprefix suggest/policyimpl/dictionary/utils/, \
buffer_with_extendable_buffer.cpp \
byte_array_utils.cpp \

View file

@ -17,13 +17,13 @@
#include "suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.h"
#include <stdint.h>
#include <string>
#include "defines.h"
#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h"
#include "suggest/policyimpl/dictionary/structure/v3/dynamic_patricia_trie_policy.h"
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h"
#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h"
#include "suggest/policyimpl/dictionary/utils/file_utils.h"
#include "suggest/policyimpl/dictionary/utils/format_utils.h"
#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
@ -49,17 +49,15 @@ namespace latinime {
return DictionaryStructureWithBufferPolicy::StructurePoilcyPtr(
new DynamicPatriciaTriePolicy(mmappedBuffer));
case FormatUtils::VERSION_4: {
std::string dictDirPath(path);
const std::string::size_type pos =
dictDirPath.rfind(Ver4DictConstants::TRIE_FILE_EXTENSION);
if (pos == std::string::npos) {
const int dictDirPathBufSize = strlen(path) + 1 /* terminator */;
char dictDirPath[dictDirPathBufSize];
if (!FileUtils::getFilePathWithoutSuffix(path, Ver4DictConstants::TRIE_FILE_EXTENSION,
dictDirPathBufSize, dictDirPath)) {
// Dictionary file name is not valid as a version 4 dictionary.
return DictionaryStructureWithBufferPolicy::StructurePoilcyPtr(0);
}
// Removing extension to get the base path.
dictDirPath.erase(pos);
const Ver4DictBuffers::Ver4DictBuffersPtr dictBuffers =
Ver4DictBuffers::openVer4DictBuffers(dictDirPath.c_str(), mmappedBuffer);
Ver4DictBuffers::openVer4DictBuffers(dictDirPath, mmappedBuffer);
if (!dictBuffers.get()->isValid()) {
AKLOGE("DICT: The dictionary doesn't satisfy ver4 format requirements.");
ASSERT(false);

View file

@ -67,8 +67,28 @@ class TerminalPositionLookupTable : public SingleDictContent {
return mSize;
}
bool flushToFile(const char *const dictDirPath) const {
return flush(dictDirPath, Ver4DictConstants::TERMINAL_ADDRESS_TABLE_FILE_EXTENSION);
bool flushToFile(const char *const dictDirPath, const int newHeaderRegionSize) const {
const int headerRegionSizeDiff = newHeaderRegionSize - mHeaderRegionSize;
// If header region size has been changed, terminal PtNode positions have to be adjusted
// depending on the new header region size.
if (headerRegionSizeDiff != 0) {
TerminalPositionLookupTable lookupTableToWrite;
for (int i = 0; i < mSize; ++i) {
const int terminalPtNodePosition = getTerminalPtNodePosition(i)
+ headerRegionSizeDiff;
if (!lookupTableToWrite.setTerminalPtNodePosition(i, terminalPtNodePosition)) {
AKLOGE("Cannot set terminal position to lookupTableToWrite."
" terminalId: %d, position: %d", i, terminalPtNodePosition);
return false;
}
}
return lookupTableToWrite.flush(dictDirPath,
Ver4DictConstants::TERMINAL_ADDRESS_TABLE_FILE_EXTENSION);
} else {
// We can simply use this lookup table because the header region size has not been
// changed.
return flush(dictDirPath, Ver4DictConstants::TERMINAL_ADDRESS_TABLE_FILE_EXTENSION);
}
}
private:

View file

@ -25,7 +25,8 @@
namespace latinime {
bool Ver4DictBuffers::flush(const char *const dictDirPath) const {
bool Ver4DictBuffers::flushHeaderAndDictBuffers(const char *const dictDirPath,
const BufferWithExtendableBuffer *const headerBuffer) const {
// Create temporary directory.
const int tmpDirPathBufSize = FileUtils::getFilePathWithSuffixBufSize(dictDirPath,
DictFileWritingUtils::TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE);
@ -38,8 +39,7 @@ bool Ver4DictBuffers::flush(const char *const dictDirPath) const {
return false;
}
// Write trie file.
const BufferWithExtendableBuffer *buffers[] =
{&mExpandableHeaderBuffer, &mExpandableTrieBuffer};
const BufferWithExtendableBuffer *buffers[] = {headerBuffer, &mExpandableTrieBuffer};
if (!DictFileWritingUtils::flushBuffersToFileInDir(tmpDirPath,
Ver4DictConstants::TRIE_FILE_EXTENSION, buffers, 2 /* bufferCount */)) {
AKLOGE("Dictionary trie file %s/%s cannot be written.", tmpDirPath,
@ -47,7 +47,7 @@ bool Ver4DictBuffers::flush(const char *const dictDirPath) const {
return false;
}
// Write dictionary contents.
if (!mTerminalPositionLookupTable.flushToFile(tmpDirPath)) {
if (!mTerminalPositionLookupTable.flushToFile(tmpDirPath, headerBuffer->getTailPosition())) {
AKLOGE("Terminal position lookup table cannot be written. %s", tmpDirPath);
return false;
}

View file

@ -57,6 +57,10 @@ class Ver4DictBuffers {
return &mExpandableTrieBuffer;
}
AK_FORCE_INLINE const BufferWithExtendableBuffer *getTrieBuffer() const {
return &mExpandableTrieBuffer;
}
AK_FORCE_INLINE TerminalPositionLookupTable *getUpdatableTerminalPositionLookupTable() {
return &mTerminalPositionLookupTable;
}
@ -89,7 +93,12 @@ class Ver4DictBuffers {
return mIsUpdatable;
}
bool flush(const char *const dictDirPath) const;
bool flush(const char *const dictDirPath) const {
return flushHeaderAndDictBuffers(dictDirPath, &mExpandableHeaderBuffer);
}
bool flushHeaderAndDictBuffers(const char *const dictDirPath,
const BufferWithExtendableBuffer *const headerBuffer) const;
private:
DISALLOW_COPY_AND_ASSIGN(Ver4DictBuffers);

View file

@ -216,7 +216,11 @@ bool Ver4PatriciaTriePolicy::removeBigramWords(const int *const word0, const int
}
void Ver4PatriciaTriePolicy::flush(const char *const filePath) {
// TODO: Implement.
if (!mBuffers.get()->isUpdatable()) {
AKLOGI("Warning: flush() is called for non-updatable dictionary. filePath: %s", filePath);
return;
}
mWritingHelper.writeToDictFile(filePath, &mHeaderPolicy, mUnigramCount, mBigramCount);
}
void Ver4PatriciaTriePolicy::flushWithGC(const char *const filePath) {

View file

@ -26,6 +26,7 @@
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h"
#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_reader.h"
#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_node_writer.h"
#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.h"
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
namespace latinime {
@ -50,6 +51,7 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
&mShortcutPolicy),
mUpdatingHelper(mDictBuffer, &mNodeReader, &mNodeWriter,
mHeaderPolicy.isDecayingDict()),
mWritingHelper(mBuffers.get()),
mUnigramCount(mHeaderPolicy.getUnigramCount()),
mBigramCount(mHeaderPolicy.getBigramCount()) {};
@ -120,6 +122,7 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
Ver4PatriciaTrieNodeReader mNodeReader;
Ver4PatriciaTrieNodeWriter mNodeWriter;
DynamicPatriciaTrieUpdatingHelper mUpdatingHelper;
Ver4PatriciaTrieWritingHelper mWritingHelper;
int mUnigramCount;
int mBigramCount;
};

View file

@ -0,0 +1,50 @@
/*
* Copyright (C) 2013, The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_writing_helper.h"
#include <cstring>
#include "suggest/policyimpl/dictionary/header/header_policy.h"
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h"
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
#include "suggest/policyimpl/dictionary/utils/file_utils.h"
namespace latinime {
void Ver4PatriciaTrieWritingHelper::writeToDictFile(const char *const trieFilePath,
const HeaderPolicy *const headerPolicy, const int unigramCount,
const int bigramCount) const {
const int dirPathBufSize = strlen(trieFilePath) + 1 /* terminator */;
char dirPath[dirPathBufSize];
FileUtils::getDirPath(trieFilePath, dirPathBufSize, dirPath);
BufferWithExtendableBuffer headerBuffer(
BufferWithExtendableBuffer::DEFAULT_MAX_ADDITIONAL_BUFFER_SIZE);
const int extendedRegionSize = headerPolicy->getExtendedRegionSize()
+ mBuffers->getTrieBuffer()->getUsedAdditionalBufferSize();
if (!headerPolicy->writeHeaderToBuffer(&headerBuffer, false /* updatesLastUpdatedTime */,
false /* updatesLastDecayedTime */, unigramCount, bigramCount, extendedRegionSize)) {
AKLOGE("Cannot write header structure to buffer. updatesLastUpdatedTime: %d, "
"updatesLastDecayedTime: %d, unigramCount: %d, bigramCount: %d, "
"extendedRegionSize: %d", false, false, unigramCount, bigramCount,
extendedRegionSize);
return;
}
mBuffers->flushHeaderAndDictBuffers(dirPath, &headerBuffer);
}
} // namespace latinime

View file

@ -0,0 +1,42 @@
/*
* Copyright (C) 2013, The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef LATINIME_VER4_PATRICIA_TRIE_WRITING_HELPER_H
#define LATINIME_VER4_PATRICIA_TRIE_WRITING_HELPER_H
#include "defines.h"
namespace latinime {
class HeaderPolicy;
class Ver4DictBuffers;
class Ver4PatriciaTrieWritingHelper {
public:
Ver4PatriciaTrieWritingHelper(Ver4DictBuffers *const buffers)
: mBuffers(buffers) {}
void writeToDictFile(const char *const trieFilePath, const HeaderPolicy *const headerPolicy,
const int unigramCount, const int bigramCount) const;
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(Ver4PatriciaTrieWritingHelper);
Ver4DictBuffers *const mBuffers;
};
} // namespace latinime
#endif /* LATINIME_VER4_PATRICIA_TRIE_WRITING_HELPER_H */

View file

@ -88,4 +88,43 @@ namespace latinime {
snprintf(outFilePath, filePathBufSize, "%s/%s", dirPath, fileName);
}
/* static */ bool FileUtils::getFilePathWithoutSuffix(const char *const filePath,
const char *const suffix, const int outDirPathBufSize, char *const outDirPath) {
const int filePathLength = strlen(filePath);
const int suffixLength = strlen(suffix);
if (filePathLength <= suffixLength) {
AKLOGE("File path length (%s:%d) is shorter that suffix length (%s:%d).",
filePath, filePathLength, suffix, suffixLength);
return false;
}
const int resultFilePathLength = filePathLength - suffixLength;
if (outDirPathBufSize <= resultFilePathLength) {
AKLOGE("outDirPathBufSize is too small. filePath: %s, suffix: %s, outDirPathBufSize: %d",
filePath, suffix, outDirPathBufSize);
return false;
}
if (strncmp(filePath + resultFilePathLength, suffix, suffixLength) != 0) {
AKLOGE("File Path %s does not have %s as a suffix", filePath, suffix);
return false;
}
snprintf(outDirPath, resultFilePathLength + 1 /* terminator */, "%s", filePath);
return true;
}
/* static */ void FileUtils::getDirPath(const char *const filePath, const int outDirPathBufSize,
char *const outDirPath) {
for (int i = strlen(filePath) - 1; i >= 0; --i) {
if (filePath[i] == '/') {
if (i >= outDirPathBufSize) {
AKLOGE("outDirPathBufSize is too small. filePath: %s, outDirPathBufSize: %d",
filePath, outDirPathBufSize);
ASSERT(false);
return;
}
snprintf(outDirPath, i + 1 /* terminator */, "%s", filePath);
return;
}
}
}
} // namespace latinime

View file

@ -39,6 +39,13 @@ class FileUtils {
static void getFilePath(const char *const dirPath, const char *const fileName,
const int filePathBufSize, char *const outFilePath);
// Returns whether the filePath have the suffix.
static bool getFilePathWithoutSuffix(const char *const filePath, const char *const suffix,
const int dirPathBufSize, char *const outDirPath);
static void getDirPath(const char *const filePath, const int dirPathBufSize,
char *const outDirPath);
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(FileUtils);
};

View file

@ -250,4 +250,51 @@ public class Ver4BinaryDictionaryTests extends AndroidTestCase {
binaryDictionary.removeBigramWords("bcc", "aaa");
}
public void testFlushDictionary() {
final String dictVersion = Long.toString(System.currentTimeMillis());
File trieFile = null;
try {
trieFile = createEmptyDictionaryAndGetTrieFile(dictVersion);
} catch (IOException e) {
fail("IOException while writing an initial dictionary : " + e);
}
BinaryDictionary binaryDictionary = new BinaryDictionary(trieFile.getAbsolutePath(),
0 /* offset */, trieFile.length(), true /* useFullEditDistance */,
Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
final int probability = 100;
binaryDictionary.addUnigramWord("aaa", probability);
binaryDictionary.addUnigramWord("abcd", probability);
// Close without flushing.
binaryDictionary.close();
binaryDictionary = new BinaryDictionary(trieFile.getAbsolutePath(),
0 /* offset */, trieFile.length(), true /* useFullEditDistance */,
Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
assertEquals(Dictionary.NOT_A_PROBABILITY, binaryDictionary.getFrequency("aaa"));
assertEquals(Dictionary.NOT_A_PROBABILITY, binaryDictionary.getFrequency("abcd"));
binaryDictionary.addUnigramWord("aaa", probability);
binaryDictionary.addUnigramWord("abcd", probability);
binaryDictionary.flush();
binaryDictionary.close();
binaryDictionary = new BinaryDictionary(trieFile.getAbsolutePath(),
0 /* offset */, trieFile.length(), true /* useFullEditDistance */,
Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
assertEquals(probability, binaryDictionary.getFrequency("aaa"));
assertEquals(probability, binaryDictionary.getFrequency("abcd"));
binaryDictionary.addUnigramWord("bcde", probability);
binaryDictionary.flush();
binaryDictionary.close();
binaryDictionary = new BinaryDictionary(trieFile.getAbsolutePath(),
0 /* offset */, trieFile.length(), true /* useFullEditDistance */,
Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
assertEquals(probability, binaryDictionary.getFrequency("bcde"));
binaryDictionary.close();
}
}