Merge "Implement dictionary flush."
This commit is contained in:
commit
72e53a1e38
4 changed files with 99 additions and 2 deletions
|
@ -248,7 +248,9 @@ void DynamicPatriciaTriePolicy::flush(const char *const filePath) {
|
|||
AKLOGI("Warning: flush() is called for non-updatable dictionary.");
|
||||
return;
|
||||
}
|
||||
// TODO: Implement.
|
||||
DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer,
|
||||
&mBigramListPolicy, &mShortcutListPolicy);
|
||||
writingHelper.writeToDictFile(filePath, mBuffer->getBuffer(), mHeaderPolicy.getSize());
|
||||
}
|
||||
|
||||
void DynamicPatriciaTriePolicy::flushWithGC(const char *const filePath) {
|
||||
|
|
|
@ -16,6 +16,9 @@
|
|||
|
||||
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h"
|
||||
|
||||
#include <cstdio>
|
||||
#include <cstring>
|
||||
|
||||
#include "suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h"
|
||||
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h"
|
||||
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h"
|
||||
|
@ -27,6 +30,8 @@
|
|||
namespace latinime {
|
||||
|
||||
const int DynamicPatriciaTrieWritingHelper::CHILDREN_POSITION_FIELD_SIZE = 3;
|
||||
const char *const DynamicPatriciaTrieWritingHelper::TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE =
|
||||
".tmp";
|
||||
|
||||
bool DynamicPatriciaTrieWritingHelper::addUnigramWord(
|
||||
DynamicPatriciaTrieReadingHelper *const readingHelper,
|
||||
|
@ -131,6 +136,41 @@ bool DynamicPatriciaTrieWritingHelper::removeBigramWords(const int word0Pos, con
|
|||
return mBigramPolicy->removeBigram(nodeReader.getBigramsPos(), word1Pos);
|
||||
}
|
||||
|
||||
void DynamicPatriciaTrieWritingHelper::writeToDictFile(const char *const fileName,
|
||||
const uint8_t *const headerBuf, const int headerSize) {
|
||||
const int tmpFileNameBufSize = strlen(fileName)
|
||||
+ strlen(TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE) + 1;
|
||||
char tmpFileName[tmpFileNameBufSize];
|
||||
snprintf(tmpFileName, tmpFileNameBufSize, "%s%s", fileName,
|
||||
TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE);
|
||||
FILE *const file = fopen(tmpFileName, "wb");
|
||||
if (!file) {
|
||||
return;
|
||||
}
|
||||
// Write header.
|
||||
if (fwrite(headerBuf, headerSize, 1, file) < 1) {
|
||||
fclose(file);
|
||||
remove(tmpFileName);
|
||||
return;
|
||||
}
|
||||
// Write data in original buffer.
|
||||
if (fwrite(mBuffer->getBuffer(false /* usesAdditionalBuffer */),
|
||||
mBuffer->getOriginalBufferSize(), 1, file) < 1) {
|
||||
fclose(file);
|
||||
remove(tmpFileName);
|
||||
return;
|
||||
}
|
||||
// Write data in additional buffer.
|
||||
if (fwrite(mBuffer->getBuffer(true /* usesAdditionalBuffer */),
|
||||
mBuffer->getTailPosition() - mBuffer->getOriginalBufferSize(), 1, file) < 1) {
|
||||
fclose(file);
|
||||
remove(tmpFileName);
|
||||
return;
|
||||
}
|
||||
fclose(file);
|
||||
rename(tmpFileName, fileName);
|
||||
}
|
||||
|
||||
bool DynamicPatriciaTrieWritingHelper::markNodeAsMovedAndSetPosition(
|
||||
const DynamicPatriciaTrieNodeReader *const originalNode, const int movedPos,
|
||||
const int bigramLinkedNodePos) {
|
||||
|
|
|
@ -17,6 +17,8 @@
|
|||
#ifndef LATINIME_DYNAMIC_PATRICIA_TRIE_WRITING_HELPER_H
|
||||
#define LATINIME_DYNAMIC_PATRICIA_TRIE_WRITING_HELPER_H
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include "defines.h"
|
||||
|
||||
namespace latinime {
|
||||
|
@ -46,10 +48,14 @@ class DynamicPatriciaTrieWritingHelper {
|
|||
// Remove a bigram relation from word0Pos to word1Pos.
|
||||
bool removeBigramWords(const int word0Pos, const int word1Pos);
|
||||
|
||||
void writeToDictFile(const char *const fileName, const uint8_t *const headerBuf,
|
||||
const int headerSize);
|
||||
|
||||
private:
|
||||
DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTrieWritingHelper);
|
||||
|
||||
static const int CHILDREN_POSITION_FIELD_SIZE;
|
||||
static const char *const TEMP_FILE_SUFFIX_FOR_WRITING_DICT_FILE;
|
||||
|
||||
BufferWithExtendableBuffer *const mBuffer;
|
||||
DynamicBigramListPolicy *const mBigramPolicy;
|
||||
|
|
|
@ -299,7 +299,6 @@ public class BinaryDictionaryTests extends AndroidTestCase {
|
|||
BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
|
||||
0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
|
||||
Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
|
||||
|
||||
final int unigramProbability = 100;
|
||||
final int bigramProbability = 10;
|
||||
binaryDictionary.addUnigramWord("aaa", unigramProbability);
|
||||
|
@ -335,4 +334,54 @@ public class BinaryDictionaryTests extends AndroidTestCase {
|
|||
|
||||
dictFile.delete();
|
||||
}
|
||||
|
||||
public void testFlushDictionary() {
|
||||
File dictFile = null;
|
||||
try {
|
||||
dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary");
|
||||
} catch (IOException e) {
|
||||
fail("IOException while writing an initial dictionary : " + e);
|
||||
} catch (UnsupportedFormatException e) {
|
||||
fail("UnsupportedFormatException while writing an initial dictionary : " + e);
|
||||
}
|
||||
BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
|
||||
0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
|
||||
Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
|
||||
|
||||
final int probability = 100;
|
||||
binaryDictionary.addUnigramWord("aaa", probability);
|
||||
binaryDictionary.addUnigramWord("abcd", probability);
|
||||
// Close without flushing.
|
||||
binaryDictionary.close();
|
||||
|
||||
binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
|
||||
0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
|
||||
Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
|
||||
|
||||
assertEquals(-1, binaryDictionary.getFrequency("aaa"));
|
||||
assertEquals(-1, binaryDictionary.getFrequency("abcd"));
|
||||
|
||||
binaryDictionary.addUnigramWord("aaa", probability);
|
||||
binaryDictionary.addUnigramWord("abcd", probability);
|
||||
binaryDictionary.flush();
|
||||
binaryDictionary.close();
|
||||
|
||||
binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
|
||||
0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
|
||||
Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
|
||||
|
||||
assertEquals(probability, binaryDictionary.getFrequency("aaa"));
|
||||
assertEquals(probability, binaryDictionary.getFrequency("abcd"));
|
||||
binaryDictionary.addUnigramWord("bcde", probability);
|
||||
binaryDictionary.flush();
|
||||
binaryDictionary.close();
|
||||
|
||||
binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
|
||||
0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
|
||||
Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
|
||||
assertEquals(probability, binaryDictionary.getFrequency("bcde"));
|
||||
binaryDictionary.close();
|
||||
|
||||
dictFile.delete();
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue