Support creating BinaryDictionary without creating dict file.

Bug: 14166482
Change-Id: Ic8c78ec2c8f39358a1f085a041b608972a380eef
This commit is contained in:
Keisuke Kuroyanagi 2014-04-22 13:18:34 -07:00
parent ce76821dce
commit 3b7984752c
5 changed files with 94 additions and 3 deletions

View file

@ -41,6 +41,7 @@ import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.HashMap; import java.util.HashMap;
import java.util.Locale; import java.util.Locale;
import java.util.Map;
/** /**
* Implements a static, compacted, binary dictionary of standard words. * Implements a static, compacted, binary dictionary of standard words.
@ -123,8 +124,7 @@ public final class BinaryDictionary extends Dictionary {
} }
/** /**
* Constructor for the binary dictionary. This is supposed to be called from the * Constructs binary dictionary using existing dictionary file.
* dictionary factory.
* @param filename the name of the file to read through native code. * @param filename the name of the file to read through native code.
* @param offset the offset of the dictionary data within the file. * @param offset the offset of the dictionary data within the file.
* @param length the length of the binary data. * @param length the length of the binary data.
@ -145,6 +145,38 @@ public final class BinaryDictionary extends Dictionary {
loadDictionary(filename, offset, length, isUpdatable); loadDictionary(filename, offset, length, isUpdatable);
} }
/**
* Constructs binary dictionary on memory.
* @param filename the name of the file used to flush.
* @param useFullEditDistance whether to use the full edit distance in suggestions
* @param dictType the dictionary type, as a human-readable string
* @param formatVersion the format version of the dictionary
* @param attributeMap the attributes of the dictionary
*/
@UsedForTesting
public BinaryDictionary(final String filename, final boolean useFullEditDistance,
final Locale locale, final String dictType, final long formatVersion,
final Map<String, String> attributeMap) {
super(dictType);
mLocale = locale;
mDictSize = 0;
mDictFilePath = filename;
// On memory dictionary is always updatable.
mIsUpdatable = true;
mHasUpdated = false;
mNativeSuggestOptions.setUseFullEditDistance(useFullEditDistance);
final String[] keyArray = new String[attributeMap.size()];
final String[] valueArray = new String[attributeMap.size()];
int index = 0;
for (final String key : attributeMap.keySet()) {
keyArray[index] = key;
valueArray[index] = attributeMap.get(key);
index++;
}
mNativeDict = createOnMemoryNative(formatVersion, locale.toString(), keyArray, valueArray);
}
static { static {
JniUtils.loadNativeLibrary(); JniUtils.loadNativeLibrary();
} }

View file

@ -99,6 +99,27 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
mMaxBigramCount(HeaderReadWriteUtils::readIntAttributeValue( mMaxBigramCount(HeaderReadWriteUtils::readIntAttributeValue(
&mAttributeMap, MAX_BIGRAM_COUNT_KEY, DEFAULT_MAX_BIGRAM_COUNT)) {} &mAttributeMap, MAX_BIGRAM_COUNT_KEY, DEFAULT_MAX_BIGRAM_COUNT)) {}
// Copy header information
HeaderPolicy(const HeaderPolicy *const headerPolicy)
: mDictFormatVersion(headerPolicy->mDictFormatVersion),
mDictionaryFlags(headerPolicy->mDictionaryFlags), mSize(headerPolicy->mSize),
mAttributeMap(headerPolicy->mAttributeMap), mLocale(headerPolicy->mLocale),
mMultiWordCostMultiplier(headerPolicy->mMultiWordCostMultiplier),
mRequiresGermanUmlautProcessing(headerPolicy->mRequiresGermanUmlautProcessing),
mIsDecayingDict(headerPolicy->mIsDecayingDict),
mDate(headerPolicy->mDate), mLastDecayedTime(headerPolicy->mLastDecayedTime),
mUnigramCount(headerPolicy->mUnigramCount), mBigramCount(headerPolicy->mBigramCount),
mExtendedRegionSize(headerPolicy->mExtendedRegionSize),
mHasHistoricalInfoOfWords(headerPolicy->mHasHistoricalInfoOfWords),
mForgettingCurveOccurrencesToLevelUp(
headerPolicy->mForgettingCurveOccurrencesToLevelUp),
mForgettingCurveProbabilityValuesTableId(
headerPolicy->mForgettingCurveProbabilityValuesTableId),
mForgettingCurveDurationToLevelDown(
headerPolicy->mForgettingCurveDurationToLevelDown),
mMaxUnigramCount(headerPolicy->mMaxUnigramCount),
mMaxBigramCount(headerPolicy->mMaxBigramCount) {}
// Temporary dummy header. // Temporary dummy header.
HeaderPolicy() HeaderPolicy()
: mDictFormatVersion(FormatUtils::UNKNOWN_VERSION), mDictionaryFlags(0), mSize(0), : mDictFormatVersion(FormatUtils::UNKNOWN_VERSION), mDictionaryFlags(0), mSize(0),

View file

@ -19,10 +19,12 @@
#include <climits> #include <climits>
#include "defines.h" #include "defines.h"
#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_writing_utils.h"
#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h" #include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h"
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h" #include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h"
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h" #include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h" #include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h"
#include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h"
#include "suggest/policyimpl/dictionary/utils/file_utils.h" #include "suggest/policyimpl/dictionary/utils/file_utils.h"
#include "suggest/policyimpl/dictionary/utils/format_utils.h" #include "suggest/policyimpl/dictionary/utils/format_utils.h"
#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h" #include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
@ -56,6 +58,11 @@ namespace latinime {
Ver4DictBuffers::Ver4DictBuffersPtr dictBuffers = Ver4DictBuffers::Ver4DictBuffersPtr dictBuffers =
Ver4DictBuffers::createVer4DictBuffers(&headerPolicy, Ver4DictBuffers::createVer4DictBuffers(&headerPolicy,
Ver4DictConstants::MAX_DICT_EXTENDED_REGION_SIZE); Ver4DictConstants::MAX_DICT_EXTENDED_REGION_SIZE);
if (!DynamicPtWritingUtils::writeEmptyDictionary(
dictBuffers->getWritableTrieBuffer(), 0 /* rootPos */)) {
AKLOGE("Empty ver4 dictionary structure cannot be created on memory.");
return DictionaryStructureWithBufferPolicy::StructurePolicyPtr(nullptr);
}
return DictionaryStructureWithBufferPolicy::StructurePolicyPtr( return DictionaryStructureWithBufferPolicy::StructurePolicyPtr(
new Ver4PatriciaTriePolicy(std::move(dictBuffers))); new Ver4PatriciaTriePolicy(std::move(dictBuffers)));
} }

View file

@ -131,7 +131,7 @@ Ver4DictBuffers::Ver4DictBuffers(const char *const dictPath,
mIsUpdatable(isUpdatable) {} mIsUpdatable(isUpdatable) {}
Ver4DictBuffers::Ver4DictBuffers(const HeaderPolicy *const headerPolicy, const int maxTrieSize) Ver4DictBuffers::Ver4DictBuffers(const HeaderPolicy *const headerPolicy, const int maxTrieSize)
: mHeaderBuffer(nullptr), mDictBuffer(nullptr), mHeaderPolicy(), : mHeaderBuffer(nullptr), mDictBuffer(nullptr), mHeaderPolicy(headerPolicy),
mExpandableHeaderBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE), mExpandableHeaderBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE),
mExpandableTrieBuffer(maxTrieSize), mTerminalPositionLookupTable(), mExpandableTrieBuffer(maxTrieSize), mTerminalPositionLookupTable(),
mProbabilityDictContent(headerPolicy->hasHistoricalInfoOfWords()), mProbabilityDictContent(headerPolicy->hasHistoricalInfoOfWords()),

View file

@ -97,6 +97,37 @@ public class BinaryDictionaryTests extends AndroidTestCase {
binaryDictionary.close(); binaryDictionary.close();
} }
public void testConstructingDictionaryOnMemory() {
testConstructingDictionaryOnMemory(FormatSpec.VERSION4);
}
private void testConstructingDictionaryOnMemory(final int formatVersion) {
File dictFile = null;
try {
dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
} catch (IOException e) {
fail("IOException while writing an initial dictionary : " + e);
}
FileUtils.deleteRecursively(dictFile);
assertFalse(dictFile.exists());
BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
true /* useFullEditDistance */, Locale.getDefault(), TEST_LOCALE, formatVersion,
new HashMap<String, String>());
assertTrue(binaryDictionary.isValidDictionary());
assertEquals(formatVersion, binaryDictionary.getFormatVersion());
final int probability = 100;
addUnigramWord(binaryDictionary, "word", probability);
assertEquals(probability, binaryDictionary.getFrequency("word"));
assertFalse(dictFile.exists());
binaryDictionary.flush();
assertTrue(dictFile.exists());
assertTrue(binaryDictionary.isValidDictionary());
assertEquals(formatVersion, binaryDictionary.getFormatVersion());
assertEquals(probability, binaryDictionary.getFrequency("word"));
binaryDictionary.close();
dictFile.delete();
}
public void testAddTooLongWord() { public void testAddTooLongWord() {
testAddTooLongWord(FormatSpec.VERSION4); testAddTooLongWord(FormatSpec.VERSION4);
} }