am 89939b87
: Support creating BinaryDictionary without creating dict file.
* commit '89939b870876b36791a47470ce852e4fcdfe86f2': Support creating BinaryDictionary without creating dict file.
This commit is contained in:
commit
42732ab9eb
5 changed files with 94 additions and 3 deletions
|
@ -41,6 +41,7 @@ import java.util.ArrayList;
|
|||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Implements a static, compacted, binary dictionary of standard words.
|
||||
|
@ -123,8 +124,7 @@ public final class BinaryDictionary extends Dictionary {
|
|||
}
|
||||
|
||||
/**
|
||||
* Constructor for the binary dictionary. This is supposed to be called from the
|
||||
* dictionary factory.
|
||||
* Constructs binary dictionary using existing dictionary file.
|
||||
* @param filename the name of the file to read through native code.
|
||||
* @param offset the offset of the dictionary data within the file.
|
||||
* @param length the length of the binary data.
|
||||
|
@ -145,6 +145,38 @@ public final class BinaryDictionary extends Dictionary {
|
|||
loadDictionary(filename, offset, length, isUpdatable);
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs binary dictionary on memory.
|
||||
* @param filename the name of the file used to flush.
|
||||
* @param useFullEditDistance whether to use the full edit distance in suggestions
|
||||
* @param dictType the dictionary type, as a human-readable string
|
||||
* @param formatVersion the format version of the dictionary
|
||||
* @param attributeMap the attributes of the dictionary
|
||||
*/
|
||||
@UsedForTesting
|
||||
public BinaryDictionary(final String filename, final boolean useFullEditDistance,
|
||||
final Locale locale, final String dictType, final long formatVersion,
|
||||
final Map<String, String> attributeMap) {
|
||||
super(dictType);
|
||||
mLocale = locale;
|
||||
mDictSize = 0;
|
||||
mDictFilePath = filename;
|
||||
// On memory dictionary is always updatable.
|
||||
mIsUpdatable = true;
|
||||
mHasUpdated = false;
|
||||
mNativeSuggestOptions.setUseFullEditDistance(useFullEditDistance);
|
||||
final String[] keyArray = new String[attributeMap.size()];
|
||||
final String[] valueArray = new String[attributeMap.size()];
|
||||
int index = 0;
|
||||
for (final String key : attributeMap.keySet()) {
|
||||
keyArray[index] = key;
|
||||
valueArray[index] = attributeMap.get(key);
|
||||
index++;
|
||||
}
|
||||
mNativeDict = openOnMemoryNative(formatVersion, locale.toString(), keyArray, valueArray);
|
||||
}
|
||||
|
||||
|
||||
static {
|
||||
JniUtils.loadNativeLibrary();
|
||||
}
|
||||
|
|
|
@ -99,6 +99,27 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
|
|||
mMaxBigramCount(HeaderReadWriteUtils::readIntAttributeValue(
|
||||
&mAttributeMap, MAX_BIGRAM_COUNT_KEY, DEFAULT_MAX_BIGRAM_COUNT)) {}
|
||||
|
||||
// Copy header information
|
||||
HeaderPolicy(const HeaderPolicy *const headerPolicy)
|
||||
: mDictFormatVersion(headerPolicy->mDictFormatVersion),
|
||||
mDictionaryFlags(headerPolicy->mDictionaryFlags), mSize(headerPolicy->mSize),
|
||||
mAttributeMap(headerPolicy->mAttributeMap), mLocale(headerPolicy->mLocale),
|
||||
mMultiWordCostMultiplier(headerPolicy->mMultiWordCostMultiplier),
|
||||
mRequiresGermanUmlautProcessing(headerPolicy->mRequiresGermanUmlautProcessing),
|
||||
mIsDecayingDict(headerPolicy->mIsDecayingDict),
|
||||
mDate(headerPolicy->mDate), mLastDecayedTime(headerPolicy->mLastDecayedTime),
|
||||
mUnigramCount(headerPolicy->mUnigramCount), mBigramCount(headerPolicy->mBigramCount),
|
||||
mExtendedRegionSize(headerPolicy->mExtendedRegionSize),
|
||||
mHasHistoricalInfoOfWords(headerPolicy->mHasHistoricalInfoOfWords),
|
||||
mForgettingCurveOccurrencesToLevelUp(
|
||||
headerPolicy->mForgettingCurveOccurrencesToLevelUp),
|
||||
mForgettingCurveProbabilityValuesTableId(
|
||||
headerPolicy->mForgettingCurveProbabilityValuesTableId),
|
||||
mForgettingCurveDurationToLevelDown(
|
||||
headerPolicy->mForgettingCurveDurationToLevelDown),
|
||||
mMaxUnigramCount(headerPolicy->mMaxUnigramCount),
|
||||
mMaxBigramCount(headerPolicy->mMaxBigramCount) {}
|
||||
|
||||
// Temporary dummy header.
|
||||
HeaderPolicy()
|
||||
: mDictFormatVersion(FormatUtils::UNKNOWN_VERSION), mDictionaryFlags(0), mSize(0),
|
||||
|
|
|
@ -19,10 +19,12 @@
|
|||
#include <climits>
|
||||
|
||||
#include "defines.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_writing_utils.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
|
||||
#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h"
|
||||
#include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h"
|
||||
#include "suggest/policyimpl/dictionary/utils/file_utils.h"
|
||||
#include "suggest/policyimpl/dictionary/utils/format_utils.h"
|
||||
#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
|
||||
|
@ -56,6 +58,11 @@ namespace latinime {
|
|||
Ver4DictBuffers::Ver4DictBuffersPtr dictBuffers =
|
||||
Ver4DictBuffers::createVer4DictBuffers(&headerPolicy,
|
||||
Ver4DictConstants::MAX_DICT_EXTENDED_REGION_SIZE);
|
||||
if (!DynamicPtWritingUtils::writeEmptyDictionary(
|
||||
dictBuffers->getWritableTrieBuffer(), 0 /* rootPos */)) {
|
||||
AKLOGE("Empty ver4 dictionary structure cannot be created on memory.");
|
||||
return DictionaryStructureWithBufferPolicy::StructurePolicyPtr(nullptr);
|
||||
}
|
||||
return DictionaryStructureWithBufferPolicy::StructurePolicyPtr(
|
||||
new Ver4PatriciaTriePolicy(std::move(dictBuffers)));
|
||||
}
|
||||
|
|
|
@ -131,7 +131,7 @@ Ver4DictBuffers::Ver4DictBuffers(const char *const dictPath,
|
|||
mIsUpdatable(isUpdatable) {}
|
||||
|
||||
Ver4DictBuffers::Ver4DictBuffers(const HeaderPolicy *const headerPolicy, const int maxTrieSize)
|
||||
: mHeaderBuffer(nullptr), mDictBuffer(nullptr), mHeaderPolicy(),
|
||||
: mHeaderBuffer(nullptr), mDictBuffer(nullptr), mHeaderPolicy(headerPolicy),
|
||||
mExpandableHeaderBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE),
|
||||
mExpandableTrieBuffer(maxTrieSize), mTerminalPositionLookupTable(),
|
||||
mProbabilityDictContent(headerPolicy->hasHistoricalInfoOfWords()),
|
||||
|
|
|
@ -97,6 +97,37 @@ public class BinaryDictionaryTests extends AndroidTestCase {
|
|||
binaryDictionary.close();
|
||||
}
|
||||
|
||||
public void testConstructingDictionaryOnMemory() {
|
||||
testConstructingDictionaryOnMemory(FormatSpec.VERSION4);
|
||||
}
|
||||
|
||||
private void testConstructingDictionaryOnMemory(final int formatVersion) {
|
||||
File dictFile = null;
|
||||
try {
|
||||
dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
|
||||
} catch (IOException e) {
|
||||
fail("IOException while writing an initial dictionary : " + e);
|
||||
}
|
||||
FileUtils.deleteRecursively(dictFile);
|
||||
assertFalse(dictFile.exists());
|
||||
BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
|
||||
true /* useFullEditDistance */, Locale.getDefault(), TEST_LOCALE, formatVersion,
|
||||
new HashMap<String, String>());
|
||||
assertTrue(binaryDictionary.isValidDictionary());
|
||||
assertEquals(formatVersion, binaryDictionary.getFormatVersion());
|
||||
final int probability = 100;
|
||||
addUnigramWord(binaryDictionary, "word", probability);
|
||||
assertEquals(probability, binaryDictionary.getFrequency("word"));
|
||||
assertFalse(dictFile.exists());
|
||||
binaryDictionary.flush();
|
||||
assertTrue(dictFile.exists());
|
||||
assertTrue(binaryDictionary.isValidDictionary());
|
||||
assertEquals(formatVersion, binaryDictionary.getFormatVersion());
|
||||
assertEquals(probability, binaryDictionary.getFrequency("word"));
|
||||
binaryDictionary.close();
|
||||
dictFile.delete();
|
||||
}
|
||||
|
||||
public void testAddTooLongWord() {
|
||||
testAddTooLongWord(FormatSpec.VERSION4);
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue