am 89939b87
: Support creating BinaryDictionary without creating dict file.
* commit '89939b870876b36791a47470ce852e4fcdfe86f2': Support creating BinaryDictionary without creating dict file.
This commit is contained in:
commit
42732ab9eb
5 changed files with 94 additions and 3 deletions
|
@ -41,6 +41,7 @@ import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.Locale;
|
import java.util.Locale;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Implements a static, compacted, binary dictionary of standard words.
|
* Implements a static, compacted, binary dictionary of standard words.
|
||||||
|
@ -123,8 +124,7 @@ public final class BinaryDictionary extends Dictionary {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Constructor for the binary dictionary. This is supposed to be called from the
|
* Constructs binary dictionary using existing dictionary file.
|
||||||
* dictionary factory.
|
|
||||||
* @param filename the name of the file to read through native code.
|
* @param filename the name of the file to read through native code.
|
||||||
* @param offset the offset of the dictionary data within the file.
|
* @param offset the offset of the dictionary data within the file.
|
||||||
* @param length the length of the binary data.
|
* @param length the length of the binary data.
|
||||||
|
@ -145,6 +145,38 @@ public final class BinaryDictionary extends Dictionary {
|
||||||
loadDictionary(filename, offset, length, isUpdatable);
|
loadDictionary(filename, offset, length, isUpdatable);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Constructs binary dictionary on memory.
|
||||||
|
* @param filename the name of the file used to flush.
|
||||||
|
* @param useFullEditDistance whether to use the full edit distance in suggestions
|
||||||
|
* @param dictType the dictionary type, as a human-readable string
|
||||||
|
* @param formatVersion the format version of the dictionary
|
||||||
|
* @param attributeMap the attributes of the dictionary
|
||||||
|
*/
|
||||||
|
@UsedForTesting
|
||||||
|
public BinaryDictionary(final String filename, final boolean useFullEditDistance,
|
||||||
|
final Locale locale, final String dictType, final long formatVersion,
|
||||||
|
final Map<String, String> attributeMap) {
|
||||||
|
super(dictType);
|
||||||
|
mLocale = locale;
|
||||||
|
mDictSize = 0;
|
||||||
|
mDictFilePath = filename;
|
||||||
|
// On memory dictionary is always updatable.
|
||||||
|
mIsUpdatable = true;
|
||||||
|
mHasUpdated = false;
|
||||||
|
mNativeSuggestOptions.setUseFullEditDistance(useFullEditDistance);
|
||||||
|
final String[] keyArray = new String[attributeMap.size()];
|
||||||
|
final String[] valueArray = new String[attributeMap.size()];
|
||||||
|
int index = 0;
|
||||||
|
for (final String key : attributeMap.keySet()) {
|
||||||
|
keyArray[index] = key;
|
||||||
|
valueArray[index] = attributeMap.get(key);
|
||||||
|
index++;
|
||||||
|
}
|
||||||
|
mNativeDict = openOnMemoryNative(formatVersion, locale.toString(), keyArray, valueArray);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
static {
|
static {
|
||||||
JniUtils.loadNativeLibrary();
|
JniUtils.loadNativeLibrary();
|
||||||
}
|
}
|
||||||
|
|
|
@ -99,6 +99,27 @@ class HeaderPolicy : public DictionaryHeaderStructurePolicy {
|
||||||
mMaxBigramCount(HeaderReadWriteUtils::readIntAttributeValue(
|
mMaxBigramCount(HeaderReadWriteUtils::readIntAttributeValue(
|
||||||
&mAttributeMap, MAX_BIGRAM_COUNT_KEY, DEFAULT_MAX_BIGRAM_COUNT)) {}
|
&mAttributeMap, MAX_BIGRAM_COUNT_KEY, DEFAULT_MAX_BIGRAM_COUNT)) {}
|
||||||
|
|
||||||
|
// Copy header information
|
||||||
|
HeaderPolicy(const HeaderPolicy *const headerPolicy)
|
||||||
|
: mDictFormatVersion(headerPolicy->mDictFormatVersion),
|
||||||
|
mDictionaryFlags(headerPolicy->mDictionaryFlags), mSize(headerPolicy->mSize),
|
||||||
|
mAttributeMap(headerPolicy->mAttributeMap), mLocale(headerPolicy->mLocale),
|
||||||
|
mMultiWordCostMultiplier(headerPolicy->mMultiWordCostMultiplier),
|
||||||
|
mRequiresGermanUmlautProcessing(headerPolicy->mRequiresGermanUmlautProcessing),
|
||||||
|
mIsDecayingDict(headerPolicy->mIsDecayingDict),
|
||||||
|
mDate(headerPolicy->mDate), mLastDecayedTime(headerPolicy->mLastDecayedTime),
|
||||||
|
mUnigramCount(headerPolicy->mUnigramCount), mBigramCount(headerPolicy->mBigramCount),
|
||||||
|
mExtendedRegionSize(headerPolicy->mExtendedRegionSize),
|
||||||
|
mHasHistoricalInfoOfWords(headerPolicy->mHasHistoricalInfoOfWords),
|
||||||
|
mForgettingCurveOccurrencesToLevelUp(
|
||||||
|
headerPolicy->mForgettingCurveOccurrencesToLevelUp),
|
||||||
|
mForgettingCurveProbabilityValuesTableId(
|
||||||
|
headerPolicy->mForgettingCurveProbabilityValuesTableId),
|
||||||
|
mForgettingCurveDurationToLevelDown(
|
||||||
|
headerPolicy->mForgettingCurveDurationToLevelDown),
|
||||||
|
mMaxUnigramCount(headerPolicy->mMaxUnigramCount),
|
||||||
|
mMaxBigramCount(headerPolicy->mMaxBigramCount) {}
|
||||||
|
|
||||||
// Temporary dummy header.
|
// Temporary dummy header.
|
||||||
HeaderPolicy()
|
HeaderPolicy()
|
||||||
: mDictFormatVersion(FormatUtils::UNKNOWN_VERSION), mDictionaryFlags(0), mSize(0),
|
: mDictFormatVersion(FormatUtils::UNKNOWN_VERSION), mDictionaryFlags(0), mSize(0),
|
||||||
|
|
|
@ -19,10 +19,12 @@
|
||||||
#include <climits>
|
#include <climits>
|
||||||
|
|
||||||
#include "defines.h"
|
#include "defines.h"
|
||||||
|
#include "suggest/policyimpl/dictionary/structure/pt_common/dynamic_pt_writing_utils.h"
|
||||||
#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h"
|
#include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h"
|
||||||
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h"
|
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_buffers.h"
|
||||||
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
|
#include "suggest/policyimpl/dictionary/structure/v4/ver4_dict_constants.h"
|
||||||
#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h"
|
#include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h"
|
||||||
|
#include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h"
|
||||||
#include "suggest/policyimpl/dictionary/utils/file_utils.h"
|
#include "suggest/policyimpl/dictionary/utils/file_utils.h"
|
||||||
#include "suggest/policyimpl/dictionary/utils/format_utils.h"
|
#include "suggest/policyimpl/dictionary/utils/format_utils.h"
|
||||||
#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
|
#include "suggest/policyimpl/dictionary/utils/mmapped_buffer.h"
|
||||||
|
@ -56,6 +58,11 @@ namespace latinime {
|
||||||
Ver4DictBuffers::Ver4DictBuffersPtr dictBuffers =
|
Ver4DictBuffers::Ver4DictBuffersPtr dictBuffers =
|
||||||
Ver4DictBuffers::createVer4DictBuffers(&headerPolicy,
|
Ver4DictBuffers::createVer4DictBuffers(&headerPolicy,
|
||||||
Ver4DictConstants::MAX_DICT_EXTENDED_REGION_SIZE);
|
Ver4DictConstants::MAX_DICT_EXTENDED_REGION_SIZE);
|
||||||
|
if (!DynamicPtWritingUtils::writeEmptyDictionary(
|
||||||
|
dictBuffers->getWritableTrieBuffer(), 0 /* rootPos */)) {
|
||||||
|
AKLOGE("Empty ver4 dictionary structure cannot be created on memory.");
|
||||||
|
return DictionaryStructureWithBufferPolicy::StructurePolicyPtr(nullptr);
|
||||||
|
}
|
||||||
return DictionaryStructureWithBufferPolicy::StructurePolicyPtr(
|
return DictionaryStructureWithBufferPolicy::StructurePolicyPtr(
|
||||||
new Ver4PatriciaTriePolicy(std::move(dictBuffers)));
|
new Ver4PatriciaTriePolicy(std::move(dictBuffers)));
|
||||||
}
|
}
|
||||||
|
|
|
@ -131,7 +131,7 @@ Ver4DictBuffers::Ver4DictBuffers(const char *const dictPath,
|
||||||
mIsUpdatable(isUpdatable) {}
|
mIsUpdatable(isUpdatable) {}
|
||||||
|
|
||||||
Ver4DictBuffers::Ver4DictBuffers(const HeaderPolicy *const headerPolicy, const int maxTrieSize)
|
Ver4DictBuffers::Ver4DictBuffers(const HeaderPolicy *const headerPolicy, const int maxTrieSize)
|
||||||
: mHeaderBuffer(nullptr), mDictBuffer(nullptr), mHeaderPolicy(),
|
: mHeaderBuffer(nullptr), mDictBuffer(nullptr), mHeaderPolicy(headerPolicy),
|
||||||
mExpandableHeaderBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE),
|
mExpandableHeaderBuffer(Ver4DictConstants::MAX_DICTIONARY_SIZE),
|
||||||
mExpandableTrieBuffer(maxTrieSize), mTerminalPositionLookupTable(),
|
mExpandableTrieBuffer(maxTrieSize), mTerminalPositionLookupTable(),
|
||||||
mProbabilityDictContent(headerPolicy->hasHistoricalInfoOfWords()),
|
mProbabilityDictContent(headerPolicy->hasHistoricalInfoOfWords()),
|
||||||
|
|
|
@ -97,6 +97,37 @@ public class BinaryDictionaryTests extends AndroidTestCase {
|
||||||
binaryDictionary.close();
|
binaryDictionary.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testConstructingDictionaryOnMemory() {
|
||||||
|
testConstructingDictionaryOnMemory(FormatSpec.VERSION4);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void testConstructingDictionaryOnMemory(final int formatVersion) {
|
||||||
|
File dictFile = null;
|
||||||
|
try {
|
||||||
|
dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary", formatVersion);
|
||||||
|
} catch (IOException e) {
|
||||||
|
fail("IOException while writing an initial dictionary : " + e);
|
||||||
|
}
|
||||||
|
FileUtils.deleteRecursively(dictFile);
|
||||||
|
assertFalse(dictFile.exists());
|
||||||
|
BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
|
||||||
|
true /* useFullEditDistance */, Locale.getDefault(), TEST_LOCALE, formatVersion,
|
||||||
|
new HashMap<String, String>());
|
||||||
|
assertTrue(binaryDictionary.isValidDictionary());
|
||||||
|
assertEquals(formatVersion, binaryDictionary.getFormatVersion());
|
||||||
|
final int probability = 100;
|
||||||
|
addUnigramWord(binaryDictionary, "word", probability);
|
||||||
|
assertEquals(probability, binaryDictionary.getFrequency("word"));
|
||||||
|
assertFalse(dictFile.exists());
|
||||||
|
binaryDictionary.flush();
|
||||||
|
assertTrue(dictFile.exists());
|
||||||
|
assertTrue(binaryDictionary.isValidDictionary());
|
||||||
|
assertEquals(formatVersion, binaryDictionary.getFormatVersion());
|
||||||
|
assertEquals(probability, binaryDictionary.getFrequency("word"));
|
||||||
|
binaryDictionary.close();
|
||||||
|
dictFile.delete();
|
||||||
|
}
|
||||||
|
|
||||||
public void testAddTooLongWord() {
|
public void testAddTooLongWord() {
|
||||||
testAddTooLongWord(FormatSpec.VERSION4);
|
testAddTooLongWord(FormatSpec.VERSION4);
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue