* commit '12d3bd228dd40ad023c090d56861e7b80aabeee1': Implement simple dictionary decay.
This commit is contained in:
commit
7a66169855
13 changed files with 572 additions and 39 deletions
|
@ -22,6 +22,7 @@ import android.util.Log;
|
||||||
|
|
||||||
import com.android.inputmethod.annotations.UsedForTesting;
|
import com.android.inputmethod.annotations.UsedForTesting;
|
||||||
import com.android.inputmethod.latin.Constants;
|
import com.android.inputmethod.latin.Constants;
|
||||||
|
import com.android.inputmethod.latin.Dictionary;
|
||||||
import com.android.inputmethod.latin.ExpandableBinaryDictionary;
|
import com.android.inputmethod.latin.ExpandableBinaryDictionary;
|
||||||
import com.android.inputmethod.latin.LatinImeLogger;
|
import com.android.inputmethod.latin.LatinImeLogger;
|
||||||
import com.android.inputmethod.latin.makedict.DictDecoder;
|
import com.android.inputmethod.latin.makedict.DictDecoder;
|
||||||
|
@ -50,6 +51,9 @@ public abstract class DecayingExpandableBinaryDictionaryBase extends ExpandableB
|
||||||
/** Any pair being typed or picked */
|
/** Any pair being typed or picked */
|
||||||
public static final int FREQUENCY_FOR_TYPED = 2;
|
public static final int FREQUENCY_FOR_TYPED = 2;
|
||||||
|
|
||||||
|
public static final int FREQUENCY_FOR_WORDS_IN_DICTS = FREQUENCY_FOR_TYPED;
|
||||||
|
public static final int FREQUENCY_FOR_WORDS_NOT_IN_DICTS = Dictionary.NOT_A_PROBABILITY;
|
||||||
|
|
||||||
/** Locale for which this user history dictionary is storing words */
|
/** Locale for which this user history dictionary is storing words */
|
||||||
private final String mLocale;
|
private final String mLocale;
|
||||||
|
|
||||||
|
@ -131,14 +135,17 @@ public abstract class DecayingExpandableBinaryDictionaryBase extends ExpandableB
|
||||||
(word0 != null && word0.length() >= Constants.DICTIONARY_MAX_WORD_LENGTH)) {
|
(word0 != null && word0.length() >= Constants.DICTIONARY_MAX_WORD_LENGTH)) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
addWordDynamically(word1, null /* the "shortcut" parameter is null */, FREQUENCY_FOR_TYPED,
|
final int frequency = ENABLE_BINARY_DICTIONARY_DYNAMIC_UPDATE ?
|
||||||
|
(isValid ? FREQUENCY_FOR_WORDS_IN_DICTS : FREQUENCY_FOR_WORDS_NOT_IN_DICTS) :
|
||||||
|
FREQUENCY_FOR_TYPED;
|
||||||
|
addWordDynamically(word1, null /* the "shortcut" parameter is null */, frequency,
|
||||||
false /* isNotAWord */);
|
false /* isNotAWord */);
|
||||||
// Do not insert a word as a bigram of itself
|
// Do not insert a word as a bigram of itself
|
||||||
if (word1.equals(word0)) {
|
if (word1.equals(word0)) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if (null != word0) {
|
if (null != word0) {
|
||||||
addBigramDynamically(word0, word1, FREQUENCY_FOR_TYPED, isValid);
|
addBigramDynamically(word0, word1, frequency, isValid);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -85,6 +85,7 @@ LATIN_IME_CORE_SRC_FILES := \
|
||||||
$(addprefix suggest/policyimpl/dictionary/utils/, \
|
$(addprefix suggest/policyimpl/dictionary/utils/, \
|
||||||
buffer_with_extendable_buffer.cpp \
|
buffer_with_extendable_buffer.cpp \
|
||||||
byte_array_utils.cpp \
|
byte_array_utils.cpp \
|
||||||
|
decaying_utils.cpp \
|
||||||
dict_file_writing_utils.cpp \
|
dict_file_writing_utils.cpp \
|
||||||
format_utils.cpp) \
|
format_utils.cpp) \
|
||||||
suggest/policyimpl/gesture/gesture_suggest_policy_factory.cpp \
|
suggest/policyimpl/gesture/gesture_suggest_policy_factory.cpp \
|
||||||
|
|
|
@ -17,10 +17,10 @@
|
||||||
#include "suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h"
|
#include "suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h"
|
||||||
|
|
||||||
#include "suggest/core/policy/dictionary_shortcuts_structure_policy.h"
|
#include "suggest/core/policy/dictionary_shortcuts_structure_policy.h"
|
||||||
#include "suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h"
|
|
||||||
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h"
|
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h"
|
||||||
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h"
|
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h"
|
||||||
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
|
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
|
||||||
|
#include "suggest/policyimpl/dictionary/utils/decaying_utils.h"
|
||||||
|
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
|
||||||
|
@ -41,9 +41,14 @@ void DynamicBigramListPolicy::getNextBigram(int *const outBigramPos, int *const
|
||||||
if (usesAdditionalBuffer && originalBigramPos != NOT_A_DICT_POS) {
|
if (usesAdditionalBuffer && originalBigramPos != NOT_A_DICT_POS) {
|
||||||
originalBigramPos += mBuffer->getOriginalBufferSize();
|
originalBigramPos += mBuffer->getOriginalBufferSize();
|
||||||
}
|
}
|
||||||
*outBigramPos = followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos);
|
|
||||||
*outProbability = BigramListReadWriteUtils::getProbabilityFromFlags(bigramFlags);
|
*outProbability = BigramListReadWriteUtils::getProbabilityFromFlags(bigramFlags);
|
||||||
*outHasNext = BigramListReadWriteUtils::hasNext(bigramFlags);
|
*outHasNext = BigramListReadWriteUtils::hasNext(bigramFlags);
|
||||||
|
if (mIsDecayingDict && !DecayingUtils::isValidBigram(*outProbability)) {
|
||||||
|
// This bigram is too weak to output.
|
||||||
|
*outBigramPos = NOT_A_DICT_POS;
|
||||||
|
} else {
|
||||||
|
*outBigramPos = followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos);
|
||||||
|
}
|
||||||
if (usesAdditionalBuffer) {
|
if (usesAdditionalBuffer) {
|
||||||
*bigramEntryPos += mBuffer->getOriginalBufferSize();
|
*bigramEntryPos += mBuffer->getOriginalBufferSize();
|
||||||
}
|
}
|
||||||
|
@ -153,15 +158,21 @@ bool DynamicBigramListPolicy::updateAllBigramEntriesAndDeleteUselessEntries(
|
||||||
const int bigramTargetNodePos =
|
const int bigramTargetNodePos =
|
||||||
followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos);
|
followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos);
|
||||||
nodeReader.fetchNodeInfoInBufferFromPtNodePos(bigramTargetNodePos);
|
nodeReader.fetchNodeInfoInBufferFromPtNodePos(bigramTargetNodePos);
|
||||||
// TODO: Update probability for supporting probability decaying.
|
|
||||||
if (nodeReader.isDeleted() || !nodeReader.isTerminal()
|
if (nodeReader.isDeleted() || !nodeReader.isTerminal()
|
||||||
|| bigramTargetNodePos == NOT_A_DICT_POS) {
|
|| bigramTargetNodePos == NOT_A_DICT_POS) {
|
||||||
// The target is no longer valid terminal. Invalidate the current bigram entry.
|
// The target is no longer valid terminal. Invalidate the current bigram entry.
|
||||||
if (!BigramListReadWriteUtils::writeBigramEntry(mBuffer, bigramFlags,
|
if (!BigramListReadWriteUtils::writeBigramEntry(mBuffer, bigramFlags,
|
||||||
NOT_A_DICT_POS /* targetOffset */, &bigramEntryPos)) {
|
NOT_A_DICT_POS /* targetPtNodePos */, &bigramEntryPos)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
} else {
|
continue;
|
||||||
|
}
|
||||||
|
bool isRemoved = false;
|
||||||
|
if (!updateProbabilityForDecay(bigramFlags, bigramTargetNodePos, &bigramEntryPos,
|
||||||
|
&isRemoved)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (!isRemoved) {
|
||||||
(*outValidBigramEntryCount) += 1;
|
(*outValidBigramEntryCount) += 1;
|
||||||
}
|
}
|
||||||
} while(BigramListReadWriteUtils::hasNext(bigramFlags));
|
} while(BigramListReadWriteUtils::hasNext(bigramFlags));
|
||||||
|
@ -247,8 +258,14 @@ bool DynamicBigramListPolicy::addNewBigramEntryToBigramList(const int bigramTarg
|
||||||
if (followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos) == bigramTargetPos) {
|
if (followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos) == bigramTargetPos) {
|
||||||
// Update this bigram entry.
|
// Update this bigram entry.
|
||||||
*outAddedNewBigram = false;
|
*outAddedNewBigram = false;
|
||||||
|
const int originalProbability = BigramListReadWriteUtils::getProbabilityFromFlags(
|
||||||
|
bigramFlags);
|
||||||
|
const int probabilityToWrite = mIsDecayingDict ?
|
||||||
|
DecayingUtils::getUpdatedBigramProbabilityDelta(
|
||||||
|
originalProbability, probability) : probability;
|
||||||
const BigramListReadWriteUtils::BigramFlags updatedFlags =
|
const BigramListReadWriteUtils::BigramFlags updatedFlags =
|
||||||
BigramListReadWriteUtils::setProbabilityInFlags(bigramFlags, probability);
|
BigramListReadWriteUtils::setProbabilityInFlags(bigramFlags,
|
||||||
|
probabilityToWrite);
|
||||||
return BigramListReadWriteUtils::writeBigramEntry(mBuffer, updatedFlags,
|
return BigramListReadWriteUtils::writeBigramEntry(mBuffer, updatedFlags,
|
||||||
originalBigramPos, &entryPos);
|
originalBigramPos, &entryPos);
|
||||||
}
|
}
|
||||||
|
@ -276,8 +293,11 @@ bool DynamicBigramListPolicy::addNewBigramEntryToBigramList(const int bigramTarg
|
||||||
bool DynamicBigramListPolicy::writeNewBigramEntry(const int bigramTargetPos, const int probability,
|
bool DynamicBigramListPolicy::writeNewBigramEntry(const int bigramTargetPos, const int probability,
|
||||||
int *const writingPos) {
|
int *const writingPos) {
|
||||||
// hasNext is false because we are adding a new bigram entry at the end of the bigram list.
|
// hasNext is false because we are adding a new bigram entry at the end of the bigram list.
|
||||||
|
const int probabilityToWrite = mIsDecayingDict ?
|
||||||
|
DecayingUtils::getUpdatedBigramProbabilityDelta(NOT_A_PROBABILITY, probability) :
|
||||||
|
probability;
|
||||||
return BigramListReadWriteUtils::createAndWriteBigramEntry(mBuffer, bigramTargetPos,
|
return BigramListReadWriteUtils::createAndWriteBigramEntry(mBuffer, bigramTargetPos,
|
||||||
probability, false /* hasNext */, writingPos);
|
probabilityToWrite, false /* hasNext */, writingPos);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool DynamicBigramListPolicy::removeBigram(const int bigramListPos, const int bigramTargetPos) {
|
bool DynamicBigramListPolicy::removeBigram(const int bigramListPos, const int bigramTargetPos) {
|
||||||
|
@ -339,4 +359,33 @@ int DynamicBigramListPolicy::followBigramLinkAndGetCurrentBigramPtNodePos(
|
||||||
return currentPos;
|
return currentPos;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool DynamicBigramListPolicy::updateProbabilityForDecay(
|
||||||
|
BigramListReadWriteUtils::BigramFlags bigramFlags, const int targetPtNodePos,
|
||||||
|
int *const bigramEntryPos, bool *const outRemoved) const {
|
||||||
|
*outRemoved = false;
|
||||||
|
if (mIsDecayingDict) {
|
||||||
|
// Update bigram probability for decaying.
|
||||||
|
const int newProbability = DecayingUtils::getBigramProbabilityDeltaToSave(
|
||||||
|
BigramListReadWriteUtils::getProbabilityFromFlags(bigramFlags));
|
||||||
|
if (DecayingUtils::isValidBigram(newProbability)) {
|
||||||
|
// Write new probability.
|
||||||
|
const BigramListReadWriteUtils::BigramFlags updatedBigramFlags =
|
||||||
|
BigramListReadWriteUtils::setProbabilityInFlags(
|
||||||
|
bigramFlags, newProbability);
|
||||||
|
if (!BigramListReadWriteUtils::writeBigramEntry(mBuffer, updatedBigramFlags,
|
||||||
|
targetPtNodePos, bigramEntryPos)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Remove current bigram entry.
|
||||||
|
*outRemoved = true;
|
||||||
|
if (!BigramListReadWriteUtils::writeBigramEntry(mBuffer, bigramFlags,
|
||||||
|
NOT_A_DICT_POS /* targetPtNodePos */, bigramEntryPos)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace latinime
|
} // namespace latinime
|
||||||
|
|
|
@ -21,6 +21,7 @@
|
||||||
|
|
||||||
#include "defines.h"
|
#include "defines.h"
|
||||||
#include "suggest/core/policy/dictionary_bigrams_structure_policy.h"
|
#include "suggest/core/policy/dictionary_bigrams_structure_policy.h"
|
||||||
|
#include "suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h"
|
||||||
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h"
|
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h"
|
||||||
|
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
@ -34,8 +35,9 @@ class DictionaryShortcutsStructurePolicy;
|
||||||
class DynamicBigramListPolicy : public DictionaryBigramsStructurePolicy {
|
class DynamicBigramListPolicy : public DictionaryBigramsStructurePolicy {
|
||||||
public:
|
public:
|
||||||
DynamicBigramListPolicy(BufferWithExtendableBuffer *const buffer,
|
DynamicBigramListPolicy(BufferWithExtendableBuffer *const buffer,
|
||||||
const DictionaryShortcutsStructurePolicy *const shortcutPolicy)
|
const DictionaryShortcutsStructurePolicy *const shortcutPolicy,
|
||||||
: mBuffer(buffer), mShortcutPolicy(shortcutPolicy) {}
|
const bool isDecayingDict)
|
||||||
|
: mBuffer(buffer), mShortcutPolicy(shortcutPolicy), mIsDecayingDict(isDecayingDict) {}
|
||||||
|
|
||||||
~DynamicBigramListPolicy() {}
|
~DynamicBigramListPolicy() {}
|
||||||
|
|
||||||
|
@ -74,9 +76,13 @@ class DynamicBigramListPolicy : public DictionaryBigramsStructurePolicy {
|
||||||
|
|
||||||
BufferWithExtendableBuffer *const mBuffer;
|
BufferWithExtendableBuffer *const mBuffer;
|
||||||
const DictionaryShortcutsStructurePolicy *const mShortcutPolicy;
|
const DictionaryShortcutsStructurePolicy *const mShortcutPolicy;
|
||||||
|
const bool mIsDecayingDict;
|
||||||
|
|
||||||
// Follow bigram link and return the position of bigram target PtNode that is currently valid.
|
// Follow bigram link and return the position of bigram target PtNode that is currently valid.
|
||||||
int followBigramLinkAndGetCurrentBigramPtNodePos(const int originalBigramPos) const;
|
int followBigramLinkAndGetCurrentBigramPtNodePos(const int originalBigramPos) const;
|
||||||
|
|
||||||
|
bool updateProbabilityForDecay(BigramListReadWriteUtils::BigramFlags bigramFlags,
|
||||||
|
const int targetPtNodePos, int *const bigramEntryPos, bool *const outRemoved) const;
|
||||||
};
|
};
|
||||||
} // namespace latinime
|
} // namespace latinime
|
||||||
#endif // LATINIME_DYNAMIC_BIGRAM_LIST_POLICY_H
|
#endif // LATINIME_DYNAMIC_BIGRAM_LIST_POLICY_H
|
||||||
|
|
|
@ -16,6 +16,8 @@
|
||||||
|
|
||||||
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h"
|
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h"
|
||||||
|
|
||||||
|
#include "suggest/policyimpl/dictionary/utils/decaying_utils.h"
|
||||||
|
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
|
||||||
bool DynamicPatriciaTrieGcEventListeners
|
bool DynamicPatriciaTrieGcEventListeners
|
||||||
|
@ -25,6 +27,19 @@ bool DynamicPatriciaTrieGcEventListeners
|
||||||
// PtNode is useless when the PtNode is not a terminal and doesn't have any not useless
|
// PtNode is useless when the PtNode is not a terminal and doesn't have any not useless
|
||||||
// children.
|
// children.
|
||||||
bool isUselessPtNode = !node->isTerminal();
|
bool isUselessPtNode = !node->isTerminal();
|
||||||
|
if (node->isTerminal() && mIsDecayingDict) {
|
||||||
|
const int newProbability =
|
||||||
|
DecayingUtils::getUnigramProbabilityToSave(node->getProbability());
|
||||||
|
int writingPos = node->getProbabilityFieldPos();
|
||||||
|
// Update probability.
|
||||||
|
if (!DynamicPatriciaTrieWritingUtils::writeProbabilityAndAdvancePosition(
|
||||||
|
mBuffer, newProbability, &writingPos)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (!DecayingUtils::isValidUnigram(newProbability)) {
|
||||||
|
isUselessPtNode = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
if (mChildrenValue > 0) {
|
if (mChildrenValue > 0) {
|
||||||
isUselessPtNode = false;
|
isUselessPtNode = false;
|
||||||
} else if (node->isTerminal()) {
|
} else if (node->isTerminal()) {
|
||||||
|
|
|
@ -39,9 +39,9 @@ class DynamicPatriciaTrieGcEventListeners {
|
||||||
public:
|
public:
|
||||||
TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted(
|
TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted(
|
||||||
DynamicPatriciaTrieWritingHelper *const writingHelper,
|
DynamicPatriciaTrieWritingHelper *const writingHelper,
|
||||||
BufferWithExtendableBuffer *const buffer)
|
BufferWithExtendableBuffer *const buffer, const bool isDecayingDict)
|
||||||
: mWritingHelper(writingHelper), mBuffer(buffer), mValueStack(),
|
: mWritingHelper(writingHelper), mBuffer(buffer), mIsDecayingDict(isDecayingDict),
|
||||||
mChildrenValue(0), mValidUnigramCount(0) {}
|
mValueStack(), mChildrenValue(0), mValidUnigramCount(0) {}
|
||||||
|
|
||||||
~TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted() {};
|
~TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted() {};
|
||||||
|
|
||||||
|
@ -74,6 +74,7 @@ class DynamicPatriciaTrieGcEventListeners {
|
||||||
|
|
||||||
DynamicPatriciaTrieWritingHelper *const mWritingHelper;
|
DynamicPatriciaTrieWritingHelper *const mWritingHelper;
|
||||||
BufferWithExtendableBuffer *const mBuffer;
|
BufferWithExtendableBuffer *const mBuffer;
|
||||||
|
const int mIsDecayingDict;
|
||||||
std::vector<int> mValueStack;
|
std::vector<int> mValueStack;
|
||||||
int mChildrenValue;
|
int mChildrenValue;
|
||||||
int mValidUnigramCount;
|
int mValidUnigramCount;
|
||||||
|
|
|
@ -18,6 +18,7 @@
|
||||||
|
|
||||||
#include <cstdio>
|
#include <cstdio>
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
|
#include <ctime>
|
||||||
|
|
||||||
#include "defines.h"
|
#include "defines.h"
|
||||||
#include "suggest/core/dicnode/dic_node.h"
|
#include "suggest/core/dicnode/dic_node.h"
|
||||||
|
@ -27,12 +28,17 @@
|
||||||
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h"
|
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h"
|
||||||
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h"
|
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h"
|
||||||
#include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h"
|
#include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h"
|
||||||
|
#include "suggest/policyimpl/dictionary/utils/decaying_utils.h"
|
||||||
#include "suggest/policyimpl/dictionary/utils/probability_utils.h"
|
#include "suggest/policyimpl/dictionary/utils/probability_utils.h"
|
||||||
|
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
|
||||||
const char *const DynamicPatriciaTriePolicy::UNIGRAM_COUNT_QUERY = "UNIGRAM_COUNT";
|
const char *const DynamicPatriciaTriePolicy::UNIGRAM_COUNT_QUERY = "UNIGRAM_COUNT";
|
||||||
const char *const DynamicPatriciaTriePolicy::BIGRAM_COUNT_QUERY = "BIGRAM_COUNT";
|
const char *const DynamicPatriciaTriePolicy::BIGRAM_COUNT_QUERY = "BIGRAM_COUNT";
|
||||||
|
const int DynamicPatriciaTriePolicy::MAX_DICT_EXTENDED_REGION_SIZE = 1024 * 1024;
|
||||||
|
const int DynamicPatriciaTriePolicy::MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS =
|
||||||
|
DynamicPatriciaTrieWritingHelper::MAX_DICTIONARY_SIZE - 1024;
|
||||||
|
const int DynamicPatriciaTriePolicy::MIN_SECONDS_TO_REQUIRE_GC_WHEN_WRITING = 2 * 60 * 60;
|
||||||
|
|
||||||
void DynamicPatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const dicNode,
|
void DynamicPatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const dicNode,
|
||||||
DicNodeVector *const childDicNodes) const {
|
DicNodeVector *const childDicNodes) const {
|
||||||
|
@ -143,14 +149,17 @@ int DynamicPatriciaTriePolicy::getTerminalNodePositionOfWord(const int *const in
|
||||||
|
|
||||||
int DynamicPatriciaTriePolicy::getProbability(const int unigramProbability,
|
int DynamicPatriciaTriePolicy::getProbability(const int unigramProbability,
|
||||||
const int bigramProbability) const {
|
const int bigramProbability) const {
|
||||||
// TODO: check mHeaderPolicy.usesForgettingCurve();
|
if (mHeaderPolicy.isDecayingDict()) {
|
||||||
if (unigramProbability == NOT_A_PROBABILITY) {
|
return DecayingUtils::getProbability(unigramProbability, bigramProbability);
|
||||||
return NOT_A_PROBABILITY;
|
|
||||||
} else if (bigramProbability == NOT_A_PROBABILITY) {
|
|
||||||
return ProbabilityUtils::backoff(unigramProbability);
|
|
||||||
} else {
|
} else {
|
||||||
return ProbabilityUtils::computeProbabilityForBigram(unigramProbability,
|
if (unigramProbability == NOT_A_PROBABILITY) {
|
||||||
bigramProbability);
|
return NOT_A_PROBABILITY;
|
||||||
|
} else if (bigramProbability == NOT_A_PROBABILITY) {
|
||||||
|
return ProbabilityUtils::backoff(unigramProbability);
|
||||||
|
} else {
|
||||||
|
return ProbabilityUtils::computeProbabilityForBigram(unigramProbability,
|
||||||
|
bigramProbability);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -199,11 +208,16 @@ bool DynamicPatriciaTriePolicy::addUnigramWord(const int *const word, const int
|
||||||
AKLOGI("Warning: addUnigramWord() is called for non-updatable dictionary.");
|
AKLOGI("Warning: addUnigramWord() is called for non-updatable dictionary.");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
if (mBufferWithExtendableBuffer.getTailPosition()
|
||||||
|
>= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) {
|
||||||
|
AKLOGE("The dictionary is too large to dynamically update.");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer,
|
DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer,
|
||||||
getBigramsStructurePolicy(), getShortcutsStructurePolicy());
|
getBigramsStructurePolicy(), getShortcutsStructurePolicy());
|
||||||
readingHelper.initWithPtNodeArrayPos(getRootPosition());
|
readingHelper.initWithPtNodeArrayPos(getRootPosition());
|
||||||
DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer,
|
DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer,
|
||||||
&mBigramListPolicy, &mShortcutListPolicy);
|
&mBigramListPolicy, &mShortcutListPolicy, mHeaderPolicy.isDecayingDict());
|
||||||
bool addedNewUnigram = false;
|
bool addedNewUnigram = false;
|
||||||
if (writingHelper.addUnigramWord(&readingHelper, word, length, probability,
|
if (writingHelper.addUnigramWord(&readingHelper, word, length, probability,
|
||||||
&addedNewUnigram)) {
|
&addedNewUnigram)) {
|
||||||
|
@ -222,6 +236,11 @@ bool DynamicPatriciaTriePolicy::addBigramWords(const int *const word0, const int
|
||||||
AKLOGI("Warning: addBigramWords() is called for non-updatable dictionary.");
|
AKLOGI("Warning: addBigramWords() is called for non-updatable dictionary.");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
if (mBufferWithExtendableBuffer.getTailPosition()
|
||||||
|
>= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) {
|
||||||
|
AKLOGE("The dictionary is too large to dynamically update.");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
const int word0Pos = getTerminalNodePositionOfWord(word0, length0,
|
const int word0Pos = getTerminalNodePositionOfWord(word0, length0,
|
||||||
false /* forceLowerCaseSearch */);
|
false /* forceLowerCaseSearch */);
|
||||||
if (word0Pos == NOT_A_DICT_POS) {
|
if (word0Pos == NOT_A_DICT_POS) {
|
||||||
|
@ -233,7 +252,7 @@ bool DynamicPatriciaTriePolicy::addBigramWords(const int *const word0, const int
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer,
|
DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer,
|
||||||
&mBigramListPolicy, &mShortcutListPolicy);
|
&mBigramListPolicy, &mShortcutListPolicy, mHeaderPolicy.isDecayingDict());
|
||||||
bool addedNewBigram = false;
|
bool addedNewBigram = false;
|
||||||
if (writingHelper.addBigramWords(word0Pos, word1Pos, probability, &addedNewBigram)) {
|
if (writingHelper.addBigramWords(word0Pos, word1Pos, probability, &addedNewBigram)) {
|
||||||
if (addedNewBigram) {
|
if (addedNewBigram) {
|
||||||
|
@ -251,6 +270,11 @@ bool DynamicPatriciaTriePolicy::removeBigramWords(const int *const word0, const
|
||||||
AKLOGI("Warning: removeBigramWords() is called for non-updatable dictionary.");
|
AKLOGI("Warning: removeBigramWords() is called for non-updatable dictionary.");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
if (mBufferWithExtendableBuffer.getTailPosition()
|
||||||
|
>= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) {
|
||||||
|
AKLOGE("The dictionary is too large to dynamically update.");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
const int word0Pos = getTerminalNodePositionOfWord(word0, length0,
|
const int word0Pos = getTerminalNodePositionOfWord(word0, length0,
|
||||||
false /* forceLowerCaseSearch */);
|
false /* forceLowerCaseSearch */);
|
||||||
if (word0Pos == NOT_A_DICT_POS) {
|
if (word0Pos == NOT_A_DICT_POS) {
|
||||||
|
@ -262,7 +286,7 @@ bool DynamicPatriciaTriePolicy::removeBigramWords(const int *const word0, const
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer,
|
DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer,
|
||||||
&mBigramListPolicy, &mShortcutListPolicy);
|
&mBigramListPolicy, &mShortcutListPolicy, mHeaderPolicy.isDecayingDict());
|
||||||
if (writingHelper.removeBigramWords(word0Pos, word1Pos)) {
|
if (writingHelper.removeBigramWords(word0Pos, word1Pos)) {
|
||||||
mBigramCount--;
|
mBigramCount--;
|
||||||
return true;
|
return true;
|
||||||
|
@ -277,7 +301,7 @@ void DynamicPatriciaTriePolicy::flush(const char *const filePath) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer,
|
DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer,
|
||||||
&mBigramListPolicy, &mShortcutListPolicy);
|
&mBigramListPolicy, &mShortcutListPolicy, mHeaderPolicy.isDecayingDict());
|
||||||
writingHelper.writeToDictFile(filePath, &mHeaderPolicy, mUnigramCount, mBigramCount);
|
writingHelper.writeToDictFile(filePath, &mHeaderPolicy, mUnigramCount, mBigramCount);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -287,7 +311,7 @@ void DynamicPatriciaTriePolicy::flushWithGC(const char *const filePath) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer,
|
DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer,
|
||||||
&mBigramListPolicy, &mShortcutListPolicy);
|
&mBigramListPolicy, &mShortcutListPolicy, mHeaderPolicy.isDecayingDict());
|
||||||
writingHelper.writeToDictFileWithGC(getRootPosition(), filePath, &mHeaderPolicy);
|
writingHelper.writeToDictFileWithGC(getRootPosition(), filePath, &mHeaderPolicy);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -296,8 +320,33 @@ bool DynamicPatriciaTriePolicy::needsToRunGC(const bool mindsBlockByGC) const {
|
||||||
AKLOGI("Warning: needsToRunGC() is called for non-updatable dictionary.");
|
AKLOGI("Warning: needsToRunGC() is called for non-updatable dictionary.");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
// TODO: Implement more properly.
|
if (mBufferWithExtendableBuffer.isNearSizeLimit()) {
|
||||||
return mBufferWithExtendableBuffer.isNearSizeLimit();
|
// Additional buffer size is near the limit.
|
||||||
|
return true;
|
||||||
|
} else if (mHeaderPolicy.getExtendedRegionSize()
|
||||||
|
+ mBufferWithExtendableBuffer.getUsedAdditionalBufferSize()
|
||||||
|
> MAX_DICT_EXTENDED_REGION_SIZE) {
|
||||||
|
// Total extended region size exceeds the limit.
|
||||||
|
return true;
|
||||||
|
} else if (mBufferWithExtendableBuffer.getTailPosition()
|
||||||
|
>= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS
|
||||||
|
&& mBufferWithExtendableBuffer.getUsedAdditionalBufferSize() > 0) {
|
||||||
|
// Needs to reduce dictionary size.
|
||||||
|
return true;
|
||||||
|
} else if (mHeaderPolicy.isDecayingDict()) {
|
||||||
|
if (mUnigramCount >= DecayingUtils::MAX_UNIGRAM_COUNT) {
|
||||||
|
// Unigram count exceeds the limit.
|
||||||
|
return true;
|
||||||
|
} else if (mBigramCount >= DecayingUtils::MAX_BIGRAM_COUNT) {
|
||||||
|
// Bigram count exceeds the limit.
|
||||||
|
return true;
|
||||||
|
} else if (mindsBlockByGC && mHeaderPolicy.getLastUpdatedTime()
|
||||||
|
+ MIN_SECONDS_TO_REQUIRE_GC_WHEN_WRITING < time(0)) {
|
||||||
|
// Time to update probabilities for decaying.
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
void DynamicPatriciaTriePolicy::getProperty(const char *const query, char *const outResult,
|
void DynamicPatriciaTriePolicy::getProperty(const char *const query, char *const outResult,
|
||||||
|
|
|
@ -37,7 +37,8 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
||||||
mBufferWithExtendableBuffer(mBuffer->getBuffer() + mHeaderPolicy.getSize(),
|
mBufferWithExtendableBuffer(mBuffer->getBuffer() + mHeaderPolicy.getSize(),
|
||||||
mBuffer->getBufferSize() - mHeaderPolicy.getSize()),
|
mBuffer->getBufferSize() - mHeaderPolicy.getSize()),
|
||||||
mShortcutListPolicy(&mBufferWithExtendableBuffer),
|
mShortcutListPolicy(&mBufferWithExtendableBuffer),
|
||||||
mBigramListPolicy(&mBufferWithExtendableBuffer, &mShortcutListPolicy),
|
mBigramListPolicy(&mBufferWithExtendableBuffer, &mShortcutListPolicy,
|
||||||
|
mHeaderPolicy.isDecayingDict()),
|
||||||
mUnigramCount(mHeaderPolicy.getUnigramCount()),
|
mUnigramCount(mHeaderPolicy.getUnigramCount()),
|
||||||
mBigramCount(mHeaderPolicy.getBigramCount()) {}
|
mBigramCount(mHeaderPolicy.getBigramCount()) {}
|
||||||
|
|
||||||
|
@ -101,6 +102,9 @@ class DynamicPatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
||||||
|
|
||||||
static const char*const UNIGRAM_COUNT_QUERY;
|
static const char*const UNIGRAM_COUNT_QUERY;
|
||||||
static const char*const BIGRAM_COUNT_QUERY;
|
static const char*const BIGRAM_COUNT_QUERY;
|
||||||
|
static const int MAX_DICT_EXTENDED_REGION_SIZE;
|
||||||
|
static const int MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS;
|
||||||
|
static const int MIN_SECONDS_TO_REQUIRE_GC_WHEN_WRITING;
|
||||||
|
|
||||||
const MmappedBuffer *const mBuffer;
|
const MmappedBuffer *const mBuffer;
|
||||||
const HeaderPolicy mHeaderPolicy;
|
const HeaderPolicy mHeaderPolicy;
|
||||||
|
|
|
@ -25,6 +25,7 @@
|
||||||
#include "suggest/policyimpl/dictionary/header/header_policy.h"
|
#include "suggest/policyimpl/dictionary/header/header_policy.h"
|
||||||
#include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h"
|
#include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h"
|
||||||
#include "suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h"
|
#include "suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h"
|
||||||
|
#include "suggest/policyimpl/dictionary/utils/decaying_utils.h"
|
||||||
#include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h"
|
#include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h"
|
||||||
#include "utils/hash_map_compat.h"
|
#include "utils/hash_map_compat.h"
|
||||||
|
|
||||||
|
@ -57,7 +58,9 @@ bool DynamicPatriciaTrieWritingHelper::addUnigramWord(
|
||||||
wordCodePoints[matchedCodePointCount + j])) {
|
wordCodePoints[matchedCodePointCount + j])) {
|
||||||
*outAddedNewUnigram = true;
|
*outAddedNewUnigram = true;
|
||||||
return reallocatePtNodeAndAddNewPtNodes(nodeReader,
|
return reallocatePtNodeAndAddNewPtNodes(nodeReader,
|
||||||
readingHelper->getMergedNodeCodePoints(), j, probability,
|
readingHelper->getMergedNodeCodePoints(), j,
|
||||||
|
getUpdatedProbability(NOT_A_PROBABILITY /* originalProbability */,
|
||||||
|
probability),
|
||||||
wordCodePoints + matchedCodePointCount,
|
wordCodePoints + matchedCodePointCount,
|
||||||
codePointCount - matchedCodePointCount);
|
codePointCount - matchedCodePointCount);
|
||||||
}
|
}
|
||||||
|
@ -69,7 +72,8 @@ bool DynamicPatriciaTrieWritingHelper::addUnigramWord(
|
||||||
}
|
}
|
||||||
if (!nodeReader->hasChildren()) {
|
if (!nodeReader->hasChildren()) {
|
||||||
*outAddedNewUnigram = true;
|
*outAddedNewUnigram = true;
|
||||||
return createChildrenPtNodeArrayAndAChildPtNode(nodeReader, probability,
|
return createChildrenPtNodeArrayAndAChildPtNode(nodeReader,
|
||||||
|
getUpdatedProbability(NOT_A_PROBABILITY /* originalProbability */, probability),
|
||||||
wordCodePoints + readingHelper->getTotalCodePointCount(),
|
wordCodePoints + readingHelper->getTotalCodePointCount(),
|
||||||
codePointCount - readingHelper->getTotalCodePointCount());
|
codePointCount - readingHelper->getTotalCodePointCount());
|
||||||
}
|
}
|
||||||
|
@ -86,7 +90,7 @@ bool DynamicPatriciaTrieWritingHelper::addUnigramWord(
|
||||||
return createAndInsertNodeIntoPtNodeArray(parentPos,
|
return createAndInsertNodeIntoPtNodeArray(parentPos,
|
||||||
wordCodePoints + readingHelper->getPrevTotalCodePointCount(),
|
wordCodePoints + readingHelper->getPrevTotalCodePointCount(),
|
||||||
codePointCount - readingHelper->getPrevTotalCodePointCount(),
|
codePointCount - readingHelper->getPrevTotalCodePointCount(),
|
||||||
probability, &pos);
|
getUpdatedProbability(NOT_A_PROBABILITY /* originalProbability */, probability), &pos);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool DynamicPatriciaTrieWritingHelper::addBigramWords(const int word0Pos, const int word1Pos,
|
bool DynamicPatriciaTrieWritingHelper::addBigramWords(const int word0Pos, const int word1Pos,
|
||||||
|
@ -351,9 +355,11 @@ bool DynamicPatriciaTrieWritingHelper::setPtNodeProbability(
|
||||||
if (originalPtNode->isTerminal()) {
|
if (originalPtNode->isTerminal()) {
|
||||||
// Overwrites the probability.
|
// Overwrites the probability.
|
||||||
*outAddedNewUnigram = false;
|
*outAddedNewUnigram = false;
|
||||||
|
const int probabilityToWrite = getUpdatedProbability(originalPtNode->getProbability(),
|
||||||
|
probability);
|
||||||
int probabilityFieldPos = originalPtNode->getProbabilityFieldPos();
|
int probabilityFieldPos = originalPtNode->getProbabilityFieldPos();
|
||||||
if (!DynamicPatriciaTrieWritingUtils::writeProbabilityAndAdvancePosition(mBuffer,
|
if (!DynamicPatriciaTrieWritingUtils::writeProbabilityAndAdvancePosition(mBuffer,
|
||||||
probability, &probabilityFieldPos)) {
|
probabilityToWrite, &probabilityFieldPos)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
@ -365,7 +371,8 @@ bool DynamicPatriciaTrieWritingHelper::setPtNodeProbability(
|
||||||
}
|
}
|
||||||
if (!writePtNodeToBufferByCopyingPtNodeInfo(mBuffer, originalPtNode,
|
if (!writePtNodeToBufferByCopyingPtNodeInfo(mBuffer, originalPtNode,
|
||||||
originalPtNode->getParentPos(), codePoints, originalPtNode->getCodePointCount(),
|
originalPtNode->getParentPos(), codePoints, originalPtNode->getCodePointCount(),
|
||||||
probability, &movedPos)) {
|
getUpdatedProbability(NOT_A_PROBABILITY /* originalProbability */, probability),
|
||||||
|
&movedPos)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -481,11 +488,15 @@ bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
|
||||||
DynamicPatriciaTrieGcEventListeners
|
DynamicPatriciaTrieGcEventListeners
|
||||||
::TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted
|
::TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted
|
||||||
traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted(
|
traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted(
|
||||||
this, mBuffer);
|
this, mBuffer, mIsDecayingDict);
|
||||||
if (!readingHelper.traverseAllPtNodesInPostorderDepthFirstManner(
|
if (!readingHelper.traverseAllPtNodesInPostorderDepthFirstManner(
|
||||||
&traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted)) {
|
&traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
if (mIsDecayingDict && traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted
|
||||||
|
.getValidUnigramCount() > DecayingUtils::MAX_UNIGRAM_COUNT_AFTER_GC) {
|
||||||
|
// TODO: Remove more unigrams.
|
||||||
|
}
|
||||||
|
|
||||||
readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
|
readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
|
||||||
DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateBigramProbability
|
DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateBigramProbability
|
||||||
|
@ -495,6 +506,11 @@ bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (mIsDecayingDict && traversePolicyToUpdateBigramProbability.getValidBigramEntryCount()
|
||||||
|
> DecayingUtils::MAX_BIGRAM_COUNT_AFTER_GC) {
|
||||||
|
// TODO: Remove more bigrams.
|
||||||
|
}
|
||||||
|
|
||||||
// Mapping from positions in mBuffer to positions in bufferToWrite.
|
// Mapping from positions in mBuffer to positions in bufferToWrite.
|
||||||
DictPositionRelocationMap dictPositionRelocationMap;
|
DictPositionRelocationMap dictPositionRelocationMap;
|
||||||
readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
|
readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
|
||||||
|
@ -508,7 +524,8 @@ bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
|
||||||
|
|
||||||
// Create policy instance for the GCed dictionary.
|
// Create policy instance for the GCed dictionary.
|
||||||
DynamicShortcutListPolicy newDictShortcutPolicy(bufferToWrite);
|
DynamicShortcutListPolicy newDictShortcutPolicy(bufferToWrite);
|
||||||
DynamicBigramListPolicy newDictBigramPolicy(bufferToWrite, &newDictShortcutPolicy);
|
DynamicBigramListPolicy newDictBigramPolicy(bufferToWrite, &newDictShortcutPolicy,
|
||||||
|
mIsDecayingDict);
|
||||||
// Create reading helper for the GCed dictionary.
|
// Create reading helper for the GCed dictionary.
|
||||||
DynamicPatriciaTrieReadingHelper newDictReadingHelper(bufferToWrite, &newDictBigramPolicy,
|
DynamicPatriciaTrieReadingHelper newDictReadingHelper(bufferToWrite, &newDictBigramPolicy,
|
||||||
&newDictShortcutPolicy);
|
&newDictShortcutPolicy);
|
||||||
|
@ -525,4 +542,13 @@ bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int DynamicPatriciaTrieWritingHelper::getUpdatedProbability(const int originalProbability,
|
||||||
|
const int newProbability) {
|
||||||
|
if (mIsDecayingDict) {
|
||||||
|
return DecayingUtils::getUpdatedUnigramProbability(originalProbability, newProbability);
|
||||||
|
} else {
|
||||||
|
return newProbability;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace latinime
|
} // namespace latinime
|
||||||
|
|
|
@ -47,10 +47,13 @@ class DynamicPatriciaTrieWritingHelper {
|
||||||
DISALLOW_COPY_AND_ASSIGN(DictPositionRelocationMap);
|
DISALLOW_COPY_AND_ASSIGN(DictPositionRelocationMap);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static const size_t MAX_DICTIONARY_SIZE;
|
||||||
|
|
||||||
DynamicPatriciaTrieWritingHelper(BufferWithExtendableBuffer *const buffer,
|
DynamicPatriciaTrieWritingHelper(BufferWithExtendableBuffer *const buffer,
|
||||||
DynamicBigramListPolicy *const bigramPolicy,
|
DynamicBigramListPolicy *const bigramPolicy,
|
||||||
DynamicShortcutListPolicy *const shortcutPolicy)
|
DynamicShortcutListPolicy *const shortcutPolicy, const bool isDecayingDict)
|
||||||
: mBuffer(buffer), mBigramPolicy(bigramPolicy), mShortcutPolicy(shortcutPolicy) {}
|
: mBuffer(buffer), mBigramPolicy(bigramPolicy), mShortcutPolicy(shortcutPolicy),
|
||||||
|
mIsDecayingDict(isDecayingDict) {}
|
||||||
|
|
||||||
~DynamicPatriciaTrieWritingHelper() {}
|
~DynamicPatriciaTrieWritingHelper() {}
|
||||||
|
|
||||||
|
@ -87,11 +90,11 @@ class DynamicPatriciaTrieWritingHelper {
|
||||||
DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTrieWritingHelper);
|
DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTrieWritingHelper);
|
||||||
|
|
||||||
static const int CHILDREN_POSITION_FIELD_SIZE;
|
static const int CHILDREN_POSITION_FIELD_SIZE;
|
||||||
static const size_t MAX_DICTIONARY_SIZE;
|
|
||||||
|
|
||||||
BufferWithExtendableBuffer *const mBuffer;
|
BufferWithExtendableBuffer *const mBuffer;
|
||||||
DynamicBigramListPolicy *const mBigramPolicy;
|
DynamicBigramListPolicy *const mBigramPolicy;
|
||||||
DynamicShortcutListPolicy *const mShortcutPolicy;
|
DynamicShortcutListPolicy *const mShortcutPolicy;
|
||||||
|
const bool mIsDecayingDict;
|
||||||
|
|
||||||
bool markNodeAsMovedAndSetPosition(const DynamicPatriciaTrieNodeReader *const nodeToUpdate,
|
bool markNodeAsMovedAndSetPosition(const DynamicPatriciaTrieNodeReader *const nodeToUpdate,
|
||||||
const int movedPos, const int bigramLinkedNodePos);
|
const int movedPos, const int bigramLinkedNodePos);
|
||||||
|
@ -127,6 +130,8 @@ class DynamicPatriciaTrieWritingHelper {
|
||||||
|
|
||||||
bool runGC(const int rootPtNodeArrayPos, BufferWithExtendableBuffer *const bufferToWrite,
|
bool runGC(const int rootPtNodeArrayPos, BufferWithExtendableBuffer *const bufferToWrite,
|
||||||
int *const outUnigramCount, int *const outBigramCount);
|
int *const outUnigramCount, int *const outBigramCount);
|
||||||
|
|
||||||
|
int getUpdatedProbability(const int originalProbability, const int newProbability);
|
||||||
};
|
};
|
||||||
} // namespace latinime
|
} // namespace latinime
|
||||||
#endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_WRITING_HELPER_H */
|
#endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_WRITING_HELPER_H */
|
||||||
|
|
|
@ -0,0 +1,129 @@
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2013, The Android Open Source Project
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "suggest/policyimpl/dictionary/utils/decaying_utils.h"
|
||||||
|
|
||||||
|
#include "suggest/policyimpl/dictionary/utils/probability_utils.h"
|
||||||
|
|
||||||
|
namespace latinime {
|
||||||
|
|
||||||
|
const int DecayingUtils::MAX_UNIGRAM_COUNT = 12000;
|
||||||
|
const int DecayingUtils::MAX_UNIGRAM_COUNT_AFTER_GC = 10000;
|
||||||
|
const int DecayingUtils::MAX_BIGRAM_COUNT = 12000;
|
||||||
|
const int DecayingUtils::MAX_BIGRAM_COUNT_AFTER_GC = 10000;
|
||||||
|
|
||||||
|
const int DecayingUtils::MAX_COMPUTED_PROBABILITY = 127;
|
||||||
|
const int DecayingUtils::MAX_UNIGRAM_PROBABILITY = 120;
|
||||||
|
const int DecayingUtils::MIN_VALID_UNIGRAM_PROBABILITY = 24;
|
||||||
|
const int DecayingUtils::UNIGRAM_PROBABILITY_STEP = 8;
|
||||||
|
const int DecayingUtils::MAX_BIGRAM_PROBABILITY_DELTA = 15;
|
||||||
|
const int DecayingUtils::MIN_VALID_BIGRAM_PROBABILITY_DELTA = 3;
|
||||||
|
const int DecayingUtils::BIGRAM_PROBABILITY_DELTA_STEP = 1;
|
||||||
|
|
||||||
|
/* static */ int DecayingUtils::getProbability(const int encodedUnigramProbability,
|
||||||
|
const int encodedBigramProbabilityDelta) {
|
||||||
|
if (encodedUnigramProbability == NOT_A_PROBABILITY) {
|
||||||
|
return NOT_A_PROBABILITY;
|
||||||
|
} else if (encodedBigramProbabilityDelta == NOT_A_PROBABILITY) {
|
||||||
|
const int rawProbability = ProbabilityUtils::backoff(decodeUnigramProbability(
|
||||||
|
encodedUnigramProbability));
|
||||||
|
return min(getDecayedProbability(rawProbability), MAX_COMPUTED_PROBABILITY);
|
||||||
|
} else {
|
||||||
|
const int rawProbability = ProbabilityUtils::computeProbabilityForBigram(
|
||||||
|
decodeUnigramProbability(encodedUnigramProbability),
|
||||||
|
decodeBigramProbabilityDelta(encodedBigramProbabilityDelta));
|
||||||
|
return min(getDecayedProbability(rawProbability), MAX_COMPUTED_PROBABILITY);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* static */ int DecayingUtils::getUpdatedUnigramProbability(const int originalEncodedProbability,
|
||||||
|
const int newProbability) {
|
||||||
|
if (originalEncodedProbability == NOT_A_PROBABILITY) {
|
||||||
|
// The unigram is not in this dictionary.
|
||||||
|
if (newProbability == NOT_A_PROBABILITY) {
|
||||||
|
// The unigram is not in other dictionaries.
|
||||||
|
return 0;
|
||||||
|
} else {
|
||||||
|
return MIN_VALID_UNIGRAM_PROBABILITY;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (newProbability != NOT_A_PROBABILITY
|
||||||
|
&& originalEncodedProbability < MIN_VALID_UNIGRAM_PROBABILITY) {
|
||||||
|
return MIN_VALID_UNIGRAM_PROBABILITY;
|
||||||
|
}
|
||||||
|
return min(originalEncodedProbability + UNIGRAM_PROBABILITY_STEP, MAX_UNIGRAM_PROBABILITY);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* static */ int DecayingUtils::getUnigramProbabilityToSave(const int encodedProbability) {
|
||||||
|
return max(encodedProbability - UNIGRAM_PROBABILITY_STEP, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* static */ int DecayingUtils::getBigramProbabilityDeltaToSave(const int encodedProbabilityDelta) {
|
||||||
|
return max(encodedProbabilityDelta - BIGRAM_PROBABILITY_DELTA_STEP, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* static */ int DecayingUtils::getUpdatedBigramProbabilityDelta(
|
||||||
|
const int originalEncodedProbabilityDelta, const int newProbability) {
|
||||||
|
if (originalEncodedProbabilityDelta == NOT_A_PROBABILITY) {
|
||||||
|
// The bigram relation is not in this dictionary.
|
||||||
|
if (newProbability == NOT_A_PROBABILITY) {
|
||||||
|
// The bigram target is not in other dictionaries.
|
||||||
|
return 0;
|
||||||
|
} else {
|
||||||
|
return MIN_VALID_BIGRAM_PROBABILITY_DELTA;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (newProbability != NOT_A_PROBABILITY
|
||||||
|
&& originalEncodedProbabilityDelta < MIN_VALID_BIGRAM_PROBABILITY_DELTA) {
|
||||||
|
return MIN_VALID_BIGRAM_PROBABILITY_DELTA;
|
||||||
|
}
|
||||||
|
return min(originalEncodedProbabilityDelta + BIGRAM_PROBABILITY_DELTA_STEP,
|
||||||
|
MAX_BIGRAM_PROBABILITY_DELTA);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* static */ int DecayingUtils::isValidUnigram(const int encodedUnigramProbability) {
|
||||||
|
return encodedUnigramProbability >= MIN_VALID_UNIGRAM_PROBABILITY;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* static */ int DecayingUtils::isValidBigram(const int encodedBigramProbabilityDelta) {
|
||||||
|
return encodedBigramProbabilityDelta >= MIN_VALID_BIGRAM_PROBABILITY_DELTA;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* static */ int DecayingUtils::decodeUnigramProbability(const int encodedProbability) {
|
||||||
|
const int probability = encodedProbability - MIN_VALID_UNIGRAM_PROBABILITY;
|
||||||
|
if (probability < 0) {
|
||||||
|
return NOT_A_PROBABILITY;
|
||||||
|
} else {
|
||||||
|
return min(probability, MAX_UNIGRAM_PROBABILITY);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* static */ int DecayingUtils::decodeBigramProbabilityDelta(const int encodedProbabilityDelta) {
|
||||||
|
const int probabilityDelta = encodedProbabilityDelta - MIN_VALID_BIGRAM_PROBABILITY_DELTA;
|
||||||
|
if (probabilityDelta < 0) {
|
||||||
|
return NOT_A_PROBABILITY;
|
||||||
|
} else {
|
||||||
|
return min(probabilityDelta, MAX_BIGRAM_PROBABILITY_DELTA);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* static */ int DecayingUtils::getDecayedProbability(const int rawProbability) {
|
||||||
|
return rawProbability;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace latinime
|
|
@ -0,0 +1,70 @@
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2013, The Android Open Source Project
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef LATINIME_DECAYING_UTILS_H
|
||||||
|
#define LATINIME_DECAYING_UTILS_H
|
||||||
|
|
||||||
|
#include "defines.h"
|
||||||
|
|
||||||
|
namespace latinime {
|
||||||
|
|
||||||
|
// TODO: Check the elapsed time and decrease the probability depending on the time. Time field is
|
||||||
|
// required to introduced to each terminal PtNode and bigram entry.
|
||||||
|
// TODO: Quit using bigram probability to indicate the delta.
|
||||||
|
// TODO: Quit using bigram probability delta.
|
||||||
|
class DecayingUtils {
|
||||||
|
public:
|
||||||
|
static const int MAX_UNIGRAM_COUNT;
|
||||||
|
static const int MAX_UNIGRAM_COUNT_AFTER_GC;
|
||||||
|
static const int MAX_BIGRAM_COUNT;
|
||||||
|
static const int MAX_BIGRAM_COUNT_AFTER_GC;
|
||||||
|
|
||||||
|
static int getProbability(const int encodedUnigramProbability,
|
||||||
|
const int encodedBigramProbabilityDelta);
|
||||||
|
|
||||||
|
static int getUpdatedUnigramProbability(const int originalEncodedProbability,
|
||||||
|
const int newProbability);
|
||||||
|
|
||||||
|
static int getUpdatedBigramProbabilityDelta(const int originalEncodedProbabilityDelta,
|
||||||
|
const int newProbability);
|
||||||
|
|
||||||
|
static int isValidUnigram(const int encodedUnigramProbability);
|
||||||
|
|
||||||
|
static int isValidBigram(const int encodedProbabilityDelta);
|
||||||
|
|
||||||
|
static int getUnigramProbabilityToSave(const int encodedProbability);
|
||||||
|
|
||||||
|
static int getBigramProbabilityDeltaToSave(const int encodedProbabilityDelta);
|
||||||
|
|
||||||
|
private:
|
||||||
|
DISALLOW_IMPLICIT_CONSTRUCTORS(DecayingUtils);
|
||||||
|
|
||||||
|
static const int MAX_COMPUTED_PROBABILITY;
|
||||||
|
static const int MAX_UNIGRAM_PROBABILITY;
|
||||||
|
static const int MIN_VALID_UNIGRAM_PROBABILITY;
|
||||||
|
static const int UNIGRAM_PROBABILITY_STEP;
|
||||||
|
static const int MAX_BIGRAM_PROBABILITY_DELTA;
|
||||||
|
static const int MIN_VALID_BIGRAM_PROBABILITY_DELTA;
|
||||||
|
static const int BIGRAM_PROBABILITY_DELTA_STEP;
|
||||||
|
|
||||||
|
static int decodeUnigramProbability(const int encodedProbability);
|
||||||
|
|
||||||
|
static int decodeBigramProbabilityDelta(const int encodedProbability);
|
||||||
|
|
||||||
|
static int getDecayedProbability(const int rawProbability);
|
||||||
|
};
|
||||||
|
} // namespace latinime
|
||||||
|
#endif /* LATINIME_DECAYING_UTILS_H */
|
|
@ -0,0 +1,171 @@
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2013 The Android Open Source Project
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package com.android.inputmethod.latin;
|
||||||
|
|
||||||
|
import android.test.AndroidTestCase;
|
||||||
|
import android.test.suitebuilder.annotation.LargeTest;
|
||||||
|
|
||||||
|
import com.android.inputmethod.latin.makedict.FormatSpec;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.Locale;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
@LargeTest
|
||||||
|
public class BinaryDictionaryDecayingTests extends AndroidTestCase {
|
||||||
|
private static final String TEST_DICT_FILE_EXTENSION = ".testDict";
|
||||||
|
private static final String TEST_LOCALE = "test";
|
||||||
|
|
||||||
|
private static final int DUMMY_PROBABILITY = 0;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void setUp() throws Exception {
|
||||||
|
super.setUp();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void tearDown() throws Exception {
|
||||||
|
super.tearDown();
|
||||||
|
}
|
||||||
|
|
||||||
|
private void forcePassingShortTime(final BinaryDictionary binaryDictionary) {
|
||||||
|
binaryDictionary.flushWithGC();
|
||||||
|
}
|
||||||
|
|
||||||
|
private void forcePassingLongTime(final BinaryDictionary binaryDictionary) {
|
||||||
|
// Currently, probabilities are decayed when GC is run. All entries that have never been
|
||||||
|
// typed in 32 GCs are removed.
|
||||||
|
final int count = 32;
|
||||||
|
for (int i = 0; i < count; i++) {
|
||||||
|
binaryDictionary.flushWithGC();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private File createEmptyDictionaryAndGetFile(final String filename) throws IOException {
|
||||||
|
final File file = File.createTempFile(filename, TEST_DICT_FILE_EXTENSION,
|
||||||
|
getContext().getCacheDir());
|
||||||
|
Map<String, String> attributeMap = new HashMap<String, String>();
|
||||||
|
attributeMap.put(FormatSpec.FileHeader.SUPPORTS_DYNAMIC_UPDATE_ATTRIBUTE,
|
||||||
|
FormatSpec.FileHeader.ATTRIBUTE_VALUE_TRUE);
|
||||||
|
attributeMap.put(FormatSpec.FileHeader.USES_FORGETTING_CURVE_ATTRIBUTE,
|
||||||
|
FormatSpec.FileHeader.ATTRIBUTE_VALUE_TRUE);
|
||||||
|
if (BinaryDictionary.createEmptyDictFile(file.getAbsolutePath(),
|
||||||
|
3 /* dictVersion */, attributeMap)) {
|
||||||
|
return file;
|
||||||
|
} else {
|
||||||
|
throw new IOException("Empty dictionary cannot be created.");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testAddValidAndInvalidWords() {
|
||||||
|
File dictFile = null;
|
||||||
|
try {
|
||||||
|
dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary");
|
||||||
|
} catch (IOException e) {
|
||||||
|
fail("IOException while writing an initial dictionary : " + e);
|
||||||
|
}
|
||||||
|
BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
|
||||||
|
0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
|
||||||
|
Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
|
||||||
|
|
||||||
|
binaryDictionary.addUnigramWord("a", Dictionary.NOT_A_PROBABILITY);
|
||||||
|
assertFalse(binaryDictionary.isValidWord("a"));
|
||||||
|
binaryDictionary.addUnigramWord("a", Dictionary.NOT_A_PROBABILITY);
|
||||||
|
assertFalse(binaryDictionary.isValidWord("a"));
|
||||||
|
binaryDictionary.addUnigramWord("a", Dictionary.NOT_A_PROBABILITY);
|
||||||
|
assertFalse(binaryDictionary.isValidWord("a"));
|
||||||
|
binaryDictionary.addUnigramWord("a", Dictionary.NOT_A_PROBABILITY);
|
||||||
|
assertTrue(binaryDictionary.isValidWord("a"));
|
||||||
|
|
||||||
|
binaryDictionary.addUnigramWord("b", DUMMY_PROBABILITY);
|
||||||
|
assertTrue(binaryDictionary.isValidWord("b"));
|
||||||
|
|
||||||
|
final int unigramProbability = binaryDictionary.getFrequency("a");
|
||||||
|
binaryDictionary.addBigramWords("a", "b", Dictionary.NOT_A_PROBABILITY);
|
||||||
|
assertFalse(binaryDictionary.isValidBigram("a", "b"));
|
||||||
|
binaryDictionary.addBigramWords("a", "b", Dictionary.NOT_A_PROBABILITY);
|
||||||
|
assertFalse(binaryDictionary.isValidBigram("a", "b"));
|
||||||
|
binaryDictionary.addBigramWords("a", "b", Dictionary.NOT_A_PROBABILITY);
|
||||||
|
assertFalse(binaryDictionary.isValidBigram("a", "b"));
|
||||||
|
binaryDictionary.addBigramWords("a", "b", Dictionary.NOT_A_PROBABILITY);
|
||||||
|
assertTrue(binaryDictionary.isValidBigram("a", "b"));
|
||||||
|
|
||||||
|
binaryDictionary.addUnigramWord("c", DUMMY_PROBABILITY);
|
||||||
|
binaryDictionary.addBigramWords("a", "c", DUMMY_PROBABILITY);
|
||||||
|
assertTrue(binaryDictionary.isValidBigram("a", "c"));
|
||||||
|
|
||||||
|
binaryDictionary.close();
|
||||||
|
dictFile.delete();
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: Add large tests.
|
||||||
|
public void testDecayingProbability() {
|
||||||
|
File dictFile = null;
|
||||||
|
try {
|
||||||
|
dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary");
|
||||||
|
} catch (IOException e) {
|
||||||
|
fail("IOException while writing an initial dictionary : " + e);
|
||||||
|
}
|
||||||
|
BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
|
||||||
|
0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
|
||||||
|
Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
|
||||||
|
|
||||||
|
binaryDictionary.addUnigramWord("a", DUMMY_PROBABILITY);
|
||||||
|
assertTrue(binaryDictionary.isValidWord("a"));
|
||||||
|
forcePassingShortTime(binaryDictionary);
|
||||||
|
assertFalse(binaryDictionary.isValidWord("a"));
|
||||||
|
|
||||||
|
binaryDictionary.addUnigramWord("a", DUMMY_PROBABILITY);
|
||||||
|
binaryDictionary.addUnigramWord("a", DUMMY_PROBABILITY);
|
||||||
|
binaryDictionary.addUnigramWord("a", DUMMY_PROBABILITY);
|
||||||
|
binaryDictionary.addUnigramWord("a", DUMMY_PROBABILITY);
|
||||||
|
forcePassingShortTime(binaryDictionary);
|
||||||
|
assertTrue(binaryDictionary.isValidWord("a"));
|
||||||
|
forcePassingLongTime(binaryDictionary);
|
||||||
|
assertFalse(binaryDictionary.isValidWord("a"));
|
||||||
|
|
||||||
|
binaryDictionary.addUnigramWord("a", DUMMY_PROBABILITY);
|
||||||
|
binaryDictionary.addUnigramWord("b", DUMMY_PROBABILITY);
|
||||||
|
binaryDictionary.addBigramWords("a", "b", DUMMY_PROBABILITY);
|
||||||
|
assertTrue(binaryDictionary.isValidBigram("a", "b"));
|
||||||
|
forcePassingShortTime(binaryDictionary);
|
||||||
|
assertFalse(binaryDictionary.isValidBigram("a", "b"));
|
||||||
|
|
||||||
|
binaryDictionary.addUnigramWord("a", DUMMY_PROBABILITY);
|
||||||
|
binaryDictionary.addUnigramWord("b", DUMMY_PROBABILITY);
|
||||||
|
binaryDictionary.addBigramWords("a", "b", DUMMY_PROBABILITY);
|
||||||
|
binaryDictionary.addUnigramWord("a", DUMMY_PROBABILITY);
|
||||||
|
binaryDictionary.addUnigramWord("b", DUMMY_PROBABILITY);
|
||||||
|
binaryDictionary.addBigramWords("a", "b", DUMMY_PROBABILITY);
|
||||||
|
binaryDictionary.addUnigramWord("a", DUMMY_PROBABILITY);
|
||||||
|
binaryDictionary.addUnigramWord("b", DUMMY_PROBABILITY);
|
||||||
|
binaryDictionary.addBigramWords("a", "b", DUMMY_PROBABILITY);
|
||||||
|
binaryDictionary.addUnigramWord("a", DUMMY_PROBABILITY);
|
||||||
|
binaryDictionary.addUnigramWord("b", DUMMY_PROBABILITY);
|
||||||
|
binaryDictionary.addBigramWords("a", "b", DUMMY_PROBABILITY);
|
||||||
|
assertTrue(binaryDictionary.isValidBigram("a", "b"));
|
||||||
|
forcePassingShortTime(binaryDictionary);
|
||||||
|
assertTrue(binaryDictionary.isValidBigram("a", "b"));
|
||||||
|
forcePassingLongTime(binaryDictionary);
|
||||||
|
assertFalse(binaryDictionary.isValidBigram("a", "b"));
|
||||||
|
|
||||||
|
binaryDictionary.close();
|
||||||
|
dictFile.delete();
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in a new issue