am 0afad267: Merge "Implement updateCounter() by using existing entry adding methods."
* commit '0afad267c5a8ef1fbac0fa37b215638830b29604': Implement updateCounter() by using existing entry adding methods.main
commit
131306aafa
|
@ -374,7 +374,7 @@ static bool latinime_BinaryDictionary_addUnigramEntry(JNIEnv *env, jclass clazz,
|
||||||
// Use 1 for count to indicate the word has inputted.
|
// Use 1 for count to indicate the word has inputted.
|
||||||
const UnigramProperty unigramProperty(isBeginningOfSentence, isNotAWord,
|
const UnigramProperty unigramProperty(isBeginningOfSentence, isNotAWord,
|
||||||
isBlacklisted, probability, HistoricalInfo(timestamp, 0 /* level */, 1 /* count */),
|
isBlacklisted, probability, HistoricalInfo(timestamp, 0 /* level */, 1 /* count */),
|
||||||
&shortcuts);
|
std::move(shortcuts));
|
||||||
return dictionary->addUnigramEntry(CodePointArrayView(codePoints, codePointCount),
|
return dictionary->addUnigramEntry(CodePointArrayView(codePoints, codePointCount),
|
||||||
&unigramProperty);
|
&unigramProperty);
|
||||||
}
|
}
|
||||||
|
@ -434,10 +434,16 @@ static bool latinime_BinaryDictionary_updateCounter(JNIEnv *env, jclass clazz, j
|
||||||
if (!dictionary) {
|
if (!dictionary) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
jsize wordLength = env->GetArrayLength(word);
|
const PrevWordsInfo prevWordsInfo = JniDataUtils::constructPrevWordsInfo(env,
|
||||||
int wordCodePoints[wordLength];
|
prevWordCodePointArrays, isBeginningOfSentenceArray,
|
||||||
env->GetIntArrayRegion(word, 0, wordLength, wordCodePoints);
|
env->GetArrayLength(prevWordCodePointArrays));
|
||||||
return false;
|
jsize codePointCount = env->GetArrayLength(word);
|
||||||
|
int wordCodePoints[codePointCount];
|
||||||
|
env->GetIntArrayRegion(word, 0, codePointCount, wordCodePoints);
|
||||||
|
const HistoricalInfo historicalInfo(timestamp, 0 /* level */, count);
|
||||||
|
return dictionary->updateCounter(&prevWordsInfo,
|
||||||
|
CodePointArrayView(wordCodePoints, codePointCount), isValidWord == JNI_TRUE,
|
||||||
|
historicalInfo);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Returns how many language model params are processed.
|
// Returns how many language model params are processed.
|
||||||
|
@ -509,7 +515,7 @@ static int latinime_BinaryDictionary_addMultipleDictionaryEntries(JNIEnv *env, j
|
||||||
// Use 1 for count to indicate the word has inputted.
|
// Use 1 for count to indicate the word has inputted.
|
||||||
const UnigramProperty unigramProperty(false /* isBeginningOfSentence */, isNotAWord,
|
const UnigramProperty unigramProperty(false /* isBeginningOfSentence */, isNotAWord,
|
||||||
isBlacklisted, unigramProbability,
|
isBlacklisted, unigramProbability,
|
||||||
HistoricalInfo(timestamp, 0 /* level */, 1 /* count */), &shortcuts);
|
HistoricalInfo(timestamp, 0 /* level */, 1 /* count */), std::move(shortcuts));
|
||||||
dictionary->addUnigramEntry(CodePointArrayView(word1CodePoints, word1Length),
|
dictionary->addUnigramEntry(CodePointArrayView(word1CodePoints, word1Length),
|
||||||
&unigramProperty);
|
&unigramProperty);
|
||||||
if (word0) {
|
if (word0) {
|
||||||
|
|
|
@ -155,6 +155,14 @@ bool Dictionary::removeNgramEntry(const PrevWordsInfo *const prevWordsInfo,
|
||||||
return mDictionaryStructureWithBufferPolicy->removeNgramEntry(prevWordsInfo, codePoints);
|
return mDictionaryStructureWithBufferPolicy->removeNgramEntry(prevWordsInfo, codePoints);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool Dictionary::updateCounter(const PrevWordsInfo *const prevWordsInfo,
|
||||||
|
const CodePointArrayView codePoints, const bool isValidWord,
|
||||||
|
const HistoricalInfo historicalInfo) {
|
||||||
|
TimeKeeper::setCurrentTime();
|
||||||
|
return mDictionaryStructureWithBufferPolicy->updateCounter(prevWordsInfo, codePoints,
|
||||||
|
isValidWord, historicalInfo);
|
||||||
|
}
|
||||||
|
|
||||||
bool Dictionary::flush(const char *const filePath) {
|
bool Dictionary::flush(const char *const filePath) {
|
||||||
TimeKeeper::setCurrentTime();
|
TimeKeeper::setCurrentTime();
|
||||||
return mDictionaryStructureWithBufferPolicy->flush(filePath);
|
return mDictionaryStructureWithBufferPolicy->flush(filePath);
|
||||||
|
|
|
@ -22,6 +22,7 @@
|
||||||
#include "defines.h"
|
#include "defines.h"
|
||||||
#include "jni.h"
|
#include "jni.h"
|
||||||
#include "suggest/core/dictionary/ngram_listener.h"
|
#include "suggest/core/dictionary/ngram_listener.h"
|
||||||
|
#include "suggest/core/dictionary/property/historical_info.h"
|
||||||
#include "suggest/core/dictionary/property/word_property.h"
|
#include "suggest/core/dictionary/property/word_property.h"
|
||||||
#include "suggest/core/policy/dictionary_header_structure_policy.h"
|
#include "suggest/core/policy/dictionary_header_structure_policy.h"
|
||||||
#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
|
#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
|
||||||
|
@ -90,6 +91,10 @@ class Dictionary {
|
||||||
bool removeNgramEntry(const PrevWordsInfo *const prevWordsInfo,
|
bool removeNgramEntry(const PrevWordsInfo *const prevWordsInfo,
|
||||||
const CodePointArrayView codePoints);
|
const CodePointArrayView codePoints);
|
||||||
|
|
||||||
|
bool updateCounter(const PrevWordsInfo *const prevWordsInfo,
|
||||||
|
const CodePointArrayView codePoints, const bool isValidWord,
|
||||||
|
const HistoricalInfo historicalInfo);
|
||||||
|
|
||||||
bool flush(const char *const filePath);
|
bool flush(const char *const filePath);
|
||||||
|
|
||||||
bool flushWithGC(const char *const filePath);
|
bool flushWithGC(const char *const filePath);
|
||||||
|
|
|
@ -27,7 +27,7 @@ namespace latinime {
|
||||||
class NgramProperty {
|
class NgramProperty {
|
||||||
public:
|
public:
|
||||||
NgramProperty(const std::vector<int> &&targetCodePoints, const int probability,
|
NgramProperty(const std::vector<int> &&targetCodePoints, const int probability,
|
||||||
const HistoricalInfo &historicalInfo)
|
const HistoricalInfo historicalInfo)
|
||||||
: mTargetCodePoints(std::move(targetCodePoints)), mProbability(probability),
|
: mTargetCodePoints(std::move(targetCodePoints)), mProbability(probability),
|
||||||
mHistoricalInfo(historicalInfo) {}
|
mHistoricalInfo(historicalInfo) {}
|
||||||
|
|
||||||
|
|
|
@ -54,11 +54,18 @@ class UnigramProperty {
|
||||||
mProbability(NOT_A_PROBABILITY), mHistoricalInfo(), mShortcuts() {}
|
mProbability(NOT_A_PROBABILITY), mHistoricalInfo(), mShortcuts() {}
|
||||||
|
|
||||||
UnigramProperty(const bool representsBeginningOfSentence, const bool isNotAWord,
|
UnigramProperty(const bool representsBeginningOfSentence, const bool isNotAWord,
|
||||||
const bool isBlacklisted, const int probability, const HistoricalInfo &historicalInfo,
|
const bool isBlacklisted, const int probability, const HistoricalInfo historicalInfo,
|
||||||
const std::vector<ShortcutProperty> *const shortcuts)
|
const std::vector<ShortcutProperty> &&shortcuts)
|
||||||
: mRepresentsBeginningOfSentence(representsBeginningOfSentence),
|
: mRepresentsBeginningOfSentence(representsBeginningOfSentence),
|
||||||
mIsNotAWord(isNotAWord), mIsBlacklisted(isBlacklisted), mProbability(probability),
|
mIsNotAWord(isNotAWord), mIsBlacklisted(isBlacklisted), mProbability(probability),
|
||||||
mHistoricalInfo(historicalInfo), mShortcuts(*shortcuts) {}
|
mHistoricalInfo(historicalInfo), mShortcuts(std::move(shortcuts)) {}
|
||||||
|
|
||||||
|
// Without shortcuts.
|
||||||
|
UnigramProperty(const bool representsBeginningOfSentence, const bool isNotAWord,
|
||||||
|
const bool isBlacklisted, const int probability, const HistoricalInfo historicalInfo)
|
||||||
|
: mRepresentsBeginningOfSentence(representsBeginningOfSentence),
|
||||||
|
mIsNotAWord(isNotAWord), mIsBlacklisted(isBlacklisted), mProbability(probability),
|
||||||
|
mHistoricalInfo(historicalInfo), mShortcuts() {}
|
||||||
|
|
||||||
bool representsBeginningOfSentence() const {
|
bool representsBeginningOfSentence() const {
|
||||||
return mRepresentsBeginningOfSentence;
|
return mRepresentsBeginningOfSentence;
|
||||||
|
|
|
@ -21,6 +21,7 @@
|
||||||
|
|
||||||
#include "defines.h"
|
#include "defines.h"
|
||||||
#include "suggest/core/dictionary/binary_dictionary_shortcut_iterator.h"
|
#include "suggest/core/dictionary/binary_dictionary_shortcut_iterator.h"
|
||||||
|
#include "suggest/core/dictionary/property/historical_info.h"
|
||||||
#include "suggest/core/dictionary/property/word_property.h"
|
#include "suggest/core/dictionary/property/word_property.h"
|
||||||
#include "suggest/core/dictionary/word_attributes.h"
|
#include "suggest/core/dictionary/word_attributes.h"
|
||||||
#include "utils/int_array_view.h"
|
#include "utils/int_array_view.h"
|
||||||
|
@ -87,6 +88,11 @@ class DictionaryStructureWithBufferPolicy {
|
||||||
virtual bool removeNgramEntry(const PrevWordsInfo *const prevWordsInfo,
|
virtual bool removeNgramEntry(const PrevWordsInfo *const prevWordsInfo,
|
||||||
const CodePointArrayView wordCodePoints) = 0;
|
const CodePointArrayView wordCodePoints) = 0;
|
||||||
|
|
||||||
|
// Returns whether the update was success or not.
|
||||||
|
virtual bool updateCounter(const PrevWordsInfo *const prevWordsInfo,
|
||||||
|
const CodePointArrayView wordCodePoints, const bool isValidWord,
|
||||||
|
const HistoricalInfo historicalInfo) = 0;
|
||||||
|
|
||||||
// Returns whether the flush was success or not.
|
// Returns whether the flush was success or not.
|
||||||
virtual bool flush(const char *const filePath) = 0;
|
virtual bool flush(const char *const filePath) = 0;
|
||||||
|
|
||||||
|
|
|
@ -33,7 +33,7 @@ class PrevWordsInfo {
|
||||||
clear();
|
clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
PrevWordsInfo(PrevWordsInfo &&prevWordsInfo)
|
PrevWordsInfo(const PrevWordsInfo &prevWordsInfo)
|
||||||
: mPrevWordCount(prevWordsInfo.mPrevWordCount) {
|
: mPrevWordCount(prevWordsInfo.mPrevWordCount) {
|
||||||
for (size_t i = 0; i < mPrevWordCount; ++i) {
|
for (size_t i = 0; i < mPrevWordCount; ++i) {
|
||||||
mPrevWordCodePointCount[i] = prevWordsInfo.mPrevWordCodePointCount[i];
|
mPrevWordCodePointCount[i] = prevWordsInfo.mPrevWordCodePointCount[i];
|
||||||
|
@ -73,6 +73,16 @@ class PrevWordsInfo {
|
||||||
mIsBeginningOfSentence[0] = isBeginningOfSentence;
|
mIsBeginningOfSentence[0] = isBeginningOfSentence;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
size_t getPrevWordCount() const {
|
||||||
|
return mPrevWordCount;
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: Remove.
|
||||||
|
const PrevWordsInfo getTrimmedPrevWordsInfo(const size_t maxPrevWordCount) const {
|
||||||
|
return PrevWordsInfo(mPrevWordCodePoints, mPrevWordCodePointCount, mIsBeginningOfSentence,
|
||||||
|
std::min(mPrevWordCount, maxPrevWordCount));
|
||||||
|
}
|
||||||
|
|
||||||
bool isValid() const {
|
bool isValid() const {
|
||||||
if (mPrevWordCodePointCount[0] > 0) {
|
if (mPrevWordCodePointCount[0] > 0) {
|
||||||
return true;
|
return true;
|
||||||
|
@ -112,7 +122,7 @@ class PrevWordsInfo {
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
DISALLOW_COPY_AND_ASSIGN(PrevWordsInfo);
|
DISALLOW_ASSIGNMENT_OPERATOR(PrevWordsInfo);
|
||||||
|
|
||||||
static int getWordId(const DictionaryStructureWithBufferPolicy *const dictStructurePolicy,
|
static int getWordId(const DictionaryStructureWithBufferPolicy *const dictStructurePolicy,
|
||||||
const int *const wordCodePoints, const int wordCodePointCount,
|
const int *const wordCodePoints, const int wordCodePointCount,
|
||||||
|
|
|
@ -52,6 +52,7 @@ const char *const Ver4PatriciaTriePolicy::MAX_BIGRAM_COUNT_QUERY = "MAX_BIGRAM_C
|
||||||
const int Ver4PatriciaTriePolicy::MARGIN_TO_REFUSE_DYNAMIC_OPERATIONS = 1024;
|
const int Ver4PatriciaTriePolicy::MARGIN_TO_REFUSE_DYNAMIC_OPERATIONS = 1024;
|
||||||
const int Ver4PatriciaTriePolicy::MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS =
|
const int Ver4PatriciaTriePolicy::MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS =
|
||||||
Ver4DictConstants::MAX_DICTIONARY_SIZE - MARGIN_TO_REFUSE_DYNAMIC_OPERATIONS;
|
Ver4DictConstants::MAX_DICTIONARY_SIZE - MARGIN_TO_REFUSE_DYNAMIC_OPERATIONS;
|
||||||
|
const int Ver4PatriciaTriePolicy::DUMMY_PROBABILITY_FOR_VALID_WORDS = 1;
|
||||||
|
|
||||||
void Ver4PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const dicNode,
|
void Ver4PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const dicNode,
|
||||||
DicNodeVector *const childDicNodes) const {
|
DicNodeVector *const childDicNodes) const {
|
||||||
|
@ -339,11 +340,9 @@ bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsI
|
||||||
}
|
}
|
||||||
if (prevWordIds[0] == NOT_A_WORD_ID) {
|
if (prevWordIds[0] == NOT_A_WORD_ID) {
|
||||||
if (prevWordsInfo->isNthPrevWordBeginningOfSentence(1 /* n */)) {
|
if (prevWordsInfo->isNthPrevWordBeginningOfSentence(1 /* n */)) {
|
||||||
const std::vector<UnigramProperty::ShortcutProperty> shortcuts;
|
|
||||||
const UnigramProperty beginningOfSentenceUnigramProperty(
|
const UnigramProperty beginningOfSentenceUnigramProperty(
|
||||||
true /* representsBeginningOfSentence */, true /* isNotAWord */,
|
true /* representsBeginningOfSentence */, true /* isNotAWord */,
|
||||||
false /* isBlacklisted */, MAX_PROBABILITY /* probability */,
|
false /* isBlacklisted */, MAX_PROBABILITY /* probability */, HistoricalInfo());
|
||||||
HistoricalInfo(), &shortcuts);
|
|
||||||
if (!addUnigramEntry(prevWordsInfo->getNthPrevWordCodePoints(1 /* n */),
|
if (!addUnigramEntry(prevWordsInfo->getNthPrevWordCodePoints(1 /* n */),
|
||||||
&beginningOfSentenceUnigramProperty)) {
|
&beginningOfSentenceUnigramProperty)) {
|
||||||
AKLOGE("Cannot add unigram entry for the beginning-of-sentence.");
|
AKLOGE("Cannot add unigram entry for the beginning-of-sentence.");
|
||||||
|
@ -414,6 +413,29 @@ bool Ver4PatriciaTriePolicy::removeNgramEntry(const PrevWordsInfo *const prevWor
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
bool Ver4PatriciaTriePolicy::updateCounter(const PrevWordsInfo *const prevWordsInfo,
|
||||||
|
const CodePointArrayView wordCodePoints, const bool isValidWord,
|
||||||
|
const HistoricalInfo historicalInfo) {
|
||||||
|
if (!mBuffers->isUpdatable()) {
|
||||||
|
AKLOGI("Warning: updateCounter() is called for non-updatable dictionary.");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
const int probability = isValidWord ? DUMMY_PROBABILITY_FOR_VALID_WORDS : NOT_A_PROBABILITY;
|
||||||
|
const UnigramProperty unigramProperty(false /* representsBeginningOfSentence */,
|
||||||
|
false /* isNotAWord */, false /*isBlacklisted*/, probability, historicalInfo);
|
||||||
|
if (!addUnigramEntry(wordCodePoints, &unigramProperty)) {
|
||||||
|
AKLOGE("Cannot update unigarm entry in updateCounter().");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
const NgramProperty ngramProperty(wordCodePoints.toVector(), probability, historicalInfo);
|
||||||
|
if (!addNgramEntry(prevWordsInfo, &ngramProperty)) {
|
||||||
|
AKLOGE("Cannot update unigarm entry in updateCounter().");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
bool Ver4PatriciaTriePolicy::flush(const char *const filePath) {
|
bool Ver4PatriciaTriePolicy::flush(const char *const filePath) {
|
||||||
if (!mBuffers->isUpdatable()) {
|
if (!mBuffers->isUpdatable()) {
|
||||||
AKLOGI("Warning: flush() is called for non-updatable dictionary. filePath: %s", filePath);
|
AKLOGI("Warning: flush() is called for non-updatable dictionary. filePath: %s", filePath);
|
||||||
|
@ -551,7 +573,7 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty(
|
||||||
}
|
}
|
||||||
const UnigramProperty unigramProperty(ptNodeParams.representsBeginningOfSentence(),
|
const UnigramProperty unigramProperty(ptNodeParams.representsBeginningOfSentence(),
|
||||||
ptNodeParams.isNotAWord(), ptNodeParams.isBlacklisted(), ptNodeParams.getProbability(),
|
ptNodeParams.isNotAWord(), ptNodeParams.isBlacklisted(), ptNodeParams.getProbability(),
|
||||||
*historicalInfo, &shortcuts);
|
*historicalInfo, std::move(shortcuts));
|
||||||
return WordProperty(wordCodePoints.toVector(), &unigramProperty, &ngrams);
|
return WordProperty(wordCodePoints.toVector(), &unigramProperty, &ngrams);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -118,6 +118,10 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
||||||
bool removeNgramEntry(const PrevWordsInfo *const prevWordsInfo,
|
bool removeNgramEntry(const PrevWordsInfo *const prevWordsInfo,
|
||||||
const CodePointArrayView wordCodePoints);
|
const CodePointArrayView wordCodePoints);
|
||||||
|
|
||||||
|
bool updateCounter(const PrevWordsInfo *const prevWordsInfo,
|
||||||
|
const CodePointArrayView wordCodePoints, const bool isValidWord,
|
||||||
|
const HistoricalInfo historicalInfo);
|
||||||
|
|
||||||
bool flush(const char *const filePath);
|
bool flush(const char *const filePath);
|
||||||
|
|
||||||
bool flushWithGC(const char *const filePath);
|
bool flushWithGC(const char *const filePath);
|
||||||
|
@ -147,6 +151,7 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
||||||
// prevent the dictionary from overflowing.
|
// prevent the dictionary from overflowing.
|
||||||
static const int MARGIN_TO_REFUSE_DYNAMIC_OPERATIONS;
|
static const int MARGIN_TO_REFUSE_DYNAMIC_OPERATIONS;
|
||||||
static const int MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS;
|
static const int MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS;
|
||||||
|
static const int DUMMY_PROBABILITY_FOR_VALID_WORDS;
|
||||||
|
|
||||||
const Ver4DictBuffers::Ver4DictBuffersPtr mBuffers;
|
const Ver4DictBuffers::Ver4DictBuffersPtr mBuffers;
|
||||||
const HeaderPolicy *const mHeaderPolicy;
|
const HeaderPolicy *const mHeaderPolicy;
|
||||||
|
|
|
@ -477,7 +477,7 @@ const WordProperty PatriciaTriePolicy::getWordProperty(
|
||||||
}
|
}
|
||||||
const UnigramProperty unigramProperty(ptNodeParams.representsBeginningOfSentence(),
|
const UnigramProperty unigramProperty(ptNodeParams.representsBeginningOfSentence(),
|
||||||
ptNodeParams.isNotAWord(), ptNodeParams.isBlacklisted(), ptNodeParams.getProbability(),
|
ptNodeParams.isNotAWord(), ptNodeParams.isBlacklisted(), ptNodeParams.getProbability(),
|
||||||
HistoricalInfo(), &shortcuts);
|
HistoricalInfo(), std::move(shortcuts));
|
||||||
return WordProperty(wordCodePoints.toVector(), &unigramProperty, &ngrams);
|
return WordProperty(wordCodePoints.toVector(), &unigramProperty, &ngrams);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -107,6 +107,14 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool updateCounter(const PrevWordsInfo *const prevWordsInfo,
|
||||||
|
const CodePointArrayView wordCodePoints, const bool isValidWord,
|
||||||
|
const HistoricalInfo historicalInfo) {
|
||||||
|
// This method should not be called for non-updatable dictionary.
|
||||||
|
AKLOGI("Warning: updateCounter() is called for non-updatable dictionary.");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
bool flush(const char *const filePath) {
|
bool flush(const char *const filePath) {
|
||||||
// This method should not be called for non-updatable dictionary.
|
// This method should not be called for non-updatable dictionary.
|
||||||
AKLOGI("Warning: flush() is called for non-updatable dictionary.");
|
AKLOGI("Warning: flush() is called for non-updatable dictionary.");
|
||||||
|
|
|
@ -43,6 +43,7 @@ const char *const Ver4PatriciaTriePolicy::MAX_BIGRAM_COUNT_QUERY = "MAX_BIGRAM_C
|
||||||
const int Ver4PatriciaTriePolicy::MARGIN_TO_REFUSE_DYNAMIC_OPERATIONS = 1024;
|
const int Ver4PatriciaTriePolicy::MARGIN_TO_REFUSE_DYNAMIC_OPERATIONS = 1024;
|
||||||
const int Ver4PatriciaTriePolicy::MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS =
|
const int Ver4PatriciaTriePolicy::MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS =
|
||||||
Ver4DictConstants::MAX_DICTIONARY_SIZE - MARGIN_TO_REFUSE_DYNAMIC_OPERATIONS;
|
Ver4DictConstants::MAX_DICTIONARY_SIZE - MARGIN_TO_REFUSE_DYNAMIC_OPERATIONS;
|
||||||
|
const int Ver4PatriciaTriePolicy::DUMMY_PROBABILITY_FOR_VALID_WORDS = 1;
|
||||||
|
|
||||||
void Ver4PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const dicNode,
|
void Ver4PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const dicNode,
|
||||||
DicNodeVector *const childDicNodes) const {
|
DicNodeVector *const childDicNodes) const {
|
||||||
|
@ -298,11 +299,9 @@ bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsI
|
||||||
if (!prevWordsInfo->isNthPrevWordBeginningOfSentence(i + 1 /* n */)) {
|
if (!prevWordsInfo->isNthPrevWordBeginningOfSentence(i + 1 /* n */)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
const std::vector<UnigramProperty::ShortcutProperty> shortcuts;
|
|
||||||
const UnigramProperty beginningOfSentenceUnigramProperty(
|
const UnigramProperty beginningOfSentenceUnigramProperty(
|
||||||
true /* representsBeginningOfSentence */, true /* isNotAWord */,
|
true /* representsBeginningOfSentence */, true /* isNotAWord */,
|
||||||
false /* isBlacklisted */, MAX_PROBABILITY /* probability */,
|
false /* isBlacklisted */, MAX_PROBABILITY /* probability */, HistoricalInfo());
|
||||||
HistoricalInfo(), &shortcuts);
|
|
||||||
if (!addUnigramEntry(prevWordsInfo->getNthPrevWordCodePoints(1 /* n */),
|
if (!addUnigramEntry(prevWordsInfo->getNthPrevWordCodePoints(1 /* n */),
|
||||||
&beginningOfSentenceUnigramProperty)) {
|
&beginningOfSentenceUnigramProperty)) {
|
||||||
AKLOGE("Cannot add unigram entry for the beginning-of-sentence.");
|
AKLOGE("Cannot add unigram entry for the beginning-of-sentence.");
|
||||||
|
@ -364,6 +363,32 @@ bool Ver4PatriciaTriePolicy::removeNgramEntry(const PrevWordsInfo *const prevWor
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool Ver4PatriciaTriePolicy::updateCounter(const PrevWordsInfo *const prevWordsInfo,
|
||||||
|
const CodePointArrayView wordCodePoints, const bool isValidWord,
|
||||||
|
const HistoricalInfo historicalInfo) {
|
||||||
|
if (!mBuffers->isUpdatable()) {
|
||||||
|
AKLOGI("Warning: updateCounter() is called for non-updatable dictionary.");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
// TODO: Have count up method in language model dict content.
|
||||||
|
const int probability = isValidWord ? DUMMY_PROBABILITY_FOR_VALID_WORDS : NOT_A_PROBABILITY;
|
||||||
|
const UnigramProperty unigramProperty(false /* representsBeginningOfSentence */,
|
||||||
|
false /* isNotAWord */, false /*isBlacklisted*/, probability, historicalInfo);
|
||||||
|
if (!addUnigramEntry(wordCodePoints, &unigramProperty)) {
|
||||||
|
AKLOGE("Cannot update unigarm entry in updateCounter().");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
const NgramProperty ngramProperty(wordCodePoints.toVector(), probability, historicalInfo);
|
||||||
|
for (size_t i = 1; i <= prevWordsInfo->getPrevWordCount(); ++i) {
|
||||||
|
const PrevWordsInfo trimmedPrevWordsInfo(prevWordsInfo->getTrimmedPrevWordsInfo(i));
|
||||||
|
if (!addNgramEntry(&trimmedPrevWordsInfo, &ngramProperty)) {
|
||||||
|
AKLOGE("Cannot update ngram entry in updateCounter().");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
bool Ver4PatriciaTriePolicy::flush(const char *const filePath) {
|
bool Ver4PatriciaTriePolicy::flush(const char *const filePath) {
|
||||||
if (!mBuffers->isUpdatable()) {
|
if (!mBuffers->isUpdatable()) {
|
||||||
AKLOGI("Warning: flush() is called for non-updatable dictionary. filePath: %s", filePath);
|
AKLOGI("Warning: flush() is called for non-updatable dictionary. filePath: %s", filePath);
|
||||||
|
@ -486,7 +511,7 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty(
|
||||||
}
|
}
|
||||||
const UnigramProperty unigramProperty(probabilityEntry.representsBeginningOfSentence(),
|
const UnigramProperty unigramProperty(probabilityEntry.representsBeginningOfSentence(),
|
||||||
probabilityEntry.isNotAWord(), probabilityEntry.isBlacklisted(),
|
probabilityEntry.isNotAWord(), probabilityEntry.isBlacklisted(),
|
||||||
probabilityEntry.getProbability(), *historicalInfo, &shortcuts);
|
probabilityEntry.getProbability(), *historicalInfo, std::move(shortcuts));
|
||||||
return WordProperty(wordCodePoints.toVector(), &unigramProperty, &ngrams);
|
return WordProperty(wordCodePoints.toVector(), &unigramProperty, &ngrams);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -98,6 +98,10 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
||||||
bool removeNgramEntry(const PrevWordsInfo *const prevWordsInfo,
|
bool removeNgramEntry(const PrevWordsInfo *const prevWordsInfo,
|
||||||
const CodePointArrayView wordCodePoints);
|
const CodePointArrayView wordCodePoints);
|
||||||
|
|
||||||
|
bool updateCounter(const PrevWordsInfo *const prevWordsInfo,
|
||||||
|
const CodePointArrayView wordCodePoints, const bool isValidWord,
|
||||||
|
const HistoricalInfo historicalInfo);
|
||||||
|
|
||||||
bool flush(const char *const filePath);
|
bool flush(const char *const filePath);
|
||||||
|
|
||||||
bool flushWithGC(const char *const filePath);
|
bool flushWithGC(const char *const filePath);
|
||||||
|
@ -127,6 +131,8 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
||||||
// prevent the dictionary from overflowing.
|
// prevent the dictionary from overflowing.
|
||||||
static const int MARGIN_TO_REFUSE_DYNAMIC_OPERATIONS;
|
static const int MARGIN_TO_REFUSE_DYNAMIC_OPERATIONS;
|
||||||
static const int MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS;
|
static const int MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS;
|
||||||
|
// TODO: Remove
|
||||||
|
static const int DUMMY_PROBABILITY_FOR_VALID_WORDS;
|
||||||
|
|
||||||
const Ver4DictBuffers::Ver4DictBuffersPtr mBuffers;
|
const Ver4DictBuffers::Ver4DictBuffersPtr mBuffers;
|
||||||
const HeaderPolicy *const mHeaderPolicy;
|
const HeaderPolicy *const mHeaderPolicy;
|
||||||
|
|
Loading…
Reference in New Issue