Merge "Implement updateCounter() by using existing entry adding methods."

This commit is contained in:
Keisuke Kuroyanagi 2014-10-02 02:09:20 +00:00 committed by Android (Google) Code Review
commit 0afad267c5
13 changed files with 129 additions and 21 deletions

View file

@ -374,7 +374,7 @@ static bool latinime_BinaryDictionary_addUnigramEntry(JNIEnv *env, jclass clazz,
// Use 1 for count to indicate the word has inputted.
const UnigramProperty unigramProperty(isBeginningOfSentence, isNotAWord,
isBlacklisted, probability, HistoricalInfo(timestamp, 0 /* level */, 1 /* count */),
&shortcuts);
std::move(shortcuts));
return dictionary->addUnigramEntry(CodePointArrayView(codePoints, codePointCount),
&unigramProperty);
}
@ -434,10 +434,16 @@ static bool latinime_BinaryDictionary_updateCounter(JNIEnv *env, jclass clazz, j
if (!dictionary) {
return false;
}
jsize wordLength = env->GetArrayLength(word);
int wordCodePoints[wordLength];
env->GetIntArrayRegion(word, 0, wordLength, wordCodePoints);
return false;
const PrevWordsInfo prevWordsInfo = JniDataUtils::constructPrevWordsInfo(env,
prevWordCodePointArrays, isBeginningOfSentenceArray,
env->GetArrayLength(prevWordCodePointArrays));
jsize codePointCount = env->GetArrayLength(word);
int wordCodePoints[codePointCount];
env->GetIntArrayRegion(word, 0, codePointCount, wordCodePoints);
const HistoricalInfo historicalInfo(timestamp, 0 /* level */, count);
return dictionary->updateCounter(&prevWordsInfo,
CodePointArrayView(wordCodePoints, codePointCount), isValidWord == JNI_TRUE,
historicalInfo);
}
// Returns how many language model params are processed.
@ -509,7 +515,7 @@ static int latinime_BinaryDictionary_addMultipleDictionaryEntries(JNIEnv *env, j
// Use 1 for count to indicate the word has inputted.
const UnigramProperty unigramProperty(false /* isBeginningOfSentence */, isNotAWord,
isBlacklisted, unigramProbability,
HistoricalInfo(timestamp, 0 /* level */, 1 /* count */), &shortcuts);
HistoricalInfo(timestamp, 0 /* level */, 1 /* count */), std::move(shortcuts));
dictionary->addUnigramEntry(CodePointArrayView(word1CodePoints, word1Length),
&unigramProperty);
if (word0) {

View file

@ -155,6 +155,14 @@ bool Dictionary::removeNgramEntry(const PrevWordsInfo *const prevWordsInfo,
return mDictionaryStructureWithBufferPolicy->removeNgramEntry(prevWordsInfo, codePoints);
}
bool Dictionary::updateCounter(const PrevWordsInfo *const prevWordsInfo,
const CodePointArrayView codePoints, const bool isValidWord,
const HistoricalInfo historicalInfo) {
TimeKeeper::setCurrentTime();
return mDictionaryStructureWithBufferPolicy->updateCounter(prevWordsInfo, codePoints,
isValidWord, historicalInfo);
}
bool Dictionary::flush(const char *const filePath) {
TimeKeeper::setCurrentTime();
return mDictionaryStructureWithBufferPolicy->flush(filePath);

View file

@ -22,6 +22,7 @@
#include "defines.h"
#include "jni.h"
#include "suggest/core/dictionary/ngram_listener.h"
#include "suggest/core/dictionary/property/historical_info.h"
#include "suggest/core/dictionary/property/word_property.h"
#include "suggest/core/policy/dictionary_header_structure_policy.h"
#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
@ -90,6 +91,10 @@ class Dictionary {
bool removeNgramEntry(const PrevWordsInfo *const prevWordsInfo,
const CodePointArrayView codePoints);
bool updateCounter(const PrevWordsInfo *const prevWordsInfo,
const CodePointArrayView codePoints, const bool isValidWord,
const HistoricalInfo historicalInfo);
bool flush(const char *const filePath);
bool flushWithGC(const char *const filePath);

View file

@ -27,7 +27,7 @@ namespace latinime {
class NgramProperty {
public:
NgramProperty(const std::vector<int> &&targetCodePoints, const int probability,
const HistoricalInfo &historicalInfo)
const HistoricalInfo historicalInfo)
: mTargetCodePoints(std::move(targetCodePoints)), mProbability(probability),
mHistoricalInfo(historicalInfo) {}

View file

@ -54,11 +54,18 @@ class UnigramProperty {
mProbability(NOT_A_PROBABILITY), mHistoricalInfo(), mShortcuts() {}
UnigramProperty(const bool representsBeginningOfSentence, const bool isNotAWord,
const bool isBlacklisted, const int probability, const HistoricalInfo &historicalInfo,
const std::vector<ShortcutProperty> *const shortcuts)
const bool isBlacklisted, const int probability, const HistoricalInfo historicalInfo,
const std::vector<ShortcutProperty> &&shortcuts)
: mRepresentsBeginningOfSentence(representsBeginningOfSentence),
mIsNotAWord(isNotAWord), mIsBlacklisted(isBlacklisted), mProbability(probability),
mHistoricalInfo(historicalInfo), mShortcuts(*shortcuts) {}
mHistoricalInfo(historicalInfo), mShortcuts(std::move(shortcuts)) {}
// Without shortcuts.
UnigramProperty(const bool representsBeginningOfSentence, const bool isNotAWord,
const bool isBlacklisted, const int probability, const HistoricalInfo historicalInfo)
: mRepresentsBeginningOfSentence(representsBeginningOfSentence),
mIsNotAWord(isNotAWord), mIsBlacklisted(isBlacklisted), mProbability(probability),
mHistoricalInfo(historicalInfo), mShortcuts() {}
bool representsBeginningOfSentence() const {
return mRepresentsBeginningOfSentence;

View file

@ -21,6 +21,7 @@
#include "defines.h"
#include "suggest/core/dictionary/binary_dictionary_shortcut_iterator.h"
#include "suggest/core/dictionary/property/historical_info.h"
#include "suggest/core/dictionary/property/word_property.h"
#include "suggest/core/dictionary/word_attributes.h"
#include "utils/int_array_view.h"
@ -87,6 +88,11 @@ class DictionaryStructureWithBufferPolicy {
virtual bool removeNgramEntry(const PrevWordsInfo *const prevWordsInfo,
const CodePointArrayView wordCodePoints) = 0;
// Returns whether the update was success or not.
virtual bool updateCounter(const PrevWordsInfo *const prevWordsInfo,
const CodePointArrayView wordCodePoints, const bool isValidWord,
const HistoricalInfo historicalInfo) = 0;
// Returns whether the flush was success or not.
virtual bool flush(const char *const filePath) = 0;

View file

@ -33,7 +33,7 @@ class PrevWordsInfo {
clear();
}
PrevWordsInfo(PrevWordsInfo &&prevWordsInfo)
PrevWordsInfo(const PrevWordsInfo &prevWordsInfo)
: mPrevWordCount(prevWordsInfo.mPrevWordCount) {
for (size_t i = 0; i < mPrevWordCount; ++i) {
mPrevWordCodePointCount[i] = prevWordsInfo.mPrevWordCodePointCount[i];
@ -73,6 +73,16 @@ class PrevWordsInfo {
mIsBeginningOfSentence[0] = isBeginningOfSentence;
}
size_t getPrevWordCount() const {
return mPrevWordCount;
}
// TODO: Remove.
const PrevWordsInfo getTrimmedPrevWordsInfo(const size_t maxPrevWordCount) const {
return PrevWordsInfo(mPrevWordCodePoints, mPrevWordCodePointCount, mIsBeginningOfSentence,
std::min(mPrevWordCount, maxPrevWordCount));
}
bool isValid() const {
if (mPrevWordCodePointCount[0] > 0) {
return true;
@ -112,7 +122,7 @@ class PrevWordsInfo {
}
private:
DISALLOW_COPY_AND_ASSIGN(PrevWordsInfo);
DISALLOW_ASSIGNMENT_OPERATOR(PrevWordsInfo);
static int getWordId(const DictionaryStructureWithBufferPolicy *const dictStructurePolicy,
const int *const wordCodePoints, const int wordCodePointCount,

View file

@ -52,6 +52,7 @@ const char *const Ver4PatriciaTriePolicy::MAX_BIGRAM_COUNT_QUERY = "MAX_BIGRAM_C
const int Ver4PatriciaTriePolicy::MARGIN_TO_REFUSE_DYNAMIC_OPERATIONS = 1024;
const int Ver4PatriciaTriePolicy::MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS =
Ver4DictConstants::MAX_DICTIONARY_SIZE - MARGIN_TO_REFUSE_DYNAMIC_OPERATIONS;
const int Ver4PatriciaTriePolicy::DUMMY_PROBABILITY_FOR_VALID_WORDS = 1;
void Ver4PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const dicNode,
DicNodeVector *const childDicNodes) const {
@ -339,11 +340,9 @@ bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsI
}
if (prevWordIds[0] == NOT_A_WORD_ID) {
if (prevWordsInfo->isNthPrevWordBeginningOfSentence(1 /* n */)) {
const std::vector<UnigramProperty::ShortcutProperty> shortcuts;
const UnigramProperty beginningOfSentenceUnigramProperty(
true /* representsBeginningOfSentence */, true /* isNotAWord */,
false /* isBlacklisted */, MAX_PROBABILITY /* probability */,
HistoricalInfo(), &shortcuts);
false /* isBlacklisted */, MAX_PROBABILITY /* probability */, HistoricalInfo());
if (!addUnigramEntry(prevWordsInfo->getNthPrevWordCodePoints(1 /* n */),
&beginningOfSentenceUnigramProperty)) {
AKLOGE("Cannot add unigram entry for the beginning-of-sentence.");
@ -414,6 +413,29 @@ bool Ver4PatriciaTriePolicy::removeNgramEntry(const PrevWordsInfo *const prevWor
}
}
bool Ver4PatriciaTriePolicy::updateCounter(const PrevWordsInfo *const prevWordsInfo,
const CodePointArrayView wordCodePoints, const bool isValidWord,
const HistoricalInfo historicalInfo) {
if (!mBuffers->isUpdatable()) {
AKLOGI("Warning: updateCounter() is called for non-updatable dictionary.");
return false;
}
const int probability = isValidWord ? DUMMY_PROBABILITY_FOR_VALID_WORDS : NOT_A_PROBABILITY;
const UnigramProperty unigramProperty(false /* representsBeginningOfSentence */,
false /* isNotAWord */, false /*isBlacklisted*/, probability, historicalInfo);
if (!addUnigramEntry(wordCodePoints, &unigramProperty)) {
AKLOGE("Cannot update unigarm entry in updateCounter().");
return false;
}
const NgramProperty ngramProperty(wordCodePoints.toVector(), probability, historicalInfo);
if (!addNgramEntry(prevWordsInfo, &ngramProperty)) {
AKLOGE("Cannot update unigarm entry in updateCounter().");
return false;
}
return true;
}
bool Ver4PatriciaTriePolicy::flush(const char *const filePath) {
if (!mBuffers->isUpdatable()) {
AKLOGI("Warning: flush() is called for non-updatable dictionary. filePath: %s", filePath);
@ -551,7 +573,7 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty(
}
const UnigramProperty unigramProperty(ptNodeParams.representsBeginningOfSentence(),
ptNodeParams.isNotAWord(), ptNodeParams.isBlacklisted(), ptNodeParams.getProbability(),
*historicalInfo, &shortcuts);
*historicalInfo, std::move(shortcuts));
return WordProperty(wordCodePoints.toVector(), &unigramProperty, &ngrams);
}

View file

@ -118,6 +118,10 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
bool removeNgramEntry(const PrevWordsInfo *const prevWordsInfo,
const CodePointArrayView wordCodePoints);
bool updateCounter(const PrevWordsInfo *const prevWordsInfo,
const CodePointArrayView wordCodePoints, const bool isValidWord,
const HistoricalInfo historicalInfo);
bool flush(const char *const filePath);
bool flushWithGC(const char *const filePath);
@ -147,6 +151,7 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
// prevent the dictionary from overflowing.
static const int MARGIN_TO_REFUSE_DYNAMIC_OPERATIONS;
static const int MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS;
static const int DUMMY_PROBABILITY_FOR_VALID_WORDS;
const Ver4DictBuffers::Ver4DictBuffersPtr mBuffers;
const HeaderPolicy *const mHeaderPolicy;

View file

@ -477,7 +477,7 @@ const WordProperty PatriciaTriePolicy::getWordProperty(
}
const UnigramProperty unigramProperty(ptNodeParams.representsBeginningOfSentence(),
ptNodeParams.isNotAWord(), ptNodeParams.isBlacklisted(), ptNodeParams.getProbability(),
HistoricalInfo(), &shortcuts);
HistoricalInfo(), std::move(shortcuts));
return WordProperty(wordCodePoints.toVector(), &unigramProperty, &ngrams);
}

View file

@ -107,6 +107,14 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
return false;
}
bool updateCounter(const PrevWordsInfo *const prevWordsInfo,
const CodePointArrayView wordCodePoints, const bool isValidWord,
const HistoricalInfo historicalInfo) {
// This method should not be called for non-updatable dictionary.
AKLOGI("Warning: updateCounter() is called for non-updatable dictionary.");
return false;
}
bool flush(const char *const filePath) {
// This method should not be called for non-updatable dictionary.
AKLOGI("Warning: flush() is called for non-updatable dictionary.");

View file

@ -43,6 +43,7 @@ const char *const Ver4PatriciaTriePolicy::MAX_BIGRAM_COUNT_QUERY = "MAX_BIGRAM_C
const int Ver4PatriciaTriePolicy::MARGIN_TO_REFUSE_DYNAMIC_OPERATIONS = 1024;
const int Ver4PatriciaTriePolicy::MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS =
Ver4DictConstants::MAX_DICTIONARY_SIZE - MARGIN_TO_REFUSE_DYNAMIC_OPERATIONS;
const int Ver4PatriciaTriePolicy::DUMMY_PROBABILITY_FOR_VALID_WORDS = 1;
void Ver4PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const dicNode,
DicNodeVector *const childDicNodes) const {
@ -298,11 +299,9 @@ bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsI
if (!prevWordsInfo->isNthPrevWordBeginningOfSentence(i + 1 /* n */)) {
return false;
}
const std::vector<UnigramProperty::ShortcutProperty> shortcuts;
const UnigramProperty beginningOfSentenceUnigramProperty(
true /* representsBeginningOfSentence */, true /* isNotAWord */,
false /* isBlacklisted */, MAX_PROBABILITY /* probability */,
HistoricalInfo(), &shortcuts);
false /* isBlacklisted */, MAX_PROBABILITY /* probability */, HistoricalInfo());
if (!addUnigramEntry(prevWordsInfo->getNthPrevWordCodePoints(1 /* n */),
&beginningOfSentenceUnigramProperty)) {
AKLOGE("Cannot add unigram entry for the beginning-of-sentence.");
@ -364,6 +363,32 @@ bool Ver4PatriciaTriePolicy::removeNgramEntry(const PrevWordsInfo *const prevWor
}
}
bool Ver4PatriciaTriePolicy::updateCounter(const PrevWordsInfo *const prevWordsInfo,
const CodePointArrayView wordCodePoints, const bool isValidWord,
const HistoricalInfo historicalInfo) {
if (!mBuffers->isUpdatable()) {
AKLOGI("Warning: updateCounter() is called for non-updatable dictionary.");
return false;
}
// TODO: Have count up method in language model dict content.
const int probability = isValidWord ? DUMMY_PROBABILITY_FOR_VALID_WORDS : NOT_A_PROBABILITY;
const UnigramProperty unigramProperty(false /* representsBeginningOfSentence */,
false /* isNotAWord */, false /*isBlacklisted*/, probability, historicalInfo);
if (!addUnigramEntry(wordCodePoints, &unigramProperty)) {
AKLOGE("Cannot update unigarm entry in updateCounter().");
return false;
}
const NgramProperty ngramProperty(wordCodePoints.toVector(), probability, historicalInfo);
for (size_t i = 1; i <= prevWordsInfo->getPrevWordCount(); ++i) {
const PrevWordsInfo trimmedPrevWordsInfo(prevWordsInfo->getTrimmedPrevWordsInfo(i));
if (!addNgramEntry(&trimmedPrevWordsInfo, &ngramProperty)) {
AKLOGE("Cannot update ngram entry in updateCounter().");
return false;
}
}
return true;
}
bool Ver4PatriciaTriePolicy::flush(const char *const filePath) {
if (!mBuffers->isUpdatable()) {
AKLOGI("Warning: flush() is called for non-updatable dictionary. filePath: %s", filePath);
@ -486,7 +511,7 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty(
}
const UnigramProperty unigramProperty(probabilityEntry.representsBeginningOfSentence(),
probabilityEntry.isNotAWord(), probabilityEntry.isBlacklisted(),
probabilityEntry.getProbability(), *historicalInfo, &shortcuts);
probabilityEntry.getProbability(), *historicalInfo, std::move(shortcuts));
return WordProperty(wordCodePoints.toVector(), &unigramProperty, &ngrams);
}

View file

@ -98,6 +98,10 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
bool removeNgramEntry(const PrevWordsInfo *const prevWordsInfo,
const CodePointArrayView wordCodePoints);
bool updateCounter(const PrevWordsInfo *const prevWordsInfo,
const CodePointArrayView wordCodePoints, const bool isValidWord,
const HistoricalInfo historicalInfo);
bool flush(const char *const filePath);
bool flushWithGC(const char *const filePath);
@ -127,6 +131,8 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
// prevent the dictionary from overflowing.
static const int MARGIN_TO_REFUSE_DYNAMIC_OPERATIONS;
static const int MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS;
// TODO: Remove
static const int DUMMY_PROBABILITY_FOR_VALID_WORDS;
const Ver4DictBuffers::Ver4DictBuffersPtr mBuffers;
const HeaderPolicy *const mHeaderPolicy;