Merge changes I210acb81,Ie9508788

* changes:
  Make NgramProperty have NgramContext.
  Create .cpp file for NgramContext.
This commit is contained in:
Keisuke Kuroyanagi 2014-10-21 10:28:25 +00:00 committed by Android (Google) Code Review
commit dfc82fa366
15 changed files with 188 additions and 150 deletions

View file

@ -40,6 +40,7 @@ LATIN_IME_CORE_SRC_FILES := \
proximity_info_state_utils.cpp) \ proximity_info_state_utils.cpp) \
suggest/core/policy/weighting.cpp \ suggest/core/policy/weighting.cpp \
suggest/core/session/dic_traverse_session.cpp \ suggest/core/session/dic_traverse_session.cpp \
suggest/core/session/ngram_context.cpp \
$(addprefix suggest/core/result/, \ $(addprefix suggest/core/result/, \
suggestion_results.cpp \ suggestion_results.cpp \
suggestions_output_utils.cpp) \ suggestions_output_utils.cpp) \
@ -55,7 +56,7 @@ LATIN_IME_CORE_SRC_FILES := \
dynamic_pt_updating_helper.cpp \ dynamic_pt_updating_helper.cpp \
dynamic_pt_writing_utils.cpp \ dynamic_pt_writing_utils.cpp \
patricia_trie_reading_utils.cpp \ patricia_trie_reading_utils.cpp \
shortcut/shortcut_list_reading_utils.cpp ) \ shortcut/shortcut_list_reading_utils.cpp) \
$(addprefix suggest/policyimpl/dictionary/structure/v2/, \ $(addprefix suggest/policyimpl/dictionary/structure/v2/, \
patricia_trie_policy.cpp \ patricia_trie_policy.cpp \
ver2_patricia_trie_node_reader.cpp \ ver2_patricia_trie_node_reader.cpp \

View file

@ -409,9 +409,10 @@ static bool latinime_BinaryDictionary_addNgramEntry(JNIEnv *env, jclass clazz, j
int wordCodePoints[wordLength]; int wordCodePoints[wordLength];
env->GetIntArrayRegion(word, 0, wordLength, wordCodePoints); env->GetIntArrayRegion(word, 0, wordLength, wordCodePoints);
// Use 1 for count to indicate the ngram has inputted. // Use 1 for count to indicate the ngram has inputted.
const NgramProperty ngramProperty(CodePointArrayView(wordCodePoints, wordLength).toVector(), const NgramProperty ngramProperty(ngramContext,
CodePointArrayView(wordCodePoints, wordLength).toVector(),
probability, HistoricalInfo(timestamp, 0 /* level */, 1 /* count */)); probability, HistoricalInfo(timestamp, 0 /* level */, 1 /* count */));
return dictionary->addNgramEntry(&ngramContext, &ngramProperty); return dictionary->addNgramEntry(&ngramProperty);
} }
static bool latinime_BinaryDictionary_removeNgramEntry(JNIEnv *env, jclass clazz, jlong dict, static bool latinime_BinaryDictionary_removeNgramEntry(JNIEnv *env, jclass clazz, jlong dict,
@ -527,12 +528,12 @@ static int latinime_BinaryDictionary_addMultipleDictionaryEntries(JNIEnv *env, j
if (word0) { if (word0) {
jint bigramProbability = env->GetIntField(languageModelParam, bigramProbabilityFieldId); jint bigramProbability = env->GetIntField(languageModelParam, bigramProbabilityFieldId);
// Use 1 for count to indicate the bigram has inputted. // Use 1 for count to indicate the bigram has inputted.
const NgramProperty ngramProperty(
CodePointArrayView(word1CodePoints, word1Length).toVector(),
bigramProbability, HistoricalInfo(timestamp, 0 /* level */, 1 /* count */));
const NgramContext ngramContext(word0CodePoints, word0Length, const NgramContext ngramContext(word0CodePoints, word0Length,
false /* isBeginningOfSentence */); false /* isBeginningOfSentence */);
dictionary->addNgramEntry(&ngramContext, &ngramProperty); const NgramProperty ngramProperty(ngramContext,
CodePointArrayView(word1CodePoints, word1Length).toVector(),
bigramProbability, HistoricalInfo(timestamp, 0 /* level */, 1 /* count */));
dictionary->addNgramEntry(&ngramProperty);
} }
if (dictionary->needsToRunGC(true /* mindsBlockByGC */)) { if (dictionary->needsToRunGC(true /* mindsBlockByGC */)) {
return i + 1; return i + 1;
@ -642,11 +643,8 @@ static bool latinime_BinaryDictionary_migrateNative(JNIEnv *env, jclass clazz, j
return false; return false;
} }
} }
const NgramContext ngramContext(wordCodePoints, wordCodePointCount,
wordProperty.getUnigramProperty()->representsBeginningOfSentence());
for (const NgramProperty &ngramProperty : *wordProperty.getNgramProperties()) { for (const NgramProperty &ngramProperty : *wordProperty.getNgramProperties()) {
if (!dictionaryStructureWithBufferPolicy->addNgramEntry(&ngramContext, if (!dictionaryStructureWithBufferPolicy->addNgramEntry(&ngramProperty)) {
&ngramProperty)) {
LogUtils::logToJava(env, "Cannot add ngram to the new dict."); LogUtils::logToJava(env, "Cannot add ngram to the new dict.");
return false; return false;
} }

View file

@ -140,10 +140,9 @@ bool Dictionary::removeUnigramEntry(const CodePointArrayView codePoints) {
return mDictionaryStructureWithBufferPolicy->removeUnigramEntry(codePoints); return mDictionaryStructureWithBufferPolicy->removeUnigramEntry(codePoints);
} }
bool Dictionary::addNgramEntry(const NgramContext *const ngramContext, bool Dictionary::addNgramEntry(const NgramProperty *const ngramProperty) {
const NgramProperty *const ngramProperty) {
TimeKeeper::setCurrentTime(); TimeKeeper::setCurrentTime();
return mDictionaryStructureWithBufferPolicy->addNgramEntry(ngramContext, ngramProperty); return mDictionaryStructureWithBufferPolicy->addNgramEntry(ngramProperty);
} }
bool Dictionary::removeNgramEntry(const NgramContext *const ngramContext, bool Dictionary::removeNgramEntry(const NgramContext *const ngramContext,

View file

@ -85,8 +85,7 @@ class Dictionary {
bool removeUnigramEntry(const CodePointArrayView codePoints); bool removeUnigramEntry(const CodePointArrayView codePoints);
bool addNgramEntry(const NgramContext *const ngramContext, bool addNgramEntry(const NgramProperty *const ngramProperty);
const NgramProperty *const ngramProperty);
bool removeNgramEntry(const NgramContext *const ngramContext, bool removeNgramEntry(const NgramContext *const ngramContext,
const CodePointArrayView codePoints); const CodePointArrayView codePoints);

View file

@ -21,15 +21,20 @@
#include "defines.h" #include "defines.h"
#include "suggest/core/dictionary/property/historical_info.h" #include "suggest/core/dictionary/property/historical_info.h"
#include "suggest/core/session/ngram_context.h"
namespace latinime { namespace latinime {
class NgramProperty { class NgramProperty {
public: public:
NgramProperty(const std::vector<int> &&targetCodePoints, const int probability, NgramProperty(const NgramContext &ngramContext, const std::vector<int> &&targetCodePoints,
const HistoricalInfo historicalInfo) const int probability, const HistoricalInfo historicalInfo)
: mTargetCodePoints(std::move(targetCodePoints)), mProbability(probability), : mNgramContext(ngramContext), mTargetCodePoints(std::move(targetCodePoints)),
mHistoricalInfo(historicalInfo) {} mProbability(probability), mHistoricalInfo(historicalInfo) {}
const NgramContext *getNgramContext() const {
return &mNgramContext;
}
const std::vector<int> *getTargetCodePoints() const { const std::vector<int> *getTargetCodePoints() const {
return &mTargetCodePoints; return &mTargetCodePoints;
@ -48,6 +53,7 @@ class NgramProperty {
DISALLOW_DEFAULT_CONSTRUCTOR(NgramProperty); DISALLOW_DEFAULT_CONSTRUCTOR(NgramProperty);
DISALLOW_ASSIGNMENT_OPERATOR(NgramProperty); DISALLOW_ASSIGNMENT_OPERATOR(NgramProperty);
const NgramContext mNgramContext;
const std::vector<int> mTargetCodePoints; const std::vector<int> mTargetCodePoints;
const int mProbability; const int mProbability;
const HistoricalInfo mHistoricalInfo; const HistoricalInfo mHistoricalInfo;

View file

@ -34,9 +34,9 @@ class WordProperty {
: mCodePoints(), mUnigramProperty(), mNgrams() {} : mCodePoints(), mUnigramProperty(), mNgrams() {}
WordProperty(const std::vector<int> &&codePoints, const UnigramProperty *const unigramProperty, WordProperty(const std::vector<int> &&codePoints, const UnigramProperty *const unigramProperty,
const std::vector<NgramProperty> *const bigrams) const std::vector<NgramProperty> *const ngrams)
: mCodePoints(std::move(codePoints)), mUnigramProperty(*unigramProperty), : mCodePoints(std::move(codePoints)), mUnigramProperty(*unigramProperty),
mNgrams(*bigrams) {} mNgrams(*ngrams) {}
void outputProperties(JNIEnv *const env, jintArray outCodePoints, jbooleanArray outFlags, void outputProperties(JNIEnv *const env, jintArray outCodePoints, jbooleanArray outFlags,
jintArray outProbabilityInfo, jobject outBigramTargets, jobject outBigramProbabilities, jintArray outProbabilityInfo, jobject outBigramTargets, jobject outBigramProbabilities,

View file

@ -40,7 +40,6 @@ class UnigramProperty;
* This class abstracts the structure of dictionaries. * This class abstracts the structure of dictionaries.
* Implement this policy to support additional dictionaries. * Implement this policy to support additional dictionaries.
*/ */
// TODO: Use word id instead of terminal PtNode position.
class DictionaryStructureWithBufferPolicy { class DictionaryStructureWithBufferPolicy {
public: public:
typedef std::unique_ptr<DictionaryStructureWithBufferPolicy> StructurePolicyPtr; typedef std::unique_ptr<DictionaryStructureWithBufferPolicy> StructurePolicyPtr;
@ -81,8 +80,7 @@ class DictionaryStructureWithBufferPolicy {
virtual bool removeUnigramEntry(const CodePointArrayView wordCodePoints) = 0; virtual bool removeUnigramEntry(const CodePointArrayView wordCodePoints) = 0;
// Returns whether the update was success or not. // Returns whether the update was success or not.
virtual bool addNgramEntry(const NgramContext *const ngramContext, virtual bool addNgramEntry(const NgramProperty *const ngramProperty) = 0;
const NgramProperty *const ngramProperty) = 0;
// Returns whether the update was success or not. // Returns whether the update was success or not.
virtual bool removeNgramEntry(const NgramContext *const ngramContext, virtual bool removeNgramEntry(const NgramContext *const ngramContext,
@ -106,7 +104,6 @@ class DictionaryStructureWithBufferPolicy {
virtual void getProperty(const char *const query, const int queryLength, char *const outResult, virtual void getProperty(const char *const query, const int queryLength, char *const outResult,
const int maxResultLength) = 0; const int maxResultLength) = 0;
// Used for testing.
virtual const WordProperty getWordProperty(const CodePointArrayView wordCodePoints) const = 0; virtual const WordProperty getWordProperty(const CodePointArrayView wordCodePoints) const = 0;
// Method to iterate all words in the dictionary. // Method to iterate all words in the dictionary.

View file

@ -0,0 +1,123 @@
/*
* Copyright (C) 2014 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "suggest/core/session/ngram_context.h"
#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
#include "utils/char_utils.h"
namespace latinime {
NgramContext::NgramContext() : mPrevWordCount(0) {}
NgramContext::NgramContext(const NgramContext &ngramContext)
: mPrevWordCount(ngramContext.mPrevWordCount) {
for (size_t i = 0; i < mPrevWordCount; ++i) {
mPrevWordCodePointCount[i] = ngramContext.mPrevWordCodePointCount[i];
memmove(mPrevWordCodePoints[i], ngramContext.mPrevWordCodePoints[i],
sizeof(mPrevWordCodePoints[i][0]) * mPrevWordCodePointCount[i]);
mIsBeginningOfSentence[i] = ngramContext.mIsBeginningOfSentence[i];
}
}
NgramContext::NgramContext(const int prevWordCodePoints[][MAX_WORD_LENGTH],
const int *const prevWordCodePointCount, const bool *const isBeginningOfSentence,
const size_t prevWordCount)
: mPrevWordCount(std::min(NELEMS(mPrevWordCodePoints), prevWordCount)) {
clear();
for (size_t i = 0; i < mPrevWordCount; ++i) {
if (prevWordCodePointCount[i] < 0 || prevWordCodePointCount[i] > MAX_WORD_LENGTH) {
continue;
}
memmove(mPrevWordCodePoints[i], prevWordCodePoints[i],
sizeof(mPrevWordCodePoints[i][0]) * prevWordCodePointCount[i]);
mPrevWordCodePointCount[i] = prevWordCodePointCount[i];
mIsBeginningOfSentence[i] = isBeginningOfSentence[i];
}
}
NgramContext::NgramContext(const int *const prevWordCodePoints, const int prevWordCodePointCount,
const bool isBeginningOfSentence) : mPrevWordCount(1) {
clear();
if (prevWordCodePointCount > MAX_WORD_LENGTH || !prevWordCodePoints) {
return;
}
memmove(mPrevWordCodePoints[0], prevWordCodePoints,
sizeof(mPrevWordCodePoints[0][0]) * prevWordCodePointCount);
mPrevWordCodePointCount[0] = prevWordCodePointCount;
mIsBeginningOfSentence[0] = isBeginningOfSentence;
}
bool NgramContext::isValid() const {
if (mPrevWordCodePointCount[0] > 0) {
return true;
}
if (mIsBeginningOfSentence[0]) {
return true;
}
return false;
}
const CodePointArrayView NgramContext::getNthPrevWordCodePoints(const size_t n) const {
if (n <= 0 || n > mPrevWordCount) {
return CodePointArrayView();
}
return CodePointArrayView(mPrevWordCodePoints[n - 1], mPrevWordCodePointCount[n - 1]);
}
bool NgramContext::isNthPrevWordBeginningOfSentence(const size_t n) const {
if (n <= 0 || n > mPrevWordCount) {
return false;
}
return mIsBeginningOfSentence[n - 1];
}
/* static */ int NgramContext::getWordId(
const DictionaryStructureWithBufferPolicy *const dictStructurePolicy,
const int *const wordCodePoints, const int wordCodePointCount,
const bool isBeginningOfSentence, const bool tryLowerCaseSearch) {
if (!dictStructurePolicy || !wordCodePoints || wordCodePointCount > MAX_WORD_LENGTH) {
return NOT_A_WORD_ID;
}
int codePoints[MAX_WORD_LENGTH];
int codePointCount = wordCodePointCount;
memmove(codePoints, wordCodePoints, sizeof(int) * codePointCount);
if (isBeginningOfSentence) {
codePointCount = CharUtils::attachBeginningOfSentenceMarker(codePoints, codePointCount,
MAX_WORD_LENGTH);
if (codePointCount <= 0) {
return NOT_A_WORD_ID;
}
}
const CodePointArrayView codePointArrayView(codePoints, codePointCount);
const int wordId = dictStructurePolicy->getWordId(codePointArrayView,
false /* forceLowerCaseSearch */);
if (wordId != NOT_A_WORD_ID || !tryLowerCaseSearch) {
// Return the id when when the word was found or doesn't try lower case search.
return wordId;
}
// Check bigrams for lower-cased previous word if original was not found. Useful for
// auto-capitalized words like "The [current_word]".
return dictStructurePolicy->getWordId(codePointArrayView, true /* forceLowerCaseSearch */);
}
void NgramContext::clear() {
for (size_t i = 0; i < NELEMS(mPrevWordCodePoints); ++i) {
mPrevWordCodePointCount[i] = 0;
mIsBeginningOfSentence[i] = false;
}
}
} // namespace latinime

View file

@ -20,145 +20,54 @@
#include <array> #include <array>
#include "defines.h" #include "defines.h"
#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
#include "utils/char_utils.h"
#include "utils/int_array_view.h" #include "utils/int_array_view.h"
namespace latinime { namespace latinime {
// Rename to NgramContext. class DictionaryStructureWithBufferPolicy;
class NgramContext { class NgramContext {
public: public:
// No prev word information. // No prev word information.
NgramContext() : mPrevWordCount(0) { NgramContext();
clear(); // Copy constructor to use this class with std::vector and use this class as a return value.
} NgramContext(const NgramContext &ngramContext);
NgramContext(const NgramContext &ngramContext)
: mPrevWordCount(ngramContext.mPrevWordCount) {
for (size_t i = 0; i < mPrevWordCount; ++i) {
mPrevWordCodePointCount[i] = ngramContext.mPrevWordCodePointCount[i];
memmove(mPrevWordCodePoints[i], ngramContext.mPrevWordCodePoints[i],
sizeof(mPrevWordCodePoints[i][0]) * mPrevWordCodePointCount[i]);
mIsBeginningOfSentence[i] = ngramContext.mIsBeginningOfSentence[i];
}
}
// Construct from previous words. // Construct from previous words.
NgramContext(const int prevWordCodePoints[][MAX_WORD_LENGTH], NgramContext(const int prevWordCodePoints[][MAX_WORD_LENGTH],
const int *const prevWordCodePointCount, const bool *const isBeginningOfSentence, const int *const prevWordCodePointCount, const bool *const isBeginningOfSentence,
const size_t prevWordCount) const size_t prevWordCount);
: mPrevWordCount(std::min(NELEMS(mPrevWordCodePoints), prevWordCount)) {
clear();
for (size_t i = 0; i < mPrevWordCount; ++i) {
if (prevWordCodePointCount[i] < 0 || prevWordCodePointCount[i] > MAX_WORD_LENGTH) {
continue;
}
memmove(mPrevWordCodePoints[i], prevWordCodePoints[i],
sizeof(mPrevWordCodePoints[i][0]) * prevWordCodePointCount[i]);
mPrevWordCodePointCount[i] = prevWordCodePointCount[i];
mIsBeginningOfSentence[i] = isBeginningOfSentence[i];
}
}
// Construct from a previous word. // Construct from a previous word.
NgramContext(const int *const prevWordCodePoints, const int prevWordCodePointCount, NgramContext(const int *const prevWordCodePoints, const int prevWordCodePointCount,
const bool isBeginningOfSentence) : mPrevWordCount(1) { const bool isBeginningOfSentence);
clear();
if (prevWordCodePointCount > MAX_WORD_LENGTH || !prevWordCodePoints) {
return;
}
memmove(mPrevWordCodePoints[0], prevWordCodePoints,
sizeof(mPrevWordCodePoints[0][0]) * prevWordCodePointCount);
mPrevWordCodePointCount[0] = prevWordCodePointCount;
mIsBeginningOfSentence[0] = isBeginningOfSentence;
}
size_t getPrevWordCount() const { size_t getPrevWordCount() const {
return mPrevWordCount; return mPrevWordCount;
} }
bool isValid() const;
// TODO: Remove.
const NgramContext getTrimmedNgramContext(const size_t maxPrevWordCount) const {
return NgramContext(mPrevWordCodePoints, mPrevWordCodePointCount, mIsBeginningOfSentence,
std::min(mPrevWordCount, maxPrevWordCount));
}
bool isValid() const {
if (mPrevWordCodePointCount[0] > 0) {
return true;
}
if (mIsBeginningOfSentence[0]) {
return true;
}
return false;
}
template<size_t N> template<size_t N>
const WordIdArrayView getPrevWordIds( const WordIdArrayView getPrevWordIds(
const DictionaryStructureWithBufferPolicy *const dictStructurePolicy, const DictionaryStructureWithBufferPolicy *const dictStructurePolicy,
std::array<int, N> *const prevWordIdBuffer, const bool tryLowerCaseSearch) const { WordIdArray<N> *const prevWordIdBuffer, const bool tryLowerCaseSearch) const {
for (size_t i = 0; i < std::min(mPrevWordCount, N); ++i) { for (size_t i = 0; i < std::min(mPrevWordCount, N); ++i) {
prevWordIdBuffer->at(i) = getWordId(dictStructurePolicy, prevWordIdBuffer->at(i) = getWordId(dictStructurePolicy, mPrevWordCodePoints[i],
mPrevWordCodePoints[i], mPrevWordCodePointCount[i], mPrevWordCodePointCount[i], mIsBeginningOfSentence[i], tryLowerCaseSearch);
mIsBeginningOfSentence[i], tryLowerCaseSearch);
} }
return WordIdArrayView::fromArray(*prevWordIdBuffer).limit(mPrevWordCount); return WordIdArrayView::fromArray(*prevWordIdBuffer).limit(mPrevWordCount);
} }
// n is 1-indexed. // n is 1-indexed.
const CodePointArrayView getNthPrevWordCodePoints(const size_t n) const { const CodePointArrayView getNthPrevWordCodePoints(const size_t n) const;
if (n <= 0 || n > mPrevWordCount) {
return CodePointArrayView();
}
return CodePointArrayView(mPrevWordCodePoints[n - 1], mPrevWordCodePointCount[n - 1]);
}
// n is 1-indexed. // n is 1-indexed.
bool isNthPrevWordBeginningOfSentence(const size_t n) const { bool isNthPrevWordBeginningOfSentence(const size_t n) const;
if (n <= 0 || n > mPrevWordCount) {
return false;
}
return mIsBeginningOfSentence[n - 1];
}
private: private:
DISALLOW_ASSIGNMENT_OPERATOR(NgramContext); DISALLOW_ASSIGNMENT_OPERATOR(NgramContext);
static int getWordId(const DictionaryStructureWithBufferPolicy *const dictStructurePolicy, static int getWordId(const DictionaryStructureWithBufferPolicy *const dictStructurePolicy,
const int *const wordCodePoints, const int wordCodePointCount, const int *const wordCodePoints, const int wordCodePointCount,
const bool isBeginningOfSentence, const bool tryLowerCaseSearch) { const bool isBeginningOfSentence, const bool tryLowerCaseSearch);
if (!dictStructurePolicy || !wordCodePoints || wordCodePointCount > MAX_WORD_LENGTH) { void clear();
return NOT_A_WORD_ID;
}
int codePoints[MAX_WORD_LENGTH];
int codePointCount = wordCodePointCount;
memmove(codePoints, wordCodePoints, sizeof(int) * codePointCount);
if (isBeginningOfSentence) {
codePointCount = CharUtils::attachBeginningOfSentenceMarker(codePoints,
codePointCount, MAX_WORD_LENGTH);
if (codePointCount <= 0) {
return NOT_A_WORD_ID;
}
}
const CodePointArrayView codePointArrayView(codePoints, codePointCount);
const int wordId = dictStructurePolicy->getWordId(
codePointArrayView, false /* forceLowerCaseSearch */);
if (wordId != NOT_A_WORD_ID || !tryLowerCaseSearch) {
// Return the id when when the word was found or doesn't try lower case search.
return wordId;
}
// Check bigrams for lower-cased previous word if original was not found. Useful for
// auto-capitalized words like "The [current_word]".
return dictStructurePolicy->getWordId(codePointArrayView, true /* forceLowerCaseSearch */);
}
void clear() {
for (size_t i = 0; i < NELEMS(mPrevWordCodePoints); ++i) {
mPrevWordCodePointCount[i] = 0;
mIsBeginningOfSentence[i] = false;
}
}
const size_t mPrevWordCount; const size_t mPrevWordCount;
int mPrevWordCodePoints[MAX_PREV_WORD_COUNT_FOR_N_GRAM][MAX_WORD_LENGTH]; int mPrevWordCodePoints[MAX_PREV_WORD_COUNT_FOR_N_GRAM][MAX_WORD_LENGTH];

View file

@ -344,8 +344,7 @@ bool Ver4PatriciaTriePolicy::removeUnigramEntry(const CodePointArrayView wordCod
return mNodeWriter.suppressUnigramEntry(&ptNodeParams); return mNodeWriter.suppressUnigramEntry(&ptNodeParams);
} }
bool Ver4PatriciaTriePolicy::addNgramEntry(const NgramContext *const ngramContext, bool Ver4PatriciaTriePolicy::addNgramEntry(const NgramProperty *const ngramProperty) {
const NgramProperty *const ngramProperty) {
if (!mBuffers->isUpdatable()) { if (!mBuffers->isUpdatable()) {
AKLOGI("Warning: addNgramEntry() is called for non-updatable dictionary."); AKLOGI("Warning: addNgramEntry() is called for non-updatable dictionary.");
return false; return false;
@ -355,6 +354,7 @@ bool Ver4PatriciaTriePolicy::addNgramEntry(const NgramContext *const ngramContex
mDictBuffer->getTailPosition()); mDictBuffer->getTailPosition());
return false; return false;
} }
const NgramContext *const ngramContext = ngramProperty->getNgramContext();
if (!ngramContext->isValid()) { if (!ngramContext->isValid()) {
AKLOGE("Ngram context is not valid for adding n-gram entry to the dictionary."); AKLOGE("Ngram context is not valid for adding n-gram entry to the dictionary.");
return false; return false;
@ -463,9 +463,9 @@ bool Ver4PatriciaTriePolicy::updateEntriesForWordWithNgramContext(
} }
const int probabilityForNgram = ngramContext->isNthPrevWordBeginningOfSentence(1 /* n */) const int probabilityForNgram = ngramContext->isNthPrevWordBeginningOfSentence(1 /* n */)
? NOT_A_PROBABILITY : probability; ? NOT_A_PROBABILITY : probability;
const NgramProperty ngramProperty(wordCodePoints.toVector(), probabilityForNgram, const NgramProperty ngramProperty(*ngramContext, wordCodePoints.toVector(), probabilityForNgram,
historicalInfo); historicalInfo);
if (!addNgramEntry(ngramContext, &ngramProperty)) { if (!addNgramEntry(&ngramProperty)) {
AKLOGE("Cannot update unigarm entry in updateEntriesForWordWithNgramContext()."); AKLOGE("Cannot update unigarm entry in updateEntriesForWordWithNgramContext().");
return false; return false;
} }
@ -585,6 +585,8 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty(
bigramEntry.getHistoricalInfo(), mHeaderPolicy) : bigramEntry.getHistoricalInfo(), mHeaderPolicy) :
bigramEntry.getProbability(); bigramEntry.getProbability();
ngrams.emplace_back( ngrams.emplace_back(
NgramContext(wordCodePoints.data(), wordCodePoints.size(),
ptNodeParams.representsBeginningOfSentence()),
CodePointArrayView(bigramWord1CodePoints, codePointCount).toVector(), CodePointArrayView(bigramWord1CodePoints, codePointCount).toVector(),
probability, *historicalInfo); probability, *historicalInfo);
} }

View file

@ -113,8 +113,7 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
bool removeUnigramEntry(const CodePointArrayView wordCodePoints); bool removeUnigramEntry(const CodePointArrayView wordCodePoints);
bool addNgramEntry(const NgramContext *const ngramContext, bool addNgramEntry(const NgramProperty *const ngramProperty);
const NgramProperty *const ngramProperty);
bool removeNgramEntry(const NgramContext *const ngramContext, bool removeNgramEntry(const NgramContext *const ngramContext,
const CodePointArrayView wordCodePoints); const CodePointArrayView wordCodePoints);

View file

@ -451,6 +451,8 @@ const WordProperty PatriciaTriePolicy::getWordProperty(
bigramWord1CodePoints, &word1Probability); bigramWord1CodePoints, &word1Probability);
const int probability = getProbability(word1Probability, bigramsIt.getProbability()); const int probability = getProbability(word1Probability, bigramsIt.getProbability());
ngrams.emplace_back( ngrams.emplace_back(
NgramContext(wordCodePoints.data(), wordCodePoints.size(),
ptNodeParams.representsBeginningOfSentence()),
CodePointArrayView(bigramWord1CodePoints, word1CodePointCount).toVector(), CodePointArrayView(bigramWord1CodePoints, word1CodePointCount).toVector(),
probability, HistoricalInfo()); probability, HistoricalInfo());
} }

View file

@ -93,8 +93,7 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
return false; return false;
} }
bool addNgramEntry(const NgramContext *const ngramContext, bool addNgramEntry(const NgramProperty *const ngramProperty) {
const NgramProperty *const ngramProperty) {
// This method should not be called for non-updatable dictionary. // This method should not be called for non-updatable dictionary.
AKLOGI("Warning: addNgramEntry() is called for non-updatable dictionary."); AKLOGI("Warning: addNgramEntry() is called for non-updatable dictionary.");
return false; return false;

View file

@ -264,8 +264,7 @@ bool Ver4PatriciaTriePolicy::removeUnigramEntry(const CodePointArrayView wordCod
return true; return true;
} }
bool Ver4PatriciaTriePolicy::addNgramEntry(const NgramContext *const ngramContext, bool Ver4PatriciaTriePolicy::addNgramEntry(const NgramProperty *const ngramProperty) {
const NgramProperty *const ngramProperty) {
if (!mBuffers->isUpdatable()) { if (!mBuffers->isUpdatable()) {
AKLOGI("Warning: addNgramEntry() is called for non-updatable dictionary."); AKLOGI("Warning: addNgramEntry() is called for non-updatable dictionary.");
return false; return false;
@ -275,6 +274,7 @@ bool Ver4PatriciaTriePolicy::addNgramEntry(const NgramContext *const ngramContex
mDictBuffer->getTailPosition()); mDictBuffer->getTailPosition());
return false; return false;
} }
const NgramContext *const ngramContext = ngramProperty->getNgramContext();
if (!ngramContext->isValid()) { if (!ngramContext->isValid()) {
AKLOGE("Ngram context is not valid for adding n-gram entry to the dictionary."); AKLOGE("Ngram context is not valid for adding n-gram entry to the dictionary.");
return false; return false;
@ -453,7 +453,8 @@ bool Ver4PatriciaTriePolicy::needsToRunGC(const bool mindsBlockByGC) const {
// Needs to reduce dictionary size. // Needs to reduce dictionary size.
return true; return true;
} else if (mHeaderPolicy->isDecayingDict()) { } else if (mHeaderPolicy->isDecayingDict()) {
return ForgettingCurveUtils::needsToDecay(mindsBlockByGC, mEntryCounters.getEntryCounts(), mHeaderPolicy); return ForgettingCurveUtils::needsToDecay(mindsBlockByGC, mEntryCounters.getEntryCounts(),
mHeaderPolicy);
} }
return false; return false;
} }
@ -503,12 +504,16 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty(
prevWordIds)) { prevWordIds)) {
const int codePointCount = getCodePointsAndReturnCodePointCount(entry.getWordId(), const int codePointCount = getCodePointsAndReturnCodePointCount(entry.getWordId(),
MAX_WORD_LENGTH, bigramWord1CodePoints); MAX_WORD_LENGTH, bigramWord1CodePoints);
const ProbabilityEntry probabilityEntry = entry.getProbabilityEntry(); const ProbabilityEntry ngramProbabilityEntry = entry.getProbabilityEntry();
const HistoricalInfo *const historicalInfo = probabilityEntry.getHistoricalInfo(); const HistoricalInfo *const historicalInfo = ngramProbabilityEntry.getHistoricalInfo();
const int probability = probabilityEntry.hasHistoricalInfo() ? const int probability = ngramProbabilityEntry.hasHistoricalInfo() ?
ForgettingCurveUtils::decodeProbability(historicalInfo, mHeaderPolicy) : ForgettingCurveUtils::decodeProbability(historicalInfo, mHeaderPolicy) :
probabilityEntry.getProbability(); ngramProbabilityEntry.getProbability();
ngrams.emplace_back(CodePointArrayView(bigramWord1CodePoints, codePointCount).toVector(), ngrams.emplace_back(
NgramContext(
wordCodePoints.data(), wordCodePoints.size(),
probabilityEntry.representsBeginningOfSentence()),
CodePointArrayView(bigramWord1CodePoints, codePointCount).toVector(),
probability, *historicalInfo); probability, *historicalInfo);
} }
// Fetch shortcut information. // Fetch shortcut information.

View file

@ -92,8 +92,7 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
bool removeUnigramEntry(const CodePointArrayView wordCodePoints); bool removeUnigramEntry(const CodePointArrayView wordCodePoints);
bool addNgramEntry(const NgramContext *const ngramContext, bool addNgramEntry(const NgramProperty *const ngramProperty);
const NgramProperty *const ngramProperty);
bool removeNgramEntry(const NgramContext *const ngramContext, bool removeNgramEntry(const NgramContext *const ngramContext,
const CodePointArrayView wordCodePoints); const CodePointArrayView wordCodePoints);