Make NgramProperty have NgramContext.
Bug: 14425059 Change-Id: I210acb816b122857dbbe1ee4dd6a35c5335bf2bf
This commit is contained in:
parent
f87bb77a91
commit
88bb28c132
12 changed files with 48 additions and 43 deletions
|
@ -409,9 +409,10 @@ static bool latinime_BinaryDictionary_addNgramEntry(JNIEnv *env, jclass clazz, j
|
|||
int wordCodePoints[wordLength];
|
||||
env->GetIntArrayRegion(word, 0, wordLength, wordCodePoints);
|
||||
// Use 1 for count to indicate the ngram has inputted.
|
||||
const NgramProperty ngramProperty(CodePointArrayView(wordCodePoints, wordLength).toVector(),
|
||||
const NgramProperty ngramProperty(ngramContext,
|
||||
CodePointArrayView(wordCodePoints, wordLength).toVector(),
|
||||
probability, HistoricalInfo(timestamp, 0 /* level */, 1 /* count */));
|
||||
return dictionary->addNgramEntry(&ngramContext, &ngramProperty);
|
||||
return dictionary->addNgramEntry(&ngramProperty);
|
||||
}
|
||||
|
||||
static bool latinime_BinaryDictionary_removeNgramEntry(JNIEnv *env, jclass clazz, jlong dict,
|
||||
|
@ -526,12 +527,12 @@ static int latinime_BinaryDictionary_addMultipleDictionaryEntries(JNIEnv *env, j
|
|||
if (word0) {
|
||||
jint bigramProbability = env->GetIntField(languageModelParam, bigramProbabilityFieldId);
|
||||
// Use 1 for count to indicate the bigram has inputted.
|
||||
const NgramProperty ngramProperty(
|
||||
CodePointArrayView(word1CodePoints, word1Length).toVector(),
|
||||
bigramProbability, HistoricalInfo(timestamp, 0 /* level */, 1 /* count */));
|
||||
const NgramContext ngramContext(word0CodePoints, word0Length,
|
||||
false /* isBeginningOfSentence */);
|
||||
dictionary->addNgramEntry(&ngramContext, &ngramProperty);
|
||||
const NgramProperty ngramProperty(ngramContext,
|
||||
CodePointArrayView(word1CodePoints, word1Length).toVector(),
|
||||
bigramProbability, HistoricalInfo(timestamp, 0 /* level */, 1 /* count */));
|
||||
dictionary->addNgramEntry(&ngramProperty);
|
||||
}
|
||||
if (dictionary->needsToRunGC(true /* mindsBlockByGC */)) {
|
||||
return i + 1;
|
||||
|
@ -641,11 +642,8 @@ static bool latinime_BinaryDictionary_migrateNative(JNIEnv *env, jclass clazz, j
|
|||
return false;
|
||||
}
|
||||
}
|
||||
const NgramContext ngramContext(wordCodePoints, wordCodePointCount,
|
||||
wordProperty.getUnigramProperty()->representsBeginningOfSentence());
|
||||
for (const NgramProperty &ngramProperty : *wordProperty.getNgramProperties()) {
|
||||
if (!dictionaryStructureWithBufferPolicy->addNgramEntry(&ngramContext,
|
||||
&ngramProperty)) {
|
||||
if (!dictionaryStructureWithBufferPolicy->addNgramEntry(&ngramProperty)) {
|
||||
LogUtils::logToJava(env, "Cannot add ngram to the new dict.");
|
||||
return false;
|
||||
}
|
||||
|
|
|
@ -140,10 +140,9 @@ bool Dictionary::removeUnigramEntry(const CodePointArrayView codePoints) {
|
|||
return mDictionaryStructureWithBufferPolicy->removeUnigramEntry(codePoints);
|
||||
}
|
||||
|
||||
bool Dictionary::addNgramEntry(const NgramContext *const ngramContext,
|
||||
const NgramProperty *const ngramProperty) {
|
||||
bool Dictionary::addNgramEntry(const NgramProperty *const ngramProperty) {
|
||||
TimeKeeper::setCurrentTime();
|
||||
return mDictionaryStructureWithBufferPolicy->addNgramEntry(ngramContext, ngramProperty);
|
||||
return mDictionaryStructureWithBufferPolicy->addNgramEntry(ngramProperty);
|
||||
}
|
||||
|
||||
bool Dictionary::removeNgramEntry(const NgramContext *const ngramContext,
|
||||
|
|
|
@ -85,8 +85,7 @@ class Dictionary {
|
|||
|
||||
bool removeUnigramEntry(const CodePointArrayView codePoints);
|
||||
|
||||
bool addNgramEntry(const NgramContext *const ngramContext,
|
||||
const NgramProperty *const ngramProperty);
|
||||
bool addNgramEntry(const NgramProperty *const ngramProperty);
|
||||
|
||||
bool removeNgramEntry(const NgramContext *const ngramContext,
|
||||
const CodePointArrayView codePoints);
|
||||
|
|
|
@ -21,15 +21,20 @@
|
|||
|
||||
#include "defines.h"
|
||||
#include "suggest/core/dictionary/property/historical_info.h"
|
||||
#include "suggest/core/session/ngram_context.h"
|
||||
|
||||
namespace latinime {
|
||||
|
||||
class NgramProperty {
|
||||
public:
|
||||
NgramProperty(const std::vector<int> &&targetCodePoints, const int probability,
|
||||
const HistoricalInfo historicalInfo)
|
||||
: mTargetCodePoints(std::move(targetCodePoints)), mProbability(probability),
|
||||
mHistoricalInfo(historicalInfo) {}
|
||||
NgramProperty(const NgramContext &ngramContext, const std::vector<int> &&targetCodePoints,
|
||||
const int probability, const HistoricalInfo historicalInfo)
|
||||
: mNgramContext(ngramContext), mTargetCodePoints(std::move(targetCodePoints)),
|
||||
mProbability(probability), mHistoricalInfo(historicalInfo) {}
|
||||
|
||||
const NgramContext *getNgramContext() const {
|
||||
return &mNgramContext;
|
||||
}
|
||||
|
||||
const std::vector<int> *getTargetCodePoints() const {
|
||||
return &mTargetCodePoints;
|
||||
|
@ -48,6 +53,7 @@ class NgramProperty {
|
|||
DISALLOW_DEFAULT_CONSTRUCTOR(NgramProperty);
|
||||
DISALLOW_ASSIGNMENT_OPERATOR(NgramProperty);
|
||||
|
||||
const NgramContext mNgramContext;
|
||||
const std::vector<int> mTargetCodePoints;
|
||||
const int mProbability;
|
||||
const HistoricalInfo mHistoricalInfo;
|
||||
|
|
|
@ -34,9 +34,9 @@ class WordProperty {
|
|||
: mCodePoints(), mUnigramProperty(), mNgrams() {}
|
||||
|
||||
WordProperty(const std::vector<int> &&codePoints, const UnigramProperty *const unigramProperty,
|
||||
const std::vector<NgramProperty> *const bigrams)
|
||||
const std::vector<NgramProperty> *const ngrams)
|
||||
: mCodePoints(std::move(codePoints)), mUnigramProperty(*unigramProperty),
|
||||
mNgrams(*bigrams) {}
|
||||
mNgrams(*ngrams) {}
|
||||
|
||||
void outputProperties(JNIEnv *const env, jintArray outCodePoints, jbooleanArray outFlags,
|
||||
jintArray outProbabilityInfo, jobject outBigramTargets, jobject outBigramProbabilities,
|
||||
|
|
|
@ -40,7 +40,6 @@ class UnigramProperty;
|
|||
* This class abstracts the structure of dictionaries.
|
||||
* Implement this policy to support additional dictionaries.
|
||||
*/
|
||||
// TODO: Use word id instead of terminal PtNode position.
|
||||
class DictionaryStructureWithBufferPolicy {
|
||||
public:
|
||||
typedef std::unique_ptr<DictionaryStructureWithBufferPolicy> StructurePolicyPtr;
|
||||
|
@ -81,8 +80,7 @@ class DictionaryStructureWithBufferPolicy {
|
|||
virtual bool removeUnigramEntry(const CodePointArrayView wordCodePoints) = 0;
|
||||
|
||||
// Returns whether the update was success or not.
|
||||
virtual bool addNgramEntry(const NgramContext *const ngramContext,
|
||||
const NgramProperty *const ngramProperty) = 0;
|
||||
virtual bool addNgramEntry(const NgramProperty *const ngramProperty) = 0;
|
||||
|
||||
// Returns whether the update was success or not.
|
||||
virtual bool removeNgramEntry(const NgramContext *const ngramContext,
|
||||
|
@ -106,7 +104,6 @@ class DictionaryStructureWithBufferPolicy {
|
|||
virtual void getProperty(const char *const query, const int queryLength, char *const outResult,
|
||||
const int maxResultLength) = 0;
|
||||
|
||||
// Used for testing.
|
||||
virtual const WordProperty getWordProperty(const CodePointArrayView wordCodePoints) const = 0;
|
||||
|
||||
// Method to iterate all words in the dictionary.
|
||||
|
|
|
@ -344,8 +344,7 @@ bool Ver4PatriciaTriePolicy::removeUnigramEntry(const CodePointArrayView wordCod
|
|||
return mNodeWriter.suppressUnigramEntry(&ptNodeParams);
|
||||
}
|
||||
|
||||
bool Ver4PatriciaTriePolicy::addNgramEntry(const NgramContext *const ngramContext,
|
||||
const NgramProperty *const ngramProperty) {
|
||||
bool Ver4PatriciaTriePolicy::addNgramEntry(const NgramProperty *const ngramProperty) {
|
||||
if (!mBuffers->isUpdatable()) {
|
||||
AKLOGI("Warning: addNgramEntry() is called for non-updatable dictionary.");
|
||||
return false;
|
||||
|
@ -355,6 +354,7 @@ bool Ver4PatriciaTriePolicy::addNgramEntry(const NgramContext *const ngramContex
|
|||
mDictBuffer->getTailPosition());
|
||||
return false;
|
||||
}
|
||||
const NgramContext *const ngramContext = ngramProperty->getNgramContext();
|
||||
if (!ngramContext->isValid()) {
|
||||
AKLOGE("Ngram context is not valid for adding n-gram entry to the dictionary.");
|
||||
return false;
|
||||
|
@ -463,9 +463,9 @@ bool Ver4PatriciaTriePolicy::updateEntriesForWordWithNgramContext(
|
|||
}
|
||||
const int probabilityForNgram = ngramContext->isNthPrevWordBeginningOfSentence(1 /* n */)
|
||||
? NOT_A_PROBABILITY : probability;
|
||||
const NgramProperty ngramProperty(wordCodePoints.toVector(), probabilityForNgram,
|
||||
const NgramProperty ngramProperty(*ngramContext, wordCodePoints.toVector(), probabilityForNgram,
|
||||
historicalInfo);
|
||||
if (!addNgramEntry(ngramContext, &ngramProperty)) {
|
||||
if (!addNgramEntry(&ngramProperty)) {
|
||||
AKLOGE("Cannot update unigarm entry in updateEntriesForWordWithNgramContext().");
|
||||
return false;
|
||||
}
|
||||
|
@ -585,6 +585,8 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty(
|
|||
bigramEntry.getHistoricalInfo(), mHeaderPolicy) :
|
||||
bigramEntry.getProbability();
|
||||
ngrams.emplace_back(
|
||||
NgramContext(wordCodePoints.data(), wordCodePoints.size(),
|
||||
ptNodeParams.representsBeginningOfSentence()),
|
||||
CodePointArrayView(bigramWord1CodePoints, codePointCount).toVector(),
|
||||
probability, *historicalInfo);
|
||||
}
|
||||
|
|
|
@ -113,8 +113,7 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
|||
|
||||
bool removeUnigramEntry(const CodePointArrayView wordCodePoints);
|
||||
|
||||
bool addNgramEntry(const NgramContext *const ngramContext,
|
||||
const NgramProperty *const ngramProperty);
|
||||
bool addNgramEntry(const NgramProperty *const ngramProperty);
|
||||
|
||||
bool removeNgramEntry(const NgramContext *const ngramContext,
|
||||
const CodePointArrayView wordCodePoints);
|
||||
|
|
|
@ -451,6 +451,8 @@ const WordProperty PatriciaTriePolicy::getWordProperty(
|
|||
bigramWord1CodePoints, &word1Probability);
|
||||
const int probability = getProbability(word1Probability, bigramsIt.getProbability());
|
||||
ngrams.emplace_back(
|
||||
NgramContext(wordCodePoints.data(), wordCodePoints.size(),
|
||||
ptNodeParams.representsBeginningOfSentence()),
|
||||
CodePointArrayView(bigramWord1CodePoints, word1CodePointCount).toVector(),
|
||||
probability, HistoricalInfo());
|
||||
}
|
||||
|
|
|
@ -93,8 +93,7 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
|||
return false;
|
||||
}
|
||||
|
||||
bool addNgramEntry(const NgramContext *const ngramContext,
|
||||
const NgramProperty *const ngramProperty) {
|
||||
bool addNgramEntry(const NgramProperty *const ngramProperty) {
|
||||
// This method should not be called for non-updatable dictionary.
|
||||
AKLOGI("Warning: addNgramEntry() is called for non-updatable dictionary.");
|
||||
return false;
|
||||
|
|
|
@ -264,8 +264,7 @@ bool Ver4PatriciaTriePolicy::removeUnigramEntry(const CodePointArrayView wordCod
|
|||
return true;
|
||||
}
|
||||
|
||||
bool Ver4PatriciaTriePolicy::addNgramEntry(const NgramContext *const ngramContext,
|
||||
const NgramProperty *const ngramProperty) {
|
||||
bool Ver4PatriciaTriePolicy::addNgramEntry(const NgramProperty *const ngramProperty) {
|
||||
if (!mBuffers->isUpdatable()) {
|
||||
AKLOGI("Warning: addNgramEntry() is called for non-updatable dictionary.");
|
||||
return false;
|
||||
|
@ -275,6 +274,7 @@ bool Ver4PatriciaTriePolicy::addNgramEntry(const NgramContext *const ngramContex
|
|||
mDictBuffer->getTailPosition());
|
||||
return false;
|
||||
}
|
||||
const NgramContext *const ngramContext = ngramProperty->getNgramContext();
|
||||
if (!ngramContext->isValid()) {
|
||||
AKLOGE("Ngram context is not valid for adding n-gram entry to the dictionary.");
|
||||
return false;
|
||||
|
@ -451,7 +451,8 @@ bool Ver4PatriciaTriePolicy::needsToRunGC(const bool mindsBlockByGC) const {
|
|||
// Needs to reduce dictionary size.
|
||||
return true;
|
||||
} else if (mHeaderPolicy->isDecayingDict()) {
|
||||
return ForgettingCurveUtils::needsToDecay(mindsBlockByGC, mEntryCounters.getEntryCounts(), mHeaderPolicy);
|
||||
return ForgettingCurveUtils::needsToDecay(mindsBlockByGC, mEntryCounters.getEntryCounts(),
|
||||
mHeaderPolicy);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
@ -501,12 +502,16 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty(
|
|||
prevWordIds)) {
|
||||
const int codePointCount = getCodePointsAndReturnCodePointCount(entry.getWordId(),
|
||||
MAX_WORD_LENGTH, bigramWord1CodePoints);
|
||||
const ProbabilityEntry probabilityEntry = entry.getProbabilityEntry();
|
||||
const HistoricalInfo *const historicalInfo = probabilityEntry.getHistoricalInfo();
|
||||
const int probability = probabilityEntry.hasHistoricalInfo() ?
|
||||
const ProbabilityEntry ngramProbabilityEntry = entry.getProbabilityEntry();
|
||||
const HistoricalInfo *const historicalInfo = ngramProbabilityEntry.getHistoricalInfo();
|
||||
const int probability = ngramProbabilityEntry.hasHistoricalInfo() ?
|
||||
ForgettingCurveUtils::decodeProbability(historicalInfo, mHeaderPolicy) :
|
||||
probabilityEntry.getProbability();
|
||||
ngrams.emplace_back(CodePointArrayView(bigramWord1CodePoints, codePointCount).toVector(),
|
||||
ngramProbabilityEntry.getProbability();
|
||||
ngrams.emplace_back(
|
||||
NgramContext(
|
||||
wordCodePoints.data(), wordCodePoints.size(),
|
||||
probabilityEntry.representsBeginningOfSentence()),
|
||||
CodePointArrayView(bigramWord1CodePoints, codePointCount).toVector(),
|
||||
probability, *historicalInfo);
|
||||
}
|
||||
// Fetch shortcut information.
|
||||
|
|
|
@ -92,8 +92,7 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
|||
|
||||
bool removeUnigramEntry(const CodePointArrayView wordCodePoints);
|
||||
|
||||
bool addNgramEntry(const NgramContext *const ngramContext,
|
||||
const NgramProperty *const ngramProperty);
|
||||
bool addNgramEntry(const NgramProperty *const ngramProperty);
|
||||
|
||||
bool removeNgramEntry(const NgramContext *const ngramContext,
|
||||
const CodePointArrayView wordCodePoints);
|
||||
|
|
Loading…
Reference in a new issue