Merge "Rename PrevWordsInfo to NgramContext."
This commit is contained in:
commit
10fa30e380
15 changed files with 114 additions and 114 deletions
|
@ -242,15 +242,15 @@ static void latinime_BinaryDictionary_getSuggestions(JNIEnv *env, jclass clazz,
|
||||||
env->GetFloatArrayRegion(inOutWeightOfLangModelVsSpatialModel, 0, 1 /* len */,
|
env->GetFloatArrayRegion(inOutWeightOfLangModelVsSpatialModel, 0, 1 /* len */,
|
||||||
&weightOfLangModelVsSpatialModel);
|
&weightOfLangModelVsSpatialModel);
|
||||||
SuggestionResults suggestionResults(MAX_RESULTS);
|
SuggestionResults suggestionResults(MAX_RESULTS);
|
||||||
const PrevWordsInfo prevWordsInfo = JniDataUtils::constructPrevWordsInfo(env,
|
const NgramContext ngramContext = JniDataUtils::constructNgramContext(env,
|
||||||
prevWordCodePointArrays, isBeginningOfSentenceArray, prevWordCount);
|
prevWordCodePointArrays, isBeginningOfSentenceArray, prevWordCount);
|
||||||
if (givenSuggestOptions.isGesture() || inputSize > 0) {
|
if (givenSuggestOptions.isGesture() || inputSize > 0) {
|
||||||
// TODO: Use SuggestionResults to return suggestions.
|
// TODO: Use SuggestionResults to return suggestions.
|
||||||
dictionary->getSuggestions(pInfo, traverseSession, xCoordinates, yCoordinates,
|
dictionary->getSuggestions(pInfo, traverseSession, xCoordinates, yCoordinates,
|
||||||
times, pointerIds, inputCodePoints, inputSize, &prevWordsInfo,
|
times, pointerIds, inputCodePoints, inputSize, &ngramContext,
|
||||||
&givenSuggestOptions, weightOfLangModelVsSpatialModel, &suggestionResults);
|
&givenSuggestOptions, weightOfLangModelVsSpatialModel, &suggestionResults);
|
||||||
} else {
|
} else {
|
||||||
dictionary->getPredictions(&prevWordsInfo, &suggestionResults);
|
dictionary->getPredictions(&ngramContext, &suggestionResults);
|
||||||
}
|
}
|
||||||
if (DEBUG_DICT) {
|
if (DEBUG_DICT) {
|
||||||
suggestionResults.dumpSuggestions();
|
suggestionResults.dumpSuggestions();
|
||||||
|
@ -289,10 +289,10 @@ static jint latinime_BinaryDictionary_getNgramProbability(JNIEnv *env, jclass cl
|
||||||
const jsize wordLength = env->GetArrayLength(word);
|
const jsize wordLength = env->GetArrayLength(word);
|
||||||
int wordCodePoints[wordLength];
|
int wordCodePoints[wordLength];
|
||||||
env->GetIntArrayRegion(word, 0, wordLength, wordCodePoints);
|
env->GetIntArrayRegion(word, 0, wordLength, wordCodePoints);
|
||||||
const PrevWordsInfo prevWordsInfo = JniDataUtils::constructPrevWordsInfo(env,
|
const NgramContext ngramContext = JniDataUtils::constructNgramContext(env,
|
||||||
prevWordCodePointArrays, isBeginningOfSentenceArray,
|
prevWordCodePointArrays, isBeginningOfSentenceArray,
|
||||||
env->GetArrayLength(prevWordCodePointArrays));
|
env->GetArrayLength(prevWordCodePointArrays));
|
||||||
return dictionary->getNgramProbability(&prevWordsInfo,
|
return dictionary->getNgramProbability(&ngramContext,
|
||||||
CodePointArrayView(wordCodePoints, wordLength));
|
CodePointArrayView(wordCodePoints, wordLength));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -402,7 +402,7 @@ static bool latinime_BinaryDictionary_addNgramEntry(JNIEnv *env, jclass clazz, j
|
||||||
if (!dictionary) {
|
if (!dictionary) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
const PrevWordsInfo prevWordsInfo = JniDataUtils::constructPrevWordsInfo(env,
|
const NgramContext ngramContext = JniDataUtils::constructNgramContext(env,
|
||||||
prevWordCodePointArrays, isBeginningOfSentenceArray,
|
prevWordCodePointArrays, isBeginningOfSentenceArray,
|
||||||
env->GetArrayLength(prevWordCodePointArrays));
|
env->GetArrayLength(prevWordCodePointArrays));
|
||||||
jsize wordLength = env->GetArrayLength(word);
|
jsize wordLength = env->GetArrayLength(word);
|
||||||
|
@ -411,7 +411,7 @@ static bool latinime_BinaryDictionary_addNgramEntry(JNIEnv *env, jclass clazz, j
|
||||||
// Use 1 for count to indicate the ngram has inputted.
|
// Use 1 for count to indicate the ngram has inputted.
|
||||||
const NgramProperty ngramProperty(CodePointArrayView(wordCodePoints, wordLength).toVector(),
|
const NgramProperty ngramProperty(CodePointArrayView(wordCodePoints, wordLength).toVector(),
|
||||||
probability, HistoricalInfo(timestamp, 0 /* level */, 1 /* count */));
|
probability, HistoricalInfo(timestamp, 0 /* level */, 1 /* count */));
|
||||||
return dictionary->addNgramEntry(&prevWordsInfo, &ngramProperty);
|
return dictionary->addNgramEntry(&ngramContext, &ngramProperty);
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool latinime_BinaryDictionary_removeNgramEntry(JNIEnv *env, jclass clazz, jlong dict,
|
static bool latinime_BinaryDictionary_removeNgramEntry(JNIEnv *env, jclass clazz, jlong dict,
|
||||||
|
@ -421,13 +421,13 @@ static bool latinime_BinaryDictionary_removeNgramEntry(JNIEnv *env, jclass clazz
|
||||||
if (!dictionary) {
|
if (!dictionary) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
const PrevWordsInfo prevWordsInfo = JniDataUtils::constructPrevWordsInfo(env,
|
const NgramContext ngramContext = JniDataUtils::constructNgramContext(env,
|
||||||
prevWordCodePointArrays, isBeginningOfSentenceArray,
|
prevWordCodePointArrays, isBeginningOfSentenceArray,
|
||||||
env->GetArrayLength(prevWordCodePointArrays));
|
env->GetArrayLength(prevWordCodePointArrays));
|
||||||
jsize codePointCount = env->GetArrayLength(word);
|
jsize codePointCount = env->GetArrayLength(word);
|
||||||
int wordCodePoints[codePointCount];
|
int wordCodePoints[codePointCount];
|
||||||
env->GetIntArrayRegion(word, 0, codePointCount, wordCodePoints);
|
env->GetIntArrayRegion(word, 0, codePointCount, wordCodePoints);
|
||||||
return dictionary->removeNgramEntry(&prevWordsInfo,
|
return dictionary->removeNgramEntry(&ngramContext,
|
||||||
CodePointArrayView(wordCodePoints, codePointCount));
|
CodePointArrayView(wordCodePoints, codePointCount));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -439,14 +439,14 @@ static bool latinime_BinaryDictionary_updateEntriesForWordWithNgramContext(JNIEn
|
||||||
if (!dictionary) {
|
if (!dictionary) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
const PrevWordsInfo prevWordsInfo = JniDataUtils::constructPrevWordsInfo(env,
|
const NgramContext ngramContext = JniDataUtils::constructNgramContext(env,
|
||||||
prevWordCodePointArrays, isBeginningOfSentenceArray,
|
prevWordCodePointArrays, isBeginningOfSentenceArray,
|
||||||
env->GetArrayLength(prevWordCodePointArrays));
|
env->GetArrayLength(prevWordCodePointArrays));
|
||||||
jsize codePointCount = env->GetArrayLength(word);
|
jsize codePointCount = env->GetArrayLength(word);
|
||||||
int wordCodePoints[codePointCount];
|
int wordCodePoints[codePointCount];
|
||||||
env->GetIntArrayRegion(word, 0, codePointCount, wordCodePoints);
|
env->GetIntArrayRegion(word, 0, codePointCount, wordCodePoints);
|
||||||
const HistoricalInfo historicalInfo(timestamp, 0 /* level */, count);
|
const HistoricalInfo historicalInfo(timestamp, 0 /* level */, count);
|
||||||
return dictionary->updateEntriesForWordWithNgramContext(&prevWordsInfo,
|
return dictionary->updateEntriesForWordWithNgramContext(&ngramContext,
|
||||||
CodePointArrayView(wordCodePoints, codePointCount), isValidWord == JNI_TRUE,
|
CodePointArrayView(wordCodePoints, codePointCount), isValidWord == JNI_TRUE,
|
||||||
historicalInfo);
|
historicalInfo);
|
||||||
}
|
}
|
||||||
|
@ -529,9 +529,9 @@ static int latinime_BinaryDictionary_addMultipleDictionaryEntries(JNIEnv *env, j
|
||||||
const NgramProperty ngramProperty(
|
const NgramProperty ngramProperty(
|
||||||
CodePointArrayView(word1CodePoints, word1Length).toVector(),
|
CodePointArrayView(word1CodePoints, word1Length).toVector(),
|
||||||
bigramProbability, HistoricalInfo(timestamp, 0 /* level */, 1 /* count */));
|
bigramProbability, HistoricalInfo(timestamp, 0 /* level */, 1 /* count */));
|
||||||
const PrevWordsInfo prevWordsInfo(word0CodePoints, word0Length,
|
const NgramContext ngramContext(word0CodePoints, word0Length,
|
||||||
false /* isBeginningOfSentence */);
|
false /* isBeginningOfSentence */);
|
||||||
dictionary->addNgramEntry(&prevWordsInfo, &ngramProperty);
|
dictionary->addNgramEntry(&ngramContext, &ngramProperty);
|
||||||
}
|
}
|
||||||
if (dictionary->needsToRunGC(true /* mindsBlockByGC */)) {
|
if (dictionary->needsToRunGC(true /* mindsBlockByGC */)) {
|
||||||
return i + 1;
|
return i + 1;
|
||||||
|
@ -641,10 +641,10 @@ static bool latinime_BinaryDictionary_migrateNative(JNIEnv *env, jclass clazz, j
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
const PrevWordsInfo prevWordsInfo(wordCodePoints, wordCodePointCount,
|
const NgramContext ngramContext(wordCodePoints, wordCodePointCount,
|
||||||
wordProperty.getUnigramProperty()->representsBeginningOfSentence());
|
wordProperty.getUnigramProperty()->representsBeginningOfSentence());
|
||||||
for (const NgramProperty &ngramProperty : *wordProperty.getNgramProperties()) {
|
for (const NgramProperty &ngramProperty : *wordProperty.getNgramProperties()) {
|
||||||
if (!dictionaryStructureWithBufferPolicy->addNgramEntry(&prevWordsInfo,
|
if (!dictionaryStructureWithBufferPolicy->addNgramEntry(&ngramContext,
|
||||||
&ngramProperty)) {
|
&ngramProperty)) {
|
||||||
LogUtils::logToJava(env, "Cannot add ngram to the new dict.");
|
LogUtils::logToJava(env, "Cannot add ngram to the new dict.");
|
||||||
return false;
|
return false;
|
||||||
|
|
|
@ -40,14 +40,14 @@ static void latinime_initDicTraverseSession(JNIEnv *env, jclass clazz, jlong tra
|
||||||
}
|
}
|
||||||
Dictionary *dict = reinterpret_cast<Dictionary *>(dictionary);
|
Dictionary *dict = reinterpret_cast<Dictionary *>(dictionary);
|
||||||
if (!previousWord) {
|
if (!previousWord) {
|
||||||
PrevWordsInfo prevWordsInfo;
|
NgramContext emptyNgramContext;
|
||||||
ts->init(dict, &prevWordsInfo, 0 /* suggestOptions */);
|
ts->init(dict, &emptyNgramContext, 0 /* suggestOptions */);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
int prevWord[previousWordLength];
|
int prevWord[previousWordLength];
|
||||||
env->GetIntArrayRegion(previousWord, 0, previousWordLength, prevWord);
|
env->GetIntArrayRegion(previousWord, 0, previousWordLength, prevWord);
|
||||||
PrevWordsInfo prevWordsInfo(prevWord, previousWordLength, false /* isStartOfSentence */);
|
NgramContext ngramContext(prevWord, previousWordLength, false /* isStartOfSentence */);
|
||||||
ts->init(dict, &prevWordsInfo, 0 /* suggestOptions */);
|
ts->init(dict, &ngramContext, 0 /* suggestOptions */);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void latinime_releaseDicTraverseSession(JNIEnv *env, jclass clazz, jlong traverseSession) {
|
static void latinime_releaseDicTraverseSession(JNIEnv *env, jclass clazz, jlong traverseSession) {
|
||||||
|
|
|
@ -46,11 +46,11 @@ Dictionary::Dictionary(JNIEnv *env, DictionaryStructureWithBufferPolicy::Structu
|
||||||
|
|
||||||
void Dictionary::getSuggestions(ProximityInfo *proximityInfo, DicTraverseSession *traverseSession,
|
void Dictionary::getSuggestions(ProximityInfo *proximityInfo, DicTraverseSession *traverseSession,
|
||||||
int *xcoordinates, int *ycoordinates, int *times, int *pointerIds, int *inputCodePoints,
|
int *xcoordinates, int *ycoordinates, int *times, int *pointerIds, int *inputCodePoints,
|
||||||
int inputSize, const PrevWordsInfo *const prevWordsInfo,
|
int inputSize, const NgramContext *const ngramContext,
|
||||||
const SuggestOptions *const suggestOptions, const float weightOfLangModelVsSpatialModel,
|
const SuggestOptions *const suggestOptions, const float weightOfLangModelVsSpatialModel,
|
||||||
SuggestionResults *const outSuggestionResults) const {
|
SuggestionResults *const outSuggestionResults) const {
|
||||||
TimeKeeper::setCurrentTime();
|
TimeKeeper::setCurrentTime();
|
||||||
traverseSession->init(this, prevWordsInfo, suggestOptions);
|
traverseSession->init(this, ngramContext, suggestOptions);
|
||||||
const auto &suggest = suggestOptions->isGesture() ? mGestureSuggest : mTypingSuggest;
|
const auto &suggest = suggestOptions->isGesture() ? mGestureSuggest : mTypingSuggest;
|
||||||
suggest->getSuggestions(proximityInfo, traverseSession, xcoordinates,
|
suggest->getSuggestions(proximityInfo, traverseSession, xcoordinates,
|
||||||
ycoordinates, times, pointerIds, inputCodePoints, inputSize,
|
ycoordinates, times, pointerIds, inputCodePoints, inputSize,
|
||||||
|
@ -58,10 +58,10 @@ void Dictionary::getSuggestions(ProximityInfo *proximityInfo, DicTraverseSession
|
||||||
}
|
}
|
||||||
|
|
||||||
Dictionary::NgramListenerForPrediction::NgramListenerForPrediction(
|
Dictionary::NgramListenerForPrediction::NgramListenerForPrediction(
|
||||||
const PrevWordsInfo *const prevWordsInfo, const WordIdArrayView prevWordIds,
|
const NgramContext *const ngramContext, const WordIdArrayView prevWordIds,
|
||||||
SuggestionResults *const suggestionResults,
|
SuggestionResults *const suggestionResults,
|
||||||
const DictionaryStructureWithBufferPolicy *const dictStructurePolicy)
|
const DictionaryStructureWithBufferPolicy *const dictStructurePolicy)
|
||||||
: mPrevWordsInfo(prevWordsInfo), mPrevWordIds(prevWordIds),
|
: mNgramContext(ngramContext), mPrevWordIds(prevWordIds),
|
||||||
mSuggestionResults(suggestionResults), mDictStructurePolicy(dictStructurePolicy) {}
|
mSuggestionResults(suggestionResults), mDictStructurePolicy(dictStructurePolicy) {}
|
||||||
|
|
||||||
void Dictionary::NgramListenerForPrediction::onVisitEntry(const int ngramProbability,
|
void Dictionary::NgramListenerForPrediction::onVisitEntry(const int ngramProbability,
|
||||||
|
@ -69,7 +69,7 @@ void Dictionary::NgramListenerForPrediction::onVisitEntry(const int ngramProbabi
|
||||||
if (targetWordId == NOT_A_WORD_ID) {
|
if (targetWordId == NOT_A_WORD_ID) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if (mPrevWordsInfo->isNthPrevWordBeginningOfSentence(1 /* n */)
|
if (mNgramContext->isNthPrevWordBeginningOfSentence(1 /* n */)
|
||||||
&& ngramProbability == NOT_A_PROBABILITY) {
|
&& ngramProbability == NOT_A_PROBABILITY) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -85,20 +85,20 @@ void Dictionary::NgramListenerForPrediction::onVisitEntry(const int ngramProbabi
|
||||||
wordAttributes.getProbability());
|
wordAttributes.getProbability());
|
||||||
}
|
}
|
||||||
|
|
||||||
void Dictionary::getPredictions(const PrevWordsInfo *const prevWordsInfo,
|
void Dictionary::getPredictions(const NgramContext *const ngramContext,
|
||||||
SuggestionResults *const outSuggestionResults) const {
|
SuggestionResults *const outSuggestionResults) const {
|
||||||
TimeKeeper::setCurrentTime();
|
TimeKeeper::setCurrentTime();
|
||||||
WordIdArray<MAX_PREV_WORD_COUNT_FOR_N_GRAM> prevWordIdArray;
|
WordIdArray<MAX_PREV_WORD_COUNT_FOR_N_GRAM> prevWordIdArray;
|
||||||
const WordIdArrayView prevWordIds = prevWordsInfo->getPrevWordIds(
|
const WordIdArrayView prevWordIds = ngramContext->getPrevWordIds(
|
||||||
mDictionaryStructureWithBufferPolicy.get(), &prevWordIdArray,
|
mDictionaryStructureWithBufferPolicy.get(), &prevWordIdArray,
|
||||||
true /* tryLowerCaseSearch */);
|
true /* tryLowerCaseSearch */);
|
||||||
NgramListenerForPrediction listener(prevWordsInfo, prevWordIds, outSuggestionResults,
|
NgramListenerForPrediction listener(ngramContext, prevWordIds, outSuggestionResults,
|
||||||
mDictionaryStructureWithBufferPolicy.get());
|
mDictionaryStructureWithBufferPolicy.get());
|
||||||
mDictionaryStructureWithBufferPolicy->iterateNgramEntries(prevWordIds, &listener);
|
mDictionaryStructureWithBufferPolicy->iterateNgramEntries(prevWordIds, &listener);
|
||||||
}
|
}
|
||||||
|
|
||||||
int Dictionary::getProbability(const CodePointArrayView codePoints) const {
|
int Dictionary::getProbability(const CodePointArrayView codePoints) const {
|
||||||
return getNgramProbability(nullptr /* prevWordsInfo */, codePoints);
|
return getNgramProbability(nullptr /* ngramContext */, codePoints);
|
||||||
}
|
}
|
||||||
|
|
||||||
int Dictionary::getMaxProbabilityOfExactMatches(const CodePointArrayView codePoints) const {
|
int Dictionary::getMaxProbabilityOfExactMatches(const CodePointArrayView codePoints) const {
|
||||||
|
@ -107,18 +107,18 @@ int Dictionary::getMaxProbabilityOfExactMatches(const CodePointArrayView codePoi
|
||||||
mDictionaryStructureWithBufferPolicy.get(), codePoints);
|
mDictionaryStructureWithBufferPolicy.get(), codePoints);
|
||||||
}
|
}
|
||||||
|
|
||||||
int Dictionary::getNgramProbability(const PrevWordsInfo *const prevWordsInfo,
|
int Dictionary::getNgramProbability(const NgramContext *const ngramContext,
|
||||||
const CodePointArrayView codePoints) const {
|
const CodePointArrayView codePoints) const {
|
||||||
TimeKeeper::setCurrentTime();
|
TimeKeeper::setCurrentTime();
|
||||||
const int wordId = mDictionaryStructureWithBufferPolicy->getWordId(codePoints,
|
const int wordId = mDictionaryStructureWithBufferPolicy->getWordId(codePoints,
|
||||||
false /* forceLowerCaseSearch */);
|
false /* forceLowerCaseSearch */);
|
||||||
if (wordId == NOT_A_WORD_ID) return NOT_A_PROBABILITY;
|
if (wordId == NOT_A_WORD_ID) return NOT_A_PROBABILITY;
|
||||||
if (!prevWordsInfo) {
|
if (!ngramContext) {
|
||||||
return getDictionaryStructurePolicy()->getProbabilityOfWord(WordIdArrayView(), wordId);
|
return getDictionaryStructurePolicy()->getProbabilityOfWord(WordIdArrayView(), wordId);
|
||||||
}
|
}
|
||||||
WordIdArray<MAX_PREV_WORD_COUNT_FOR_N_GRAM> prevWordIdArray;
|
WordIdArray<MAX_PREV_WORD_COUNT_FOR_N_GRAM> prevWordIdArray;
|
||||||
const WordIdArrayView prevWordIds = prevWordsInfo->getPrevWordIds
|
const WordIdArrayView prevWordIds = ngramContext->getPrevWordIds(
|
||||||
(mDictionaryStructureWithBufferPolicy.get(), &prevWordIdArray,
|
mDictionaryStructureWithBufferPolicy.get(), &prevWordIdArray,
|
||||||
true /* tryLowerCaseSearch */);
|
true /* tryLowerCaseSearch */);
|
||||||
return getDictionaryStructurePolicy()->getProbabilityOfWord(prevWordIds, wordId);
|
return getDictionaryStructurePolicy()->getProbabilityOfWord(prevWordIds, wordId);
|
||||||
}
|
}
|
||||||
|
@ -140,23 +140,23 @@ bool Dictionary::removeUnigramEntry(const CodePointArrayView codePoints) {
|
||||||
return mDictionaryStructureWithBufferPolicy->removeUnigramEntry(codePoints);
|
return mDictionaryStructureWithBufferPolicy->removeUnigramEntry(codePoints);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Dictionary::addNgramEntry(const PrevWordsInfo *const prevWordsInfo,
|
bool Dictionary::addNgramEntry(const NgramContext *const ngramContext,
|
||||||
const NgramProperty *const ngramProperty) {
|
const NgramProperty *const ngramProperty) {
|
||||||
TimeKeeper::setCurrentTime();
|
TimeKeeper::setCurrentTime();
|
||||||
return mDictionaryStructureWithBufferPolicy->addNgramEntry(prevWordsInfo, ngramProperty);
|
return mDictionaryStructureWithBufferPolicy->addNgramEntry(ngramContext, ngramProperty);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Dictionary::removeNgramEntry(const PrevWordsInfo *const prevWordsInfo,
|
bool Dictionary::removeNgramEntry(const NgramContext *const ngramContext,
|
||||||
const CodePointArrayView codePoints) {
|
const CodePointArrayView codePoints) {
|
||||||
TimeKeeper::setCurrentTime();
|
TimeKeeper::setCurrentTime();
|
||||||
return mDictionaryStructureWithBufferPolicy->removeNgramEntry(prevWordsInfo, codePoints);
|
return mDictionaryStructureWithBufferPolicy->removeNgramEntry(ngramContext, codePoints);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Dictionary::updateEntriesForWordWithNgramContext(const PrevWordsInfo *const prevWordsInfo,
|
bool Dictionary::updateEntriesForWordWithNgramContext(const NgramContext *const ngramContext,
|
||||||
const CodePointArrayView codePoints, const bool isValidWord,
|
const CodePointArrayView codePoints, const bool isValidWord,
|
||||||
const HistoricalInfo historicalInfo) {
|
const HistoricalInfo historicalInfo) {
|
||||||
TimeKeeper::setCurrentTime();
|
TimeKeeper::setCurrentTime();
|
||||||
return mDictionaryStructureWithBufferPolicy->updateEntriesForWordWithNgramContext(prevWordsInfo,
|
return mDictionaryStructureWithBufferPolicy->updateEntriesForWordWithNgramContext(ngramContext,
|
||||||
codePoints, isValidWord, historicalInfo);
|
codePoints, isValidWord, historicalInfo);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -33,7 +33,7 @@ namespace latinime {
|
||||||
|
|
||||||
class DictionaryStructureWithBufferPolicy;
|
class DictionaryStructureWithBufferPolicy;
|
||||||
class DicTraverseSession;
|
class DicTraverseSession;
|
||||||
class PrevWordsInfo;
|
class NgramContext;
|
||||||
class ProximityInfo;
|
class ProximityInfo;
|
||||||
class SuggestionResults;
|
class SuggestionResults;
|
||||||
class SuggestOptions;
|
class SuggestOptions;
|
||||||
|
@ -66,18 +66,18 @@ class Dictionary {
|
||||||
|
|
||||||
void getSuggestions(ProximityInfo *proximityInfo, DicTraverseSession *traverseSession,
|
void getSuggestions(ProximityInfo *proximityInfo, DicTraverseSession *traverseSession,
|
||||||
int *xcoordinates, int *ycoordinates, int *times, int *pointerIds, int *inputCodePoints,
|
int *xcoordinates, int *ycoordinates, int *times, int *pointerIds, int *inputCodePoints,
|
||||||
int inputSize, const PrevWordsInfo *const prevWordsInfo,
|
int inputSize, const NgramContext *const ngramContext,
|
||||||
const SuggestOptions *const suggestOptions, const float weightOfLangModelVsSpatialModel,
|
const SuggestOptions *const suggestOptions, const float weightOfLangModelVsSpatialModel,
|
||||||
SuggestionResults *const outSuggestionResults) const;
|
SuggestionResults *const outSuggestionResults) const;
|
||||||
|
|
||||||
void getPredictions(const PrevWordsInfo *const prevWordsInfo,
|
void getPredictions(const NgramContext *const ngramContext,
|
||||||
SuggestionResults *const outSuggestionResults) const;
|
SuggestionResults *const outSuggestionResults) const;
|
||||||
|
|
||||||
int getProbability(const CodePointArrayView codePoints) const;
|
int getProbability(const CodePointArrayView codePoints) const;
|
||||||
|
|
||||||
int getMaxProbabilityOfExactMatches(const CodePointArrayView codePoints) const;
|
int getMaxProbabilityOfExactMatches(const CodePointArrayView codePoints) const;
|
||||||
|
|
||||||
int getNgramProbability(const PrevWordsInfo *const prevWordsInfo,
|
int getNgramProbability(const NgramContext *const ngramContext,
|
||||||
const CodePointArrayView codePoints) const;
|
const CodePointArrayView codePoints) const;
|
||||||
|
|
||||||
bool addUnigramEntry(const CodePointArrayView codePoints,
|
bool addUnigramEntry(const CodePointArrayView codePoints,
|
||||||
|
@ -85,13 +85,13 @@ class Dictionary {
|
||||||
|
|
||||||
bool removeUnigramEntry(const CodePointArrayView codePoints);
|
bool removeUnigramEntry(const CodePointArrayView codePoints);
|
||||||
|
|
||||||
bool addNgramEntry(const PrevWordsInfo *const prevWordsInfo,
|
bool addNgramEntry(const NgramContext *const ngramContext,
|
||||||
const NgramProperty *const ngramProperty);
|
const NgramProperty *const ngramProperty);
|
||||||
|
|
||||||
bool removeNgramEntry(const PrevWordsInfo *const prevWordsInfo,
|
bool removeNgramEntry(const NgramContext *const ngramContext,
|
||||||
const CodePointArrayView codePoints);
|
const CodePointArrayView codePoints);
|
||||||
|
|
||||||
bool updateEntriesForWordWithNgramContext(const PrevWordsInfo *const prevWordsInfo,
|
bool updateEntriesForWordWithNgramContext(const NgramContext *const ngramContext,
|
||||||
const CodePointArrayView codePoints, const bool isValidWord,
|
const CodePointArrayView codePoints, const bool isValidWord,
|
||||||
const HistoricalInfo historicalInfo);
|
const HistoricalInfo historicalInfo);
|
||||||
|
|
||||||
|
@ -123,7 +123,7 @@ class Dictionary {
|
||||||
|
|
||||||
class NgramListenerForPrediction : public NgramListener {
|
class NgramListenerForPrediction : public NgramListener {
|
||||||
public:
|
public:
|
||||||
NgramListenerForPrediction(const PrevWordsInfo *const prevWordsInfo,
|
NgramListenerForPrediction(const NgramContext *const ngramContext,
|
||||||
const WordIdArrayView prevWordIds, SuggestionResults *const suggestionResults,
|
const WordIdArrayView prevWordIds, SuggestionResults *const suggestionResults,
|
||||||
const DictionaryStructureWithBufferPolicy *const dictStructurePolicy);
|
const DictionaryStructureWithBufferPolicy *const dictStructurePolicy);
|
||||||
virtual void onVisitEntry(const int ngramProbability, const int targetWordId);
|
virtual void onVisitEntry(const int ngramProbability, const int targetWordId);
|
||||||
|
@ -131,7 +131,7 @@ class Dictionary {
|
||||||
private:
|
private:
|
||||||
DISALLOW_IMPLICIT_CONSTRUCTORS(NgramListenerForPrediction);
|
DISALLOW_IMPLICIT_CONSTRUCTORS(NgramListenerForPrediction);
|
||||||
|
|
||||||
const PrevWordsInfo *const mPrevWordsInfo;
|
const NgramContext *const mNgramContext;
|
||||||
const WordIdArrayView mPrevWordIds;
|
const WordIdArrayView mPrevWordIds;
|
||||||
SuggestionResults *const mSuggestionResults;
|
SuggestionResults *const mSuggestionResults;
|
||||||
const DictionaryStructureWithBufferPolicy *const mDictStructurePolicy;
|
const DictionaryStructureWithBufferPolicy *const mDictStructurePolicy;
|
||||||
|
|
|
@ -33,10 +33,10 @@ namespace latinime {
|
||||||
std::vector<DicNode> current;
|
std::vector<DicNode> current;
|
||||||
std::vector<DicNode> next;
|
std::vector<DicNode> next;
|
||||||
|
|
||||||
// No prev words information.
|
// No ngram context.
|
||||||
PrevWordsInfo emptyPrevWordsInfo;
|
NgramContext emptyNgramContext;
|
||||||
WordIdArray<MAX_PREV_WORD_COUNT_FOR_N_GRAM> prevWordIdArray;
|
WordIdArray<MAX_PREV_WORD_COUNT_FOR_N_GRAM> prevWordIdArray;
|
||||||
const WordIdArrayView prevWordIds = emptyPrevWordsInfo.getPrevWordIds(
|
const WordIdArrayView prevWordIds = emptyNgramContext.getPrevWordIds(
|
||||||
dictionaryStructurePolicy, &prevWordIdArray, false /* tryLowerCaseSearch */);
|
dictionaryStructurePolicy, &prevWordIdArray, false /* tryLowerCaseSearch */);
|
||||||
current.emplace_back();
|
current.emplace_back();
|
||||||
DicNodeUtils::initAsRoot(dictionaryStructurePolicy, prevWordIds, ¤t.front());
|
DicNodeUtils::initAsRoot(dictionaryStructurePolicy, prevWordIds, ¤t.front());
|
||||||
|
|
|
@ -33,7 +33,7 @@ class DicNodeVector;
|
||||||
class DictionaryHeaderStructurePolicy;
|
class DictionaryHeaderStructurePolicy;
|
||||||
class MultiBigramMap;
|
class MultiBigramMap;
|
||||||
class NgramListener;
|
class NgramListener;
|
||||||
class PrevWordsInfo;
|
class NgramContext;
|
||||||
class UnigramProperty;
|
class UnigramProperty;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -81,15 +81,15 @@ class DictionaryStructureWithBufferPolicy {
|
||||||
virtual bool removeUnigramEntry(const CodePointArrayView wordCodePoints) = 0;
|
virtual bool removeUnigramEntry(const CodePointArrayView wordCodePoints) = 0;
|
||||||
|
|
||||||
// Returns whether the update was success or not.
|
// Returns whether the update was success or not.
|
||||||
virtual bool addNgramEntry(const PrevWordsInfo *const prevWordsInfo,
|
virtual bool addNgramEntry(const NgramContext *const ngramContext,
|
||||||
const NgramProperty *const ngramProperty) = 0;
|
const NgramProperty *const ngramProperty) = 0;
|
||||||
|
|
||||||
// Returns whether the update was success or not.
|
// Returns whether the update was success or not.
|
||||||
virtual bool removeNgramEntry(const PrevWordsInfo *const prevWordsInfo,
|
virtual bool removeNgramEntry(const NgramContext *const ngramContext,
|
||||||
const CodePointArrayView wordCodePoints) = 0;
|
const CodePointArrayView wordCodePoints) = 0;
|
||||||
|
|
||||||
// Returns whether the update was success or not.
|
// Returns whether the update was success or not.
|
||||||
virtual bool updateEntriesForWordWithNgramContext(const PrevWordsInfo *const prevWordsInfo,
|
virtual bool updateEntriesForWordWithNgramContext(const NgramContext *const ngramContext,
|
||||||
const CodePointArrayView wordCodePoints, const bool isValidWord,
|
const CodePointArrayView wordCodePoints, const bool isValidWord,
|
||||||
const HistoricalInfo historicalInfo) = 0;
|
const HistoricalInfo historicalInfo) = 0;
|
||||||
|
|
||||||
|
|
|
@ -30,12 +30,12 @@ const int DicTraverseSession::DICTIONARY_SIZE_THRESHOLD_TO_USE_LARGE_CACHE_FOR_S
|
||||||
256 * 1024;
|
256 * 1024;
|
||||||
|
|
||||||
void DicTraverseSession::init(const Dictionary *const dictionary,
|
void DicTraverseSession::init(const Dictionary *const dictionary,
|
||||||
const PrevWordsInfo *const prevWordsInfo, const SuggestOptions *const suggestOptions) {
|
const NgramContext *const ngramContext, const SuggestOptions *const suggestOptions) {
|
||||||
mDictionary = dictionary;
|
mDictionary = dictionary;
|
||||||
mMultiWordCostMultiplier = getDictionaryStructurePolicy()->getHeaderStructurePolicy()
|
mMultiWordCostMultiplier = getDictionaryStructurePolicy()->getHeaderStructurePolicy()
|
||||||
->getMultiWordCostMultiplier();
|
->getMultiWordCostMultiplier();
|
||||||
mSuggestOptions = suggestOptions;
|
mSuggestOptions = suggestOptions;
|
||||||
mPrevWordIdCount = prevWordsInfo->getPrevWordIds(getDictionaryStructurePolicy(),
|
mPrevWordIdCount = ngramContext->getPrevWordIds(getDictionaryStructurePolicy(),
|
||||||
&mPrevWordIdArray, true /* tryLowerCaseSearch */).size();
|
&mPrevWordIdArray, true /* tryLowerCaseSearch */).size();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -30,7 +30,7 @@ namespace latinime {
|
||||||
|
|
||||||
class Dictionary;
|
class Dictionary;
|
||||||
class DictionaryStructureWithBufferPolicy;
|
class DictionaryStructureWithBufferPolicy;
|
||||||
class PrevWordsInfo;
|
class NgramContext;
|
||||||
class ProximityInfo;
|
class ProximityInfo;
|
||||||
class SuggestOptions;
|
class SuggestOptions;
|
||||||
|
|
||||||
|
@ -61,7 +61,7 @@ class DicTraverseSession {
|
||||||
// Non virtual inline destructor -- never inherit this class
|
// Non virtual inline destructor -- never inherit this class
|
||||||
AK_FORCE_INLINE ~DicTraverseSession() {}
|
AK_FORCE_INLINE ~DicTraverseSession() {}
|
||||||
|
|
||||||
void init(const Dictionary *dictionary, const PrevWordsInfo *const prevWordsInfo,
|
void init(const Dictionary *dictionary, const NgramContext *const ngramContext,
|
||||||
const SuggestOptions *const suggestOptions);
|
const SuggestOptions *const suggestOptions);
|
||||||
// TODO: Remove and merge into init
|
// TODO: Remove and merge into init
|
||||||
void setupForGetSuggestions(const ProximityInfo *pInfo, const int *inputCodePoints,
|
void setupForGetSuggestions(const ProximityInfo *pInfo, const int *inputCodePoints,
|
||||||
|
|
|
@ -14,8 +14,8 @@
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifndef LATINIME_PREV_WORDS_INFO_H
|
#ifndef LATINIME_NGRAM_CONTEXT_H
|
||||||
#define LATINIME_PREV_WORDS_INFO_H
|
#define LATINIME_NGRAM_CONTEXT_H
|
||||||
|
|
||||||
#include <array>
|
#include <array>
|
||||||
|
|
||||||
|
@ -27,25 +27,25 @@
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
|
||||||
// Rename to NgramContext.
|
// Rename to NgramContext.
|
||||||
class PrevWordsInfo {
|
class NgramContext {
|
||||||
public:
|
public:
|
||||||
// No prev word information.
|
// No prev word information.
|
||||||
PrevWordsInfo() : mPrevWordCount(0) {
|
NgramContext() : mPrevWordCount(0) {
|
||||||
clear();
|
clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
PrevWordsInfo(const PrevWordsInfo &prevWordsInfo)
|
NgramContext(const NgramContext &ngramContext)
|
||||||
: mPrevWordCount(prevWordsInfo.mPrevWordCount) {
|
: mPrevWordCount(ngramContext.mPrevWordCount) {
|
||||||
for (size_t i = 0; i < mPrevWordCount; ++i) {
|
for (size_t i = 0; i < mPrevWordCount; ++i) {
|
||||||
mPrevWordCodePointCount[i] = prevWordsInfo.mPrevWordCodePointCount[i];
|
mPrevWordCodePointCount[i] = ngramContext.mPrevWordCodePointCount[i];
|
||||||
memmove(mPrevWordCodePoints[i], prevWordsInfo.mPrevWordCodePoints[i],
|
memmove(mPrevWordCodePoints[i], ngramContext.mPrevWordCodePoints[i],
|
||||||
sizeof(mPrevWordCodePoints[i][0]) * mPrevWordCodePointCount[i]);
|
sizeof(mPrevWordCodePoints[i][0]) * mPrevWordCodePointCount[i]);
|
||||||
mIsBeginningOfSentence[i] = prevWordsInfo.mIsBeginningOfSentence[i];
|
mIsBeginningOfSentence[i] = ngramContext.mIsBeginningOfSentence[i];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Construct from previous words.
|
// Construct from previous words.
|
||||||
PrevWordsInfo(const int prevWordCodePoints[][MAX_WORD_LENGTH],
|
NgramContext(const int prevWordCodePoints[][MAX_WORD_LENGTH],
|
||||||
const int *const prevWordCodePointCount, const bool *const isBeginningOfSentence,
|
const int *const prevWordCodePointCount, const bool *const isBeginningOfSentence,
|
||||||
const size_t prevWordCount)
|
const size_t prevWordCount)
|
||||||
: mPrevWordCount(std::min(NELEMS(mPrevWordCodePoints), prevWordCount)) {
|
: mPrevWordCount(std::min(NELEMS(mPrevWordCodePoints), prevWordCount)) {
|
||||||
|
@ -62,7 +62,7 @@ class PrevWordsInfo {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Construct from a previous word.
|
// Construct from a previous word.
|
||||||
PrevWordsInfo(const int *const prevWordCodePoints, const int prevWordCodePointCount,
|
NgramContext(const int *const prevWordCodePoints, const int prevWordCodePointCount,
|
||||||
const bool isBeginningOfSentence) : mPrevWordCount(1) {
|
const bool isBeginningOfSentence) : mPrevWordCount(1) {
|
||||||
clear();
|
clear();
|
||||||
if (prevWordCodePointCount > MAX_WORD_LENGTH || !prevWordCodePoints) {
|
if (prevWordCodePointCount > MAX_WORD_LENGTH || !prevWordCodePoints) {
|
||||||
|
@ -79,8 +79,8 @@ class PrevWordsInfo {
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: Remove.
|
// TODO: Remove.
|
||||||
const PrevWordsInfo getTrimmedPrevWordsInfo(const size_t maxPrevWordCount) const {
|
const NgramContext getTrimmedNgramContext(const size_t maxPrevWordCount) const {
|
||||||
return PrevWordsInfo(mPrevWordCodePoints, mPrevWordCodePointCount, mIsBeginningOfSentence,
|
return NgramContext(mPrevWordCodePoints, mPrevWordCodePointCount, mIsBeginningOfSentence,
|
||||||
std::min(mPrevWordCount, maxPrevWordCount));
|
std::min(mPrevWordCount, maxPrevWordCount));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -123,7 +123,7 @@ class PrevWordsInfo {
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
DISALLOW_ASSIGNMENT_OPERATOR(PrevWordsInfo);
|
DISALLOW_ASSIGNMENT_OPERATOR(NgramContext);
|
||||||
|
|
||||||
static int getWordId(const DictionaryStructureWithBufferPolicy *const dictStructurePolicy,
|
static int getWordId(const DictionaryStructureWithBufferPolicy *const dictStructurePolicy,
|
||||||
const int *const wordCodePoints, const int wordCodePointCount,
|
const int *const wordCodePoints, const int wordCodePointCount,
|
||||||
|
@ -166,4 +166,4 @@ class PrevWordsInfo {
|
||||||
bool mIsBeginningOfSentence[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
|
bool mIsBeginningOfSentence[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
|
||||||
};
|
};
|
||||||
} // namespace latinime
|
} // namespace latinime
|
||||||
#endif // LATINIME_PREV_WORDS_INFO_H
|
#endif // LATINIME_NGRAM_CONTEXT_H
|
||||||
|
|
|
@ -338,7 +338,7 @@ bool Ver4PatriciaTriePolicy::removeUnigramEntry(const CodePointArrayView wordCod
|
||||||
return mNodeWriter.suppressUnigramEntry(&ptNodeParams);
|
return mNodeWriter.suppressUnigramEntry(&ptNodeParams);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsInfo,
|
bool Ver4PatriciaTriePolicy::addNgramEntry(const NgramContext *const ngramContext,
|
||||||
const NgramProperty *const ngramProperty) {
|
const NgramProperty *const ngramProperty) {
|
||||||
if (!mBuffers->isUpdatable()) {
|
if (!mBuffers->isUpdatable()) {
|
||||||
AKLOGI("Warning: addNgramEntry() is called for non-updatable dictionary.");
|
AKLOGI("Warning: addNgramEntry() is called for non-updatable dictionary.");
|
||||||
|
@ -349,8 +349,8 @@ bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsI
|
||||||
mDictBuffer->getTailPosition());
|
mDictBuffer->getTailPosition());
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (!prevWordsInfo->isValid()) {
|
if (!ngramContext->isValid()) {
|
||||||
AKLOGE("prev words info is not valid for adding n-gram entry to the dictionary.");
|
AKLOGE("Ngram context is not valid for adding n-gram entry to the dictionary.");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (ngramProperty->getTargetCodePoints()->size() > MAX_WORD_LENGTH) {
|
if (ngramProperty->getTargetCodePoints()->size() > MAX_WORD_LENGTH) {
|
||||||
|
@ -359,23 +359,23 @@ bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsI
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
WordIdArray<MAX_PREV_WORD_COUNT_FOR_N_GRAM> prevWordIdArray;
|
WordIdArray<MAX_PREV_WORD_COUNT_FOR_N_GRAM> prevWordIdArray;
|
||||||
const WordIdArrayView prevWordIds = prevWordsInfo->getPrevWordIds(this, &prevWordIdArray,
|
const WordIdArrayView prevWordIds = ngramContext->getPrevWordIds(this, &prevWordIdArray,
|
||||||
false /* tryLowerCaseSearch */);
|
false /* tryLowerCaseSearch */);
|
||||||
if (prevWordIds.empty()) {
|
if (prevWordIds.empty()) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (prevWordIds[0] == NOT_A_WORD_ID) {
|
if (prevWordIds[0] == NOT_A_WORD_ID) {
|
||||||
if (prevWordsInfo->isNthPrevWordBeginningOfSentence(1 /* n */)) {
|
if (ngramContext->isNthPrevWordBeginningOfSentence(1 /* n */)) {
|
||||||
const UnigramProperty beginningOfSentenceUnigramProperty(
|
const UnigramProperty beginningOfSentenceUnigramProperty(
|
||||||
true /* representsBeginningOfSentence */, true /* isNotAWord */,
|
true /* representsBeginningOfSentence */, true /* isNotAWord */,
|
||||||
false /* isBlacklisted */, MAX_PROBABILITY /* probability */, HistoricalInfo());
|
false /* isBlacklisted */, MAX_PROBABILITY /* probability */, HistoricalInfo());
|
||||||
if (!addUnigramEntry(prevWordsInfo->getNthPrevWordCodePoints(1 /* n */),
|
if (!addUnigramEntry(ngramContext->getNthPrevWordCodePoints(1 /* n */),
|
||||||
&beginningOfSentenceUnigramProperty)) {
|
&beginningOfSentenceUnigramProperty)) {
|
||||||
AKLOGE("Cannot add unigram entry for the beginning-of-sentence.");
|
AKLOGE("Cannot add unigram entry for the beginning-of-sentence.");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
// Refresh word ids.
|
// Refresh word ids.
|
||||||
prevWordsInfo->getPrevWordIds(this, &prevWordIdArray, false /* tryLowerCaseSearch */);
|
ngramContext->getPrevWordIds(this, &prevWordIdArray, false /* tryLowerCaseSearch */);
|
||||||
} else {
|
} else {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -399,7 +399,7 @@ bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsI
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Ver4PatriciaTriePolicy::removeNgramEntry(const PrevWordsInfo *const prevWordsInfo,
|
bool Ver4PatriciaTriePolicy::removeNgramEntry(const NgramContext *const ngramContext,
|
||||||
const CodePointArrayView wordCodePoints) {
|
const CodePointArrayView wordCodePoints) {
|
||||||
if (!mBuffers->isUpdatable()) {
|
if (!mBuffers->isUpdatable()) {
|
||||||
AKLOGI("Warning: removeNgramEntry() is called for non-updatable dictionary.");
|
AKLOGI("Warning: removeNgramEntry() is called for non-updatable dictionary.");
|
||||||
|
@ -410,8 +410,8 @@ bool Ver4PatriciaTriePolicy::removeNgramEntry(const PrevWordsInfo *const prevWor
|
||||||
mDictBuffer->getTailPosition());
|
mDictBuffer->getTailPosition());
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (!prevWordsInfo->isValid()) {
|
if (!ngramContext->isValid()) {
|
||||||
AKLOGE("prev words info is not valid for removing n-gram entry form the dictionary.");
|
AKLOGE("Ngram context is not valid for removing n-gram entry form the dictionary.");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (wordCodePoints.size() > MAX_WORD_LENGTH) {
|
if (wordCodePoints.size() > MAX_WORD_LENGTH) {
|
||||||
|
@ -419,7 +419,7 @@ bool Ver4PatriciaTriePolicy::removeNgramEntry(const PrevWordsInfo *const prevWor
|
||||||
wordCodePoints.size());
|
wordCodePoints.size());
|
||||||
}
|
}
|
||||||
WordIdArray<MAX_PREV_WORD_COUNT_FOR_N_GRAM> prevWordIdArray;
|
WordIdArray<MAX_PREV_WORD_COUNT_FOR_N_GRAM> prevWordIdArray;
|
||||||
const WordIdArrayView prevWordIds = prevWordsInfo->getPrevWordIds(this, &prevWordIdArray,
|
const WordIdArrayView prevWordIds = ngramContext->getPrevWordIds(this, &prevWordIdArray,
|
||||||
false /* tryLowerCaseSerch */);
|
false /* tryLowerCaseSerch */);
|
||||||
if (prevWordIds.firstOrDefault(NOT_A_WORD_ID) == NOT_A_WORD_ID) {
|
if (prevWordIds.firstOrDefault(NOT_A_WORD_ID) == NOT_A_WORD_ID) {
|
||||||
return false;
|
return false;
|
||||||
|
@ -441,7 +441,7 @@ bool Ver4PatriciaTriePolicy::removeNgramEntry(const PrevWordsInfo *const prevWor
|
||||||
|
|
||||||
|
|
||||||
bool Ver4PatriciaTriePolicy::updateEntriesForWordWithNgramContext(
|
bool Ver4PatriciaTriePolicy::updateEntriesForWordWithNgramContext(
|
||||||
const PrevWordsInfo *const prevWordsInfo, const CodePointArrayView wordCodePoints,
|
const NgramContext *const ngramContext, const CodePointArrayView wordCodePoints,
|
||||||
const bool isValidWord, const HistoricalInfo historicalInfo) {
|
const bool isValidWord, const HistoricalInfo historicalInfo) {
|
||||||
if (!mBuffers->isUpdatable()) {
|
if (!mBuffers->isUpdatable()) {
|
||||||
AKLOGI("Warning: updateEntriesForWordWithNgramContext() is called for non-updatable "
|
AKLOGI("Warning: updateEntriesForWordWithNgramContext() is called for non-updatable "
|
||||||
|
@ -455,11 +455,11 @@ bool Ver4PatriciaTriePolicy::updateEntriesForWordWithNgramContext(
|
||||||
AKLOGE("Cannot update unigarm entry in updateEntriesForWordWithNgramContext().");
|
AKLOGE("Cannot update unigarm entry in updateEntriesForWordWithNgramContext().");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
const int probabilityForNgram = prevWordsInfo->isNthPrevWordBeginningOfSentence(1 /* n */)
|
const int probabilityForNgram = ngramContext->isNthPrevWordBeginningOfSentence(1 /* n */)
|
||||||
? NOT_A_PROBABILITY : probability;
|
? NOT_A_PROBABILITY : probability;
|
||||||
const NgramProperty ngramProperty(wordCodePoints.toVector(), probabilityForNgram,
|
const NgramProperty ngramProperty(wordCodePoints.toVector(), probabilityForNgram,
|
||||||
historicalInfo);
|
historicalInfo);
|
||||||
if (!addNgramEntry(prevWordsInfo, &ngramProperty)) {
|
if (!addNgramEntry(ngramContext, &ngramProperty)) {
|
||||||
AKLOGE("Cannot update unigarm entry in updateEntriesForWordWithNgramContext().");
|
AKLOGE("Cannot update unigarm entry in updateEntriesForWordWithNgramContext().");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
|
@ -112,13 +112,13 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
||||||
|
|
||||||
bool removeUnigramEntry(const CodePointArrayView wordCodePoints);
|
bool removeUnigramEntry(const CodePointArrayView wordCodePoints);
|
||||||
|
|
||||||
bool addNgramEntry(const PrevWordsInfo *const prevWordsInfo,
|
bool addNgramEntry(const NgramContext *const ngramContext,
|
||||||
const NgramProperty *const ngramProperty);
|
const NgramProperty *const ngramProperty);
|
||||||
|
|
||||||
bool removeNgramEntry(const PrevWordsInfo *const prevWordsInfo,
|
bool removeNgramEntry(const NgramContext *const ngramContext,
|
||||||
const CodePointArrayView wordCodePoints);
|
const CodePointArrayView wordCodePoints);
|
||||||
|
|
||||||
bool updateEntriesForWordWithNgramContext(const PrevWordsInfo *const prevWordsInfo,
|
bool updateEntriesForWordWithNgramContext(const NgramContext *const ngramContext,
|
||||||
const CodePointArrayView wordCodePoints, const bool isValidWord,
|
const CodePointArrayView wordCodePoints, const bool isValidWord,
|
||||||
const HistoricalInfo historicalInfo);
|
const HistoricalInfo historicalInfo);
|
||||||
|
|
||||||
|
|
|
@ -93,21 +93,21 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool addNgramEntry(const PrevWordsInfo *const prevWordsInfo,
|
bool addNgramEntry(const NgramContext *const ngramContext,
|
||||||
const NgramProperty *const ngramProperty) {
|
const NgramProperty *const ngramProperty) {
|
||||||
// This method should not be called for non-updatable dictionary.
|
// This method should not be called for non-updatable dictionary.
|
||||||
AKLOGI("Warning: addNgramEntry() is called for non-updatable dictionary.");
|
AKLOGI("Warning: addNgramEntry() is called for non-updatable dictionary.");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool removeNgramEntry(const PrevWordsInfo *const prevWordsInfo,
|
bool removeNgramEntry(const NgramContext *const ngramContext,
|
||||||
const CodePointArrayView wordCodePoints) {
|
const CodePointArrayView wordCodePoints) {
|
||||||
// This method should not be called for non-updatable dictionary.
|
// This method should not be called for non-updatable dictionary.
|
||||||
AKLOGI("Warning: removeNgramEntry() is called for non-updatable dictionary.");
|
AKLOGI("Warning: removeNgramEntry() is called for non-updatable dictionary.");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool updateEntriesForWordWithNgramContext(const PrevWordsInfo *const prevWordsInfo,
|
bool updateEntriesForWordWithNgramContext(const NgramContext *const ngramContext,
|
||||||
const CodePointArrayView wordCodePoints, const bool isValidWord,
|
const CodePointArrayView wordCodePoints, const bool isValidWord,
|
||||||
const HistoricalInfo historicalInfo) {
|
const HistoricalInfo historicalInfo) {
|
||||||
// This method should not be called for non-updatable dictionary.
|
// This method should not be called for non-updatable dictionary.
|
||||||
|
|
|
@ -266,7 +266,7 @@ bool Ver4PatriciaTriePolicy::removeUnigramEntry(const CodePointArrayView wordCod
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsInfo,
|
bool Ver4PatriciaTriePolicy::addNgramEntry(const NgramContext *const ngramContext,
|
||||||
const NgramProperty *const ngramProperty) {
|
const NgramProperty *const ngramProperty) {
|
||||||
if (!mBuffers->isUpdatable()) {
|
if (!mBuffers->isUpdatable()) {
|
||||||
AKLOGI("Warning: addNgramEntry() is called for non-updatable dictionary.");
|
AKLOGI("Warning: addNgramEntry() is called for non-updatable dictionary.");
|
||||||
|
@ -277,8 +277,8 @@ bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsI
|
||||||
mDictBuffer->getTailPosition());
|
mDictBuffer->getTailPosition());
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (!prevWordsInfo->isValid()) {
|
if (!ngramContext->isValid()) {
|
||||||
AKLOGE("prev words info is not valid for adding n-gram entry to the dictionary.");
|
AKLOGE("Ngram context is not valid for adding n-gram entry to the dictionary.");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (ngramProperty->getTargetCodePoints()->size() > MAX_WORD_LENGTH) {
|
if (ngramProperty->getTargetCodePoints()->size() > MAX_WORD_LENGTH) {
|
||||||
|
@ -287,7 +287,7 @@ bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsI
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
WordIdArray<MAX_PREV_WORD_COUNT_FOR_N_GRAM> prevWordIdArray;
|
WordIdArray<MAX_PREV_WORD_COUNT_FOR_N_GRAM> prevWordIdArray;
|
||||||
const WordIdArrayView prevWordIds = prevWordsInfo->getPrevWordIds(this, &prevWordIdArray,
|
const WordIdArrayView prevWordIds = ngramContext->getPrevWordIds(this, &prevWordIdArray,
|
||||||
false /* tryLowerCaseSearch */);
|
false /* tryLowerCaseSearch */);
|
||||||
if (prevWordIds.empty()) {
|
if (prevWordIds.empty()) {
|
||||||
return false;
|
return false;
|
||||||
|
@ -296,19 +296,19 @@ bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsI
|
||||||
if (prevWordIds[i] != NOT_A_WORD_ID) {
|
if (prevWordIds[i] != NOT_A_WORD_ID) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if (!prevWordsInfo->isNthPrevWordBeginningOfSentence(i + 1 /* n */)) {
|
if (!ngramContext->isNthPrevWordBeginningOfSentence(i + 1 /* n */)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
const UnigramProperty beginningOfSentenceUnigramProperty(
|
const UnigramProperty beginningOfSentenceUnigramProperty(
|
||||||
true /* representsBeginningOfSentence */, true /* isNotAWord */,
|
true /* representsBeginningOfSentence */, true /* isNotAWord */,
|
||||||
false /* isBlacklisted */, MAX_PROBABILITY /* probability */, HistoricalInfo());
|
false /* isBlacklisted */, MAX_PROBABILITY /* probability */, HistoricalInfo());
|
||||||
if (!addUnigramEntry(prevWordsInfo->getNthPrevWordCodePoints(1 /* n */),
|
if (!addUnigramEntry(ngramContext->getNthPrevWordCodePoints(1 /* n */),
|
||||||
&beginningOfSentenceUnigramProperty)) {
|
&beginningOfSentenceUnigramProperty)) {
|
||||||
AKLOGE("Cannot add unigram entry for the beginning-of-sentence.");
|
AKLOGE("Cannot add unigram entry for the beginning-of-sentence.");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
// Refresh word ids.
|
// Refresh word ids.
|
||||||
prevWordsInfo->getPrevWordIds(this, &prevWordIdArray, false /* tryLowerCaseSearch */);
|
ngramContext->getPrevWordIds(this, &prevWordIdArray, false /* tryLowerCaseSearch */);
|
||||||
}
|
}
|
||||||
const int wordId = getWordId(CodePointArrayView(*ngramProperty->getTargetCodePoints()),
|
const int wordId = getWordId(CodePointArrayView(*ngramProperty->getTargetCodePoints()),
|
||||||
false /* forceLowerCaseSearch */);
|
false /* forceLowerCaseSearch */);
|
||||||
|
@ -326,7 +326,7 @@ bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsI
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Ver4PatriciaTriePolicy::removeNgramEntry(const PrevWordsInfo *const prevWordsInfo,
|
bool Ver4PatriciaTriePolicy::removeNgramEntry(const NgramContext *const ngramContext,
|
||||||
const CodePointArrayView wordCodePoints) {
|
const CodePointArrayView wordCodePoints) {
|
||||||
if (!mBuffers->isUpdatable()) {
|
if (!mBuffers->isUpdatable()) {
|
||||||
AKLOGI("Warning: removeNgramEntry() is called for non-updatable dictionary.");
|
AKLOGI("Warning: removeNgramEntry() is called for non-updatable dictionary.");
|
||||||
|
@ -337,8 +337,8 @@ bool Ver4PatriciaTriePolicy::removeNgramEntry(const PrevWordsInfo *const prevWor
|
||||||
mDictBuffer->getTailPosition());
|
mDictBuffer->getTailPosition());
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (!prevWordsInfo->isValid()) {
|
if (!ngramContext->isValid()) {
|
||||||
AKLOGE("prev words info is not valid for removing n-gram entry form the dictionary.");
|
AKLOGE("Ngram context is not valid for removing n-gram entry form the dictionary.");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (wordCodePoints.size() > MAX_WORD_LENGTH) {
|
if (wordCodePoints.size() > MAX_WORD_LENGTH) {
|
||||||
|
@ -346,7 +346,7 @@ bool Ver4PatriciaTriePolicy::removeNgramEntry(const PrevWordsInfo *const prevWor
|
||||||
wordCodePoints.size());
|
wordCodePoints.size());
|
||||||
}
|
}
|
||||||
WordIdArray<MAX_PREV_WORD_COUNT_FOR_N_GRAM> prevWordIdArray;
|
WordIdArray<MAX_PREV_WORD_COUNT_FOR_N_GRAM> prevWordIdArray;
|
||||||
const WordIdArrayView prevWordIds = prevWordsInfo->getPrevWordIds(this, &prevWordIdArray,
|
const WordIdArrayView prevWordIds = ngramContext->getPrevWordIds(this, &prevWordIdArray,
|
||||||
false /* tryLowerCaseSerch */);
|
false /* tryLowerCaseSerch */);
|
||||||
if (prevWordIds.empty() || prevWordIds.contains(NOT_A_WORD_ID)) {
|
if (prevWordIds.empty() || prevWordIds.contains(NOT_A_WORD_ID)) {
|
||||||
return false;
|
return false;
|
||||||
|
@ -364,7 +364,7 @@ bool Ver4PatriciaTriePolicy::removeNgramEntry(const PrevWordsInfo *const prevWor
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Ver4PatriciaTriePolicy::updateEntriesForWordWithNgramContext(
|
bool Ver4PatriciaTriePolicy::updateEntriesForWordWithNgramContext(
|
||||||
const PrevWordsInfo *const prevWordsInfo, const CodePointArrayView wordCodePoints,
|
const NgramContext *const ngramContext, const CodePointArrayView wordCodePoints,
|
||||||
const bool isValidWord, const HistoricalInfo historicalInfo) {
|
const bool isValidWord, const HistoricalInfo historicalInfo) {
|
||||||
if (!mBuffers->isUpdatable()) {
|
if (!mBuffers->isUpdatable()) {
|
||||||
AKLOGI("Warning: updateEntriesForWordWithNgramContext() is called for non-updatable "
|
AKLOGI("Warning: updateEntriesForWordWithNgramContext() is called for non-updatable "
|
||||||
|
@ -379,13 +379,13 @@ bool Ver4PatriciaTriePolicy::updateEntriesForWordWithNgramContext(
|
||||||
AKLOGE("Cannot update unigarm entry in updateEntriesForWordWithNgramContext().");
|
AKLOGE("Cannot update unigarm entry in updateEntriesForWordWithNgramContext().");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
const int probabilityForNgram = prevWordsInfo->isNthPrevWordBeginningOfSentence(1 /* n */)
|
const int probabilityForNgram = ngramContext->isNthPrevWordBeginningOfSentence(1 /* n */)
|
||||||
? NOT_A_PROBABILITY : probability;
|
? NOT_A_PROBABILITY : probability;
|
||||||
const NgramProperty ngramProperty(wordCodePoints.toVector(), probabilityForNgram,
|
const NgramProperty ngramProperty(wordCodePoints.toVector(), probabilityForNgram,
|
||||||
historicalInfo);
|
historicalInfo);
|
||||||
for (size_t i = 1; i <= prevWordsInfo->getPrevWordCount(); ++i) {
|
for (size_t i = 1; i <= ngramContext->getPrevWordCount(); ++i) {
|
||||||
const PrevWordsInfo trimmedPrevWordsInfo(prevWordsInfo->getTrimmedPrevWordsInfo(i));
|
const NgramContext trimmedNgramContext(ngramContext->getTrimmedNgramContext(i));
|
||||||
if (!addNgramEntry(&trimmedPrevWordsInfo, &ngramProperty)) {
|
if (!addNgramEntry(&trimmedNgramContext, &ngramProperty)) {
|
||||||
AKLOGE("Cannot update ngram entry in updateEntriesForWordWithNgramContext().");
|
AKLOGE("Cannot update ngram entry in updateEntriesForWordWithNgramContext().");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
|
@ -92,13 +92,13 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
||||||
|
|
||||||
bool removeUnigramEntry(const CodePointArrayView wordCodePoints);
|
bool removeUnigramEntry(const CodePointArrayView wordCodePoints);
|
||||||
|
|
||||||
bool addNgramEntry(const PrevWordsInfo *const prevWordsInfo,
|
bool addNgramEntry(const NgramContext *const ngramContext,
|
||||||
const NgramProperty *const ngramProperty);
|
const NgramProperty *const ngramProperty);
|
||||||
|
|
||||||
bool removeNgramEntry(const PrevWordsInfo *const prevWordsInfo,
|
bool removeNgramEntry(const NgramContext *const ngramContext,
|
||||||
const CodePointArrayView wordCodePoints);
|
const CodePointArrayView wordCodePoints);
|
||||||
|
|
||||||
bool updateEntriesForWordWithNgramContext(const PrevWordsInfo *const prevWordsInfo,
|
bool updateEntriesForWordWithNgramContext(const NgramContext *const ngramContext,
|
||||||
const CodePointArrayView wordCodePoints, const bool isValidWord,
|
const CodePointArrayView wordCodePoints, const bool isValidWord,
|
||||||
const HistoricalInfo historicalInfo);
|
const HistoricalInfo historicalInfo);
|
||||||
|
|
||||||
|
|
|
@ -96,7 +96,7 @@ class JniDataUtils {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static PrevWordsInfo constructPrevWordsInfo(JNIEnv *env, jobjectArray prevWordCodePointArrays,
|
static NgramContext constructNgramContext(JNIEnv *env, jobjectArray prevWordCodePointArrays,
|
||||||
jbooleanArray isBeginningOfSentenceArray, const size_t prevWordCount) {
|
jbooleanArray isBeginningOfSentenceArray, const size_t prevWordCount) {
|
||||||
int prevWordCodePoints[MAX_PREV_WORD_COUNT_FOR_N_GRAM][MAX_WORD_LENGTH];
|
int prevWordCodePoints[MAX_PREV_WORD_COUNT_FOR_N_GRAM][MAX_WORD_LENGTH];
|
||||||
int prevWordCodePointCount[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
|
int prevWordCodePointCount[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
|
||||||
|
@ -119,7 +119,7 @@ class JniDataUtils {
|
||||||
&isBeginningOfSentenceBoolean);
|
&isBeginningOfSentenceBoolean);
|
||||||
isBeginningOfSentence[i] = isBeginningOfSentenceBoolean == JNI_TRUE;
|
isBeginningOfSentence[i] = isBeginningOfSentenceBoolean == JNI_TRUE;
|
||||||
}
|
}
|
||||||
return PrevWordsInfo(prevWordCodePoints, prevWordCodePointCount, isBeginningOfSentence,
|
return NgramContext(prevWordCodePoints, prevWordCodePointCount, isBeginningOfSentence,
|
||||||
prevWordCount);
|
prevWordCount);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue