From f87bb77a9183d126847d5925c2b03bec45fabd6d Mon Sep 17 00:00:00 2001 From: Keisuke Kuroyanagi Date: Tue, 21 Oct 2014 17:04:56 +0900 Subject: [PATCH 1/2] Create .cpp file for NgramContext. Bug: 14425059 Change-Id: Ie950878817b9c80cc9c970e1a84880c9b9ab228a --- native/jni/NativeFileList.mk | 3 +- .../suggest/core/session/ngram_context.cpp | 123 ++++++++++++++++++ .../src/suggest/core/session/ngram_context.h | 121 +++-------------- 3 files changed, 140 insertions(+), 107 deletions(-) create mode 100644 native/jni/src/suggest/core/session/ngram_context.cpp diff --git a/native/jni/NativeFileList.mk b/native/jni/NativeFileList.mk index b896f386f..7299ed3c0 100644 --- a/native/jni/NativeFileList.mk +++ b/native/jni/NativeFileList.mk @@ -40,6 +40,7 @@ LATIN_IME_CORE_SRC_FILES := \ proximity_info_state_utils.cpp) \ suggest/core/policy/weighting.cpp \ suggest/core/session/dic_traverse_session.cpp \ + suggest/core/session/ngram_context.cpp \ $(addprefix suggest/core/result/, \ suggestion_results.cpp \ suggestions_output_utils.cpp) \ @@ -55,7 +56,7 @@ LATIN_IME_CORE_SRC_FILES := \ dynamic_pt_updating_helper.cpp \ dynamic_pt_writing_utils.cpp \ patricia_trie_reading_utils.cpp \ - shortcut/shortcut_list_reading_utils.cpp ) \ + shortcut/shortcut_list_reading_utils.cpp) \ $(addprefix suggest/policyimpl/dictionary/structure/v2/, \ patricia_trie_policy.cpp \ ver2_patricia_trie_node_reader.cpp \ diff --git a/native/jni/src/suggest/core/session/ngram_context.cpp b/native/jni/src/suggest/core/session/ngram_context.cpp new file mode 100644 index 000000000..17ef9ae60 --- /dev/null +++ b/native/jni/src/suggest/core/session/ngram_context.cpp @@ -0,0 +1,123 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "suggest/core/session/ngram_context.h" + +#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h" +#include "utils/char_utils.h" + +namespace latinime { + +NgramContext::NgramContext() : mPrevWordCount(0) {} + +NgramContext::NgramContext(const NgramContext &ngramContext) + : mPrevWordCount(ngramContext.mPrevWordCount) { + for (size_t i = 0; i < mPrevWordCount; ++i) { + mPrevWordCodePointCount[i] = ngramContext.mPrevWordCodePointCount[i]; + memmove(mPrevWordCodePoints[i], ngramContext.mPrevWordCodePoints[i], + sizeof(mPrevWordCodePoints[i][0]) * mPrevWordCodePointCount[i]); + mIsBeginningOfSentence[i] = ngramContext.mIsBeginningOfSentence[i]; + } +} + +NgramContext::NgramContext(const int prevWordCodePoints[][MAX_WORD_LENGTH], + const int *const prevWordCodePointCount, const bool *const isBeginningOfSentence, + const size_t prevWordCount) + : mPrevWordCount(std::min(NELEMS(mPrevWordCodePoints), prevWordCount)) { + clear(); + for (size_t i = 0; i < mPrevWordCount; ++i) { + if (prevWordCodePointCount[i] < 0 || prevWordCodePointCount[i] > MAX_WORD_LENGTH) { + continue; + } + memmove(mPrevWordCodePoints[i], prevWordCodePoints[i], + sizeof(mPrevWordCodePoints[i][0]) * prevWordCodePointCount[i]); + mPrevWordCodePointCount[i] = prevWordCodePointCount[i]; + mIsBeginningOfSentence[i] = isBeginningOfSentence[i]; + } +} + +NgramContext::NgramContext(const int *const prevWordCodePoints, const int prevWordCodePointCount, + const bool isBeginningOfSentence) : mPrevWordCount(1) { + clear(); + if (prevWordCodePointCount > MAX_WORD_LENGTH || !prevWordCodePoints) { + return; + } + memmove(mPrevWordCodePoints[0], prevWordCodePoints, + sizeof(mPrevWordCodePoints[0][0]) * prevWordCodePointCount); + mPrevWordCodePointCount[0] = prevWordCodePointCount; + mIsBeginningOfSentence[0] = isBeginningOfSentence; +} + +bool NgramContext::isValid() const { + if (mPrevWordCodePointCount[0] > 0) { + return true; + } + if (mIsBeginningOfSentence[0]) { + return true; + } + return false; +} + +const CodePointArrayView NgramContext::getNthPrevWordCodePoints(const size_t n) const { + if (n <= 0 || n > mPrevWordCount) { + return CodePointArrayView(); + } + return CodePointArrayView(mPrevWordCodePoints[n - 1], mPrevWordCodePointCount[n - 1]); +} + +bool NgramContext::isNthPrevWordBeginningOfSentence(const size_t n) const { + if (n <= 0 || n > mPrevWordCount) { + return false; + } + return mIsBeginningOfSentence[n - 1]; +} + +/* static */ int NgramContext::getWordId( + const DictionaryStructureWithBufferPolicy *const dictStructurePolicy, + const int *const wordCodePoints, const int wordCodePointCount, + const bool isBeginningOfSentence, const bool tryLowerCaseSearch) { + if (!dictStructurePolicy || !wordCodePoints || wordCodePointCount > MAX_WORD_LENGTH) { + return NOT_A_WORD_ID; + } + int codePoints[MAX_WORD_LENGTH]; + int codePointCount = wordCodePointCount; + memmove(codePoints, wordCodePoints, sizeof(int) * codePointCount); + if (isBeginningOfSentence) { + codePointCount = CharUtils::attachBeginningOfSentenceMarker(codePoints, codePointCount, + MAX_WORD_LENGTH); + if (codePointCount <= 0) { + return NOT_A_WORD_ID; + } + } + const CodePointArrayView codePointArrayView(codePoints, codePointCount); + const int wordId = dictStructurePolicy->getWordId(codePointArrayView, + false /* forceLowerCaseSearch */); + if (wordId != NOT_A_WORD_ID || !tryLowerCaseSearch) { + // Return the id when when the word was found or doesn't try lower case search. + return wordId; + } + // Check bigrams for lower-cased previous word if original was not found. Useful for + // auto-capitalized words like "The [current_word]". + return dictStructurePolicy->getWordId(codePointArrayView, true /* forceLowerCaseSearch */); +} + +void NgramContext::clear() { + for (size_t i = 0; i < NELEMS(mPrevWordCodePoints); ++i) { + mPrevWordCodePointCount[i] = 0; + mIsBeginningOfSentence[i] = false; + } +} +} // namespace latinime diff --git a/native/jni/src/suggest/core/session/ngram_context.h b/native/jni/src/suggest/core/session/ngram_context.h index 64c71410f..9b36199c9 100644 --- a/native/jni/src/suggest/core/session/ngram_context.h +++ b/native/jni/src/suggest/core/session/ngram_context.h @@ -20,145 +20,54 @@ #include #include "defines.h" -#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h" -#include "utils/char_utils.h" #include "utils/int_array_view.h" namespace latinime { -// Rename to NgramContext. +class DictionaryStructureWithBufferPolicy; + class NgramContext { public: // No prev word information. - NgramContext() : mPrevWordCount(0) { - clear(); - } - - NgramContext(const NgramContext &ngramContext) - : mPrevWordCount(ngramContext.mPrevWordCount) { - for (size_t i = 0; i < mPrevWordCount; ++i) { - mPrevWordCodePointCount[i] = ngramContext.mPrevWordCodePointCount[i]; - memmove(mPrevWordCodePoints[i], ngramContext.mPrevWordCodePoints[i], - sizeof(mPrevWordCodePoints[i][0]) * mPrevWordCodePointCount[i]); - mIsBeginningOfSentence[i] = ngramContext.mIsBeginningOfSentence[i]; - } - } - + NgramContext(); + // Copy constructor to use this class with std::vector and use this class as a return value. + NgramContext(const NgramContext &ngramContext); // Construct from previous words. NgramContext(const int prevWordCodePoints[][MAX_WORD_LENGTH], const int *const prevWordCodePointCount, const bool *const isBeginningOfSentence, - const size_t prevWordCount) - : mPrevWordCount(std::min(NELEMS(mPrevWordCodePoints), prevWordCount)) { - clear(); - for (size_t i = 0; i < mPrevWordCount; ++i) { - if (prevWordCodePointCount[i] < 0 || prevWordCodePointCount[i] > MAX_WORD_LENGTH) { - continue; - } - memmove(mPrevWordCodePoints[i], prevWordCodePoints[i], - sizeof(mPrevWordCodePoints[i][0]) * prevWordCodePointCount[i]); - mPrevWordCodePointCount[i] = prevWordCodePointCount[i]; - mIsBeginningOfSentence[i] = isBeginningOfSentence[i]; - } - } - + const size_t prevWordCount); // Construct from a previous word. NgramContext(const int *const prevWordCodePoints, const int prevWordCodePointCount, - const bool isBeginningOfSentence) : mPrevWordCount(1) { - clear(); - if (prevWordCodePointCount > MAX_WORD_LENGTH || !prevWordCodePoints) { - return; - } - memmove(mPrevWordCodePoints[0], prevWordCodePoints, - sizeof(mPrevWordCodePoints[0][0]) * prevWordCodePointCount); - mPrevWordCodePointCount[0] = prevWordCodePointCount; - mIsBeginningOfSentence[0] = isBeginningOfSentence; - } + const bool isBeginningOfSentence); size_t getPrevWordCount() const { return mPrevWordCount; } - - // TODO: Remove. - const NgramContext getTrimmedNgramContext(const size_t maxPrevWordCount) const { - return NgramContext(mPrevWordCodePoints, mPrevWordCodePointCount, mIsBeginningOfSentence, - std::min(mPrevWordCount, maxPrevWordCount)); - } - - bool isValid() const { - if (mPrevWordCodePointCount[0] > 0) { - return true; - } - if (mIsBeginningOfSentence[0]) { - return true; - } - return false; - } + bool isValid() const; template const WordIdArrayView getPrevWordIds( const DictionaryStructureWithBufferPolicy *const dictStructurePolicy, - std::array *const prevWordIdBuffer, const bool tryLowerCaseSearch) const { + WordIdArray *const prevWordIdBuffer, const bool tryLowerCaseSearch) const { for (size_t i = 0; i < std::min(mPrevWordCount, N); ++i) { - prevWordIdBuffer->at(i) = getWordId(dictStructurePolicy, - mPrevWordCodePoints[i], mPrevWordCodePointCount[i], - mIsBeginningOfSentence[i], tryLowerCaseSearch); + prevWordIdBuffer->at(i) = getWordId(dictStructurePolicy, mPrevWordCodePoints[i], + mPrevWordCodePointCount[i], mIsBeginningOfSentence[i], tryLowerCaseSearch); } return WordIdArrayView::fromArray(*prevWordIdBuffer).limit(mPrevWordCount); } // n is 1-indexed. - const CodePointArrayView getNthPrevWordCodePoints(const size_t n) const { - if (n <= 0 || n > mPrevWordCount) { - return CodePointArrayView(); - } - return CodePointArrayView(mPrevWordCodePoints[n - 1], mPrevWordCodePointCount[n - 1]); - } - + const CodePointArrayView getNthPrevWordCodePoints(const size_t n) const; // n is 1-indexed. - bool isNthPrevWordBeginningOfSentence(const size_t n) const { - if (n <= 0 || n > mPrevWordCount) { - return false; - } - return mIsBeginningOfSentence[n - 1]; - } + bool isNthPrevWordBeginningOfSentence(const size_t n) const; private: DISALLOW_ASSIGNMENT_OPERATOR(NgramContext); static int getWordId(const DictionaryStructureWithBufferPolicy *const dictStructurePolicy, const int *const wordCodePoints, const int wordCodePointCount, - const bool isBeginningOfSentence, const bool tryLowerCaseSearch) { - if (!dictStructurePolicy || !wordCodePoints || wordCodePointCount > MAX_WORD_LENGTH) { - return NOT_A_WORD_ID; - } - int codePoints[MAX_WORD_LENGTH]; - int codePointCount = wordCodePointCount; - memmove(codePoints, wordCodePoints, sizeof(int) * codePointCount); - if (isBeginningOfSentence) { - codePointCount = CharUtils::attachBeginningOfSentenceMarker(codePoints, - codePointCount, MAX_WORD_LENGTH); - if (codePointCount <= 0) { - return NOT_A_WORD_ID; - } - } - const CodePointArrayView codePointArrayView(codePoints, codePointCount); - const int wordId = dictStructurePolicy->getWordId( - codePointArrayView, false /* forceLowerCaseSearch */); - if (wordId != NOT_A_WORD_ID || !tryLowerCaseSearch) { - // Return the id when when the word was found or doesn't try lower case search. - return wordId; - } - // Check bigrams for lower-cased previous word if original was not found. Useful for - // auto-capitalized words like "The [current_word]". - return dictStructurePolicy->getWordId(codePointArrayView, true /* forceLowerCaseSearch */); - } - - void clear() { - for (size_t i = 0; i < NELEMS(mPrevWordCodePoints); ++i) { - mPrevWordCodePointCount[i] = 0; - mIsBeginningOfSentence[i] = false; - } - } + const bool isBeginningOfSentence, const bool tryLowerCaseSearch); + void clear(); const size_t mPrevWordCount; int mPrevWordCodePoints[MAX_PREV_WORD_COUNT_FOR_N_GRAM][MAX_WORD_LENGTH]; From 88bb28c132d87f15a52e9a0b8a45950f39eb19ad Mon Sep 17 00:00:00 2001 From: Keisuke Kuroyanagi Date: Tue, 21 Oct 2014 17:12:32 +0900 Subject: [PATCH 2/2] Make NgramProperty have NgramContext. Bug: 14425059 Change-Id: I210acb816b122857dbbe1ee4dd6a35c5335bf2bf --- ...oid_inputmethod_latin_BinaryDictionary.cpp | 18 +++++++--------- .../suggest/core/dictionary/dictionary.cpp | 5 ++--- .../src/suggest/core/dictionary/dictionary.h | 3 +-- .../core/dictionary/property/ngram_property.h | 14 +++++++++---- .../core/dictionary/property/word_property.h | 4 ++-- .../dictionary_structure_with_buffer_policy.h | 5 +---- .../v402/ver4_patricia_trie_policy.cpp | 10 +++++---- .../backward/v402/ver4_patricia_trie_policy.h | 3 +-- .../structure/v2/patricia_trie_policy.cpp | 2 ++ .../structure/v2/patricia_trie_policy.h | 3 +-- .../v4/ver4_patricia_trie_policy.cpp | 21 ++++++++++++------- .../structure/v4/ver4_patricia_trie_policy.h | 3 +-- 12 files changed, 48 insertions(+), 43 deletions(-) diff --git a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp index f8dadb488..1020f6865 100644 --- a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp +++ b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp @@ -409,9 +409,10 @@ static bool latinime_BinaryDictionary_addNgramEntry(JNIEnv *env, jclass clazz, j int wordCodePoints[wordLength]; env->GetIntArrayRegion(word, 0, wordLength, wordCodePoints); // Use 1 for count to indicate the ngram has inputted. - const NgramProperty ngramProperty(CodePointArrayView(wordCodePoints, wordLength).toVector(), + const NgramProperty ngramProperty(ngramContext, + CodePointArrayView(wordCodePoints, wordLength).toVector(), probability, HistoricalInfo(timestamp, 0 /* level */, 1 /* count */)); - return dictionary->addNgramEntry(&ngramContext, &ngramProperty); + return dictionary->addNgramEntry(&ngramProperty); } static bool latinime_BinaryDictionary_removeNgramEntry(JNIEnv *env, jclass clazz, jlong dict, @@ -526,12 +527,12 @@ static int latinime_BinaryDictionary_addMultipleDictionaryEntries(JNIEnv *env, j if (word0) { jint bigramProbability = env->GetIntField(languageModelParam, bigramProbabilityFieldId); // Use 1 for count to indicate the bigram has inputted. - const NgramProperty ngramProperty( - CodePointArrayView(word1CodePoints, word1Length).toVector(), - bigramProbability, HistoricalInfo(timestamp, 0 /* level */, 1 /* count */)); const NgramContext ngramContext(word0CodePoints, word0Length, false /* isBeginningOfSentence */); - dictionary->addNgramEntry(&ngramContext, &ngramProperty); + const NgramProperty ngramProperty(ngramContext, + CodePointArrayView(word1CodePoints, word1Length).toVector(), + bigramProbability, HistoricalInfo(timestamp, 0 /* level */, 1 /* count */)); + dictionary->addNgramEntry(&ngramProperty); } if (dictionary->needsToRunGC(true /* mindsBlockByGC */)) { return i + 1; @@ -641,11 +642,8 @@ static bool latinime_BinaryDictionary_migrateNative(JNIEnv *env, jclass clazz, j return false; } } - const NgramContext ngramContext(wordCodePoints, wordCodePointCount, - wordProperty.getUnigramProperty()->representsBeginningOfSentence()); for (const NgramProperty &ngramProperty : *wordProperty.getNgramProperties()) { - if (!dictionaryStructureWithBufferPolicy->addNgramEntry(&ngramContext, - &ngramProperty)) { + if (!dictionaryStructureWithBufferPolicy->addNgramEntry(&ngramProperty)) { LogUtils::logToJava(env, "Cannot add ngram to the new dict."); return false; } diff --git a/native/jni/src/suggest/core/dictionary/dictionary.cpp b/native/jni/src/suggest/core/dictionary/dictionary.cpp index 697e99ffb..bfe17cc4c 100644 --- a/native/jni/src/suggest/core/dictionary/dictionary.cpp +++ b/native/jni/src/suggest/core/dictionary/dictionary.cpp @@ -140,10 +140,9 @@ bool Dictionary::removeUnigramEntry(const CodePointArrayView codePoints) { return mDictionaryStructureWithBufferPolicy->removeUnigramEntry(codePoints); } -bool Dictionary::addNgramEntry(const NgramContext *const ngramContext, - const NgramProperty *const ngramProperty) { +bool Dictionary::addNgramEntry(const NgramProperty *const ngramProperty) { TimeKeeper::setCurrentTime(); - return mDictionaryStructureWithBufferPolicy->addNgramEntry(ngramContext, ngramProperty); + return mDictionaryStructureWithBufferPolicy->addNgramEntry(ngramProperty); } bool Dictionary::removeNgramEntry(const NgramContext *const ngramContext, diff --git a/native/jni/src/suggest/core/dictionary/dictionary.h b/native/jni/src/suggest/core/dictionary/dictionary.h index 843aec473..a5e986d15 100644 --- a/native/jni/src/suggest/core/dictionary/dictionary.h +++ b/native/jni/src/suggest/core/dictionary/dictionary.h @@ -85,8 +85,7 @@ class Dictionary { bool removeUnigramEntry(const CodePointArrayView codePoints); - bool addNgramEntry(const NgramContext *const ngramContext, - const NgramProperty *const ngramProperty); + bool addNgramEntry(const NgramProperty *const ngramProperty); bool removeNgramEntry(const NgramContext *const ngramContext, const CodePointArrayView codePoints); diff --git a/native/jni/src/suggest/core/dictionary/property/ngram_property.h b/native/jni/src/suggest/core/dictionary/property/ngram_property.h index 8709799f9..e67b4da31 100644 --- a/native/jni/src/suggest/core/dictionary/property/ngram_property.h +++ b/native/jni/src/suggest/core/dictionary/property/ngram_property.h @@ -21,15 +21,20 @@ #include "defines.h" #include "suggest/core/dictionary/property/historical_info.h" +#include "suggest/core/session/ngram_context.h" namespace latinime { class NgramProperty { public: - NgramProperty(const std::vector &&targetCodePoints, const int probability, - const HistoricalInfo historicalInfo) - : mTargetCodePoints(std::move(targetCodePoints)), mProbability(probability), - mHistoricalInfo(historicalInfo) {} + NgramProperty(const NgramContext &ngramContext, const std::vector &&targetCodePoints, + const int probability, const HistoricalInfo historicalInfo) + : mNgramContext(ngramContext), mTargetCodePoints(std::move(targetCodePoints)), + mProbability(probability), mHistoricalInfo(historicalInfo) {} + + const NgramContext *getNgramContext() const { + return &mNgramContext; + } const std::vector *getTargetCodePoints() const { return &mTargetCodePoints; @@ -48,6 +53,7 @@ class NgramProperty { DISALLOW_DEFAULT_CONSTRUCTOR(NgramProperty); DISALLOW_ASSIGNMENT_OPERATOR(NgramProperty); + const NgramContext mNgramContext; const std::vector mTargetCodePoints; const int mProbability; const HistoricalInfo mHistoricalInfo; diff --git a/native/jni/src/suggest/core/dictionary/property/word_property.h b/native/jni/src/suggest/core/dictionary/property/word_property.h index 0c23e8225..01b8987b5 100644 --- a/native/jni/src/suggest/core/dictionary/property/word_property.h +++ b/native/jni/src/suggest/core/dictionary/property/word_property.h @@ -34,9 +34,9 @@ class WordProperty { : mCodePoints(), mUnigramProperty(), mNgrams() {} WordProperty(const std::vector &&codePoints, const UnigramProperty *const unigramProperty, - const std::vector *const bigrams) + const std::vector *const ngrams) : mCodePoints(std::move(codePoints)), mUnigramProperty(*unigramProperty), - mNgrams(*bigrams) {} + mNgrams(*ngrams) {} void outputProperties(JNIEnv *const env, jintArray outCodePoints, jbooleanArray outFlags, jintArray outProbabilityInfo, jobject outBigramTargets, jobject outBigramProbabilities, diff --git a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h index ceda5c03f..33a0fbc19 100644 --- a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h +++ b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h @@ -40,7 +40,6 @@ class UnigramProperty; * This class abstracts the structure of dictionaries. * Implement this policy to support additional dictionaries. */ -// TODO: Use word id instead of terminal PtNode position. class DictionaryStructureWithBufferPolicy { public: typedef std::unique_ptr StructurePolicyPtr; @@ -81,8 +80,7 @@ class DictionaryStructureWithBufferPolicy { virtual bool removeUnigramEntry(const CodePointArrayView wordCodePoints) = 0; // Returns whether the update was success or not. - virtual bool addNgramEntry(const NgramContext *const ngramContext, - const NgramProperty *const ngramProperty) = 0; + virtual bool addNgramEntry(const NgramProperty *const ngramProperty) = 0; // Returns whether the update was success or not. virtual bool removeNgramEntry(const NgramContext *const ngramContext, @@ -106,7 +104,6 @@ class DictionaryStructureWithBufferPolicy { virtual void getProperty(const char *const query, const int queryLength, char *const outResult, const int maxResultLength) = 0; - // Used for testing. virtual const WordProperty getWordProperty(const CodePointArrayView wordCodePoints) const = 0; // Method to iterate all words in the dictionary. diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp index d0dccc3be..66f750120 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp @@ -344,8 +344,7 @@ bool Ver4PatriciaTriePolicy::removeUnigramEntry(const CodePointArrayView wordCod return mNodeWriter.suppressUnigramEntry(&ptNodeParams); } -bool Ver4PatriciaTriePolicy::addNgramEntry(const NgramContext *const ngramContext, - const NgramProperty *const ngramProperty) { +bool Ver4PatriciaTriePolicy::addNgramEntry(const NgramProperty *const ngramProperty) { if (!mBuffers->isUpdatable()) { AKLOGI("Warning: addNgramEntry() is called for non-updatable dictionary."); return false; @@ -355,6 +354,7 @@ bool Ver4PatriciaTriePolicy::addNgramEntry(const NgramContext *const ngramContex mDictBuffer->getTailPosition()); return false; } + const NgramContext *const ngramContext = ngramProperty->getNgramContext(); if (!ngramContext->isValid()) { AKLOGE("Ngram context is not valid for adding n-gram entry to the dictionary."); return false; @@ -463,9 +463,9 @@ bool Ver4PatriciaTriePolicy::updateEntriesForWordWithNgramContext( } const int probabilityForNgram = ngramContext->isNthPrevWordBeginningOfSentence(1 /* n */) ? NOT_A_PROBABILITY : probability; - const NgramProperty ngramProperty(wordCodePoints.toVector(), probabilityForNgram, + const NgramProperty ngramProperty(*ngramContext, wordCodePoints.toVector(), probabilityForNgram, historicalInfo); - if (!addNgramEntry(ngramContext, &ngramProperty)) { + if (!addNgramEntry(&ngramProperty)) { AKLOGE("Cannot update unigarm entry in updateEntriesForWordWithNgramContext()."); return false; } @@ -585,6 +585,8 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty( bigramEntry.getHistoricalInfo(), mHeaderPolicy) : bigramEntry.getProbability(); ngrams.emplace_back( + NgramContext(wordCodePoints.data(), wordCodePoints.size(), + ptNodeParams.representsBeginningOfSentence()), CodePointArrayView(bigramWord1CodePoints, codePointCount).toVector(), probability, *historicalInfo); } diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h index 2cda0d3fa..0480876ed 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h @@ -113,8 +113,7 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { bool removeUnigramEntry(const CodePointArrayView wordCodePoints); - bool addNgramEntry(const NgramContext *const ngramContext, - const NgramProperty *const ngramProperty); + bool addNgramEntry(const NgramProperty *const ngramProperty); bool removeNgramEntry(const NgramContext *const ngramContext, const CodePointArrayView wordCodePoints); diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp index b7f1199c5..b0d5c901e 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp @@ -451,6 +451,8 @@ const WordProperty PatriciaTriePolicy::getWordProperty( bigramWord1CodePoints, &word1Probability); const int probability = getProbability(word1Probability, bigramsIt.getProbability()); ngrams.emplace_back( + NgramContext(wordCodePoints.data(), wordCodePoints.size(), + ptNodeParams.representsBeginningOfSentence()), CodePointArrayView(bigramWord1CodePoints, word1CodePointCount).toVector(), probability, HistoricalInfo()); } diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h index b17681388..8933962ab 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h @@ -93,8 +93,7 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { return false; } - bool addNgramEntry(const NgramContext *const ngramContext, - const NgramProperty *const ngramProperty) { + bool addNgramEntry(const NgramProperty *const ngramProperty) { // This method should not be called for non-updatable dictionary. AKLOGI("Warning: addNgramEntry() is called for non-updatable dictionary."); return false; diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp index ead1bde50..094106746 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp @@ -264,8 +264,7 @@ bool Ver4PatriciaTriePolicy::removeUnigramEntry(const CodePointArrayView wordCod return true; } -bool Ver4PatriciaTriePolicy::addNgramEntry(const NgramContext *const ngramContext, - const NgramProperty *const ngramProperty) { +bool Ver4PatriciaTriePolicy::addNgramEntry(const NgramProperty *const ngramProperty) { if (!mBuffers->isUpdatable()) { AKLOGI("Warning: addNgramEntry() is called for non-updatable dictionary."); return false; @@ -275,6 +274,7 @@ bool Ver4PatriciaTriePolicy::addNgramEntry(const NgramContext *const ngramContex mDictBuffer->getTailPosition()); return false; } + const NgramContext *const ngramContext = ngramProperty->getNgramContext(); if (!ngramContext->isValid()) { AKLOGE("Ngram context is not valid for adding n-gram entry to the dictionary."); return false; @@ -451,7 +451,8 @@ bool Ver4PatriciaTriePolicy::needsToRunGC(const bool mindsBlockByGC) const { // Needs to reduce dictionary size. return true; } else if (mHeaderPolicy->isDecayingDict()) { - return ForgettingCurveUtils::needsToDecay(mindsBlockByGC, mEntryCounters.getEntryCounts(), mHeaderPolicy); + return ForgettingCurveUtils::needsToDecay(mindsBlockByGC, mEntryCounters.getEntryCounts(), + mHeaderPolicy); } return false; } @@ -501,12 +502,16 @@ const WordProperty Ver4PatriciaTriePolicy::getWordProperty( prevWordIds)) { const int codePointCount = getCodePointsAndReturnCodePointCount(entry.getWordId(), MAX_WORD_LENGTH, bigramWord1CodePoints); - const ProbabilityEntry probabilityEntry = entry.getProbabilityEntry(); - const HistoricalInfo *const historicalInfo = probabilityEntry.getHistoricalInfo(); - const int probability = probabilityEntry.hasHistoricalInfo() ? + const ProbabilityEntry ngramProbabilityEntry = entry.getProbabilityEntry(); + const HistoricalInfo *const historicalInfo = ngramProbabilityEntry.getHistoricalInfo(); + const int probability = ngramProbabilityEntry.hasHistoricalInfo() ? ForgettingCurveUtils::decodeProbability(historicalInfo, mHeaderPolicy) : - probabilityEntry.getProbability(); - ngrams.emplace_back(CodePointArrayView(bigramWord1CodePoints, codePointCount).toVector(), + ngramProbabilityEntry.getProbability(); + ngrams.emplace_back( + NgramContext( + wordCodePoints.data(), wordCodePoints.size(), + probabilityEntry.representsBeginningOfSentence()), + CodePointArrayView(bigramWord1CodePoints, codePointCount).toVector(), probability, *historicalInfo); } // Fetch shortcut information. diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h index e3611cb32..13700b390 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h @@ -92,8 +92,7 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { bool removeUnigramEntry(const CodePointArrayView wordCodePoints); - bool addNgramEntry(const NgramContext *const ngramContext, - const NgramProperty *const ngramProperty); + bool addNgramEntry(const NgramProperty *const ngramProperty); bool removeNgramEntry(const NgramContext *const ngramContext, const CodePointArrayView wordCodePoints);