From 537f6eea8a8d56fe532913a37f4dbff4b3d490af Mon Sep 17 00:00:00 2001 From: Keisuke Kuroyanagi Date: Thu, 11 Sep 2014 19:36:22 +0900 Subject: [PATCH] Use WordIdArrayView for prevWordIds. Bug: 14425059 Change-Id: Ia84fb997d89564e60111b46ca83bbfa3b187f316 --- .../jni/src/suggest/core/dicnode/dic_node.h | 14 ++-- .../suggest/core/dicnode/dic_node_utils.cpp | 2 +- .../src/suggest/core/dicnode/dic_node_utils.h | 3 +- .../dicnode/internal/dic_node_properties.h | 22 +++--- .../suggest/core/dictionary/dictionary.cpp | 22 +++--- .../core/dictionary/dictionary_utils.cpp | 8 +- .../core/dictionary/multi_bigram_map.cpp | 15 ++-- .../core/dictionary/multi_bigram_map.h | 9 ++- .../dictionary_structure_with_buffer_policy.h | 6 +- .../core/session/dic_traverse_session.cpp | 2 +- .../core/session/dic_traverse_session.h | 9 +-- .../v402/ver4_patricia_trie_policy.cpp | 12 +-- .../backward/v402/ver4_patricia_trie_policy.h | 9 ++- .../structure/v2/patricia_trie_policy.cpp | 19 ++--- .../structure/v2/patricia_trie_policy.h | 9 ++- .../v4/ver4_patricia_trie_policy.cpp | 78 +++++++------------ .../structure/v4/ver4_patricia_trie_policy.h | 15 ++-- native/jni/src/utils/int_array_view.h | 8 +- .../language_model_dict_content_test.cpp | 5 +- .../jni/tests/utils/int_array_view_test.cpp | 4 +- 20 files changed, 133 insertions(+), 138 deletions(-) diff --git a/native/jni/src/suggest/core/dicnode/dic_node.h b/native/jni/src/suggest/core/dicnode/dic_node.h index 3970963e8..ec61783cb 100644 --- a/native/jni/src/suggest/core/dicnode/dic_node.h +++ b/native/jni/src/suggest/core/dicnode/dic_node.h @@ -105,7 +105,7 @@ class DicNode { } // Init for root with prevWordIds which is used for n-gram - void initAsRoot(const int rootPtNodeArrayPos, const int *const prevWordIds) { + void initAsRoot(const int rootPtNodeArrayPos, const WordIdArrayView prevWordIds) { mIsCachedForNextSuggestion = false; mDicNodeProperties.init(rootPtNodeArrayPos, prevWordIds); mDicNodeState.init(); @@ -115,12 +115,11 @@ class DicNode { // Init for root with previous word void initAsRootWithPreviousWord(const DicNode *const dicNode, const int rootPtNodeArrayPos) { mIsCachedForNextSuggestion = dicNode->mIsCachedForNextSuggestion; - int newPrevWordIds[MAX_PREV_WORD_COUNT_FOR_N_GRAM]; + WordIdArray newPrevWordIds; newPrevWordIds[0] = dicNode->mDicNodeProperties.getWordId(); - for (size_t i = 1; i < NELEMS(newPrevWordIds); ++i) { - newPrevWordIds[i] = dicNode->getPrevWordIds()[i - 1]; - } - mDicNodeProperties.init(rootPtNodeArrayPos, newPrevWordIds); + dicNode->getPrevWordIds().limit(newPrevWordIds.size() - 1) + .copyToArray(&newPrevWordIds, 1 /* offset */); + mDicNodeProperties.init(rootPtNodeArrayPos, WordIdArrayView::fromArray(newPrevWordIds)); mDicNodeState.initAsRootWithPreviousWord(&dicNode->mDicNodeState, dicNode->mDicNodeProperties.getDepth()); PROF_NODE_COPY(&dicNode->mProfiler, mProfiler); @@ -203,8 +202,7 @@ class DicNode { return mDicNodeProperties.getWordId(); } - // TODO: Use view class to return word id array. - const int *getPrevWordIds() const { + const WordIdArrayView getPrevWordIds() const { return mDicNodeProperties.getPrevWordIds(); } diff --git a/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp b/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp index fe5fe8448..7d2898b7a 100644 --- a/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp +++ b/native/jni/src/suggest/core/dicnode/dic_node_utils.cpp @@ -28,7 +28,7 @@ namespace latinime { /* static */ void DicNodeUtils::initAsRoot( const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, - const int *const prevWordIds, DicNode *const newRootDicNode) { + const WordIdArrayView prevWordIds, DicNode *const newRootDicNode) { newRootDicNode->initAsRoot(dictionaryStructurePolicy->getRootPosition(), prevWordIds); } diff --git a/native/jni/src/suggest/core/dicnode/dic_node_utils.h b/native/jni/src/suggest/core/dicnode/dic_node_utils.h index 961a1c29d..b891a842a 100644 --- a/native/jni/src/suggest/core/dicnode/dic_node_utils.h +++ b/native/jni/src/suggest/core/dicnode/dic_node_utils.h @@ -18,6 +18,7 @@ #define LATINIME_DIC_NODE_UTILS_H #include "defines.h" +#include "utils/int_array_view.h" namespace latinime { @@ -30,7 +31,7 @@ class DicNodeUtils { public: static void initAsRoot( const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, - const int *const prevWordIds, DicNode *const newRootDicNode); + const WordIdArrayView prevWordIds, DicNode *const newRootDicNode); static void initAsRootWithPreviousWord( const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, const DicNode *const prevWordLastDicNode, DicNode *const newRootDicNode); diff --git a/native/jni/src/suggest/core/dicnode/internal/dic_node_properties.h b/native/jni/src/suggest/core/dicnode/internal/dic_node_properties.h index 6a1b84273..cecfc7aa9 100644 --- a/native/jni/src/suggest/core/dicnode/internal/dic_node_properties.h +++ b/native/jni/src/suggest/core/dicnode/internal/dic_node_properties.h @@ -18,8 +18,10 @@ #define LATINIME_DIC_NODE_PROPERTIES_H #include +#include #include "defines.h" +#include "utils/int_array_view.h" namespace latinime { @@ -36,23 +38,23 @@ class DicNodeProperties { // Should be called only once per DicNode is initialized. void init(const int childrenPos, const int nodeCodePoint, const int wordId, - const uint16_t depth, const uint16_t leavingDepth, const int *const prevWordIds) { + const uint16_t depth, const uint16_t leavingDepth, const WordIdArrayView prevWordIds) { mChildrenPtNodeArrayPos = childrenPos; mDicNodeCodePoint = nodeCodePoint; mWordId = wordId; mDepth = depth; mLeavingDepth = leavingDepth; - memmove(mPrevWordIds, prevWordIds, sizeof(mPrevWordIds)); + prevWordIds.copyToArray(&mPrevWordIds, 0 /* offset */); } // Init for root with prevWordsPtNodePos which is used for n-gram - void init(const int rootPtNodeArrayPos, const int *const prevWordIds) { + void init(const int rootPtNodeArrayPos, const WordIdArrayView prevWordIds) { mChildrenPtNodeArrayPos = rootPtNodeArrayPos; mDicNodeCodePoint = NOT_A_CODE_POINT; mWordId = NOT_A_WORD_ID; mDepth = 0; mLeavingDepth = 0; - memmove(mPrevWordIds, prevWordIds, sizeof(mPrevWordIds)); + prevWordIds.copyToArray(&mPrevWordIds, 0 /* offset */); } void initByCopy(const DicNodeProperties *const dicNodeProp) { @@ -61,7 +63,8 @@ class DicNodeProperties { mWordId = dicNodeProp->mWordId; mDepth = dicNodeProp->mDepth; mLeavingDepth = dicNodeProp->mLeavingDepth; - memmove(mPrevWordIds, dicNodeProp->mPrevWordIds, sizeof(mPrevWordIds)); + WordIdArrayView::fromArray(dicNodeProp->mPrevWordIds) + .copyToArray(&mPrevWordIds, 0 /* offset */); } // Init as passing child @@ -71,7 +74,8 @@ class DicNodeProperties { mWordId = dicNodeProp->mWordId; mDepth = dicNodeProp->mDepth + 1; // Increment the depth of a passing child mLeavingDepth = dicNodeProp->mLeavingDepth; - memmove(mPrevWordIds, dicNodeProp->mPrevWordIds, sizeof(mPrevWordIds)); + WordIdArrayView::fromArray(dicNodeProp->mPrevWordIds) + .copyToArray(&mPrevWordIds, 0 /* offset */); } int getChildrenPtNodeArrayPos() const { @@ -99,8 +103,8 @@ class DicNodeProperties { return (mChildrenPtNodeArrayPos != NOT_A_DICT_POS) || mDepth != mLeavingDepth; } - const int *getPrevWordIds() const { - return mPrevWordIds; + const WordIdArrayView getPrevWordIds() const { + return WordIdArrayView::fromArray(mPrevWordIds); } int getWordId() const { @@ -116,7 +120,7 @@ class DicNodeProperties { int mWordId; uint16_t mDepth; uint16_t mLeavingDepth; - int mPrevWordIds[MAX_PREV_WORD_COUNT_FOR_N_GRAM]; + WordIdArray mPrevWordIds; }; } // namespace latinime #endif // LATINIME_DIC_NODE_PROPERTIES_H diff --git a/native/jni/src/suggest/core/dictionary/dictionary.cpp b/native/jni/src/suggest/core/dictionary/dictionary.cpp index 1de405104..ec261cfbf 100644 --- a/native/jni/src/suggest/core/dictionary/dictionary.cpp +++ b/native/jni/src/suggest/core/dictionary/dictionary.cpp @@ -85,7 +85,7 @@ void Dictionary::NgramListenerForPrediction::onVisitEntry(const int ngramProbabi return; } const WordAttributes wordAttributes = mDictStructurePolicy->getWordAttributesInContext( - mPrevWordIds.data(), targetWordId, nullptr /* multiBigramMap */); + mPrevWordIds, targetWordId, nullptr /* multiBigramMap */); mSuggestionResults->addPrediction(targetWordCodePoints, codePointCount, wordAttributes.getProbability()); } @@ -93,13 +93,13 @@ void Dictionary::NgramListenerForPrediction::onVisitEntry(const int ngramProbabi void Dictionary::getPredictions(const PrevWordsInfo *const prevWordsInfo, SuggestionResults *const outSuggestionResults) const { TimeKeeper::setCurrentTime(); - int prevWordIds[MAX_PREV_WORD_COUNT_FOR_N_GRAM]; - prevWordsInfo->getPrevWordIds(mDictionaryStructureWithBufferPolicy.get(), prevWordIds, + WordIdArray prevWordIds; + prevWordsInfo->getPrevWordIds(mDictionaryStructureWithBufferPolicy.get(), prevWordIds.data(), true /* tryLowerCaseSearch */); - NgramListenerForPrediction listener(prevWordsInfo, - WordIdArrayView::fromFixedSizeArray(prevWordIds), outSuggestionResults, + const WordIdArrayView prevWordIdArrayView = WordIdArrayView::fromArray(prevWordIds); + NgramListenerForPrediction listener(prevWordsInfo, prevWordIdArrayView, outSuggestionResults, mDictionaryStructureWithBufferPolicy.get()); - mDictionaryStructureWithBufferPolicy->iterateNgramEntries(prevWordIds, &listener); + mDictionaryStructureWithBufferPolicy->iterateNgramEntries(prevWordIdArrayView, &listener); } int Dictionary::getProbability(const int *word, int length) const { @@ -119,13 +119,13 @@ int Dictionary::getNgramProbability(const PrevWordsInfo *const prevWordsInfo, co CodePointArrayView(word, length), false /* forceLowerCaseSearch */); if (wordId == NOT_A_WORD_ID) return NOT_A_PROBABILITY; if (!prevWordsInfo) { - return getDictionaryStructurePolicy()->getProbabilityOfWord( - nullptr /* prevWordsPtNodePos */, wordId); + return getDictionaryStructurePolicy()->getProbabilityOfWord(WordIdArrayView(), wordId); } - int prevWordIds[MAX_PREV_WORD_COUNT_FOR_N_GRAM]; - prevWordsInfo->getPrevWordIds(mDictionaryStructureWithBufferPolicy.get(), prevWordIds, + WordIdArray prevWordIds; + prevWordsInfo->getPrevWordIds(mDictionaryStructureWithBufferPolicy.get(), prevWordIds.data(), true /* tryLowerCaseSearch */); - return getDictionaryStructurePolicy()->getProbabilityOfWord(prevWordIds, wordId); + return getDictionaryStructurePolicy()->getProbabilityOfWord( + IntArrayView::fromArray(prevWordIds), wordId); } bool Dictionary::addUnigramEntry(const int *const word, const int length, diff --git a/native/jni/src/suggest/core/dictionary/dictionary_utils.cpp b/native/jni/src/suggest/core/dictionary/dictionary_utils.cpp index f71d4c5f0..617bf5a90 100644 --- a/native/jni/src/suggest/core/dictionary/dictionary_utils.cpp +++ b/native/jni/src/suggest/core/dictionary/dictionary_utils.cpp @@ -23,6 +23,7 @@ #include "suggest/core/dictionary/digraph_utils.h" #include "suggest/core/session/prev_words_info.h" #include "suggest/core/policy/dictionary_structure_with_buffer_policy.h" +#include "utils/int_array_view.h" namespace latinime { @@ -34,11 +35,12 @@ namespace latinime { // No prev words information. PrevWordsInfo emptyPrevWordsInfo; - int prevWordIds[MAX_PREV_WORD_COUNT_FOR_N_GRAM]; - emptyPrevWordsInfo.getPrevWordIds(dictionaryStructurePolicy, prevWordIds, + WordIdArray prevWordIds; + emptyPrevWordsInfo.getPrevWordIds(dictionaryStructurePolicy, prevWordIds.data(), false /* tryLowerCaseSearch */); current.emplace_back(); - DicNodeUtils::initAsRoot(dictionaryStructurePolicy, prevWordIds, ¤t.front()); + DicNodeUtils::initAsRoot(dictionaryStructurePolicy, + IntArrayView::fromArray(prevWordIds), ¤t.front()); for (int i = 0; i < codePointCount; ++i) { // The base-lower input is used to ignore case errors and accent errors. const int codePoint = CharUtils::toBaseLowerCase(codePoints[i]); diff --git a/native/jni/src/suggest/core/dictionary/multi_bigram_map.cpp b/native/jni/src/suggest/core/dictionary/multi_bigram_map.cpp index 979d61edb..761f51ec8 100644 --- a/native/jni/src/suggest/core/dictionary/multi_bigram_map.cpp +++ b/native/jni/src/suggest/core/dictionary/multi_bigram_map.cpp @@ -35,9 +35,9 @@ const int MultiBigramMap::BigramMap::DEFAULT_HASH_MAP_SIZE_FOR_EACH_BIGRAM_MAP = // Also caches the bigrams if there is space remaining and they have not been cached already. int MultiBigramMap::getBigramProbability( const DictionaryStructureWithBufferPolicy *const structurePolicy, - const int *const prevWordIds, const int nextWordId, + const WordIdArrayView prevWordIds, const int nextWordId, const int unigramProbability) { - if (!prevWordIds || prevWordIds[0] == NOT_A_WORD_ID) { + if (prevWordIds.empty() || prevWordIds[0] == NOT_A_WORD_ID) { return structurePolicy->getProbability(unigramProbability, NOT_A_PROBABILITY); } const auto mapPosition = mBigramMaps.find(prevWordIds[0]); @@ -56,7 +56,7 @@ int MultiBigramMap::getBigramProbability( void MultiBigramMap::BigramMap::init( const DictionaryStructureWithBufferPolicy *const structurePolicy, - const int *const prevWordIds) { + const WordIdArrayView prevWordIds) { structurePolicy->iterateNgramEntries(prevWordIds, this /* listener */); } @@ -83,16 +83,13 @@ void MultiBigramMap::BigramMap::onVisitEntry(const int ngramProbability, const i void MultiBigramMap::addBigramsForWord( const DictionaryStructureWithBufferPolicy *const structurePolicy, - const int *const prevWordIds) { - if (prevWordIds) { - mBigramMaps[prevWordIds[0]].init(structurePolicy, prevWordIds); - } + const WordIdArrayView prevWordIds) { + mBigramMaps[prevWordIds[0]].init(structurePolicy, prevWordIds); } int MultiBigramMap::readBigramProbabilityFromBinaryDictionary( const DictionaryStructureWithBufferPolicy *const structurePolicy, - const int *const prevWordIds, const int nextWordId, - const int unigramProbability) { + const WordIdArrayView prevWordIds, const int nextWordId, const int unigramProbability) { const int bigramProbability = structurePolicy->getProbabilityOfWord(prevWordIds, nextWordId); if (bigramProbability != NOT_A_PROBABILITY) { return bigramProbability; diff --git a/native/jni/src/suggest/core/dictionary/multi_bigram_map.h b/native/jni/src/suggest/core/dictionary/multi_bigram_map.h index a8c4ded57..d2eb5cc32 100644 --- a/native/jni/src/suggest/core/dictionary/multi_bigram_map.h +++ b/native/jni/src/suggest/core/dictionary/multi_bigram_map.h @@ -25,6 +25,7 @@ #include "suggest/core/dictionary/bloom_filter.h" #include "suggest/core/dictionary/ngram_listener.h" #include "suggest/core/policy/dictionary_structure_with_buffer_policy.h" +#include "utils/int_array_view.h" namespace latinime { @@ -39,7 +40,7 @@ class MultiBigramMap { // Look up the bigram probability for the given word pair from the cached bigram maps. // Also caches the bigrams if there is space remaining and they have not been cached already. int getBigramProbability(const DictionaryStructureWithBufferPolicy *const structurePolicy, - const int *const prevWordIds, const int nextWordId, const int unigramProbability); + const WordIdArrayView prevWordIds, const int nextWordId, const int unigramProbability); void clear() { mBigramMaps.clear(); @@ -57,7 +58,7 @@ class MultiBigramMap { virtual ~BigramMap() {} void init(const DictionaryStructureWithBufferPolicy *const structurePolicy, - const int *const prevWordIds); + const WordIdArrayView prevWordIds); int getBigramProbability( const DictionaryStructureWithBufferPolicy *const structurePolicy, const int nextWordId, const int unigramProbability) const; @@ -70,11 +71,11 @@ class MultiBigramMap { }; void addBigramsForWord(const DictionaryStructureWithBufferPolicy *const structurePolicy, - const int *const prevWordIds); + const WordIdArrayView prevWordIds); int readBigramProbabilityFromBinaryDictionary( const DictionaryStructureWithBufferPolicy *const structurePolicy, - const int *const prevWordIds, const int nextWordId, const int unigramProbability); + const WordIdArrayView prevWordIds, const int nextWordId, const int unigramProbability); static const size_t MAX_CACHED_PREV_WORDS_IN_BIGRAM_MAP; std::unordered_map mBigramMaps; diff --git a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h index 7414f696c..a498b6f65 100644 --- a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h +++ b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h @@ -58,15 +58,15 @@ class DictionaryStructureWithBufferPolicy { virtual int getWordId(const CodePointArrayView wordCodePoints, const bool forceLowerCaseSearch) const = 0; - virtual const WordAttributes getWordAttributesInContext(const int *const prevWordIds, + virtual const WordAttributes getWordAttributesInContext(const WordIdArrayView prevWordIds, const int wordId, MultiBigramMap *const multiBigramMap) const = 0; // TODO: Remove virtual int getProbability(const int unigramProbability, const int bigramProbability) const = 0; - virtual int getProbabilityOfWord(const int *const prevWordIds, const int wordId) const = 0; + virtual int getProbabilityOfWord(const WordIdArrayView prevWordIds, const int wordId) const = 0; - virtual void iterateNgramEntries(const int *const prevWordIds, + virtual void iterateNgramEntries(const WordIdArrayView prevWordIds, NgramListener *const listener) const = 0; virtual BinaryDictionaryShortcutIterator getShortcutIterator(const int wordId) const = 0; diff --git a/native/jni/src/suggest/core/session/dic_traverse_session.cpp b/native/jni/src/suggest/core/session/dic_traverse_session.cpp index d4d4d1eed..4f58c0c54 100644 --- a/native/jni/src/suggest/core/session/dic_traverse_session.cpp +++ b/native/jni/src/suggest/core/session/dic_traverse_session.cpp @@ -35,7 +35,7 @@ void DicTraverseSession::init(const Dictionary *const dictionary, mMultiWordCostMultiplier = getDictionaryStructurePolicy()->getHeaderStructurePolicy() ->getMultiWordCostMultiplier(); mSuggestOptions = suggestOptions; - prevWordsInfo->getPrevWordIds(getDictionaryStructurePolicy(), mPrevWordsIds, + prevWordsInfo->getPrevWordIds(getDictionaryStructurePolicy(), mPrevWordsIds.data(), true /* tryLowerCaseSearch */); } diff --git a/native/jni/src/suggest/core/session/dic_traverse_session.h b/native/jni/src/suggest/core/session/dic_traverse_session.h index 0e676d897..af199071e 100644 --- a/native/jni/src/suggest/core/session/dic_traverse_session.h +++ b/native/jni/src/suggest/core/session/dic_traverse_session.h @@ -24,6 +24,7 @@ #include "suggest/core/dicnode/dic_nodes_cache.h" #include "suggest/core/dictionary/multi_bigram_map.h" #include "suggest/core/layout/proximity_info_state.h" +#include "utils/int_array_view.h" namespace latinime { @@ -55,9 +56,7 @@ class DicTraverseSession { mMultiWordCostMultiplier(1.0f) { // NOTE: mProximityInfoStates is an array of instances. // No need to initialize it explicitly here. - for (size_t i = 0; i < NELEMS(mPrevWordsIds); ++i) { - mPrevWordsIds[i] = NOT_A_DICT_POS; - } + mPrevWordsIds.fill(NOT_A_DICT_POS); } // Non virtual inline destructor -- never inherit this class @@ -79,7 +78,7 @@ class DicTraverseSession { //-------------------- const ProximityInfo *getProximityInfo() const { return mProximityInfo; } const SuggestOptions *getSuggestOptions() const { return mSuggestOptions; } - const int *getPrevWordIds() const { return mPrevWordsIds; } + const WordIdArrayView getPrevWordIds() const { return IntArrayView::fromArray(mPrevWordsIds); } DicNodesCache *getDicTraverseCache() { return &mDicNodesCache; } MultiBigramMap *getMultiBigramMap() { return &mMultiBigramMap; } const ProximityInfoState *getProximityInfoState(int id) const { @@ -166,7 +165,7 @@ class DicTraverseSession { const int *const inputYs, const int *const times, const int *const pointerIds, const int inputSize, const float maxSpatialDistance, const int maxPointerCount); - int mPrevWordsIds[MAX_PREV_WORD_COUNT_FOR_N_GRAM]; + WordIdArray mPrevWordsIds; const ProximityInfo *mProximityInfo; const Dictionary *mDictionary; const SuggestOptions *mSuggestOptions; diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp index 9b8a50b07..dfc3d2d9b 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp @@ -116,7 +116,7 @@ int Ver4PatriciaTriePolicy::getWordId(const CodePointArrayView wordCodePoints, } const WordAttributes Ver4PatriciaTriePolicy::getWordAttributesInContext( - const int *const prevWordIds, const int wordId, + const WordIdArrayView prevWordIds, const int wordId, MultiBigramMap *const multiBigramMap) const { if (wordId == NOT_A_WORD_ID) { return WordAttributes(); @@ -128,7 +128,7 @@ const WordAttributes Ver4PatriciaTriePolicy::getWordAttributesInContext( prevWordIds, wordId, ptNodeParams.getProbability()); return getWordAttributes(probability, ptNodeParams); } - if (prevWordIds) { + if (!prevWordIds.empty()) { const int probability = getProbabilityOfWord(prevWordIds, wordId); if (probability != NOT_A_PROBABILITY) { return getWordAttributes(probability, ptNodeParams); @@ -160,7 +160,7 @@ int Ver4PatriciaTriePolicy::getProbability(const int unigramProbability, } } -int Ver4PatriciaTriePolicy::getProbabilityOfWord(const int *const prevWordIds, +int Ver4PatriciaTriePolicy::getProbabilityOfWord(const WordIdArrayView prevWordIds, const int wordId) const { if (wordId == NOT_A_WORD_ID) { return NOT_A_PROBABILITY; @@ -170,7 +170,7 @@ int Ver4PatriciaTriePolicy::getProbabilityOfWord(const int *const prevWordIds, if (ptNodeParams.isDeleted() || ptNodeParams.isBlacklisted() || ptNodeParams.isNotAWord()) { return NOT_A_PROBABILITY; } - if (prevWordIds) { + if (!prevWordIds.empty()) { const int bigramsPosition = getBigramsPositionOfPtNode( getTerminalPtNodePosFromWordId(prevWordIds[0])); BinaryDictionaryBigramsIterator bigramsIt(&mBigramPolicy, bigramsPosition); @@ -186,9 +186,9 @@ int Ver4PatriciaTriePolicy::getProbabilityOfWord(const int *const prevWordIds, return getProbability(ptNodeParams.getProbability(), NOT_A_PROBABILITY); } -void Ver4PatriciaTriePolicy::iterateNgramEntries(const int *const prevWordIds, +void Ver4PatriciaTriePolicy::iterateNgramEntries(const WordIdArrayView prevWordIds, NgramListener *const listener) const { - if (!prevWordIds) { + if (prevWordIds.empty()) { return; } const int bigramsPosition = getBigramsPositionOfPtNode( diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h index 871b556e1..576d2abb5 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.h @@ -91,14 +91,15 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { int getWordId(const CodePointArrayView wordCodePoints, const bool forceLowerCaseSearch) const; - const WordAttributes getWordAttributesInContext(const int *const prevWordIds, const int wordId, - MultiBigramMap *const multiBigramMap) const; + const WordAttributes getWordAttributesInContext(const WordIdArrayView prevWordIds, + const int wordId, MultiBigramMap *const multiBigramMap) const; int getProbability(const int unigramProbability, const int bigramProbability) const; - int getProbabilityOfWord(const int *const prevWordIds, const int wordId) const; + int getProbabilityOfWord(const WordIdArrayView prevWordIds, const int wordId) const; - void iterateNgramEntries(const int *const prevWordIds, NgramListener *const listener) const; + void iterateNgramEntries(const WordIdArrayView prevWordIds, + NgramListener *const listener) const; BinaryDictionaryShortcutIterator getShortcutIterator(const int wordId) const; diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp index e76bae97c..aae61afca 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp @@ -282,8 +282,9 @@ int PatriciaTriePolicy::getWordId(const CodePointArrayView wordCodePoints, return getWordIdFromTerminalPtNodePos(ptNodePos); } -const WordAttributes PatriciaTriePolicy::getWordAttributesInContext(const int *const prevWordIds, - const int wordId, MultiBigramMap *const multiBigramMap) const { +const WordAttributes PatriciaTriePolicy::getWordAttributesInContext( + const WordIdArrayView prevWordIds, const int wordId, + MultiBigramMap *const multiBigramMap) const { if (wordId == NOT_A_WORD_ID) { return WordAttributes(); } @@ -295,7 +296,7 @@ const WordAttributes PatriciaTriePolicy::getWordAttributesInContext(const int *c prevWordIds, wordId, ptNodeParams.getProbability()); return getWordAttributes(probability, ptNodeParams); } - if (prevWordIds) { + if (!prevWordIds.empty()) { const int bigramProbability = getProbabilityOfWord(prevWordIds, wordId); if (bigramProbability != NOT_A_PROBABILITY) { return getWordAttributes(bigramProbability, ptNodeParams); @@ -327,7 +328,8 @@ int PatriciaTriePolicy::getProbability(const int unigramProbability, } } -int PatriciaTriePolicy::getProbabilityOfWord(const int *const prevWordIds, const int wordId) const { +int PatriciaTriePolicy::getProbabilityOfWord(const WordIdArrayView prevWordIds, + const int wordId) const { if (wordId == NOT_A_WORD_ID) { return NOT_A_PROBABILITY; } @@ -340,7 +342,7 @@ int PatriciaTriePolicy::getProbabilityOfWord(const int *const prevWordIds, const // for shortcuts). return NOT_A_PROBABILITY; } - if (prevWordIds) { + if (!prevWordIds.empty()) { const int bigramsPosition = getBigramsPositionOfPtNode( getTerminalPtNodePosFromWordId(prevWordIds[0])); BinaryDictionaryBigramsIterator bigramsIt(&mBigramListPolicy, bigramsPosition); @@ -356,9 +358,9 @@ int PatriciaTriePolicy::getProbabilityOfWord(const int *const prevWordIds, const return getProbability(ptNodeParams.getProbability(), NOT_A_PROBABILITY); } -void PatriciaTriePolicy::iterateNgramEntries(const int *const prevWordIds, +void PatriciaTriePolicy::iterateNgramEntries(const WordIdArrayView prevWordIds, NgramListener *const listener) const { - if (!prevWordIds) { + if (prevWordIds.empty()) { return; } const int bigramsPosition = getBigramsPositionOfPtNode( @@ -371,8 +373,7 @@ void PatriciaTriePolicy::iterateNgramEntries(const int *const prevWordIds, } } -BinaryDictionaryShortcutIterator PatriciaTriePolicy::getShortcutIterator( - const int wordId) const { +BinaryDictionaryShortcutIterator PatriciaTriePolicy::getShortcutIterator(const int wordId) const { const int shortcutPos = getShortcutPositionOfPtNode(getTerminalPtNodePosFromWordId(wordId)); return BinaryDictionaryShortcutIterator(&mShortcutListPolicy, shortcutPos); } diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h index 8c1665d7d..fc65de58c 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h @@ -66,14 +66,15 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { int getWordId(const CodePointArrayView wordCodePoints, const bool forceLowerCaseSearch) const; - const WordAttributes getWordAttributesInContext(const int *const prevWordIds, const int wordId, - MultiBigramMap *const multiBigramMap) const; + const WordAttributes getWordAttributesInContext(const WordIdArrayView prevWordIds, + const int wordId, MultiBigramMap *const multiBigramMap) const; int getProbability(const int unigramProbability, const int bigramProbability) const; - int getProbabilityOfWord(const int *const prevWordIds, const int wordId) const; + int getProbabilityOfWord(const WordIdArrayView prevWordIds, const int wordId) const; - void iterateNgramEntries(const int *const prevWordIds, NgramListener *const listener) const; + void iterateNgramEntries(const WordIdArrayView prevWordIds, + NgramListener *const listener) const; BinaryDictionaryShortcutIterator getShortcutIterator(const int wordId) const; diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp index 0472a453a..5b1907f49 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.cpp @@ -16,6 +16,7 @@ #include "suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h" +#include #include #include "suggest/core/dicnode/dic_node.h" @@ -111,7 +112,7 @@ int Ver4PatriciaTriePolicy::getWordId(const CodePointArrayView wordCodePoints, } const WordAttributes Ver4PatriciaTriePolicy::getWordAttributesInContext( - const int *const prevWordIds, const int wordId, + const WordIdArrayView prevWordIds, const int wordId, MultiBigramMap *const multiBigramMap) const { if (wordId == NOT_A_WORD_ID) { return WordAttributes(); @@ -121,27 +122,11 @@ const WordAttributes Ver4PatriciaTriePolicy::getWordAttributesInContext( const PtNodeParams ptNodeParams = mNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos); // TODO: Support n-gram. return WordAttributes(mBuffers->getLanguageModelDictContent()->getWordProbability( - WordIdArrayView::singleElementView(prevWordIds), wordId), ptNodeParams.isBlacklisted(), + prevWordIds.limit(1 /* maxSize */), wordId), ptNodeParams.isBlacklisted(), ptNodeParams.isNotAWord(), ptNodeParams.getProbability() == 0); } -int Ver4PatriciaTriePolicy::getProbability(const int unigramProbability, - const int bigramProbability) const { - if (mHeaderPolicy->isDecayingDict()) { - // Both probabilities are encoded. Decode them and get probability. - return ForgettingCurveUtils::getProbability(unigramProbability, bigramProbability); - } else { - if (unigramProbability == NOT_A_PROBABILITY) { - return NOT_A_PROBABILITY; - } else if (bigramProbability == NOT_A_PROBABILITY) { - return ProbabilityUtils::backoff(unigramProbability); - } else { - return bigramProbability; - } - } -} - -int Ver4PatriciaTriePolicy::getProbabilityOfWord(const int *const prevWordIds, +int Ver4PatriciaTriePolicy::getProbabilityOfWord(const WordIdArrayView prevWordIds, const int wordId) const { if (wordId == NOT_A_WORD_ID) { return NOT_A_PROBABILITY; @@ -152,22 +137,19 @@ int Ver4PatriciaTriePolicy::getProbabilityOfWord(const int *const prevWordIds, if (ptNodeParams.isDeleted() || ptNodeParams.isBlacklisted() || ptNodeParams.isNotAWord()) { return NOT_A_PROBABILITY; } - if (prevWordIds) { - // TODO: Support n-gram. - const ProbabilityEntry probabilityEntry = - mBuffers->getLanguageModelDictContent()->getNgramProbabilityEntry( - IntArrayView::singleElementView(prevWordIds), wordId); - if (!probabilityEntry.isValid()) { - return NOT_A_PROBABILITY; - } - if (mHeaderPolicy->hasHistoricalInfoOfWords()) { - return ForgettingCurveUtils::decodeProbability(probabilityEntry.getHistoricalInfo(), - mHeaderPolicy); - } else { - return probabilityEntry.getProbability(); - } + // TODO: Support n-gram. + const ProbabilityEntry probabilityEntry = + mBuffers->getLanguageModelDictContent()->getNgramProbabilityEntry( + prevWordIds.limit(1 /* maxSize */), wordId); + if (!probabilityEntry.isValid()) { + return NOT_A_PROBABILITY; + } + if (mHeaderPolicy->hasHistoricalInfoOfWords()) { + return ForgettingCurveUtils::decodeProbability(probabilityEntry.getHistoricalInfo(), + mHeaderPolicy); + } else { + return probabilityEntry.getProbability(); } - return getProbability(ptNodeParams.getProbability(), NOT_A_PROBABILITY); } BinaryDictionaryShortcutIterator Ver4PatriciaTriePolicy::getShortcutIterator( @@ -176,15 +158,15 @@ BinaryDictionaryShortcutIterator Ver4PatriciaTriePolicy::getShortcutIterator( return BinaryDictionaryShortcutIterator(&mShortcutPolicy, shortcutPos); } -void Ver4PatriciaTriePolicy::iterateNgramEntries(const int *const prevWordIds, +void Ver4PatriciaTriePolicy::iterateNgramEntries(const WordIdArrayView prevWordIds, NgramListener *const listener) const { - if (!prevWordIds) { + if (prevWordIds.empty()) { return; } // TODO: Support n-gram. const auto languageModelDictContent = mBuffers->getLanguageModelDictContent(); for (const auto entry : languageModelDictContent->getProbabilityEntries( - WordIdArrayView::singleElementView(prevWordIds))) { + prevWordIds.limit(1 /* maxSize */))) { const ProbabilityEntry &probabilityEntry = entry.getProbabilityEntry(); const int probability = probabilityEntry.hasHistoricalInfo() ? ForgettingCurveUtils::decodeProbability( @@ -321,8 +303,8 @@ bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsI "length: %zd", bigramProperty->getTargetCodePoints()->size()); return false; } - int prevWordIds[MAX_PREV_WORD_COUNT_FOR_N_GRAM]; - prevWordsInfo->getPrevWordIds(this, prevWordIds, false /* tryLowerCaseSearch */); + WordIdArray prevWordIds; + prevWordsInfo->getPrevWordIds(this, prevWordIds.data(), false /* tryLowerCaseSearch */); // TODO: Support N-gram. if (prevWordIds[0] == NOT_A_WORD_ID) { if (prevWordsInfo->isNthPrevWordBeginningOfSentence(1 /* n */)) { @@ -337,7 +319,7 @@ bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsI return false; } // Refresh word ids. - prevWordsInfo->getPrevWordIds(this, prevWordIds, false /* tryLowerCaseSearch */); + prevWordsInfo->getPrevWordIds(this, prevWordIds.data(), false /* tryLowerCaseSearch */); } else { return false; } @@ -348,14 +330,14 @@ bool Ver4PatriciaTriePolicy::addNgramEntry(const PrevWordsInfo *const prevWordsI return false; } bool addedNewEntry = false; - int prevWordsPtNodePos[MAX_PREV_WORD_COUNT_FOR_N_GRAM]; - for (size_t i = 0; i < NELEMS(prevWordIds); ++i) { + WordIdArray prevWordsPtNodePos; + for (size_t i = 0; i < prevWordsPtNodePos.size(); ++i) { prevWordsPtNodePos[i] = mBuffers->getTerminalPositionLookupTable() ->getTerminalPtNodePosition(prevWordIds[i]); } const int wordPtNodePos = mBuffers->getTerminalPositionLookupTable() ->getTerminalPtNodePosition(wordId); - if (mUpdatingHelper.addNgramEntry(WordIdArrayView::fromFixedSizeArray(prevWordsPtNodePos), + if (mUpdatingHelper.addNgramEntry(WordIdArrayView::fromArray(prevWordsPtNodePos), wordPtNodePos, bigramProperty, &addedNewEntry)) { if (addedNewEntry) { mBigramCount++; @@ -385,8 +367,8 @@ bool Ver4PatriciaTriePolicy::removeNgramEntry(const PrevWordsInfo *const prevWor AKLOGE("word is too long to remove n-gram entry form the dictionary. length: %zd", wordCodePoints.size()); } - int prevWordIds[MAX_PREV_WORD_COUNT_FOR_N_GRAM]; - prevWordsInfo->getPrevWordIds(this, prevWordIds, false /* tryLowerCaseSerch */); + WordIdArray prevWordIds; + prevWordsInfo->getPrevWordIds(this, prevWordIds.data(), false /* tryLowerCaseSerch */); // TODO: Support N-gram. if (prevWordIds[0] == NOT_A_WORD_ID) { return false; @@ -395,14 +377,14 @@ bool Ver4PatriciaTriePolicy::removeNgramEntry(const PrevWordsInfo *const prevWor if (wordId == NOT_A_WORD_ID) { return false; } - int prevWordsPtNodePos[MAX_PREV_WORD_COUNT_FOR_N_GRAM]; - for (size_t i = 0; i < NELEMS(prevWordIds); ++i) { + std::array prevWordsPtNodePos; + for (size_t i = 0; i < prevWordsPtNodePos.size(); ++i) { prevWordsPtNodePos[i] = mBuffers->getTerminalPositionLookupTable() ->getTerminalPtNodePosition(prevWordIds[i]); } const int wordPtNodePos = mBuffers->getTerminalPositionLookupTable() ->getTerminalPtNodePosition(wordId); - if (mUpdatingHelper.removeNgramEntry(WordIdArrayView::fromFixedSizeArray(prevWordsPtNodePos), + if (mUpdatingHelper.removeNgramEntry(WordIdArrayView::fromArray(prevWordsPtNodePos), wordPtNodePos)) { mBigramCount--; return true; diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h index 980c16e4a..a117a3614 100644 --- a/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h +++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v4/ver4_patricia_trie_policy.h @@ -68,14 +68,19 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy { int getWordId(const CodePointArrayView wordCodePoints, const bool forceLowerCaseSearch) const; - const WordAttributes getWordAttributesInContext(const int *const prevWordIds, const int wordId, - MultiBigramMap *const multiBigramMap) const; + const WordAttributes getWordAttributesInContext(const WordIdArrayView prevWordIds, + const int wordId, MultiBigramMap *const multiBigramMap) const; - int getProbability(const int unigramProbability, const int bigramProbability) const; + // TODO: Remove + int getProbability(const int unigramProbability, const int bigramProbability) const { + // Not used. + return NOT_A_PROBABILITY; + } - int getProbabilityOfWord(const int *const prevWordIds, const int wordId) const; + int getProbabilityOfWord(const WordIdArrayView prevWordIds, const int wordId) const; - void iterateNgramEntries(const int *const prevWordIds, NgramListener *const listener) const; + void iterateNgramEntries(const WordIdArrayView prevWordIds, + NgramListener *const listener) const; BinaryDictionaryShortcutIterator getShortcutIterator(const int wordId) const; diff --git a/native/jni/src/utils/int_array_view.h b/native/jni/src/utils/int_array_view.h index c39add9fe..caa13d976 100644 --- a/native/jni/src/utils/int_array_view.h +++ b/native/jni/src/utils/int_array_view.h @@ -57,9 +57,9 @@ class IntArrayView { explicit IntArrayView(const std::vector &vector) : mPtr(vector.data()), mSize(vector.size()) {} - template - AK_FORCE_INLINE static IntArrayView fromFixedSizeArray(const int (&array)[N]) { - return IntArrayView(array, N); + template + AK_FORCE_INLINE static IntArrayView fromArray(const std::array &array) { + return IntArrayView(array.data(), array.size()); } // Returns a view that points one int object. @@ -120,6 +120,8 @@ class IntArrayView { using WordIdArrayView = IntArrayView; using PtNodePosArrayView = IntArrayView; using CodePointArrayView = IntArrayView; +template +using WordIdArray = std::array; } // namespace latinime #endif // LATINIME_MEMORY_VIEW_H diff --git a/native/jni/tests/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content_test.cpp b/native/jni/tests/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content_test.cpp index e6f0353e3..7608b45c2 100644 --- a/native/jni/tests/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content_test.cpp +++ b/native/jni/tests/suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content_test.cpp @@ -18,6 +18,7 @@ #include +#include #include #include "utils/int_array_view.h" @@ -97,8 +98,8 @@ TEST(LanguageModelDictContentTest, TestGetWordProbability) { const int bigramProbability = 20; const int trigramProbability = 30; const int wordId = 100; - const int prevWordIdArray[] = { 1, 2 }; - const WordIdArrayView prevWordIds = WordIdArrayView::fromFixedSizeArray(prevWordIdArray); + const std::array prevWordIdArray = {{ 1, 2 }}; + const WordIdArrayView prevWordIds = WordIdArrayView::fromArray(prevWordIdArray); const ProbabilityEntry probabilityEntry(flag, probability); languageModelDictContent.setProbabilityEntry(wordId, &probabilityEntry); diff --git a/native/jni/tests/utils/int_array_view_test.cpp b/native/jni/tests/utils/int_array_view_test.cpp index ec57cf59c..3bc294cdd 100644 --- a/native/jni/tests/utils/int_array_view_test.cpp +++ b/native/jni/tests/utils/int_array_view_test.cpp @@ -46,8 +46,8 @@ TEST(IntArrayViewTest, TestIteration) { TEST(IntArrayViewTest, TestConstructFromArray) { const size_t ARRAY_SIZE = 100; - int intArray[ARRAY_SIZE]; - const auto intArrayView = IntArrayView::fromFixedSizeArray(intArray); + std::array intArray; + const auto intArrayView = IntArrayView::fromArray(intArray); EXPECT_EQ(ARRAY_SIZE, intArrayView.size()); }