From 45d1a936a7a318286c4404951db1bd825e25cc7c Mon Sep 17 00:00:00 2001 From: Keisuke Kuroyanagi Date: Fri, 16 May 2014 22:31:45 +0900 Subject: [PATCH] Move prev word related logic to PrevWordsInfo. Bug: 14119293 Bug: 14425059 Change-Id: I1bbd7ab4ace2c475f27bc468cb7b4d67e1ae2f9f --- .../core/dictionary/bigram_dictionary.cpp | 24 ++------ .../binary_dictionary_bigrams_iterator.h | 5 ++ .../core/session/dic_traverse_session.cpp | 17 +----- .../suggest/core/session/prev_words_info.h | 57 +++++++++++++++++-- 4 files changed, 64 insertions(+), 39 deletions(-) diff --git a/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp b/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp index 847fa1b02..295e760d6 100644 --- a/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp +++ b/native/jni/src/suggest/core/dictionary/bigram_dictionary.cpp @@ -48,21 +48,10 @@ BigramDictionary::~BigramDictionary() { */ void BigramDictionary::getPredictions(const PrevWordsInfo *const prevWordsInfo, SuggestionResults *const outSuggestionResults) const { - int pos = getBigramListPositionForWord(prevWordsInfo->getPrevWordCodePoints(), - prevWordsInfo->getPrevWordCodePointCount(), false /* forceLowerCaseSearch */); - // getBigramListPositionForWord returns 0 if this word isn't in the dictionary or has no bigrams - if (NOT_A_DICT_POS == pos) { - // If no bigrams for this exact word, search again in lower case. - pos = getBigramListPositionForWord(prevWordsInfo->getPrevWordCodePoints(), - prevWordsInfo->getPrevWordCodePointCount(), true /* forceLowerCaseSearch */); - } - // If still no bigrams, we really don't have them! - if (NOT_A_DICT_POS == pos) return; - int unigramProbability = 0; int bigramCodePoints[MAX_WORD_LENGTH]; - BinaryDictionaryBigramsIterator bigramsIt( - mDictionaryStructurePolicy->getBigramsStructurePolicy(), pos); + BinaryDictionaryBigramsIterator bigramsIt = + prevWordsInfo->getBigramsIteratorForPrediction(mDictionaryStructurePolicy); while (bigramsIt.hasNext()) { bigramsIt.next(); if (bigramsIt.getBigramPos() == NOT_A_DICT_POS) { @@ -98,16 +87,11 @@ int BigramDictionary::getBigramListPositionForWord(const int *prevWord, const in int BigramDictionary::getBigramProbability(const PrevWordsInfo *const prevWordsInfo, const int *word1, int length1) const { - int pos = getBigramListPositionForWord(prevWordsInfo->getPrevWordCodePoints(), - prevWordsInfo->getPrevWordCodePointCount(), false /* forceLowerCaseSearch */); - // getBigramListPositionForWord returns 0 if this word isn't in the dictionary or has no bigrams - if (NOT_A_DICT_POS == pos) return NOT_A_PROBABILITY; int nextWordPos = mDictionaryStructurePolicy->getTerminalPtNodePositionOfWord(word1, length1, false /* forceLowerCaseSearch */); if (NOT_A_DICT_POS == nextWordPos) return NOT_A_PROBABILITY; - - BinaryDictionaryBigramsIterator bigramsIt( - mDictionaryStructurePolicy->getBigramsStructurePolicy(), pos); + BinaryDictionaryBigramsIterator bigramsIt = + prevWordsInfo->getBigramsIteratorForPrediction(mDictionaryStructurePolicy); while (bigramsIt.hasNext()) { bigramsIt.next(); if (bigramsIt.getBigramPos() == nextWordPos diff --git a/native/jni/src/suggest/core/dictionary/binary_dictionary_bigrams_iterator.h b/native/jni/src/suggest/core/dictionary/binary_dictionary_bigrams_iterator.h index d16ac47fe..bc9d57671 100644 --- a/native/jni/src/suggest/core/dictionary/binary_dictionary_bigrams_iterator.h +++ b/native/jni/src/suggest/core/dictionary/binary_dictionary_bigrams_iterator.h @@ -30,6 +30,11 @@ class BinaryDictionaryBigramsIterator { mBigramPos(NOT_A_DICT_POS), mProbability(NOT_A_PROBABILITY), mHasNext(pos != NOT_A_DICT_POS) {} + BinaryDictionaryBigramsIterator(BinaryDictionaryBigramsIterator &&bigramsIterator) + : mBigramsStructurePolicy(bigramsIterator.mBigramsStructurePolicy), + mPos(bigramsIterator.mPos), mBigramPos(bigramsIterator.mBigramPos), + mProbability(bigramsIterator.mProbability), mHasNext(bigramsIterator.mHasNext) {} + AK_FORCE_INLINE bool hasNext() const { return mHasNext; } diff --git a/native/jni/src/suggest/core/session/dic_traverse_session.cpp b/native/jni/src/suggest/core/session/dic_traverse_session.cpp index b9e9db719..dc2b66a2c 100644 --- a/native/jni/src/suggest/core/session/dic_traverse_session.cpp +++ b/native/jni/src/suggest/core/session/dic_traverse_session.cpp @@ -35,21 +35,8 @@ void DicTraverseSession::init(const Dictionary *const dictionary, mMultiWordCostMultiplier = getDictionaryStructurePolicy()->getHeaderStructurePolicy() ->getMultiWordCostMultiplier(); mSuggestOptions = suggestOptions; - if (!prevWordsInfo->getPrevWordCodePoints()) { - mPrevWordsPtNodePos[0] = NOT_A_DICT_POS; - return; - } - // TODO: merge following similar calls to getTerminalPosition into one case-insensitive call. - mPrevWordsPtNodePos[0] = getDictionaryStructurePolicy()->getTerminalPtNodePositionOfWord( - prevWordsInfo->getPrevWordCodePoints(), prevWordsInfo->getPrevWordCodePointCount(), - false /* forceLowerCaseSearch */); - if (mPrevWordsPtNodePos[0] == NOT_A_DICT_POS) { - // Check bigrams for lower-cased previous word if original was not found. Useful for - // auto-capitalized words like "The [current_word]". - mPrevWordsPtNodePos[0] = getDictionaryStructurePolicy()->getTerminalPtNodePositionOfWord( - prevWordsInfo->getPrevWordCodePoints(), prevWordsInfo->getPrevWordCodePointCount(), - true /* forceLowerCaseSearch */); - } + prevWordsInfo->getPrevWordsTerminalPtNodePos( + getDictionaryStructurePolicy(), mPrevWordsPtNodePos); } void DicTraverseSession::setupForGetSuggestions(const ProximityInfo *pInfo, diff --git a/native/jni/src/suggest/core/session/prev_words_info.h b/native/jni/src/suggest/core/session/prev_words_info.h index bc685945e..70a99ef38 100644 --- a/native/jni/src/suggest/core/session/prev_words_info.h +++ b/native/jni/src/suggest/core/session/prev_words_info.h @@ -18,6 +18,8 @@ #define LATINIME_PREV_WORDS_INFO_H #include "defines.h" +#include "suggest/core/dictionary/binary_dictionary_bigrams_iterator.h" +#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h" namespace latinime { @@ -38,17 +40,64 @@ class PrevWordsInfo { mPrevWordCodePointCount[0] = prevWordCodePointCount; mIsBeginningOfSentence[0] = isBeginningOfSentence; } - const int *getPrevWordCodePoints() const { - return mPrevWordCodePoints[0]; + + void getPrevWordsTerminalPtNodePos( + const DictionaryStructureWithBufferPolicy *const dictStructurePolicy, + int *const outPrevWordsTerminalPtNodePos) const { + for (size_t i = 0; i < NELEMS(mPrevWordCodePoints); ++i) { + outPrevWordsTerminalPtNodePos[i] = getTerminalPtNodePosOfWord(dictStructurePolicy, + mPrevWordCodePoints[i], mPrevWordCodePointCount[i], + mIsBeginningOfSentence[i]); + } } - int getPrevWordCodePointCount() const { - return mPrevWordCodePointCount[0]; + BinaryDictionaryBigramsIterator getBigramsIteratorForPrediction( + const DictionaryStructureWithBufferPolicy *const dictStructurePolicy) const { + int pos = getBigramListPositionForWord(dictStructurePolicy, mPrevWordCodePoints[0], + mPrevWordCodePointCount[0], false /* forceLowerCaseSearch */); + // getBigramListPositionForWord returns NOT_A_DICT_POS if this word isn't in the + // dictionary or has no bigrams + if (NOT_A_DICT_POS == pos) { + // If no bigrams for this exact word, search again in lower case. + pos = getBigramListPositionForWord(dictStructurePolicy, mPrevWordCodePoints[0], + mPrevWordCodePointCount[0], true /* forceLowerCaseSearch */); + } + return BinaryDictionaryBigramsIterator( + dictStructurePolicy->getBigramsStructurePolicy(), pos); } private: DISALLOW_COPY_AND_ASSIGN(PrevWordsInfo); + static int getTerminalPtNodePosOfWord( + const DictionaryStructureWithBufferPolicy *const dictStructurePolicy, + const int *const wordCodePoints, const int wordCodePointCount, + const bool isBeginningOfSentence) { + if (!dictStructurePolicy || !wordCodePoints) { + return NOT_A_DICT_POS; + } + const int wordPtNodePos = dictStructurePolicy->getTerminalPtNodePositionOfWord( + wordCodePoints, wordCodePointCount, false /* forceLowerCaseSearch */); + if (wordPtNodePos != NOT_A_DICT_POS) { + return wordPtNodePos; + } + // Check bigrams for lower-cased previous word if original was not found. Useful for + // auto-capitalized words like "The [current_word]". + return dictStructurePolicy->getTerminalPtNodePositionOfWord( + wordCodePoints, wordCodePointCount, true /* forceLowerCaseSearch */); + } + + static int getBigramListPositionForWord( + const DictionaryStructureWithBufferPolicy *const dictStructurePolicy, + const int *wordCodePoints, const int wordCodePointCount, + const bool forceLowerCaseSearch) { + if (!wordCodePoints || wordCodePointCount <= 0) return NOT_A_DICT_POS; + const int terminalPtNodePos = dictStructurePolicy->getTerminalPtNodePositionOfWord( + wordCodePoints, wordCodePointCount, forceLowerCaseSearch); + if (NOT_A_DICT_POS == terminalPtNodePos) return NOT_A_DICT_POS; + return dictStructurePolicy->getBigramsPositionOfPtNode(terminalPtNodePos); + } + void clear() { for (size_t i = 0; i < NELEMS(mPrevWordCodePoints); ++i) { mPrevWordCodePoints[i] = nullptr;