Merge "Move prev word related logic to PrevWordsInfo."

main
Keisuke Kuroyanagi 2014-05-19 01:37:41 +00:00 committed by Android (Google) Code Review
commit c7ce8addf8
4 changed files with 64 additions and 39 deletions

View File

@ -48,21 +48,10 @@ BigramDictionary::~BigramDictionary() {
*/ */
void BigramDictionary::getPredictions(const PrevWordsInfo *const prevWordsInfo, void BigramDictionary::getPredictions(const PrevWordsInfo *const prevWordsInfo,
SuggestionResults *const outSuggestionResults) const { SuggestionResults *const outSuggestionResults) const {
int pos = getBigramListPositionForWord(prevWordsInfo->getPrevWordCodePoints(),
prevWordsInfo->getPrevWordCodePointCount(), false /* forceLowerCaseSearch */);
// getBigramListPositionForWord returns 0 if this word isn't in the dictionary or has no bigrams
if (NOT_A_DICT_POS == pos) {
// If no bigrams for this exact word, search again in lower case.
pos = getBigramListPositionForWord(prevWordsInfo->getPrevWordCodePoints(),
prevWordsInfo->getPrevWordCodePointCount(), true /* forceLowerCaseSearch */);
}
// If still no bigrams, we really don't have them!
if (NOT_A_DICT_POS == pos) return;
int unigramProbability = 0; int unigramProbability = 0;
int bigramCodePoints[MAX_WORD_LENGTH]; int bigramCodePoints[MAX_WORD_LENGTH];
BinaryDictionaryBigramsIterator bigramsIt( BinaryDictionaryBigramsIterator bigramsIt =
mDictionaryStructurePolicy->getBigramsStructurePolicy(), pos); prevWordsInfo->getBigramsIteratorForPrediction(mDictionaryStructurePolicy);
while (bigramsIt.hasNext()) { while (bigramsIt.hasNext()) {
bigramsIt.next(); bigramsIt.next();
if (bigramsIt.getBigramPos() == NOT_A_DICT_POS) { if (bigramsIt.getBigramPos() == NOT_A_DICT_POS) {
@ -98,16 +87,11 @@ int BigramDictionary::getBigramListPositionForWord(const int *prevWord, const in
int BigramDictionary::getBigramProbability(const PrevWordsInfo *const prevWordsInfo, int BigramDictionary::getBigramProbability(const PrevWordsInfo *const prevWordsInfo,
const int *word1, int length1) const { const int *word1, int length1) const {
int pos = getBigramListPositionForWord(prevWordsInfo->getPrevWordCodePoints(),
prevWordsInfo->getPrevWordCodePointCount(), false /* forceLowerCaseSearch */);
// getBigramListPositionForWord returns 0 if this word isn't in the dictionary or has no bigrams
if (NOT_A_DICT_POS == pos) return NOT_A_PROBABILITY;
int nextWordPos = mDictionaryStructurePolicy->getTerminalPtNodePositionOfWord(word1, length1, int nextWordPos = mDictionaryStructurePolicy->getTerminalPtNodePositionOfWord(word1, length1,
false /* forceLowerCaseSearch */); false /* forceLowerCaseSearch */);
if (NOT_A_DICT_POS == nextWordPos) return NOT_A_PROBABILITY; if (NOT_A_DICT_POS == nextWordPos) return NOT_A_PROBABILITY;
BinaryDictionaryBigramsIterator bigramsIt =
BinaryDictionaryBigramsIterator bigramsIt( prevWordsInfo->getBigramsIteratorForPrediction(mDictionaryStructurePolicy);
mDictionaryStructurePolicy->getBigramsStructurePolicy(), pos);
while (bigramsIt.hasNext()) { while (bigramsIt.hasNext()) {
bigramsIt.next(); bigramsIt.next();
if (bigramsIt.getBigramPos() == nextWordPos if (bigramsIt.getBigramPos() == nextWordPos

View File

@ -30,6 +30,11 @@ class BinaryDictionaryBigramsIterator {
mBigramPos(NOT_A_DICT_POS), mProbability(NOT_A_PROBABILITY), mBigramPos(NOT_A_DICT_POS), mProbability(NOT_A_PROBABILITY),
mHasNext(pos != NOT_A_DICT_POS) {} mHasNext(pos != NOT_A_DICT_POS) {}
BinaryDictionaryBigramsIterator(BinaryDictionaryBigramsIterator &&bigramsIterator)
: mBigramsStructurePolicy(bigramsIterator.mBigramsStructurePolicy),
mPos(bigramsIterator.mPos), mBigramPos(bigramsIterator.mBigramPos),
mProbability(bigramsIterator.mProbability), mHasNext(bigramsIterator.mHasNext) {}
AK_FORCE_INLINE bool hasNext() const { AK_FORCE_INLINE bool hasNext() const {
return mHasNext; return mHasNext;
} }

View File

@ -35,21 +35,8 @@ void DicTraverseSession::init(const Dictionary *const dictionary,
mMultiWordCostMultiplier = getDictionaryStructurePolicy()->getHeaderStructurePolicy() mMultiWordCostMultiplier = getDictionaryStructurePolicy()->getHeaderStructurePolicy()
->getMultiWordCostMultiplier(); ->getMultiWordCostMultiplier();
mSuggestOptions = suggestOptions; mSuggestOptions = suggestOptions;
if (!prevWordsInfo->getPrevWordCodePoints()) { prevWordsInfo->getPrevWordsTerminalPtNodePos(
mPrevWordsPtNodePos[0] = NOT_A_DICT_POS; getDictionaryStructurePolicy(), mPrevWordsPtNodePos);
return;
}
// TODO: merge following similar calls to getTerminalPosition into one case-insensitive call.
mPrevWordsPtNodePos[0] = getDictionaryStructurePolicy()->getTerminalPtNodePositionOfWord(
prevWordsInfo->getPrevWordCodePoints(), prevWordsInfo->getPrevWordCodePointCount(),
false /* forceLowerCaseSearch */);
if (mPrevWordsPtNodePos[0] == NOT_A_DICT_POS) {
// Check bigrams for lower-cased previous word if original was not found. Useful for
// auto-capitalized words like "The [current_word]".
mPrevWordsPtNodePos[0] = getDictionaryStructurePolicy()->getTerminalPtNodePositionOfWord(
prevWordsInfo->getPrevWordCodePoints(), prevWordsInfo->getPrevWordCodePointCount(),
true /* forceLowerCaseSearch */);
}
} }
void DicTraverseSession::setupForGetSuggestions(const ProximityInfo *pInfo, void DicTraverseSession::setupForGetSuggestions(const ProximityInfo *pInfo,

View File

@ -18,6 +18,8 @@
#define LATINIME_PREV_WORDS_INFO_H #define LATINIME_PREV_WORDS_INFO_H
#include "defines.h" #include "defines.h"
#include "suggest/core/dictionary/binary_dictionary_bigrams_iterator.h"
#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
namespace latinime { namespace latinime {
@ -38,17 +40,64 @@ class PrevWordsInfo {
mPrevWordCodePointCount[0] = prevWordCodePointCount; mPrevWordCodePointCount[0] = prevWordCodePointCount;
mIsBeginningOfSentence[0] = isBeginningOfSentence; mIsBeginningOfSentence[0] = isBeginningOfSentence;
} }
const int *getPrevWordCodePoints() const {
return mPrevWordCodePoints[0]; void getPrevWordsTerminalPtNodePos(
const DictionaryStructureWithBufferPolicy *const dictStructurePolicy,
int *const outPrevWordsTerminalPtNodePos) const {
for (size_t i = 0; i < NELEMS(mPrevWordCodePoints); ++i) {
outPrevWordsTerminalPtNodePos[i] = getTerminalPtNodePosOfWord(dictStructurePolicy,
mPrevWordCodePoints[i], mPrevWordCodePointCount[i],
mIsBeginningOfSentence[i]);
}
} }
int getPrevWordCodePointCount() const { BinaryDictionaryBigramsIterator getBigramsIteratorForPrediction(
return mPrevWordCodePointCount[0]; const DictionaryStructureWithBufferPolicy *const dictStructurePolicy) const {
int pos = getBigramListPositionForWord(dictStructurePolicy, mPrevWordCodePoints[0],
mPrevWordCodePointCount[0], false /* forceLowerCaseSearch */);
// getBigramListPositionForWord returns NOT_A_DICT_POS if this word isn't in the
// dictionary or has no bigrams
if (NOT_A_DICT_POS == pos) {
// If no bigrams for this exact word, search again in lower case.
pos = getBigramListPositionForWord(dictStructurePolicy, mPrevWordCodePoints[0],
mPrevWordCodePointCount[0], true /* forceLowerCaseSearch */);
}
return BinaryDictionaryBigramsIterator(
dictStructurePolicy->getBigramsStructurePolicy(), pos);
} }
private: private:
DISALLOW_COPY_AND_ASSIGN(PrevWordsInfo); DISALLOW_COPY_AND_ASSIGN(PrevWordsInfo);
static int getTerminalPtNodePosOfWord(
const DictionaryStructureWithBufferPolicy *const dictStructurePolicy,
const int *const wordCodePoints, const int wordCodePointCount,
const bool isBeginningOfSentence) {
if (!dictStructurePolicy || !wordCodePoints) {
return NOT_A_DICT_POS;
}
const int wordPtNodePos = dictStructurePolicy->getTerminalPtNodePositionOfWord(
wordCodePoints, wordCodePointCount, false /* forceLowerCaseSearch */);
if (wordPtNodePos != NOT_A_DICT_POS) {
return wordPtNodePos;
}
// Check bigrams for lower-cased previous word if original was not found. Useful for
// auto-capitalized words like "The [current_word]".
return dictStructurePolicy->getTerminalPtNodePositionOfWord(
wordCodePoints, wordCodePointCount, true /* forceLowerCaseSearch */);
}
static int getBigramListPositionForWord(
const DictionaryStructureWithBufferPolicy *const dictStructurePolicy,
const int *wordCodePoints, const int wordCodePointCount,
const bool forceLowerCaseSearch) {
if (!wordCodePoints || wordCodePointCount <= 0) return NOT_A_DICT_POS;
const int terminalPtNodePos = dictStructurePolicy->getTerminalPtNodePositionOfWord(
wordCodePoints, wordCodePointCount, forceLowerCaseSearch);
if (NOT_A_DICT_POS == terminalPtNodePos) return NOT_A_DICT_POS;
return dictStructurePolicy->getBigramsPositionOfPtNode(terminalPtNodePos);
}
void clear() { void clear() {
for (size_t i = 0; i < NELEMS(mPrevWordCodePoints); ++i) { for (size_t i = 0; i < NELEMS(mPrevWordCodePoints); ++i) {
mPrevWordCodePoints[i] = nullptr; mPrevWordCodePoints[i] = nullptr;