Merge "Move prev word related logic to PrevWordsInfo."
This commit is contained in:
commit
c7ce8addf8
4 changed files with 64 additions and 39 deletions
|
@ -48,21 +48,10 @@ BigramDictionary::~BigramDictionary() {
|
|||
*/
|
||||
void BigramDictionary::getPredictions(const PrevWordsInfo *const prevWordsInfo,
|
||||
SuggestionResults *const outSuggestionResults) const {
|
||||
int pos = getBigramListPositionForWord(prevWordsInfo->getPrevWordCodePoints(),
|
||||
prevWordsInfo->getPrevWordCodePointCount(), false /* forceLowerCaseSearch */);
|
||||
// getBigramListPositionForWord returns 0 if this word isn't in the dictionary or has no bigrams
|
||||
if (NOT_A_DICT_POS == pos) {
|
||||
// If no bigrams for this exact word, search again in lower case.
|
||||
pos = getBigramListPositionForWord(prevWordsInfo->getPrevWordCodePoints(),
|
||||
prevWordsInfo->getPrevWordCodePointCount(), true /* forceLowerCaseSearch */);
|
||||
}
|
||||
// If still no bigrams, we really don't have them!
|
||||
if (NOT_A_DICT_POS == pos) return;
|
||||
|
||||
int unigramProbability = 0;
|
||||
int bigramCodePoints[MAX_WORD_LENGTH];
|
||||
BinaryDictionaryBigramsIterator bigramsIt(
|
||||
mDictionaryStructurePolicy->getBigramsStructurePolicy(), pos);
|
||||
BinaryDictionaryBigramsIterator bigramsIt =
|
||||
prevWordsInfo->getBigramsIteratorForPrediction(mDictionaryStructurePolicy);
|
||||
while (bigramsIt.hasNext()) {
|
||||
bigramsIt.next();
|
||||
if (bigramsIt.getBigramPos() == NOT_A_DICT_POS) {
|
||||
|
@ -98,16 +87,11 @@ int BigramDictionary::getBigramListPositionForWord(const int *prevWord, const in
|
|||
|
||||
int BigramDictionary::getBigramProbability(const PrevWordsInfo *const prevWordsInfo,
|
||||
const int *word1, int length1) const {
|
||||
int pos = getBigramListPositionForWord(prevWordsInfo->getPrevWordCodePoints(),
|
||||
prevWordsInfo->getPrevWordCodePointCount(), false /* forceLowerCaseSearch */);
|
||||
// getBigramListPositionForWord returns 0 if this word isn't in the dictionary or has no bigrams
|
||||
if (NOT_A_DICT_POS == pos) return NOT_A_PROBABILITY;
|
||||
int nextWordPos = mDictionaryStructurePolicy->getTerminalPtNodePositionOfWord(word1, length1,
|
||||
false /* forceLowerCaseSearch */);
|
||||
if (NOT_A_DICT_POS == nextWordPos) return NOT_A_PROBABILITY;
|
||||
|
||||
BinaryDictionaryBigramsIterator bigramsIt(
|
||||
mDictionaryStructurePolicy->getBigramsStructurePolicy(), pos);
|
||||
BinaryDictionaryBigramsIterator bigramsIt =
|
||||
prevWordsInfo->getBigramsIteratorForPrediction(mDictionaryStructurePolicy);
|
||||
while (bigramsIt.hasNext()) {
|
||||
bigramsIt.next();
|
||||
if (bigramsIt.getBigramPos() == nextWordPos
|
||||
|
|
|
@ -30,6 +30,11 @@ class BinaryDictionaryBigramsIterator {
|
|||
mBigramPos(NOT_A_DICT_POS), mProbability(NOT_A_PROBABILITY),
|
||||
mHasNext(pos != NOT_A_DICT_POS) {}
|
||||
|
||||
BinaryDictionaryBigramsIterator(BinaryDictionaryBigramsIterator &&bigramsIterator)
|
||||
: mBigramsStructurePolicy(bigramsIterator.mBigramsStructurePolicy),
|
||||
mPos(bigramsIterator.mPos), mBigramPos(bigramsIterator.mBigramPos),
|
||||
mProbability(bigramsIterator.mProbability), mHasNext(bigramsIterator.mHasNext) {}
|
||||
|
||||
AK_FORCE_INLINE bool hasNext() const {
|
||||
return mHasNext;
|
||||
}
|
||||
|
|
|
@ -35,21 +35,8 @@ void DicTraverseSession::init(const Dictionary *const dictionary,
|
|||
mMultiWordCostMultiplier = getDictionaryStructurePolicy()->getHeaderStructurePolicy()
|
||||
->getMultiWordCostMultiplier();
|
||||
mSuggestOptions = suggestOptions;
|
||||
if (!prevWordsInfo->getPrevWordCodePoints()) {
|
||||
mPrevWordsPtNodePos[0] = NOT_A_DICT_POS;
|
||||
return;
|
||||
}
|
||||
// TODO: merge following similar calls to getTerminalPosition into one case-insensitive call.
|
||||
mPrevWordsPtNodePos[0] = getDictionaryStructurePolicy()->getTerminalPtNodePositionOfWord(
|
||||
prevWordsInfo->getPrevWordCodePoints(), prevWordsInfo->getPrevWordCodePointCount(),
|
||||
false /* forceLowerCaseSearch */);
|
||||
if (mPrevWordsPtNodePos[0] == NOT_A_DICT_POS) {
|
||||
// Check bigrams for lower-cased previous word if original was not found. Useful for
|
||||
// auto-capitalized words like "The [current_word]".
|
||||
mPrevWordsPtNodePos[0] = getDictionaryStructurePolicy()->getTerminalPtNodePositionOfWord(
|
||||
prevWordsInfo->getPrevWordCodePoints(), prevWordsInfo->getPrevWordCodePointCount(),
|
||||
true /* forceLowerCaseSearch */);
|
||||
}
|
||||
prevWordsInfo->getPrevWordsTerminalPtNodePos(
|
||||
getDictionaryStructurePolicy(), mPrevWordsPtNodePos);
|
||||
}
|
||||
|
||||
void DicTraverseSession::setupForGetSuggestions(const ProximityInfo *pInfo,
|
||||
|
|
|
@ -18,6 +18,8 @@
|
|||
#define LATINIME_PREV_WORDS_INFO_H
|
||||
|
||||
#include "defines.h"
|
||||
#include "suggest/core/dictionary/binary_dictionary_bigrams_iterator.h"
|
||||
#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
|
||||
|
||||
namespace latinime {
|
||||
|
||||
|
@ -38,17 +40,64 @@ class PrevWordsInfo {
|
|||
mPrevWordCodePointCount[0] = prevWordCodePointCount;
|
||||
mIsBeginningOfSentence[0] = isBeginningOfSentence;
|
||||
}
|
||||
const int *getPrevWordCodePoints() const {
|
||||
return mPrevWordCodePoints[0];
|
||||
|
||||
void getPrevWordsTerminalPtNodePos(
|
||||
const DictionaryStructureWithBufferPolicy *const dictStructurePolicy,
|
||||
int *const outPrevWordsTerminalPtNodePos) const {
|
||||
for (size_t i = 0; i < NELEMS(mPrevWordCodePoints); ++i) {
|
||||
outPrevWordsTerminalPtNodePos[i] = getTerminalPtNodePosOfWord(dictStructurePolicy,
|
||||
mPrevWordCodePoints[i], mPrevWordCodePointCount[i],
|
||||
mIsBeginningOfSentence[i]);
|
||||
}
|
||||
}
|
||||
|
||||
int getPrevWordCodePointCount() const {
|
||||
return mPrevWordCodePointCount[0];
|
||||
BinaryDictionaryBigramsIterator getBigramsIteratorForPrediction(
|
||||
const DictionaryStructureWithBufferPolicy *const dictStructurePolicy) const {
|
||||
int pos = getBigramListPositionForWord(dictStructurePolicy, mPrevWordCodePoints[0],
|
||||
mPrevWordCodePointCount[0], false /* forceLowerCaseSearch */);
|
||||
// getBigramListPositionForWord returns NOT_A_DICT_POS if this word isn't in the
|
||||
// dictionary or has no bigrams
|
||||
if (NOT_A_DICT_POS == pos) {
|
||||
// If no bigrams for this exact word, search again in lower case.
|
||||
pos = getBigramListPositionForWord(dictStructurePolicy, mPrevWordCodePoints[0],
|
||||
mPrevWordCodePointCount[0], true /* forceLowerCaseSearch */);
|
||||
}
|
||||
return BinaryDictionaryBigramsIterator(
|
||||
dictStructurePolicy->getBigramsStructurePolicy(), pos);
|
||||
}
|
||||
|
||||
private:
|
||||
DISALLOW_COPY_AND_ASSIGN(PrevWordsInfo);
|
||||
|
||||
static int getTerminalPtNodePosOfWord(
|
||||
const DictionaryStructureWithBufferPolicy *const dictStructurePolicy,
|
||||
const int *const wordCodePoints, const int wordCodePointCount,
|
||||
const bool isBeginningOfSentence) {
|
||||
if (!dictStructurePolicy || !wordCodePoints) {
|
||||
return NOT_A_DICT_POS;
|
||||
}
|
||||
const int wordPtNodePos = dictStructurePolicy->getTerminalPtNodePositionOfWord(
|
||||
wordCodePoints, wordCodePointCount, false /* forceLowerCaseSearch */);
|
||||
if (wordPtNodePos != NOT_A_DICT_POS) {
|
||||
return wordPtNodePos;
|
||||
}
|
||||
// Check bigrams for lower-cased previous word if original was not found. Useful for
|
||||
// auto-capitalized words like "The [current_word]".
|
||||
return dictStructurePolicy->getTerminalPtNodePositionOfWord(
|
||||
wordCodePoints, wordCodePointCount, true /* forceLowerCaseSearch */);
|
||||
}
|
||||
|
||||
static int getBigramListPositionForWord(
|
||||
const DictionaryStructureWithBufferPolicy *const dictStructurePolicy,
|
||||
const int *wordCodePoints, const int wordCodePointCount,
|
||||
const bool forceLowerCaseSearch) {
|
||||
if (!wordCodePoints || wordCodePointCount <= 0) return NOT_A_DICT_POS;
|
||||
const int terminalPtNodePos = dictStructurePolicy->getTerminalPtNodePositionOfWord(
|
||||
wordCodePoints, wordCodePointCount, forceLowerCaseSearch);
|
||||
if (NOT_A_DICT_POS == terminalPtNodePos) return NOT_A_DICT_POS;
|
||||
return dictStructurePolicy->getBigramsPositionOfPtNode(terminalPtNodePos);
|
||||
}
|
||||
|
||||
void clear() {
|
||||
for (size_t i = 0; i < NELEMS(mPrevWordCodePoints); ++i) {
|
||||
mPrevWordCodePoints[i] = nullptr;
|
||||
|
|
Loading…
Reference in a new issue