Merge "Move prev word related logic to PrevWordsInfo."
commit
c7ce8addf8
|
@ -48,21 +48,10 @@ BigramDictionary::~BigramDictionary() {
|
||||||
*/
|
*/
|
||||||
void BigramDictionary::getPredictions(const PrevWordsInfo *const prevWordsInfo,
|
void BigramDictionary::getPredictions(const PrevWordsInfo *const prevWordsInfo,
|
||||||
SuggestionResults *const outSuggestionResults) const {
|
SuggestionResults *const outSuggestionResults) const {
|
||||||
int pos = getBigramListPositionForWord(prevWordsInfo->getPrevWordCodePoints(),
|
|
||||||
prevWordsInfo->getPrevWordCodePointCount(), false /* forceLowerCaseSearch */);
|
|
||||||
// getBigramListPositionForWord returns 0 if this word isn't in the dictionary or has no bigrams
|
|
||||||
if (NOT_A_DICT_POS == pos) {
|
|
||||||
// If no bigrams for this exact word, search again in lower case.
|
|
||||||
pos = getBigramListPositionForWord(prevWordsInfo->getPrevWordCodePoints(),
|
|
||||||
prevWordsInfo->getPrevWordCodePointCount(), true /* forceLowerCaseSearch */);
|
|
||||||
}
|
|
||||||
// If still no bigrams, we really don't have them!
|
|
||||||
if (NOT_A_DICT_POS == pos) return;
|
|
||||||
|
|
||||||
int unigramProbability = 0;
|
int unigramProbability = 0;
|
||||||
int bigramCodePoints[MAX_WORD_LENGTH];
|
int bigramCodePoints[MAX_WORD_LENGTH];
|
||||||
BinaryDictionaryBigramsIterator bigramsIt(
|
BinaryDictionaryBigramsIterator bigramsIt =
|
||||||
mDictionaryStructurePolicy->getBigramsStructurePolicy(), pos);
|
prevWordsInfo->getBigramsIteratorForPrediction(mDictionaryStructurePolicy);
|
||||||
while (bigramsIt.hasNext()) {
|
while (bigramsIt.hasNext()) {
|
||||||
bigramsIt.next();
|
bigramsIt.next();
|
||||||
if (bigramsIt.getBigramPos() == NOT_A_DICT_POS) {
|
if (bigramsIt.getBigramPos() == NOT_A_DICT_POS) {
|
||||||
|
@ -98,16 +87,11 @@ int BigramDictionary::getBigramListPositionForWord(const int *prevWord, const in
|
||||||
|
|
||||||
int BigramDictionary::getBigramProbability(const PrevWordsInfo *const prevWordsInfo,
|
int BigramDictionary::getBigramProbability(const PrevWordsInfo *const prevWordsInfo,
|
||||||
const int *word1, int length1) const {
|
const int *word1, int length1) const {
|
||||||
int pos = getBigramListPositionForWord(prevWordsInfo->getPrevWordCodePoints(),
|
|
||||||
prevWordsInfo->getPrevWordCodePointCount(), false /* forceLowerCaseSearch */);
|
|
||||||
// getBigramListPositionForWord returns 0 if this word isn't in the dictionary or has no bigrams
|
|
||||||
if (NOT_A_DICT_POS == pos) return NOT_A_PROBABILITY;
|
|
||||||
int nextWordPos = mDictionaryStructurePolicy->getTerminalPtNodePositionOfWord(word1, length1,
|
int nextWordPos = mDictionaryStructurePolicy->getTerminalPtNodePositionOfWord(word1, length1,
|
||||||
false /* forceLowerCaseSearch */);
|
false /* forceLowerCaseSearch */);
|
||||||
if (NOT_A_DICT_POS == nextWordPos) return NOT_A_PROBABILITY;
|
if (NOT_A_DICT_POS == nextWordPos) return NOT_A_PROBABILITY;
|
||||||
|
BinaryDictionaryBigramsIterator bigramsIt =
|
||||||
BinaryDictionaryBigramsIterator bigramsIt(
|
prevWordsInfo->getBigramsIteratorForPrediction(mDictionaryStructurePolicy);
|
||||||
mDictionaryStructurePolicy->getBigramsStructurePolicy(), pos);
|
|
||||||
while (bigramsIt.hasNext()) {
|
while (bigramsIt.hasNext()) {
|
||||||
bigramsIt.next();
|
bigramsIt.next();
|
||||||
if (bigramsIt.getBigramPos() == nextWordPos
|
if (bigramsIt.getBigramPos() == nextWordPos
|
||||||
|
|
|
@ -30,6 +30,11 @@ class BinaryDictionaryBigramsIterator {
|
||||||
mBigramPos(NOT_A_DICT_POS), mProbability(NOT_A_PROBABILITY),
|
mBigramPos(NOT_A_DICT_POS), mProbability(NOT_A_PROBABILITY),
|
||||||
mHasNext(pos != NOT_A_DICT_POS) {}
|
mHasNext(pos != NOT_A_DICT_POS) {}
|
||||||
|
|
||||||
|
BinaryDictionaryBigramsIterator(BinaryDictionaryBigramsIterator &&bigramsIterator)
|
||||||
|
: mBigramsStructurePolicy(bigramsIterator.mBigramsStructurePolicy),
|
||||||
|
mPos(bigramsIterator.mPos), mBigramPos(bigramsIterator.mBigramPos),
|
||||||
|
mProbability(bigramsIterator.mProbability), mHasNext(bigramsIterator.mHasNext) {}
|
||||||
|
|
||||||
AK_FORCE_INLINE bool hasNext() const {
|
AK_FORCE_INLINE bool hasNext() const {
|
||||||
return mHasNext;
|
return mHasNext;
|
||||||
}
|
}
|
||||||
|
|
|
@ -35,21 +35,8 @@ void DicTraverseSession::init(const Dictionary *const dictionary,
|
||||||
mMultiWordCostMultiplier = getDictionaryStructurePolicy()->getHeaderStructurePolicy()
|
mMultiWordCostMultiplier = getDictionaryStructurePolicy()->getHeaderStructurePolicy()
|
||||||
->getMultiWordCostMultiplier();
|
->getMultiWordCostMultiplier();
|
||||||
mSuggestOptions = suggestOptions;
|
mSuggestOptions = suggestOptions;
|
||||||
if (!prevWordsInfo->getPrevWordCodePoints()) {
|
prevWordsInfo->getPrevWordsTerminalPtNodePos(
|
||||||
mPrevWordsPtNodePos[0] = NOT_A_DICT_POS;
|
getDictionaryStructurePolicy(), mPrevWordsPtNodePos);
|
||||||
return;
|
|
||||||
}
|
|
||||||
// TODO: merge following similar calls to getTerminalPosition into one case-insensitive call.
|
|
||||||
mPrevWordsPtNodePos[0] = getDictionaryStructurePolicy()->getTerminalPtNodePositionOfWord(
|
|
||||||
prevWordsInfo->getPrevWordCodePoints(), prevWordsInfo->getPrevWordCodePointCount(),
|
|
||||||
false /* forceLowerCaseSearch */);
|
|
||||||
if (mPrevWordsPtNodePos[0] == NOT_A_DICT_POS) {
|
|
||||||
// Check bigrams for lower-cased previous word if original was not found. Useful for
|
|
||||||
// auto-capitalized words like "The [current_word]".
|
|
||||||
mPrevWordsPtNodePos[0] = getDictionaryStructurePolicy()->getTerminalPtNodePositionOfWord(
|
|
||||||
prevWordsInfo->getPrevWordCodePoints(), prevWordsInfo->getPrevWordCodePointCount(),
|
|
||||||
true /* forceLowerCaseSearch */);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void DicTraverseSession::setupForGetSuggestions(const ProximityInfo *pInfo,
|
void DicTraverseSession::setupForGetSuggestions(const ProximityInfo *pInfo,
|
||||||
|
|
|
@ -18,6 +18,8 @@
|
||||||
#define LATINIME_PREV_WORDS_INFO_H
|
#define LATINIME_PREV_WORDS_INFO_H
|
||||||
|
|
||||||
#include "defines.h"
|
#include "defines.h"
|
||||||
|
#include "suggest/core/dictionary/binary_dictionary_bigrams_iterator.h"
|
||||||
|
#include "suggest/core/policy/dictionary_structure_with_buffer_policy.h"
|
||||||
|
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
|
||||||
|
@ -38,17 +40,64 @@ class PrevWordsInfo {
|
||||||
mPrevWordCodePointCount[0] = prevWordCodePointCount;
|
mPrevWordCodePointCount[0] = prevWordCodePointCount;
|
||||||
mIsBeginningOfSentence[0] = isBeginningOfSentence;
|
mIsBeginningOfSentence[0] = isBeginningOfSentence;
|
||||||
}
|
}
|
||||||
const int *getPrevWordCodePoints() const {
|
|
||||||
return mPrevWordCodePoints[0];
|
void getPrevWordsTerminalPtNodePos(
|
||||||
|
const DictionaryStructureWithBufferPolicy *const dictStructurePolicy,
|
||||||
|
int *const outPrevWordsTerminalPtNodePos) const {
|
||||||
|
for (size_t i = 0; i < NELEMS(mPrevWordCodePoints); ++i) {
|
||||||
|
outPrevWordsTerminalPtNodePos[i] = getTerminalPtNodePosOfWord(dictStructurePolicy,
|
||||||
|
mPrevWordCodePoints[i], mPrevWordCodePointCount[i],
|
||||||
|
mIsBeginningOfSentence[i]);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
int getPrevWordCodePointCount() const {
|
BinaryDictionaryBigramsIterator getBigramsIteratorForPrediction(
|
||||||
return mPrevWordCodePointCount[0];
|
const DictionaryStructureWithBufferPolicy *const dictStructurePolicy) const {
|
||||||
|
int pos = getBigramListPositionForWord(dictStructurePolicy, mPrevWordCodePoints[0],
|
||||||
|
mPrevWordCodePointCount[0], false /* forceLowerCaseSearch */);
|
||||||
|
// getBigramListPositionForWord returns NOT_A_DICT_POS if this word isn't in the
|
||||||
|
// dictionary or has no bigrams
|
||||||
|
if (NOT_A_DICT_POS == pos) {
|
||||||
|
// If no bigrams for this exact word, search again in lower case.
|
||||||
|
pos = getBigramListPositionForWord(dictStructurePolicy, mPrevWordCodePoints[0],
|
||||||
|
mPrevWordCodePointCount[0], true /* forceLowerCaseSearch */);
|
||||||
|
}
|
||||||
|
return BinaryDictionaryBigramsIterator(
|
||||||
|
dictStructurePolicy->getBigramsStructurePolicy(), pos);
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
DISALLOW_COPY_AND_ASSIGN(PrevWordsInfo);
|
DISALLOW_COPY_AND_ASSIGN(PrevWordsInfo);
|
||||||
|
|
||||||
|
static int getTerminalPtNodePosOfWord(
|
||||||
|
const DictionaryStructureWithBufferPolicy *const dictStructurePolicy,
|
||||||
|
const int *const wordCodePoints, const int wordCodePointCount,
|
||||||
|
const bool isBeginningOfSentence) {
|
||||||
|
if (!dictStructurePolicy || !wordCodePoints) {
|
||||||
|
return NOT_A_DICT_POS;
|
||||||
|
}
|
||||||
|
const int wordPtNodePos = dictStructurePolicy->getTerminalPtNodePositionOfWord(
|
||||||
|
wordCodePoints, wordCodePointCount, false /* forceLowerCaseSearch */);
|
||||||
|
if (wordPtNodePos != NOT_A_DICT_POS) {
|
||||||
|
return wordPtNodePos;
|
||||||
|
}
|
||||||
|
// Check bigrams for lower-cased previous word if original was not found. Useful for
|
||||||
|
// auto-capitalized words like "The [current_word]".
|
||||||
|
return dictStructurePolicy->getTerminalPtNodePositionOfWord(
|
||||||
|
wordCodePoints, wordCodePointCount, true /* forceLowerCaseSearch */);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int getBigramListPositionForWord(
|
||||||
|
const DictionaryStructureWithBufferPolicy *const dictStructurePolicy,
|
||||||
|
const int *wordCodePoints, const int wordCodePointCount,
|
||||||
|
const bool forceLowerCaseSearch) {
|
||||||
|
if (!wordCodePoints || wordCodePointCount <= 0) return NOT_A_DICT_POS;
|
||||||
|
const int terminalPtNodePos = dictStructurePolicy->getTerminalPtNodePositionOfWord(
|
||||||
|
wordCodePoints, wordCodePointCount, forceLowerCaseSearch);
|
||||||
|
if (NOT_A_DICT_POS == terminalPtNodePos) return NOT_A_DICT_POS;
|
||||||
|
return dictStructurePolicy->getBigramsPositionOfPtNode(terminalPtNodePos);
|
||||||
|
}
|
||||||
|
|
||||||
void clear() {
|
void clear() {
|
||||||
for (size_t i = 0; i < NELEMS(mPrevWordCodePoints); ++i) {
|
for (size_t i = 0; i < NELEMS(mPrevWordCodePoints); ++i) {
|
||||||
mPrevWordCodePoints[i] = nullptr;
|
mPrevWordCodePoints[i] = nullptr;
|
||||||
|
|
Loading…
Reference in New Issue