Merge "Remove PrevWordsInfo.getBigramsIteratorForPrediction()." into lmp-dev
This commit is contained in:
commit
a704ad254e
9 changed files with 46 additions and 76 deletions
|
@ -92,7 +92,11 @@ void Dictionary::getPredictions(const PrevWordsInfo *const prevWordsInfo,
|
|||
TimeKeeper::setCurrentTime();
|
||||
NgramListenerForPrediction listener(prevWordsInfo, outSuggestionResults,
|
||||
mDictionaryStructureWithBufferPolicy.get());
|
||||
mDictionaryStructureWithBufferPolicy->iterateNgramEntries(prevWordsInfo, &listener);
|
||||
int prevWordsPtNodePos[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
|
||||
prevWordsInfo->getPrevWordsTerminalPtNodePos(
|
||||
mDictionaryStructureWithBufferPolicy.get(), prevWordsPtNodePos,
|
||||
true /* tryLowerCaseSearch */);
|
||||
mDictionaryStructureWithBufferPolicy->iterateNgramEntries(prevWordsPtNodePos, &listener);
|
||||
}
|
||||
|
||||
int Dictionary::getProbability(const int *word, int length) const {
|
||||
|
@ -111,7 +115,15 @@ int Dictionary::getNgramProbability(const PrevWordsInfo *const prevWordsInfo, co
|
|||
int nextWordPos = mDictionaryStructureWithBufferPolicy->getTerminalPtNodePositionOfWord(word,
|
||||
length, false /* forceLowerCaseSearch */);
|
||||
if (NOT_A_DICT_POS == nextWordPos) return NOT_A_PROBABILITY;
|
||||
return getDictionaryStructurePolicy()->getProbabilityOfPtNode(prevWordsInfo, nextWordPos);
|
||||
if (!prevWordsInfo) {
|
||||
return getDictionaryStructurePolicy()->getProbabilityOfPtNode(
|
||||
nullptr /* prevWordsPtNodePos */, nextWordPos);
|
||||
}
|
||||
int prevWordsPtNodePos[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
|
||||
prevWordsInfo->getPrevWordsTerminalPtNodePos(
|
||||
mDictionaryStructureWithBufferPolicy.get(), prevWordsPtNodePos,
|
||||
true /* tryLowerCaseSearch */);
|
||||
return getDictionaryStructurePolicy()->getProbabilityOfPtNode(prevWordsPtNodePos, nextWordPos);
|
||||
}
|
||||
|
||||
bool Dictionary::addUnigramEntry(const int *const word, const int length,
|
||||
|
|
|
@ -59,10 +59,10 @@ class DictionaryStructureWithBufferPolicy {
|
|||
virtual int getProbability(const int unigramProbability,
|
||||
const int bigramProbability) const = 0;
|
||||
|
||||
virtual int getProbabilityOfPtNode(const PrevWordsInfo *const prevWordsInfo,
|
||||
virtual int getProbabilityOfPtNode(const int *const prevWordsPtNodePos,
|
||||
const int nodePos) const = 0;
|
||||
|
||||
virtual void iterateNgramEntries(const PrevWordsInfo *const prevWordsInfo,
|
||||
virtual void iterateNgramEntries(const int *const prevWordsPtNodePos,
|
||||
NgramListener *const listener) const = 0;
|
||||
|
||||
virtual int getShortcutPositionOfPtNode(const int nodePos) const = 0;
|
||||
|
|
|
@ -90,13 +90,6 @@ class PrevWordsInfo {
|
|||
}
|
||||
}
|
||||
|
||||
BinaryDictionaryBigramsIterator getBigramsIteratorForPrediction(
|
||||
const DictionaryStructureWithBufferPolicy *const dictStructurePolicy) const {
|
||||
return getBigramsIteratorForWordWithTryingLowerCaseSearch(
|
||||
dictStructurePolicy, mPrevWordCodePoints[0], mPrevWordCodePointCount[0],
|
||||
mIsBeginningOfSentence[0]);
|
||||
}
|
||||
|
||||
// n is 1-indexed.
|
||||
const int *getNthPrevWordCodePoints(const int n) const {
|
||||
if (n <= 0 || n > MAX_PREV_WORD_COUNT_FOR_N_GRAM) {
|
||||
|
@ -154,46 +147,6 @@ class PrevWordsInfo {
|
|||
codePoints, codePointCount, true /* forceLowerCaseSearch */);
|
||||
}
|
||||
|
||||
static BinaryDictionaryBigramsIterator getBigramsIteratorForWordWithTryingLowerCaseSearch(
|
||||
const DictionaryStructureWithBufferPolicy *const dictStructurePolicy,
|
||||
const int *const wordCodePoints, const int wordCodePointCount,
|
||||
const bool isBeginningOfSentence) {
|
||||
if (!dictStructurePolicy || !wordCodePoints || wordCodePointCount > MAX_WORD_LENGTH) {
|
||||
return BinaryDictionaryBigramsIterator();
|
||||
}
|
||||
int codePoints[MAX_WORD_LENGTH];
|
||||
int codePointCount = wordCodePointCount;
|
||||
memmove(codePoints, wordCodePoints, sizeof(int) * codePointCount);
|
||||
if (isBeginningOfSentence) {
|
||||
codePointCount = CharUtils::attachBeginningOfSentenceMarker(codePoints,
|
||||
codePointCount, MAX_WORD_LENGTH);
|
||||
if (codePointCount <= 0) {
|
||||
return BinaryDictionaryBigramsIterator();
|
||||
}
|
||||
}
|
||||
BinaryDictionaryBigramsIterator bigramsIt = getBigramsIteratorForWord(dictStructurePolicy,
|
||||
codePoints, codePointCount, false /* forceLowerCaseSearch */);
|
||||
// getBigramsIteratorForWord returns an empty iterator if this word isn't in the dictionary
|
||||
// or has no bigrams.
|
||||
if (bigramsIt.hasNext()) {
|
||||
return bigramsIt;
|
||||
}
|
||||
// If no bigrams for this exact word, search again in lower case.
|
||||
return getBigramsIteratorForWord(dictStructurePolicy, codePoints,
|
||||
codePointCount, true /* forceLowerCaseSearch */);
|
||||
}
|
||||
|
||||
static BinaryDictionaryBigramsIterator getBigramsIteratorForWord(
|
||||
const DictionaryStructureWithBufferPolicy *const dictStructurePolicy,
|
||||
const int *wordCodePoints, const int wordCodePointCount,
|
||||
const bool forceLowerCaseSearch) {
|
||||
if (!wordCodePoints || wordCodePointCount <= 0) return BinaryDictionaryBigramsIterator();
|
||||
const int terminalPtNodePos = dictStructurePolicy->getTerminalPtNodePositionOfWord(
|
||||
wordCodePoints, wordCodePointCount, forceLowerCaseSearch);
|
||||
if (NOT_A_DICT_POS == terminalPtNodePos) return BinaryDictionaryBigramsIterator();
|
||||
return dictStructurePolicy->getBigramsIteratorOfPtNode(terminalPtNodePos);
|
||||
}
|
||||
|
||||
void clear() {
|
||||
for (size_t i = 0; i < NELEMS(mPrevWordCodePoints); ++i) {
|
||||
mPrevWordCodePointCount[i] = 0;
|
||||
|
|
|
@ -132,7 +132,7 @@ int Ver4PatriciaTriePolicy::getProbability(const int unigramProbability,
|
|||
}
|
||||
}
|
||||
|
||||
int Ver4PatriciaTriePolicy::getProbabilityOfPtNode(const PrevWordsInfo *const prevWordsInfo,
|
||||
int Ver4PatriciaTriePolicy::getProbabilityOfPtNode(const int *const prevWordsPtNodePos,
|
||||
const int ptNodePos) const {
|
||||
if (ptNodePos == NOT_A_DICT_POS) {
|
||||
return NOT_A_PROBABILITY;
|
||||
|
@ -141,9 +141,9 @@ int Ver4PatriciaTriePolicy::getProbabilityOfPtNode(const PrevWordsInfo *const pr
|
|||
if (ptNodeParams.isDeleted() || ptNodeParams.isBlacklisted() || ptNodeParams.isNotAWord()) {
|
||||
return NOT_A_PROBABILITY;
|
||||
}
|
||||
if (prevWordsInfo) {
|
||||
if (prevWordsPtNodePos) {
|
||||
BinaryDictionaryBigramsIterator bigramsIt =
|
||||
prevWordsInfo->getBigramsIteratorForPrediction(this /* dictStructurePolicy */);
|
||||
getBigramsIteratorOfPtNode(prevWordsPtNodePos[0]);
|
||||
while (bigramsIt.hasNext()) {
|
||||
bigramsIt.next();
|
||||
if (bigramsIt.getBigramPos() == ptNodePos
|
||||
|
@ -156,10 +156,12 @@ int Ver4PatriciaTriePolicy::getProbabilityOfPtNode(const PrevWordsInfo *const pr
|
|||
return getProbability(ptNodeParams.getProbability(), NOT_A_PROBABILITY);
|
||||
}
|
||||
|
||||
void Ver4PatriciaTriePolicy::iterateNgramEntries(const PrevWordsInfo *const prevWordsInfo,
|
||||
void Ver4PatriciaTriePolicy::iterateNgramEntries(const int *const prevWordsPtNodePos,
|
||||
NgramListener *const listener) const {
|
||||
BinaryDictionaryBigramsIterator bigramsIt = prevWordsInfo->getBigramsIteratorForPrediction(
|
||||
this /* dictStructurePolicy */);
|
||||
if (!prevWordsPtNodePos) {
|
||||
return;
|
||||
}
|
||||
BinaryDictionaryBigramsIterator bigramsIt = getBigramsIteratorOfPtNode(prevWordsPtNodePos[0]);
|
||||
while (bigramsIt.hasNext()) {
|
||||
bigramsIt.next();
|
||||
listener->onVisitEntry(bigramsIt.getProbability(), bigramsIt.getBigramPos());
|
||||
|
|
|
@ -90,10 +90,9 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
|||
|
||||
int getProbability(const int unigramProbability, const int bigramProbability) const;
|
||||
|
||||
int getProbabilityOfPtNode(const PrevWordsInfo *const prevWordsInfo,
|
||||
const int ptNodePos) const;
|
||||
int getProbabilityOfPtNode(const int *const prevWordsPtNodePos, const int ptNodePos) const;
|
||||
|
||||
void iterateNgramEntries(const PrevWordsInfo *const prevWordsInfo,
|
||||
void iterateNgramEntries(const int *const prevWordsPtNodePos,
|
||||
NgramListener *const listener) const;
|
||||
|
||||
int getShortcutPositionOfPtNode(const int ptNodePos) const;
|
||||
|
|
|
@ -297,7 +297,7 @@ int PatriciaTriePolicy::getProbability(const int unigramProbability,
|
|||
}
|
||||
}
|
||||
|
||||
int PatriciaTriePolicy::getProbabilityOfPtNode(const PrevWordsInfo *const prevWordsInfo,
|
||||
int PatriciaTriePolicy::getProbabilityOfPtNode(const int *const prevWordsPtNodePos,
|
||||
const int ptNodePos) const {
|
||||
if (ptNodePos == NOT_A_DICT_POS) {
|
||||
return NOT_A_PROBABILITY;
|
||||
|
@ -310,9 +310,9 @@ int PatriciaTriePolicy::getProbabilityOfPtNode(const PrevWordsInfo *const prevWo
|
|||
// for shortcuts).
|
||||
return NOT_A_PROBABILITY;
|
||||
}
|
||||
if (prevWordsInfo) {
|
||||
if (prevWordsPtNodePos) {
|
||||
BinaryDictionaryBigramsIterator bigramsIt =
|
||||
prevWordsInfo->getBigramsIteratorForPrediction(this /* dictStructurePolicy */);
|
||||
getBigramsIteratorOfPtNode(prevWordsPtNodePos[0]);
|
||||
while (bigramsIt.hasNext()) {
|
||||
bigramsIt.next();
|
||||
if (bigramsIt.getBigramPos() == ptNodePos
|
||||
|
@ -325,10 +325,12 @@ int PatriciaTriePolicy::getProbabilityOfPtNode(const PrevWordsInfo *const prevWo
|
|||
return getProbability(ptNodeParams.getProbability(), NOT_A_PROBABILITY);
|
||||
}
|
||||
|
||||
void PatriciaTriePolicy::iterateNgramEntries(const PrevWordsInfo *const prevWordsInfo,
|
||||
void PatriciaTriePolicy::iterateNgramEntries(const int *const prevWordsPtNodePos,
|
||||
NgramListener *const listener) const {
|
||||
BinaryDictionaryBigramsIterator bigramsIt = prevWordsInfo->getBigramsIteratorForPrediction(
|
||||
this /* dictStructurePolicy */);
|
||||
if (!prevWordsPtNodePos) {
|
||||
return;
|
||||
}
|
||||
BinaryDictionaryBigramsIterator bigramsIt = getBigramsIteratorOfPtNode(prevWordsPtNodePos[0]);
|
||||
while (bigramsIt.hasNext()) {
|
||||
bigramsIt.next();
|
||||
listener->onVisitEntry(bigramsIt.getProbability(), bigramsIt.getBigramPos());
|
||||
|
|
|
@ -63,9 +63,9 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
|||
|
||||
int getProbability(const int unigramProbability, const int bigramProbability) const;
|
||||
|
||||
int getProbabilityOfPtNode(const PrevWordsInfo *const prevWordsInfo, const int ptNodePos) const;
|
||||
int getProbabilityOfPtNode(const int *const prevWordsPtNodePos, const int ptNodePos) const;
|
||||
|
||||
void iterateNgramEntries(const PrevWordsInfo *const prevWordsInfo,
|
||||
void iterateNgramEntries(const int *const prevWordsPtNodePos,
|
||||
NgramListener *const listener) const;
|
||||
|
||||
int getShortcutPositionOfPtNode(const int ptNodePos) const;
|
||||
|
|
|
@ -122,7 +122,7 @@ int Ver4PatriciaTriePolicy::getProbability(const int unigramProbability,
|
|||
}
|
||||
}
|
||||
|
||||
int Ver4PatriciaTriePolicy::getProbabilityOfPtNode(const PrevWordsInfo *const prevWordsInfo,
|
||||
int Ver4PatriciaTriePolicy::getProbabilityOfPtNode(const int *const prevWordsPtNodePos,
|
||||
const int ptNodePos) const {
|
||||
if (ptNodePos == NOT_A_DICT_POS) {
|
||||
return NOT_A_PROBABILITY;
|
||||
|
@ -131,9 +131,9 @@ int Ver4PatriciaTriePolicy::getProbabilityOfPtNode(const PrevWordsInfo *const pr
|
|||
if (ptNodeParams.isDeleted() || ptNodeParams.isBlacklisted() || ptNodeParams.isNotAWord()) {
|
||||
return NOT_A_PROBABILITY;
|
||||
}
|
||||
if (prevWordsInfo) {
|
||||
if (prevWordsPtNodePos) {
|
||||
BinaryDictionaryBigramsIterator bigramsIt =
|
||||
prevWordsInfo->getBigramsIteratorForPrediction(this /* dictStructurePolicy */);
|
||||
getBigramsIteratorOfPtNode(prevWordsPtNodePos[0]);
|
||||
while (bigramsIt.hasNext()) {
|
||||
bigramsIt.next();
|
||||
if (bigramsIt.getBigramPos() == ptNodePos
|
||||
|
@ -146,10 +146,12 @@ int Ver4PatriciaTriePolicy::getProbabilityOfPtNode(const PrevWordsInfo *const pr
|
|||
return getProbability(ptNodeParams.getProbability(), NOT_A_PROBABILITY);
|
||||
}
|
||||
|
||||
void Ver4PatriciaTriePolicy::iterateNgramEntries(const PrevWordsInfo *const prevWordsInfo,
|
||||
void Ver4PatriciaTriePolicy::iterateNgramEntries(const int *const prevWordsPtNodePos,
|
||||
NgramListener *const listener) const {
|
||||
BinaryDictionaryBigramsIterator bigramsIt = prevWordsInfo->getBigramsIteratorForPrediction(
|
||||
this /* dictStructurePolicy */);
|
||||
if (!prevWordsPtNodePos) {
|
||||
return;
|
||||
}
|
||||
BinaryDictionaryBigramsIterator bigramsIt = getBigramsIteratorOfPtNode(prevWordsPtNodePos[0]);
|
||||
while (bigramsIt.hasNext()) {
|
||||
bigramsIt.next();
|
||||
listener->onVisitEntry(bigramsIt.getProbability(), bigramsIt.getBigramPos());
|
||||
|
|
|
@ -72,9 +72,9 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
|||
|
||||
int getProbability(const int unigramProbability, const int bigramProbability) const;
|
||||
|
||||
int getProbabilityOfPtNode(const PrevWordsInfo *const prevWordsInfo, const int ptNodePos) const;
|
||||
int getProbabilityOfPtNode(const int *const prevWordsPtNodePos, const int ptNodePos) const;
|
||||
|
||||
void iterateNgramEntries(const PrevWordsInfo *const prevWordsInfo,
|
||||
void iterateNgramEntries(const int *const prevWordsPtNodePos,
|
||||
NgramListener *const listener) const;
|
||||
|
||||
int getShortcutPositionOfPtNode(const int ptNodePos) const;
|
||||
|
|
Loading…
Reference in a new issue