Merge "Remove PrevWordsInfo.getBigramsIteratorForPrediction()." into lmp-dev
This commit is contained in:
commit
a704ad254e
9 changed files with 46 additions and 76 deletions
|
@ -92,7 +92,11 @@ void Dictionary::getPredictions(const PrevWordsInfo *const prevWordsInfo,
|
||||||
TimeKeeper::setCurrentTime();
|
TimeKeeper::setCurrentTime();
|
||||||
NgramListenerForPrediction listener(prevWordsInfo, outSuggestionResults,
|
NgramListenerForPrediction listener(prevWordsInfo, outSuggestionResults,
|
||||||
mDictionaryStructureWithBufferPolicy.get());
|
mDictionaryStructureWithBufferPolicy.get());
|
||||||
mDictionaryStructureWithBufferPolicy->iterateNgramEntries(prevWordsInfo, &listener);
|
int prevWordsPtNodePos[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
|
||||||
|
prevWordsInfo->getPrevWordsTerminalPtNodePos(
|
||||||
|
mDictionaryStructureWithBufferPolicy.get(), prevWordsPtNodePos,
|
||||||
|
true /* tryLowerCaseSearch */);
|
||||||
|
mDictionaryStructureWithBufferPolicy->iterateNgramEntries(prevWordsPtNodePos, &listener);
|
||||||
}
|
}
|
||||||
|
|
||||||
int Dictionary::getProbability(const int *word, int length) const {
|
int Dictionary::getProbability(const int *word, int length) const {
|
||||||
|
@ -111,7 +115,15 @@ int Dictionary::getNgramProbability(const PrevWordsInfo *const prevWordsInfo, co
|
||||||
int nextWordPos = mDictionaryStructureWithBufferPolicy->getTerminalPtNodePositionOfWord(word,
|
int nextWordPos = mDictionaryStructureWithBufferPolicy->getTerminalPtNodePositionOfWord(word,
|
||||||
length, false /* forceLowerCaseSearch */);
|
length, false /* forceLowerCaseSearch */);
|
||||||
if (NOT_A_DICT_POS == nextWordPos) return NOT_A_PROBABILITY;
|
if (NOT_A_DICT_POS == nextWordPos) return NOT_A_PROBABILITY;
|
||||||
return getDictionaryStructurePolicy()->getProbabilityOfPtNode(prevWordsInfo, nextWordPos);
|
if (!prevWordsInfo) {
|
||||||
|
return getDictionaryStructurePolicy()->getProbabilityOfPtNode(
|
||||||
|
nullptr /* prevWordsPtNodePos */, nextWordPos);
|
||||||
|
}
|
||||||
|
int prevWordsPtNodePos[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
|
||||||
|
prevWordsInfo->getPrevWordsTerminalPtNodePos(
|
||||||
|
mDictionaryStructureWithBufferPolicy.get(), prevWordsPtNodePos,
|
||||||
|
true /* tryLowerCaseSearch */);
|
||||||
|
return getDictionaryStructurePolicy()->getProbabilityOfPtNode(prevWordsPtNodePos, nextWordPos);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Dictionary::addUnigramEntry(const int *const word, const int length,
|
bool Dictionary::addUnigramEntry(const int *const word, const int length,
|
||||||
|
|
|
@ -59,10 +59,10 @@ class DictionaryStructureWithBufferPolicy {
|
||||||
virtual int getProbability(const int unigramProbability,
|
virtual int getProbability(const int unigramProbability,
|
||||||
const int bigramProbability) const = 0;
|
const int bigramProbability) const = 0;
|
||||||
|
|
||||||
virtual int getProbabilityOfPtNode(const PrevWordsInfo *const prevWordsInfo,
|
virtual int getProbabilityOfPtNode(const int *const prevWordsPtNodePos,
|
||||||
const int nodePos) const = 0;
|
const int nodePos) const = 0;
|
||||||
|
|
||||||
virtual void iterateNgramEntries(const PrevWordsInfo *const prevWordsInfo,
|
virtual void iterateNgramEntries(const int *const prevWordsPtNodePos,
|
||||||
NgramListener *const listener) const = 0;
|
NgramListener *const listener) const = 0;
|
||||||
|
|
||||||
virtual int getShortcutPositionOfPtNode(const int nodePos) const = 0;
|
virtual int getShortcutPositionOfPtNode(const int nodePos) const = 0;
|
||||||
|
|
|
@ -90,13 +90,6 @@ class PrevWordsInfo {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
BinaryDictionaryBigramsIterator getBigramsIteratorForPrediction(
|
|
||||||
const DictionaryStructureWithBufferPolicy *const dictStructurePolicy) const {
|
|
||||||
return getBigramsIteratorForWordWithTryingLowerCaseSearch(
|
|
||||||
dictStructurePolicy, mPrevWordCodePoints[0], mPrevWordCodePointCount[0],
|
|
||||||
mIsBeginningOfSentence[0]);
|
|
||||||
}
|
|
||||||
|
|
||||||
// n is 1-indexed.
|
// n is 1-indexed.
|
||||||
const int *getNthPrevWordCodePoints(const int n) const {
|
const int *getNthPrevWordCodePoints(const int n) const {
|
||||||
if (n <= 0 || n > MAX_PREV_WORD_COUNT_FOR_N_GRAM) {
|
if (n <= 0 || n > MAX_PREV_WORD_COUNT_FOR_N_GRAM) {
|
||||||
|
@ -154,46 +147,6 @@ class PrevWordsInfo {
|
||||||
codePoints, codePointCount, true /* forceLowerCaseSearch */);
|
codePoints, codePointCount, true /* forceLowerCaseSearch */);
|
||||||
}
|
}
|
||||||
|
|
||||||
static BinaryDictionaryBigramsIterator getBigramsIteratorForWordWithTryingLowerCaseSearch(
|
|
||||||
const DictionaryStructureWithBufferPolicy *const dictStructurePolicy,
|
|
||||||
const int *const wordCodePoints, const int wordCodePointCount,
|
|
||||||
const bool isBeginningOfSentence) {
|
|
||||||
if (!dictStructurePolicy || !wordCodePoints || wordCodePointCount > MAX_WORD_LENGTH) {
|
|
||||||
return BinaryDictionaryBigramsIterator();
|
|
||||||
}
|
|
||||||
int codePoints[MAX_WORD_LENGTH];
|
|
||||||
int codePointCount = wordCodePointCount;
|
|
||||||
memmove(codePoints, wordCodePoints, sizeof(int) * codePointCount);
|
|
||||||
if (isBeginningOfSentence) {
|
|
||||||
codePointCount = CharUtils::attachBeginningOfSentenceMarker(codePoints,
|
|
||||||
codePointCount, MAX_WORD_LENGTH);
|
|
||||||
if (codePointCount <= 0) {
|
|
||||||
return BinaryDictionaryBigramsIterator();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
BinaryDictionaryBigramsIterator bigramsIt = getBigramsIteratorForWord(dictStructurePolicy,
|
|
||||||
codePoints, codePointCount, false /* forceLowerCaseSearch */);
|
|
||||||
// getBigramsIteratorForWord returns an empty iterator if this word isn't in the dictionary
|
|
||||||
// or has no bigrams.
|
|
||||||
if (bigramsIt.hasNext()) {
|
|
||||||
return bigramsIt;
|
|
||||||
}
|
|
||||||
// If no bigrams for this exact word, search again in lower case.
|
|
||||||
return getBigramsIteratorForWord(dictStructurePolicy, codePoints,
|
|
||||||
codePointCount, true /* forceLowerCaseSearch */);
|
|
||||||
}
|
|
||||||
|
|
||||||
static BinaryDictionaryBigramsIterator getBigramsIteratorForWord(
|
|
||||||
const DictionaryStructureWithBufferPolicy *const dictStructurePolicy,
|
|
||||||
const int *wordCodePoints, const int wordCodePointCount,
|
|
||||||
const bool forceLowerCaseSearch) {
|
|
||||||
if (!wordCodePoints || wordCodePointCount <= 0) return BinaryDictionaryBigramsIterator();
|
|
||||||
const int terminalPtNodePos = dictStructurePolicy->getTerminalPtNodePositionOfWord(
|
|
||||||
wordCodePoints, wordCodePointCount, forceLowerCaseSearch);
|
|
||||||
if (NOT_A_DICT_POS == terminalPtNodePos) return BinaryDictionaryBigramsIterator();
|
|
||||||
return dictStructurePolicy->getBigramsIteratorOfPtNode(terminalPtNodePos);
|
|
||||||
}
|
|
||||||
|
|
||||||
void clear() {
|
void clear() {
|
||||||
for (size_t i = 0; i < NELEMS(mPrevWordCodePoints); ++i) {
|
for (size_t i = 0; i < NELEMS(mPrevWordCodePoints); ++i) {
|
||||||
mPrevWordCodePointCount[i] = 0;
|
mPrevWordCodePointCount[i] = 0;
|
||||||
|
|
|
@ -132,7 +132,7 @@ int Ver4PatriciaTriePolicy::getProbability(const int unigramProbability,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
int Ver4PatriciaTriePolicy::getProbabilityOfPtNode(const PrevWordsInfo *const prevWordsInfo,
|
int Ver4PatriciaTriePolicy::getProbabilityOfPtNode(const int *const prevWordsPtNodePos,
|
||||||
const int ptNodePos) const {
|
const int ptNodePos) const {
|
||||||
if (ptNodePos == NOT_A_DICT_POS) {
|
if (ptNodePos == NOT_A_DICT_POS) {
|
||||||
return NOT_A_PROBABILITY;
|
return NOT_A_PROBABILITY;
|
||||||
|
@ -141,9 +141,9 @@ int Ver4PatriciaTriePolicy::getProbabilityOfPtNode(const PrevWordsInfo *const pr
|
||||||
if (ptNodeParams.isDeleted() || ptNodeParams.isBlacklisted() || ptNodeParams.isNotAWord()) {
|
if (ptNodeParams.isDeleted() || ptNodeParams.isBlacklisted() || ptNodeParams.isNotAWord()) {
|
||||||
return NOT_A_PROBABILITY;
|
return NOT_A_PROBABILITY;
|
||||||
}
|
}
|
||||||
if (prevWordsInfo) {
|
if (prevWordsPtNodePos) {
|
||||||
BinaryDictionaryBigramsIterator bigramsIt =
|
BinaryDictionaryBigramsIterator bigramsIt =
|
||||||
prevWordsInfo->getBigramsIteratorForPrediction(this /* dictStructurePolicy */);
|
getBigramsIteratorOfPtNode(prevWordsPtNodePos[0]);
|
||||||
while (bigramsIt.hasNext()) {
|
while (bigramsIt.hasNext()) {
|
||||||
bigramsIt.next();
|
bigramsIt.next();
|
||||||
if (bigramsIt.getBigramPos() == ptNodePos
|
if (bigramsIt.getBigramPos() == ptNodePos
|
||||||
|
@ -156,10 +156,12 @@ int Ver4PatriciaTriePolicy::getProbabilityOfPtNode(const PrevWordsInfo *const pr
|
||||||
return getProbability(ptNodeParams.getProbability(), NOT_A_PROBABILITY);
|
return getProbability(ptNodeParams.getProbability(), NOT_A_PROBABILITY);
|
||||||
}
|
}
|
||||||
|
|
||||||
void Ver4PatriciaTriePolicy::iterateNgramEntries(const PrevWordsInfo *const prevWordsInfo,
|
void Ver4PatriciaTriePolicy::iterateNgramEntries(const int *const prevWordsPtNodePos,
|
||||||
NgramListener *const listener) const {
|
NgramListener *const listener) const {
|
||||||
BinaryDictionaryBigramsIterator bigramsIt = prevWordsInfo->getBigramsIteratorForPrediction(
|
if (!prevWordsPtNodePos) {
|
||||||
this /* dictStructurePolicy */);
|
return;
|
||||||
|
}
|
||||||
|
BinaryDictionaryBigramsIterator bigramsIt = getBigramsIteratorOfPtNode(prevWordsPtNodePos[0]);
|
||||||
while (bigramsIt.hasNext()) {
|
while (bigramsIt.hasNext()) {
|
||||||
bigramsIt.next();
|
bigramsIt.next();
|
||||||
listener->onVisitEntry(bigramsIt.getProbability(), bigramsIt.getBigramPos());
|
listener->onVisitEntry(bigramsIt.getProbability(), bigramsIt.getBigramPos());
|
||||||
|
|
|
@ -90,10 +90,9 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
||||||
|
|
||||||
int getProbability(const int unigramProbability, const int bigramProbability) const;
|
int getProbability(const int unigramProbability, const int bigramProbability) const;
|
||||||
|
|
||||||
int getProbabilityOfPtNode(const PrevWordsInfo *const prevWordsInfo,
|
int getProbabilityOfPtNode(const int *const prevWordsPtNodePos, const int ptNodePos) const;
|
||||||
const int ptNodePos) const;
|
|
||||||
|
|
||||||
void iterateNgramEntries(const PrevWordsInfo *const prevWordsInfo,
|
void iterateNgramEntries(const int *const prevWordsPtNodePos,
|
||||||
NgramListener *const listener) const;
|
NgramListener *const listener) const;
|
||||||
|
|
||||||
int getShortcutPositionOfPtNode(const int ptNodePos) const;
|
int getShortcutPositionOfPtNode(const int ptNodePos) const;
|
||||||
|
|
|
@ -297,7 +297,7 @@ int PatriciaTriePolicy::getProbability(const int unigramProbability,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
int PatriciaTriePolicy::getProbabilityOfPtNode(const PrevWordsInfo *const prevWordsInfo,
|
int PatriciaTriePolicy::getProbabilityOfPtNode(const int *const prevWordsPtNodePos,
|
||||||
const int ptNodePos) const {
|
const int ptNodePos) const {
|
||||||
if (ptNodePos == NOT_A_DICT_POS) {
|
if (ptNodePos == NOT_A_DICT_POS) {
|
||||||
return NOT_A_PROBABILITY;
|
return NOT_A_PROBABILITY;
|
||||||
|
@ -310,9 +310,9 @@ int PatriciaTriePolicy::getProbabilityOfPtNode(const PrevWordsInfo *const prevWo
|
||||||
// for shortcuts).
|
// for shortcuts).
|
||||||
return NOT_A_PROBABILITY;
|
return NOT_A_PROBABILITY;
|
||||||
}
|
}
|
||||||
if (prevWordsInfo) {
|
if (prevWordsPtNodePos) {
|
||||||
BinaryDictionaryBigramsIterator bigramsIt =
|
BinaryDictionaryBigramsIterator bigramsIt =
|
||||||
prevWordsInfo->getBigramsIteratorForPrediction(this /* dictStructurePolicy */);
|
getBigramsIteratorOfPtNode(prevWordsPtNodePos[0]);
|
||||||
while (bigramsIt.hasNext()) {
|
while (bigramsIt.hasNext()) {
|
||||||
bigramsIt.next();
|
bigramsIt.next();
|
||||||
if (bigramsIt.getBigramPos() == ptNodePos
|
if (bigramsIt.getBigramPos() == ptNodePos
|
||||||
|
@ -325,10 +325,12 @@ int PatriciaTriePolicy::getProbabilityOfPtNode(const PrevWordsInfo *const prevWo
|
||||||
return getProbability(ptNodeParams.getProbability(), NOT_A_PROBABILITY);
|
return getProbability(ptNodeParams.getProbability(), NOT_A_PROBABILITY);
|
||||||
}
|
}
|
||||||
|
|
||||||
void PatriciaTriePolicy::iterateNgramEntries(const PrevWordsInfo *const prevWordsInfo,
|
void PatriciaTriePolicy::iterateNgramEntries(const int *const prevWordsPtNodePos,
|
||||||
NgramListener *const listener) const {
|
NgramListener *const listener) const {
|
||||||
BinaryDictionaryBigramsIterator bigramsIt = prevWordsInfo->getBigramsIteratorForPrediction(
|
if (!prevWordsPtNodePos) {
|
||||||
this /* dictStructurePolicy */);
|
return;
|
||||||
|
}
|
||||||
|
BinaryDictionaryBigramsIterator bigramsIt = getBigramsIteratorOfPtNode(prevWordsPtNodePos[0]);
|
||||||
while (bigramsIt.hasNext()) {
|
while (bigramsIt.hasNext()) {
|
||||||
bigramsIt.next();
|
bigramsIt.next();
|
||||||
listener->onVisitEntry(bigramsIt.getProbability(), bigramsIt.getBigramPos());
|
listener->onVisitEntry(bigramsIt.getProbability(), bigramsIt.getBigramPos());
|
||||||
|
|
|
@ -63,9 +63,9 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
||||||
|
|
||||||
int getProbability(const int unigramProbability, const int bigramProbability) const;
|
int getProbability(const int unigramProbability, const int bigramProbability) const;
|
||||||
|
|
||||||
int getProbabilityOfPtNode(const PrevWordsInfo *const prevWordsInfo, const int ptNodePos) const;
|
int getProbabilityOfPtNode(const int *const prevWordsPtNodePos, const int ptNodePos) const;
|
||||||
|
|
||||||
void iterateNgramEntries(const PrevWordsInfo *const prevWordsInfo,
|
void iterateNgramEntries(const int *const prevWordsPtNodePos,
|
||||||
NgramListener *const listener) const;
|
NgramListener *const listener) const;
|
||||||
|
|
||||||
int getShortcutPositionOfPtNode(const int ptNodePos) const;
|
int getShortcutPositionOfPtNode(const int ptNodePos) const;
|
||||||
|
|
|
@ -122,7 +122,7 @@ int Ver4PatriciaTriePolicy::getProbability(const int unigramProbability,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
int Ver4PatriciaTriePolicy::getProbabilityOfPtNode(const PrevWordsInfo *const prevWordsInfo,
|
int Ver4PatriciaTriePolicy::getProbabilityOfPtNode(const int *const prevWordsPtNodePos,
|
||||||
const int ptNodePos) const {
|
const int ptNodePos) const {
|
||||||
if (ptNodePos == NOT_A_DICT_POS) {
|
if (ptNodePos == NOT_A_DICT_POS) {
|
||||||
return NOT_A_PROBABILITY;
|
return NOT_A_PROBABILITY;
|
||||||
|
@ -131,9 +131,9 @@ int Ver4PatriciaTriePolicy::getProbabilityOfPtNode(const PrevWordsInfo *const pr
|
||||||
if (ptNodeParams.isDeleted() || ptNodeParams.isBlacklisted() || ptNodeParams.isNotAWord()) {
|
if (ptNodeParams.isDeleted() || ptNodeParams.isBlacklisted() || ptNodeParams.isNotAWord()) {
|
||||||
return NOT_A_PROBABILITY;
|
return NOT_A_PROBABILITY;
|
||||||
}
|
}
|
||||||
if (prevWordsInfo) {
|
if (prevWordsPtNodePos) {
|
||||||
BinaryDictionaryBigramsIterator bigramsIt =
|
BinaryDictionaryBigramsIterator bigramsIt =
|
||||||
prevWordsInfo->getBigramsIteratorForPrediction(this /* dictStructurePolicy */);
|
getBigramsIteratorOfPtNode(prevWordsPtNodePos[0]);
|
||||||
while (bigramsIt.hasNext()) {
|
while (bigramsIt.hasNext()) {
|
||||||
bigramsIt.next();
|
bigramsIt.next();
|
||||||
if (bigramsIt.getBigramPos() == ptNodePos
|
if (bigramsIt.getBigramPos() == ptNodePos
|
||||||
|
@ -146,10 +146,12 @@ int Ver4PatriciaTriePolicy::getProbabilityOfPtNode(const PrevWordsInfo *const pr
|
||||||
return getProbability(ptNodeParams.getProbability(), NOT_A_PROBABILITY);
|
return getProbability(ptNodeParams.getProbability(), NOT_A_PROBABILITY);
|
||||||
}
|
}
|
||||||
|
|
||||||
void Ver4PatriciaTriePolicy::iterateNgramEntries(const PrevWordsInfo *const prevWordsInfo,
|
void Ver4PatriciaTriePolicy::iterateNgramEntries(const int *const prevWordsPtNodePos,
|
||||||
NgramListener *const listener) const {
|
NgramListener *const listener) const {
|
||||||
BinaryDictionaryBigramsIterator bigramsIt = prevWordsInfo->getBigramsIteratorForPrediction(
|
if (!prevWordsPtNodePos) {
|
||||||
this /* dictStructurePolicy */);
|
return;
|
||||||
|
}
|
||||||
|
BinaryDictionaryBigramsIterator bigramsIt = getBigramsIteratorOfPtNode(prevWordsPtNodePos[0]);
|
||||||
while (bigramsIt.hasNext()) {
|
while (bigramsIt.hasNext()) {
|
||||||
bigramsIt.next();
|
bigramsIt.next();
|
||||||
listener->onVisitEntry(bigramsIt.getProbability(), bigramsIt.getBigramPos());
|
listener->onVisitEntry(bigramsIt.getProbability(), bigramsIt.getBigramPos());
|
||||||
|
|
|
@ -72,9 +72,9 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
|
||||||
|
|
||||||
int getProbability(const int unigramProbability, const int bigramProbability) const;
|
int getProbability(const int unigramProbability, const int bigramProbability) const;
|
||||||
|
|
||||||
int getProbabilityOfPtNode(const PrevWordsInfo *const prevWordsInfo, const int ptNodePos) const;
|
int getProbabilityOfPtNode(const int *const prevWordsPtNodePos, const int ptNodePos) const;
|
||||||
|
|
||||||
void iterateNgramEntries(const PrevWordsInfo *const prevWordsInfo,
|
void iterateNgramEntries(const int *const prevWordsPtNodePos,
|
||||||
NgramListener *const listener) const;
|
NgramListener *const listener) const;
|
||||||
|
|
||||||
int getShortcutPositionOfPtNode(const int ptNodePos) const;
|
int getShortcutPositionOfPtNode(const int ptNodePos) const;
|
||||||
|
|
Loading…
Reference in a new issue