Merge "Get bigrams iterator via dict structure policy."

main
Keisuke Kuroyanagi 2014-07-08 07:26:43 +00:00 committed by Android (Google) Code Review
commit 42a64e24b8
10 changed files with 56 additions and 45 deletions

View File

@ -24,6 +24,11 @@ namespace latinime {
class BinaryDictionaryBigramsIterator { class BinaryDictionaryBigramsIterator {
public: public:
// Empty iterator.
BinaryDictionaryBigramsIterator()
: mBigramsStructurePolicy(nullptr), mPos(NOT_A_DICT_POS),
mBigramPos(NOT_A_DICT_POS), mProbability(NOT_A_PROBABILITY), mHasNext(false) {}
BinaryDictionaryBigramsIterator( BinaryDictionaryBigramsIterator(
const DictionaryBigramsStructurePolicy *const bigramsStructurePolicy, const int pos) const DictionaryBigramsStructurePolicy *const bigramsStructurePolicy, const int pos)
: mBigramsStructurePolicy(bigramsStructurePolicy), mPos(pos), : mBigramsStructurePolicy(bigramsStructurePolicy), mPos(pos),

View File

@ -53,9 +53,8 @@ int MultiBigramMap::getBigramProbability(
void MultiBigramMap::BigramMap::init( void MultiBigramMap::BigramMap::init(
const DictionaryStructureWithBufferPolicy *const structurePolicy, const int nodePos) { const DictionaryStructureWithBufferPolicy *const structurePolicy, const int nodePos) {
const int bigramsListPos = structurePolicy->getBigramsPositionOfPtNode(nodePos); BinaryDictionaryBigramsIterator bigramsIt =
BinaryDictionaryBigramsIterator bigramsIt(structurePolicy->getBigramsStructurePolicy(), structurePolicy->getBigramsIteratorOfPtNode(nodePos);
bigramsListPos);
while (bigramsIt.hasNext()) { while (bigramsIt.hasNext()) {
bigramsIt.next(); bigramsIt.next();
if (bigramsIt.getBigramPos() == NOT_A_DICT_POS) { if (bigramsIt.getBigramPos() == NOT_A_DICT_POS) {
@ -89,9 +88,8 @@ int MultiBigramMap::readBigramProbabilityFromBinaryDictionary(
const DictionaryStructureWithBufferPolicy *const structurePolicy, const int nodePos, const DictionaryStructureWithBufferPolicy *const structurePolicy, const int nodePos,
const int nextWordPosition, const int unigramProbability) { const int nextWordPosition, const int unigramProbability) {
int bigramProbability = NOT_A_PROBABILITY; int bigramProbability = NOT_A_PROBABILITY;
const int bigramsListPos = structurePolicy->getBigramsPositionOfPtNode(nodePos); BinaryDictionaryBigramsIterator bigramsIt =
BinaryDictionaryBigramsIterator bigramsIt(structurePolicy->getBigramsStructurePolicy(), structurePolicy->getBigramsIteratorOfPtNode(nodePos);
bigramsListPos);
while (bigramsIt.hasNext()) { while (bigramsIt.hasNext()) {
bigramsIt.next(); bigramsIt.next();
if (bigramsIt.getBigramPos() == nextWordPosition) { if (bigramsIt.getBigramPos() == nextWordPosition) {

View File

@ -20,6 +20,7 @@
#include <memory> #include <memory>
#include "defines.h" #include "defines.h"
#include "suggest/core/dictionary/binary_dictionary_bigrams_iterator.h"
#include "suggest/core/dictionary/property/word_property.h" #include "suggest/core/dictionary/property/word_property.h"
namespace latinime { namespace latinime {
@ -61,12 +62,10 @@ class DictionaryStructureWithBufferPolicy {
virtual int getShortcutPositionOfPtNode(const int nodePos) const = 0; virtual int getShortcutPositionOfPtNode(const int nodePos) const = 0;
virtual int getBigramsPositionOfPtNode(const int nodePos) const = 0; virtual BinaryDictionaryBigramsIterator getBigramsIteratorOfPtNode(const int nodePos) const = 0;
virtual const DictionaryHeaderStructurePolicy *getHeaderStructurePolicy() const = 0; virtual const DictionaryHeaderStructurePolicy *getHeaderStructurePolicy() const = 0;
virtual const DictionaryBigramsStructurePolicy *getBigramsStructurePolicy() const = 0;
virtual const DictionaryShortcutsStructurePolicy *getShortcutsStructurePolicy() const = 0; virtual const DictionaryShortcutsStructurePolicy *getShortcutsStructurePolicy() const = 0;
// Returns whether the update was success or not. // Returns whether the update was success or not.

View File

@ -92,11 +92,9 @@ class PrevWordsInfo {
BinaryDictionaryBigramsIterator getBigramsIteratorForPrediction( BinaryDictionaryBigramsIterator getBigramsIteratorForPrediction(
const DictionaryStructureWithBufferPolicy *const dictStructurePolicy) const { const DictionaryStructureWithBufferPolicy *const dictStructurePolicy) const {
const int bigramListPos = getBigramListPositionForWordWithTryingLowerCaseSearch( return getBigramsIteratorForWordWithTryingLowerCaseSearch(
dictStructurePolicy, mPrevWordCodePoints[0], mPrevWordCodePointCount[0], dictStructurePolicy, mPrevWordCodePoints[0], mPrevWordCodePointCount[0],
mIsBeginningOfSentence[0]); mIsBeginningOfSentence[0]);
return BinaryDictionaryBigramsIterator(dictStructurePolicy->getBigramsStructurePolicy(),
bigramListPos);
} }
// n is 1-indexed. // n is 1-indexed.
@ -156,12 +154,12 @@ class PrevWordsInfo {
codePoints, codePointCount, true /* forceLowerCaseSearch */); codePoints, codePointCount, true /* forceLowerCaseSearch */);
} }
static int getBigramListPositionForWordWithTryingLowerCaseSearch( static BinaryDictionaryBigramsIterator getBigramsIteratorForWordWithTryingLowerCaseSearch(
const DictionaryStructureWithBufferPolicy *const dictStructurePolicy, const DictionaryStructureWithBufferPolicy *const dictStructurePolicy,
const int *const wordCodePoints, const int wordCodePointCount, const int *const wordCodePoints, const int wordCodePointCount,
const bool isBeginningOfSentence) { const bool isBeginningOfSentence) {
if (!dictStructurePolicy || !wordCodePoints || wordCodePointCount > MAX_WORD_LENGTH) { if (!dictStructurePolicy || !wordCodePoints || wordCodePointCount > MAX_WORD_LENGTH) {
return NOT_A_DICT_POS; return BinaryDictionaryBigramsIterator();
} }
int codePoints[MAX_WORD_LENGTH]; int codePoints[MAX_WORD_LENGTH];
int codePointCount = wordCodePointCount; int codePointCount = wordCodePointCount;
@ -170,30 +168,30 @@ class PrevWordsInfo {
codePointCount = CharUtils::attachBeginningOfSentenceMarker(codePoints, codePointCount = CharUtils::attachBeginningOfSentenceMarker(codePoints,
codePointCount, MAX_WORD_LENGTH); codePointCount, MAX_WORD_LENGTH);
if (codePointCount <= 0) { if (codePointCount <= 0) {
return NOT_A_DICT_POS; return BinaryDictionaryBigramsIterator();
} }
} }
int pos = getBigramListPositionForWord(dictStructurePolicy, codePoints, BinaryDictionaryBigramsIterator bigramsIt = getBigramsIteratorForWord(dictStructurePolicy,
codePointCount, false /* forceLowerCaseSearch */); codePoints, codePointCount, false /* forceLowerCaseSearch */);
// getBigramListPositionForWord returns NOT_A_DICT_POS if this word isn't in the // getBigramsIteratorForWord returns an empty iterator if this word isn't in the dictionary
// dictionary or has no bigrams // or has no bigrams.
if (NOT_A_DICT_POS == pos) { if (bigramsIt.hasNext()) {
// If no bigrams for this exact word, search again in lower case. return bigramsIt;
pos = getBigramListPositionForWord(dictStructurePolicy, codePoints,
codePointCount, true /* forceLowerCaseSearch */);
} }
return pos; // If no bigrams for this exact word, search again in lower case.
return getBigramsIteratorForWord(dictStructurePolicy, codePoints,
codePointCount, true /* forceLowerCaseSearch */);
} }
static int getBigramListPositionForWord( static BinaryDictionaryBigramsIterator getBigramsIteratorForWord(
const DictionaryStructureWithBufferPolicy *const dictStructurePolicy, const DictionaryStructureWithBufferPolicy *const dictStructurePolicy,
const int *wordCodePoints, const int wordCodePointCount, const int *wordCodePoints, const int wordCodePointCount,
const bool forceLowerCaseSearch) { const bool forceLowerCaseSearch) {
if (!wordCodePoints || wordCodePointCount <= 0) return NOT_A_DICT_POS; if (!wordCodePoints || wordCodePointCount <= 0) return BinaryDictionaryBigramsIterator();
const int terminalPtNodePos = dictStructurePolicy->getTerminalPtNodePositionOfWord( const int terminalPtNodePos = dictStructurePolicy->getTerminalPtNodePositionOfWord(
wordCodePoints, wordCodePointCount, forceLowerCaseSearch); wordCodePoints, wordCodePointCount, forceLowerCaseSearch);
if (NOT_A_DICT_POS == terminalPtNodePos) return NOT_A_DICT_POS; if (NOT_A_DICT_POS == terminalPtNodePos) return BinaryDictionaryBigramsIterator();
return dictStructurePolicy->getBigramsPositionOfPtNode(terminalPtNodePos); return dictStructurePolicy->getBigramsIteratorOfPtNode(terminalPtNodePos);
} }
void clear() { void clear() {

View File

@ -154,6 +154,12 @@ int Ver4PatriciaTriePolicy::getShortcutPositionOfPtNode(const int ptNodePos) con
ptNodeParams.getTerminalId()); ptNodeParams.getTerminalId());
} }
BinaryDictionaryBigramsIterator Ver4PatriciaTriePolicy::getBigramsIteratorOfPtNode(
const int ptNodePos) const {
const int bigramsPosition = getBigramsPositionOfPtNode(ptNodePos);
return BinaryDictionaryBigramsIterator(&mBigramPolicy, bigramsPosition);
}
int Ver4PatriciaTriePolicy::getBigramsPositionOfPtNode(const int ptNodePos) const { int Ver4PatriciaTriePolicy::getBigramsPositionOfPtNode(const int ptNodePos) const {
if (ptNodePos == NOT_A_DICT_POS) { if (ptNodePos == NOT_A_DICT_POS) {
return NOT_A_DICT_POS; return NOT_A_DICT_POS;

View File

@ -94,16 +94,12 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
int getShortcutPositionOfPtNode(const int ptNodePos) const; int getShortcutPositionOfPtNode(const int ptNodePos) const;
int getBigramsPositionOfPtNode(const int ptNodePos) const; BinaryDictionaryBigramsIterator getBigramsIteratorOfPtNode(const int ptNodePos) const;
const DictionaryHeaderStructurePolicy *getHeaderStructurePolicy() const { const DictionaryHeaderStructurePolicy *getHeaderStructurePolicy() const {
return mHeaderPolicy; return mHeaderPolicy;
} }
const DictionaryBigramsStructurePolicy *getBigramsStructurePolicy() const {
return &mBigramPolicy;
}
const DictionaryShortcutsStructurePolicy *getShortcutsStructurePolicy() const { const DictionaryShortcutsStructurePolicy *getShortcutsStructurePolicy() const {
return &mShortcutPolicy; return &mShortcutPolicy;
} }
@ -167,6 +163,8 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
int mBigramCount; int mBigramCount;
std::vector<int> mTerminalPtNodePositionsForIteratingWords; std::vector<int> mTerminalPtNodePositionsForIteratingWords;
mutable bool mIsCorrupted; mutable bool mIsCorrupted;
int getBigramsPositionOfPtNode(const int ptNodePos) const;
}; };
} // namespace v402 } // namespace v402
} // namespace backward } // namespace backward

View File

@ -304,6 +304,12 @@ int PatriciaTriePolicy::getShortcutPositionOfPtNode(const int ptNodePos) const {
return mPtNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos).getShortcutPos(); return mPtNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos).getShortcutPos();
} }
BinaryDictionaryBigramsIterator PatriciaTriePolicy::getBigramsIteratorOfPtNode(
const int ptNodePos) const {
const int bigramsPosition = getBigramsPositionOfPtNode(ptNodePos);
return BinaryDictionaryBigramsIterator(&mBigramListPolicy, bigramsPosition);
}
int PatriciaTriePolicy::getBigramsPositionOfPtNode(const int ptNodePos) const { int PatriciaTriePolicy::getBigramsPositionOfPtNode(const int ptNodePos) const {
if (ptNodePos == NOT_A_DICT_POS) { if (ptNodePos == NOT_A_DICT_POS) {
return NOT_A_DICT_POS; return NOT_A_DICT_POS;
@ -322,7 +328,7 @@ int PatriciaTriePolicy::createAndGetLeavingChildNode(const DicNode *const dicNod
int bigramPos = NOT_A_DICT_POS; int bigramPos = NOT_A_DICT_POS;
int siblingPos = NOT_A_DICT_POS; int siblingPos = NOT_A_DICT_POS;
PatriciaTrieReadingUtils::readPtNodeInfo(mDictRoot, ptNodePos, getShortcutsStructurePolicy(), PatriciaTrieReadingUtils::readPtNodeInfo(mDictRoot, ptNodePos, getShortcutsStructurePolicy(),
getBigramsStructurePolicy(), &flags, &mergedNodeCodePointCount, mergedNodeCodePoints, &mBigramListPolicy, &flags, &mergedNodeCodePointCount, mergedNodeCodePoints,
&probability, &childrenPos, &shortcutPos, &bigramPos, &siblingPos); &probability, &childrenPos, &shortcutPos, &bigramPos, &siblingPos);
// Skip PtNodes don't start with Unicode code point because they represent non-word information. // Skip PtNodes don't start with Unicode code point because they represent non-word information.
if (CharUtils::isInUnicodeSpace(mergedNodeCodePoints[0])) { if (CharUtils::isInUnicodeSpace(mergedNodeCodePoints[0])) {
@ -352,7 +358,7 @@ const WordProperty PatriciaTriePolicy::getWordProperty(const int *const codePoin
std::vector<BigramProperty> bigrams; std::vector<BigramProperty> bigrams;
const int bigramListPos = getBigramsPositionOfPtNode(ptNodePos); const int bigramListPos = getBigramsPositionOfPtNode(ptNodePos);
int bigramWord1CodePoints[MAX_WORD_LENGTH]; int bigramWord1CodePoints[MAX_WORD_LENGTH];
BinaryDictionaryBigramsIterator bigramsIt(getBigramsStructurePolicy(), bigramListPos); BinaryDictionaryBigramsIterator bigramsIt(&mBigramListPolicy, bigramListPos);
while (bigramsIt.hasNext()) { while (bigramsIt.hasNext()) {
// Fetch the next bigram information and forward the iterator. // Fetch the next bigram information and forward the iterator.
bigramsIt.next(); bigramsIt.next();

View File

@ -67,16 +67,12 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
int getShortcutPositionOfPtNode(const int ptNodePos) const; int getShortcutPositionOfPtNode(const int ptNodePos) const;
int getBigramsPositionOfPtNode(const int ptNodePos) const; BinaryDictionaryBigramsIterator getBigramsIteratorOfPtNode(const int ptNodePos) const;
const DictionaryHeaderStructurePolicy *getHeaderStructurePolicy() const { const DictionaryHeaderStructurePolicy *getHeaderStructurePolicy() const {
return &mHeaderPolicy; return &mHeaderPolicy;
} }
const DictionaryBigramsStructurePolicy *getBigramsStructurePolicy() const {
return &mBigramListPolicy;
}
const DictionaryShortcutsStructurePolicy *getShortcutsStructurePolicy() const { const DictionaryShortcutsStructurePolicy *getShortcutsStructurePolicy() const {
return &mShortcutListPolicy; return &mShortcutListPolicy;
} }
@ -158,6 +154,7 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
std::vector<int> mTerminalPtNodePositionsForIteratingWords; std::vector<int> mTerminalPtNodePositionsForIteratingWords;
mutable bool mIsCorrupted; mutable bool mIsCorrupted;
int getBigramsPositionOfPtNode(const int ptNodePos) const;
int createAndGetLeavingChildNode(const DicNode *const dicNode, const int ptNodePos, int createAndGetLeavingChildNode(const DicNode *const dicNode, const int ptNodePos,
DicNodeVector *const childDicNodes) const; DicNodeVector *const childDicNodes) const;
}; };

View File

@ -144,6 +144,12 @@ int Ver4PatriciaTriePolicy::getShortcutPositionOfPtNode(const int ptNodePos) con
ptNodeParams.getTerminalId()); ptNodeParams.getTerminalId());
} }
BinaryDictionaryBigramsIterator Ver4PatriciaTriePolicy::getBigramsIteratorOfPtNode(
const int ptNodePos) const {
const int bigramsPosition = getBigramsPositionOfPtNode(ptNodePos);
return BinaryDictionaryBigramsIterator(&mBigramPolicy, bigramsPosition);
}
int Ver4PatriciaTriePolicy::getBigramsPositionOfPtNode(const int ptNodePos) const { int Ver4PatriciaTriePolicy::getBigramsPositionOfPtNode(const int ptNodePos) const {
if (ptNodePos == NOT_A_DICT_POS) { if (ptNodePos == NOT_A_DICT_POS) {
return NOT_A_DICT_POS; return NOT_A_DICT_POS;

View File

@ -76,16 +76,12 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
int getShortcutPositionOfPtNode(const int ptNodePos) const; int getShortcutPositionOfPtNode(const int ptNodePos) const;
int getBigramsPositionOfPtNode(const int ptNodePos) const; BinaryDictionaryBigramsIterator getBigramsIteratorOfPtNode(const int ptNodePos) const;
const DictionaryHeaderStructurePolicy *getHeaderStructurePolicy() const { const DictionaryHeaderStructurePolicy *getHeaderStructurePolicy() const {
return mHeaderPolicy; return mHeaderPolicy;
} }
const DictionaryBigramsStructurePolicy *getBigramsStructurePolicy() const {
return &mBigramPolicy;
}
const DictionaryShortcutsStructurePolicy *getShortcutsStructurePolicy() const { const DictionaryShortcutsStructurePolicy *getShortcutsStructurePolicy() const {
return &mShortcutPolicy; return &mShortcutPolicy;
} }
@ -146,6 +142,8 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
int mBigramCount; int mBigramCount;
std::vector<int> mTerminalPtNodePositionsForIteratingWords; std::vector<int> mTerminalPtNodePositionsForIteratingWords;
mutable bool mIsCorrupted; mutable bool mIsCorrupted;
int getBigramsPositionOfPtNode(const int ptNodePos) const;
}; };
} // namespace latinime } // namespace latinime
#endif // LATINIME_VER4_PATRICIA_TRIE_POLICY_H #endif // LATINIME_VER4_PATRICIA_TRIE_POLICY_H