Support multiple previous words in DicNode.

Bug: 14425059
Change-Id: Ib8682befe4d7d9fe5122eb538e7c804f75ded463
This commit is contained in:
Keisuke Kuroyanagi 2014-05-19 11:47:10 +09:00
parent 45d1a936a7
commit fa7db65dec
6 changed files with 36 additions and 27 deletions

View file

@ -103,10 +103,10 @@ class DicNode {
PROF_NODE_COPY(&dicNode->mProfiler, mProfiler); PROF_NODE_COPY(&dicNode->mProfiler, mProfiler);
} }
// Init for root with prevWordPtNodePos which is used for bigram // Init for root with prevWordsPtNodePos which is used for n-gram
void initAsRoot(const int rootPtNodeArrayPos, const int prevWordPtNodePos) { void initAsRoot(const int rootPtNodeArrayPos, const int *const prevWordsPtNodePos) {
mIsCachedForNextSuggestion = false; mIsCachedForNextSuggestion = false;
mDicNodeProperties.init(rootPtNodeArrayPos, prevWordPtNodePos); mDicNodeProperties.init(rootPtNodeArrayPos, prevWordsPtNodePos);
mDicNodeState.init(); mDicNodeState.init();
PROF_NODE_RESET(mProfiler); PROF_NODE_RESET(mProfiler);
} }
@ -114,7 +114,12 @@ class DicNode {
// Init for root with previous word // Init for root with previous word
void initAsRootWithPreviousWord(const DicNode *const dicNode, const int rootPtNodeArrayPos) { void initAsRootWithPreviousWord(const DicNode *const dicNode, const int rootPtNodeArrayPos) {
mIsCachedForNextSuggestion = dicNode->mIsCachedForNextSuggestion; mIsCachedForNextSuggestion = dicNode->mIsCachedForNextSuggestion;
mDicNodeProperties.init(rootPtNodeArrayPos, dicNode->mDicNodeProperties.getPtNodePos()); int newPrevWordsPtNodePos[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
newPrevWordsPtNodePos[0] = dicNode->mDicNodeProperties.getPtNodePos();
for (size_t i = 1; i < NELEMS(newPrevWordsPtNodePos); ++i) {
newPrevWordsPtNodePos[i] = dicNode->getNthPrevWordTerminalPtNodePos(i);
}
mDicNodeProperties.init(rootPtNodeArrayPos, newPrevWordsPtNodePos);
mDicNodeState.initAsRootWithPreviousWord(&dicNode->mDicNodeState, mDicNodeState.initAsRootWithPreviousWord(&dicNode->mDicNodeState,
dicNode->mDicNodeProperties.getDepth()); dicNode->mDicNodeProperties.getDepth());
PROF_NODE_COPY(&dicNode->mProfiler, mProfiler); PROF_NODE_COPY(&dicNode->mProfiler, mProfiler);
@ -140,7 +145,7 @@ class DicNode {
dicNode->mDicNodeProperties.getLeavingDepth() + mergedNodeCodePointCount); dicNode->mDicNodeProperties.getLeavingDepth() + mergedNodeCodePointCount);
mDicNodeProperties.init(ptNodePos, childrenPtNodeArrayPos, mergedNodeCodePoints[0], mDicNodeProperties.init(ptNodePos, childrenPtNodeArrayPos, mergedNodeCodePoints[0],
probability, isTerminal, hasChildren, isBlacklistedOrNotAWord, newDepth, probability, isTerminal, hasChildren, isBlacklistedOrNotAWord, newDepth,
newLeavingDepth, dicNode->mDicNodeProperties.getPrevWordTerminalPtNodePos()); newLeavingDepth, dicNode->mDicNodeProperties.getPrevWordsTerminalPtNodePos());
mDicNodeState.init(&dicNode->mDicNodeState, mergedNodeCodePointCount, mDicNodeState.init(&dicNode->mDicNodeState, mergedNodeCodePointCount,
mergedNodeCodePoints); mergedNodeCodePoints);
PROF_NODE_COPY(&dicNode->mProfiler, mProfiler); PROF_NODE_COPY(&dicNode->mProfiler, mProfiler);
@ -198,14 +203,17 @@ class DicNode {
return mDicNodeState.mDicNodeStateInput.getInputIndex(0) < inputSize - 1; return mDicNodeState.mDicNodeStateInput.getInputIndex(0) < inputSize - 1;
} }
// Used to get bigram probability in DicNodeUtils // Used to get n-gram probability in DicNodeUtils
int getPtNodePos() const { int getPtNodePos() const {
return mDicNodeProperties.getPtNodePos(); return mDicNodeProperties.getPtNodePos();
} }
// Used to get bigram probability in DicNodeUtils // Used to get n-gram probability in DicNodeUtils
int getPrevWordTerminalPtNodePos() const { int getNthPrevWordTerminalPtNodePos(const int n) const {
return mDicNodeProperties.getPrevWordTerminalPtNodePos(); if (n <= 0 || n > MAX_PREV_WORD_COUNT_FOR_N_GRAM) {
return NOT_A_DICT_POS;
}
return mDicNodeProperties.getPrevWordsTerminalPtNodePos()[n - 1];
} }
// Used in DicNodeUtils // Used in DicNodeUtils

View file

@ -29,8 +29,8 @@ namespace latinime {
/* static */ void DicNodeUtils::initAsRoot( /* static */ void DicNodeUtils::initAsRoot(
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
const int prevWordPtNodePos, DicNode *const newRootDicNode) { const int *const prevWordsPtNodePos, DicNode *const newRootDicNode) {
newRootDicNode->initAsRoot(dictionaryStructurePolicy->getRootPosition(), prevWordPtNodePos); newRootDicNode->initAsRoot(dictionaryStructurePolicy->getRootPosition(), prevWordsPtNodePos);
} }
/*static */ void DicNodeUtils::initAsRootWithPreviousWord( /*static */ void DicNodeUtils::initAsRootWithPreviousWord(
@ -86,7 +86,7 @@ namespace latinime {
const DicNode *const dicNode, MultiBigramMap *const multiBigramMap) { const DicNode *const dicNode, MultiBigramMap *const multiBigramMap) {
const int unigramProbability = dicNode->getProbability(); const int unigramProbability = dicNode->getProbability();
const int ptNodePos = dicNode->getPtNodePos(); const int ptNodePos = dicNode->getPtNodePos();
const int prevWordTerminalPtNodePos = dicNode->getPrevWordTerminalPtNodePos(); const int prevWordTerminalPtNodePos = dicNode->getNthPrevWordTerminalPtNodePos(1 /* n */);
if (NOT_A_DICT_POS == ptNodePos || NOT_A_DICT_POS == prevWordTerminalPtNodePos) { if (NOT_A_DICT_POS == ptNodePos || NOT_A_DICT_POS == prevWordTerminalPtNodePos) {
// Note: Normally wordPos comes from the dictionary and should never equal // Note: Normally wordPos comes from the dictionary and should never equal
// NOT_A_VALID_WORD_POS. // NOT_A_VALID_WORD_POS.

View file

@ -30,7 +30,7 @@ class DicNodeUtils {
public: public:
static void initAsRoot( static void initAsRoot(
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
const int prevWordPtNodePos, DicNode *const newRootDicNode); const int *const prevWordPtNodePos, DicNode *const newRootDicNode);
static void initAsRootWithPreviousWord( static void initAsRootWithPreviousWord(
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy, const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
const DicNode *const prevWordLastDicNode, DicNode *const newRootDicNode); const DicNode *const prevWordLastDicNode, DicNode *const newRootDicNode);

View file

@ -32,15 +32,14 @@ class DicNodeProperties {
: mPtNodePos(NOT_A_DICT_POS), mChildrenPtNodeArrayPos(NOT_A_DICT_POS), : mPtNodePos(NOT_A_DICT_POS), mChildrenPtNodeArrayPos(NOT_A_DICT_POS),
mProbability(NOT_A_PROBABILITY), mDicNodeCodePoint(NOT_A_CODE_POINT), mProbability(NOT_A_PROBABILITY), mDicNodeCodePoint(NOT_A_CODE_POINT),
mIsTerminal(false), mHasChildrenPtNodes(false), mIsTerminal(false), mHasChildrenPtNodes(false),
mIsBlacklistedOrNotAWord(false), mDepth(0), mLeavingDepth(0), mIsBlacklistedOrNotAWord(false), mDepth(0), mLeavingDepth(0) {}
mPrevWordTerminalPtNodePos(NOT_A_DICT_POS) {}
~DicNodeProperties() {} ~DicNodeProperties() {}
// Should be called only once per DicNode is initialized. // Should be called only once per DicNode is initialized.
void init(const int pos, const int childrenPos, const int nodeCodePoint, const int probability, void init(const int pos, const int childrenPos, const int nodeCodePoint, const int probability,
const bool isTerminal, const bool hasChildren, const bool isBlacklistedOrNotAWord, const bool isTerminal, const bool hasChildren, const bool isBlacklistedOrNotAWord,
const uint16_t depth, const uint16_t leavingDepth, const int prevWordNodePos) { const uint16_t depth, const uint16_t leavingDepth, const int *const prevWordsNodePos) {
mPtNodePos = pos; mPtNodePos = pos;
mChildrenPtNodeArrayPos = childrenPos; mChildrenPtNodeArrayPos = childrenPos;
mDicNodeCodePoint = nodeCodePoint; mDicNodeCodePoint = nodeCodePoint;
@ -50,11 +49,11 @@ class DicNodeProperties {
mIsBlacklistedOrNotAWord = isBlacklistedOrNotAWord; mIsBlacklistedOrNotAWord = isBlacklistedOrNotAWord;
mDepth = depth; mDepth = depth;
mLeavingDepth = leavingDepth; mLeavingDepth = leavingDepth;
mPrevWordTerminalPtNodePos = prevWordNodePos; memmove(mPrevWordsTerminalPtNodePos, prevWordsNodePos, sizeof(mPrevWordsTerminalPtNodePos));
} }
// Init for root with prevWordPtNodePos which is used for bigram // Init for root with prevWordsPtNodePos which is used for n-gram
void init(const int rootPtNodeArrayPos, const int prevWordNodePos) { void init(const int rootPtNodeArrayPos, const int *const prevWordsNodePos) {
mPtNodePos = NOT_A_DICT_POS; mPtNodePos = NOT_A_DICT_POS;
mChildrenPtNodeArrayPos = rootPtNodeArrayPos; mChildrenPtNodeArrayPos = rootPtNodeArrayPos;
mDicNodeCodePoint = NOT_A_CODE_POINT; mDicNodeCodePoint = NOT_A_CODE_POINT;
@ -64,7 +63,7 @@ class DicNodeProperties {
mIsBlacklistedOrNotAWord = false; mIsBlacklistedOrNotAWord = false;
mDepth = 0; mDepth = 0;
mLeavingDepth = 0; mLeavingDepth = 0;
mPrevWordTerminalPtNodePos = prevWordNodePos; memmove(mPrevWordsTerminalPtNodePos, prevWordsNodePos, sizeof(mPrevWordsTerminalPtNodePos));
} }
void initByCopy(const DicNodeProperties *const dicNodeProp) { void initByCopy(const DicNodeProperties *const dicNodeProp) {
@ -77,7 +76,8 @@ class DicNodeProperties {
mIsBlacklistedOrNotAWord = dicNodeProp->mIsBlacklistedOrNotAWord; mIsBlacklistedOrNotAWord = dicNodeProp->mIsBlacklistedOrNotAWord;
mDepth = dicNodeProp->mDepth; mDepth = dicNodeProp->mDepth;
mLeavingDepth = dicNodeProp->mLeavingDepth; mLeavingDepth = dicNodeProp->mLeavingDepth;
mPrevWordTerminalPtNodePos = dicNodeProp->mPrevWordTerminalPtNodePos; memmove(mPrevWordsTerminalPtNodePos, dicNodeProp->mPrevWordsTerminalPtNodePos,
sizeof(mPrevWordsTerminalPtNodePos));
} }
// Init as passing child // Init as passing child
@ -91,7 +91,8 @@ class DicNodeProperties {
mIsBlacklistedOrNotAWord = dicNodeProp->mIsBlacklistedOrNotAWord; mIsBlacklistedOrNotAWord = dicNodeProp->mIsBlacklistedOrNotAWord;
mDepth = dicNodeProp->mDepth + 1; // Increment the depth of a passing child mDepth = dicNodeProp->mDepth + 1; // Increment the depth of a passing child
mLeavingDepth = dicNodeProp->mLeavingDepth; mLeavingDepth = dicNodeProp->mLeavingDepth;
mPrevWordTerminalPtNodePos = dicNodeProp->mPrevWordTerminalPtNodePos; memmove(mPrevWordsTerminalPtNodePos, dicNodeProp->mPrevWordsTerminalPtNodePos,
sizeof(mPrevWordsTerminalPtNodePos));
} }
int getPtNodePos() const { int getPtNodePos() const {
@ -131,8 +132,8 @@ class DicNodeProperties {
return mIsBlacklistedOrNotAWord; return mIsBlacklistedOrNotAWord;
} }
int getPrevWordTerminalPtNodePos() const { const int *getPrevWordsTerminalPtNodePos() const {
return mPrevWordTerminalPtNodePos; return mPrevWordsTerminalPtNodePos;
} }
private: private:
@ -148,7 +149,7 @@ class DicNodeProperties {
bool mIsBlacklistedOrNotAWord; bool mIsBlacklistedOrNotAWord;
uint16_t mDepth; uint16_t mDepth;
uint16_t mLeavingDepth; uint16_t mLeavingDepth;
int mPrevWordTerminalPtNodePos; int mPrevWordsTerminalPtNodePos[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
}; };
} // namespace latinime } // namespace latinime
#endif // LATINIME_DIC_NODE_PROPERTIES_H #endif // LATINIME_DIC_NODE_PROPERTIES_H

View file

@ -79,7 +79,7 @@ class DicTraverseSession {
//-------------------- //--------------------
const ProximityInfo *getProximityInfo() const { return mProximityInfo; } const ProximityInfo *getProximityInfo() const { return mProximityInfo; }
const SuggestOptions *getSuggestOptions() const { return mSuggestOptions; } const SuggestOptions *getSuggestOptions() const { return mSuggestOptions; }
int getPrevWordPtNodePos() const { return mPrevWordsPtNodePos[0]; } const int *getPrevWordsPtNodePos() const { return mPrevWordsPtNodePos; }
DicNodesCache *getDicTraverseCache() { return &mDicNodesCache; } DicNodesCache *getDicTraverseCache() { return &mDicNodesCache; }
MultiBigramMap *getMultiBigramMap() { return &mMultiBigramMap; } MultiBigramMap *getMultiBigramMap() { return &mMultiBigramMap; }
const ProximityInfoState *getProximityInfoState(int id) const { const ProximityInfoState *getProximityInfoState(int id) const {

View file

@ -92,7 +92,7 @@ void Suggest::initializeSearch(DicTraverseSession *traverseSession) const {
// Create a new dic node here // Create a new dic node here
DicNode rootNode; DicNode rootNode;
DicNodeUtils::initAsRoot(traverseSession->getDictionaryStructurePolicy(), DicNodeUtils::initAsRoot(traverseSession->getDictionaryStructurePolicy(),
traverseSession->getPrevWordPtNodePos(), &rootNode); traverseSession->getPrevWordsPtNodePos(), &rootNode);
traverseSession->getDicTraverseCache()->copyPushActive(&rootNode); traverseSession->getDicTraverseCache()->copyPushActive(&rootNode);
} }
} }