Support multiple previous words in DicNode.

Bug: 14425059
Change-Id: Ib8682befe4d7d9fe5122eb538e7c804f75ded463
This commit is contained in:
Keisuke Kuroyanagi 2014-05-19 11:47:10 +09:00
parent 45d1a936a7
commit fa7db65dec
6 changed files with 36 additions and 27 deletions

View file

@ -103,10 +103,10 @@ class DicNode {
PROF_NODE_COPY(&dicNode->mProfiler, mProfiler);
}
// Init for root with prevWordPtNodePos which is used for bigram
void initAsRoot(const int rootPtNodeArrayPos, const int prevWordPtNodePos) {
// Init for root with prevWordsPtNodePos which is used for n-gram
void initAsRoot(const int rootPtNodeArrayPos, const int *const prevWordsPtNodePos) {
mIsCachedForNextSuggestion = false;
mDicNodeProperties.init(rootPtNodeArrayPos, prevWordPtNodePos);
mDicNodeProperties.init(rootPtNodeArrayPos, prevWordsPtNodePos);
mDicNodeState.init();
PROF_NODE_RESET(mProfiler);
}
@ -114,7 +114,12 @@ class DicNode {
// Init for root with previous word
void initAsRootWithPreviousWord(const DicNode *const dicNode, const int rootPtNodeArrayPos) {
mIsCachedForNextSuggestion = dicNode->mIsCachedForNextSuggestion;
mDicNodeProperties.init(rootPtNodeArrayPos, dicNode->mDicNodeProperties.getPtNodePos());
int newPrevWordsPtNodePos[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
newPrevWordsPtNodePos[0] = dicNode->mDicNodeProperties.getPtNodePos();
for (size_t i = 1; i < NELEMS(newPrevWordsPtNodePos); ++i) {
newPrevWordsPtNodePos[i] = dicNode->getNthPrevWordTerminalPtNodePos(i);
}
mDicNodeProperties.init(rootPtNodeArrayPos, newPrevWordsPtNodePos);
mDicNodeState.initAsRootWithPreviousWord(&dicNode->mDicNodeState,
dicNode->mDicNodeProperties.getDepth());
PROF_NODE_COPY(&dicNode->mProfiler, mProfiler);
@ -140,7 +145,7 @@ class DicNode {
dicNode->mDicNodeProperties.getLeavingDepth() + mergedNodeCodePointCount);
mDicNodeProperties.init(ptNodePos, childrenPtNodeArrayPos, mergedNodeCodePoints[0],
probability, isTerminal, hasChildren, isBlacklistedOrNotAWord, newDepth,
newLeavingDepth, dicNode->mDicNodeProperties.getPrevWordTerminalPtNodePos());
newLeavingDepth, dicNode->mDicNodeProperties.getPrevWordsTerminalPtNodePos());
mDicNodeState.init(&dicNode->mDicNodeState, mergedNodeCodePointCount,
mergedNodeCodePoints);
PROF_NODE_COPY(&dicNode->mProfiler, mProfiler);
@ -198,14 +203,17 @@ class DicNode {
return mDicNodeState.mDicNodeStateInput.getInputIndex(0) < inputSize - 1;
}
// Used to get bigram probability in DicNodeUtils
// Used to get n-gram probability in DicNodeUtils
int getPtNodePos() const {
return mDicNodeProperties.getPtNodePos();
}
// Used to get bigram probability in DicNodeUtils
int getPrevWordTerminalPtNodePos() const {
return mDicNodeProperties.getPrevWordTerminalPtNodePos();
// Used to get n-gram probability in DicNodeUtils
int getNthPrevWordTerminalPtNodePos(const int n) const {
if (n <= 0 || n > MAX_PREV_WORD_COUNT_FOR_N_GRAM) {
return NOT_A_DICT_POS;
}
return mDicNodeProperties.getPrevWordsTerminalPtNodePos()[n - 1];
}
// Used in DicNodeUtils

View file

@ -29,8 +29,8 @@ namespace latinime {
/* static */ void DicNodeUtils::initAsRoot(
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
const int prevWordPtNodePos, DicNode *const newRootDicNode) {
newRootDicNode->initAsRoot(dictionaryStructurePolicy->getRootPosition(), prevWordPtNodePos);
const int *const prevWordsPtNodePos, DicNode *const newRootDicNode) {
newRootDicNode->initAsRoot(dictionaryStructurePolicy->getRootPosition(), prevWordsPtNodePos);
}
/*static */ void DicNodeUtils::initAsRootWithPreviousWord(
@ -86,7 +86,7 @@ namespace latinime {
const DicNode *const dicNode, MultiBigramMap *const multiBigramMap) {
const int unigramProbability = dicNode->getProbability();
const int ptNodePos = dicNode->getPtNodePos();
const int prevWordTerminalPtNodePos = dicNode->getPrevWordTerminalPtNodePos();
const int prevWordTerminalPtNodePos = dicNode->getNthPrevWordTerminalPtNodePos(1 /* n */);
if (NOT_A_DICT_POS == ptNodePos || NOT_A_DICT_POS == prevWordTerminalPtNodePos) {
// Note: Normally wordPos comes from the dictionary and should never equal
// NOT_A_VALID_WORD_POS.

View file

@ -30,7 +30,7 @@ class DicNodeUtils {
public:
static void initAsRoot(
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
const int prevWordPtNodePos, DicNode *const newRootDicNode);
const int *const prevWordPtNodePos, DicNode *const newRootDicNode);
static void initAsRootWithPreviousWord(
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
const DicNode *const prevWordLastDicNode, DicNode *const newRootDicNode);

View file

@ -32,15 +32,14 @@ class DicNodeProperties {
: mPtNodePos(NOT_A_DICT_POS), mChildrenPtNodeArrayPos(NOT_A_DICT_POS),
mProbability(NOT_A_PROBABILITY), mDicNodeCodePoint(NOT_A_CODE_POINT),
mIsTerminal(false), mHasChildrenPtNodes(false),
mIsBlacklistedOrNotAWord(false), mDepth(0), mLeavingDepth(0),
mPrevWordTerminalPtNodePos(NOT_A_DICT_POS) {}
mIsBlacklistedOrNotAWord(false), mDepth(0), mLeavingDepth(0) {}
~DicNodeProperties() {}
// Should be called only once per DicNode is initialized.
void init(const int pos, const int childrenPos, const int nodeCodePoint, const int probability,
const bool isTerminal, const bool hasChildren, const bool isBlacklistedOrNotAWord,
const uint16_t depth, const uint16_t leavingDepth, const int prevWordNodePos) {
const uint16_t depth, const uint16_t leavingDepth, const int *const prevWordsNodePos) {
mPtNodePos = pos;
mChildrenPtNodeArrayPos = childrenPos;
mDicNodeCodePoint = nodeCodePoint;
@ -50,11 +49,11 @@ class DicNodeProperties {
mIsBlacklistedOrNotAWord = isBlacklistedOrNotAWord;
mDepth = depth;
mLeavingDepth = leavingDepth;
mPrevWordTerminalPtNodePos = prevWordNodePos;
memmove(mPrevWordsTerminalPtNodePos, prevWordsNodePos, sizeof(mPrevWordsTerminalPtNodePos));
}
// Init for root with prevWordPtNodePos which is used for bigram
void init(const int rootPtNodeArrayPos, const int prevWordNodePos) {
// Init for root with prevWordsPtNodePos which is used for n-gram
void init(const int rootPtNodeArrayPos, const int *const prevWordsNodePos) {
mPtNodePos = NOT_A_DICT_POS;
mChildrenPtNodeArrayPos = rootPtNodeArrayPos;
mDicNodeCodePoint = NOT_A_CODE_POINT;
@ -64,7 +63,7 @@ class DicNodeProperties {
mIsBlacklistedOrNotAWord = false;
mDepth = 0;
mLeavingDepth = 0;
mPrevWordTerminalPtNodePos = prevWordNodePos;
memmove(mPrevWordsTerminalPtNodePos, prevWordsNodePos, sizeof(mPrevWordsTerminalPtNodePos));
}
void initByCopy(const DicNodeProperties *const dicNodeProp) {
@ -77,7 +76,8 @@ class DicNodeProperties {
mIsBlacklistedOrNotAWord = dicNodeProp->mIsBlacklistedOrNotAWord;
mDepth = dicNodeProp->mDepth;
mLeavingDepth = dicNodeProp->mLeavingDepth;
mPrevWordTerminalPtNodePos = dicNodeProp->mPrevWordTerminalPtNodePos;
memmove(mPrevWordsTerminalPtNodePos, dicNodeProp->mPrevWordsTerminalPtNodePos,
sizeof(mPrevWordsTerminalPtNodePos));
}
// Init as passing child
@ -91,7 +91,8 @@ class DicNodeProperties {
mIsBlacklistedOrNotAWord = dicNodeProp->mIsBlacklistedOrNotAWord;
mDepth = dicNodeProp->mDepth + 1; // Increment the depth of a passing child
mLeavingDepth = dicNodeProp->mLeavingDepth;
mPrevWordTerminalPtNodePos = dicNodeProp->mPrevWordTerminalPtNodePos;
memmove(mPrevWordsTerminalPtNodePos, dicNodeProp->mPrevWordsTerminalPtNodePos,
sizeof(mPrevWordsTerminalPtNodePos));
}
int getPtNodePos() const {
@ -131,8 +132,8 @@ class DicNodeProperties {
return mIsBlacklistedOrNotAWord;
}
int getPrevWordTerminalPtNodePos() const {
return mPrevWordTerminalPtNodePos;
const int *getPrevWordsTerminalPtNodePos() const {
return mPrevWordsTerminalPtNodePos;
}
private:
@ -148,7 +149,7 @@ class DicNodeProperties {
bool mIsBlacklistedOrNotAWord;
uint16_t mDepth;
uint16_t mLeavingDepth;
int mPrevWordTerminalPtNodePos;
int mPrevWordsTerminalPtNodePos[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
};
} // namespace latinime
#endif // LATINIME_DIC_NODE_PROPERTIES_H

View file

@ -79,7 +79,7 @@ class DicTraverseSession {
//--------------------
const ProximityInfo *getProximityInfo() const { return mProximityInfo; }
const SuggestOptions *getSuggestOptions() const { return mSuggestOptions; }
int getPrevWordPtNodePos() const { return mPrevWordsPtNodePos[0]; }
const int *getPrevWordsPtNodePos() const { return mPrevWordsPtNodePos; }
DicNodesCache *getDicTraverseCache() { return &mDicNodesCache; }
MultiBigramMap *getMultiBigramMap() { return &mMultiBigramMap; }
const ProximityInfoState *getProximityInfoState(int id) const {

View file

@ -92,7 +92,7 @@ void Suggest::initializeSearch(DicTraverseSession *traverseSession) const {
// Create a new dic node here
DicNode rootNode;
DicNodeUtils::initAsRoot(traverseSession->getDictionaryStructurePolicy(),
traverseSession->getPrevWordPtNodePos(), &rootNode);
traverseSession->getPrevWordsPtNodePos(), &rootNode);
traverseSession->getDicTraverseCache()->copyPushActive(&rootNode);
}
}