am d7bef2be
: Merge "Support multiple previous words in DicNode."
* commit 'd7bef2bee16b6e529d55b505764a79821fe3c825': Support multiple previous words in DicNode.
This commit is contained in:
commit
2e7c28483d
6 changed files with 36 additions and 27 deletions
|
@ -103,10 +103,10 @@ class DicNode {
|
|||
PROF_NODE_COPY(&dicNode->mProfiler, mProfiler);
|
||||
}
|
||||
|
||||
// Init for root with prevWordPtNodePos which is used for bigram
|
||||
void initAsRoot(const int rootPtNodeArrayPos, const int prevWordPtNodePos) {
|
||||
// Init for root with prevWordsPtNodePos which is used for n-gram
|
||||
void initAsRoot(const int rootPtNodeArrayPos, const int *const prevWordsPtNodePos) {
|
||||
mIsCachedForNextSuggestion = false;
|
||||
mDicNodeProperties.init(rootPtNodeArrayPos, prevWordPtNodePos);
|
||||
mDicNodeProperties.init(rootPtNodeArrayPos, prevWordsPtNodePos);
|
||||
mDicNodeState.init();
|
||||
PROF_NODE_RESET(mProfiler);
|
||||
}
|
||||
|
@ -114,7 +114,12 @@ class DicNode {
|
|||
// Init for root with previous word
|
||||
void initAsRootWithPreviousWord(const DicNode *const dicNode, const int rootPtNodeArrayPos) {
|
||||
mIsCachedForNextSuggestion = dicNode->mIsCachedForNextSuggestion;
|
||||
mDicNodeProperties.init(rootPtNodeArrayPos, dicNode->mDicNodeProperties.getPtNodePos());
|
||||
int newPrevWordsPtNodePos[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
|
||||
newPrevWordsPtNodePos[0] = dicNode->mDicNodeProperties.getPtNodePos();
|
||||
for (size_t i = 1; i < NELEMS(newPrevWordsPtNodePos); ++i) {
|
||||
newPrevWordsPtNodePos[i] = dicNode->getNthPrevWordTerminalPtNodePos(i);
|
||||
}
|
||||
mDicNodeProperties.init(rootPtNodeArrayPos, newPrevWordsPtNodePos);
|
||||
mDicNodeState.initAsRootWithPreviousWord(&dicNode->mDicNodeState,
|
||||
dicNode->mDicNodeProperties.getDepth());
|
||||
PROF_NODE_COPY(&dicNode->mProfiler, mProfiler);
|
||||
|
@ -140,7 +145,7 @@ class DicNode {
|
|||
dicNode->mDicNodeProperties.getLeavingDepth() + mergedNodeCodePointCount);
|
||||
mDicNodeProperties.init(ptNodePos, childrenPtNodeArrayPos, mergedNodeCodePoints[0],
|
||||
probability, isTerminal, hasChildren, isBlacklistedOrNotAWord, newDepth,
|
||||
newLeavingDepth, dicNode->mDicNodeProperties.getPrevWordTerminalPtNodePos());
|
||||
newLeavingDepth, dicNode->mDicNodeProperties.getPrevWordsTerminalPtNodePos());
|
||||
mDicNodeState.init(&dicNode->mDicNodeState, mergedNodeCodePointCount,
|
||||
mergedNodeCodePoints);
|
||||
PROF_NODE_COPY(&dicNode->mProfiler, mProfiler);
|
||||
|
@ -198,14 +203,17 @@ class DicNode {
|
|||
return mDicNodeState.mDicNodeStateInput.getInputIndex(0) < inputSize - 1;
|
||||
}
|
||||
|
||||
// Used to get bigram probability in DicNodeUtils
|
||||
// Used to get n-gram probability in DicNodeUtils
|
||||
int getPtNodePos() const {
|
||||
return mDicNodeProperties.getPtNodePos();
|
||||
}
|
||||
|
||||
// Used to get bigram probability in DicNodeUtils
|
||||
int getPrevWordTerminalPtNodePos() const {
|
||||
return mDicNodeProperties.getPrevWordTerminalPtNodePos();
|
||||
// Used to get n-gram probability in DicNodeUtils
|
||||
int getNthPrevWordTerminalPtNodePos(const int n) const {
|
||||
if (n <= 0 || n > MAX_PREV_WORD_COUNT_FOR_N_GRAM) {
|
||||
return NOT_A_DICT_POS;
|
||||
}
|
||||
return mDicNodeProperties.getPrevWordsTerminalPtNodePos()[n - 1];
|
||||
}
|
||||
|
||||
// Used in DicNodeUtils
|
||||
|
|
|
@ -29,8 +29,8 @@ namespace latinime {
|
|||
|
||||
/* static */ void DicNodeUtils::initAsRoot(
|
||||
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
|
||||
const int prevWordPtNodePos, DicNode *const newRootDicNode) {
|
||||
newRootDicNode->initAsRoot(dictionaryStructurePolicy->getRootPosition(), prevWordPtNodePos);
|
||||
const int *const prevWordsPtNodePos, DicNode *const newRootDicNode) {
|
||||
newRootDicNode->initAsRoot(dictionaryStructurePolicy->getRootPosition(), prevWordsPtNodePos);
|
||||
}
|
||||
|
||||
/*static */ void DicNodeUtils::initAsRootWithPreviousWord(
|
||||
|
@ -86,7 +86,7 @@ namespace latinime {
|
|||
const DicNode *const dicNode, MultiBigramMap *const multiBigramMap) {
|
||||
const int unigramProbability = dicNode->getProbability();
|
||||
const int ptNodePos = dicNode->getPtNodePos();
|
||||
const int prevWordTerminalPtNodePos = dicNode->getPrevWordTerminalPtNodePos();
|
||||
const int prevWordTerminalPtNodePos = dicNode->getNthPrevWordTerminalPtNodePos(1 /* n */);
|
||||
if (NOT_A_DICT_POS == ptNodePos || NOT_A_DICT_POS == prevWordTerminalPtNodePos) {
|
||||
// Note: Normally wordPos comes from the dictionary and should never equal
|
||||
// NOT_A_VALID_WORD_POS.
|
||||
|
|
|
@ -30,7 +30,7 @@ class DicNodeUtils {
|
|||
public:
|
||||
static void initAsRoot(
|
||||
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
|
||||
const int prevWordPtNodePos, DicNode *const newRootDicNode);
|
||||
const int *const prevWordPtNodePos, DicNode *const newRootDicNode);
|
||||
static void initAsRootWithPreviousWord(
|
||||
const DictionaryStructureWithBufferPolicy *const dictionaryStructurePolicy,
|
||||
const DicNode *const prevWordLastDicNode, DicNode *const newRootDicNode);
|
||||
|
|
|
@ -32,15 +32,14 @@ class DicNodeProperties {
|
|||
: mPtNodePos(NOT_A_DICT_POS), mChildrenPtNodeArrayPos(NOT_A_DICT_POS),
|
||||
mProbability(NOT_A_PROBABILITY), mDicNodeCodePoint(NOT_A_CODE_POINT),
|
||||
mIsTerminal(false), mHasChildrenPtNodes(false),
|
||||
mIsBlacklistedOrNotAWord(false), mDepth(0), mLeavingDepth(0),
|
||||
mPrevWordTerminalPtNodePos(NOT_A_DICT_POS) {}
|
||||
mIsBlacklistedOrNotAWord(false), mDepth(0), mLeavingDepth(0) {}
|
||||
|
||||
~DicNodeProperties() {}
|
||||
|
||||
// Should be called only once per DicNode is initialized.
|
||||
void init(const int pos, const int childrenPos, const int nodeCodePoint, const int probability,
|
||||
const bool isTerminal, const bool hasChildren, const bool isBlacklistedOrNotAWord,
|
||||
const uint16_t depth, const uint16_t leavingDepth, const int prevWordNodePos) {
|
||||
const uint16_t depth, const uint16_t leavingDepth, const int *const prevWordsNodePos) {
|
||||
mPtNodePos = pos;
|
||||
mChildrenPtNodeArrayPos = childrenPos;
|
||||
mDicNodeCodePoint = nodeCodePoint;
|
||||
|
@ -50,11 +49,11 @@ class DicNodeProperties {
|
|||
mIsBlacklistedOrNotAWord = isBlacklistedOrNotAWord;
|
||||
mDepth = depth;
|
||||
mLeavingDepth = leavingDepth;
|
||||
mPrevWordTerminalPtNodePos = prevWordNodePos;
|
||||
memmove(mPrevWordsTerminalPtNodePos, prevWordsNodePos, sizeof(mPrevWordsTerminalPtNodePos));
|
||||
}
|
||||
|
||||
// Init for root with prevWordPtNodePos which is used for bigram
|
||||
void init(const int rootPtNodeArrayPos, const int prevWordNodePos) {
|
||||
// Init for root with prevWordsPtNodePos which is used for n-gram
|
||||
void init(const int rootPtNodeArrayPos, const int *const prevWordsNodePos) {
|
||||
mPtNodePos = NOT_A_DICT_POS;
|
||||
mChildrenPtNodeArrayPos = rootPtNodeArrayPos;
|
||||
mDicNodeCodePoint = NOT_A_CODE_POINT;
|
||||
|
@ -64,7 +63,7 @@ class DicNodeProperties {
|
|||
mIsBlacklistedOrNotAWord = false;
|
||||
mDepth = 0;
|
||||
mLeavingDepth = 0;
|
||||
mPrevWordTerminalPtNodePos = prevWordNodePos;
|
||||
memmove(mPrevWordsTerminalPtNodePos, prevWordsNodePos, sizeof(mPrevWordsTerminalPtNodePos));
|
||||
}
|
||||
|
||||
void initByCopy(const DicNodeProperties *const dicNodeProp) {
|
||||
|
@ -77,7 +76,8 @@ class DicNodeProperties {
|
|||
mIsBlacklistedOrNotAWord = dicNodeProp->mIsBlacklistedOrNotAWord;
|
||||
mDepth = dicNodeProp->mDepth;
|
||||
mLeavingDepth = dicNodeProp->mLeavingDepth;
|
||||
mPrevWordTerminalPtNodePos = dicNodeProp->mPrevWordTerminalPtNodePos;
|
||||
memmove(mPrevWordsTerminalPtNodePos, dicNodeProp->mPrevWordsTerminalPtNodePos,
|
||||
sizeof(mPrevWordsTerminalPtNodePos));
|
||||
}
|
||||
|
||||
// Init as passing child
|
||||
|
@ -91,7 +91,8 @@ class DicNodeProperties {
|
|||
mIsBlacklistedOrNotAWord = dicNodeProp->mIsBlacklistedOrNotAWord;
|
||||
mDepth = dicNodeProp->mDepth + 1; // Increment the depth of a passing child
|
||||
mLeavingDepth = dicNodeProp->mLeavingDepth;
|
||||
mPrevWordTerminalPtNodePos = dicNodeProp->mPrevWordTerminalPtNodePos;
|
||||
memmove(mPrevWordsTerminalPtNodePos, dicNodeProp->mPrevWordsTerminalPtNodePos,
|
||||
sizeof(mPrevWordsTerminalPtNodePos));
|
||||
}
|
||||
|
||||
int getPtNodePos() const {
|
||||
|
@ -131,8 +132,8 @@ class DicNodeProperties {
|
|||
return mIsBlacklistedOrNotAWord;
|
||||
}
|
||||
|
||||
int getPrevWordTerminalPtNodePos() const {
|
||||
return mPrevWordTerminalPtNodePos;
|
||||
const int *getPrevWordsTerminalPtNodePos() const {
|
||||
return mPrevWordsTerminalPtNodePos;
|
||||
}
|
||||
|
||||
private:
|
||||
|
@ -148,7 +149,7 @@ class DicNodeProperties {
|
|||
bool mIsBlacklistedOrNotAWord;
|
||||
uint16_t mDepth;
|
||||
uint16_t mLeavingDepth;
|
||||
int mPrevWordTerminalPtNodePos;
|
||||
int mPrevWordsTerminalPtNodePos[MAX_PREV_WORD_COUNT_FOR_N_GRAM];
|
||||
};
|
||||
} // namespace latinime
|
||||
#endif // LATINIME_DIC_NODE_PROPERTIES_H
|
||||
|
|
|
@ -79,7 +79,7 @@ class DicTraverseSession {
|
|||
//--------------------
|
||||
const ProximityInfo *getProximityInfo() const { return mProximityInfo; }
|
||||
const SuggestOptions *getSuggestOptions() const { return mSuggestOptions; }
|
||||
int getPrevWordPtNodePos() const { return mPrevWordsPtNodePos[0]; }
|
||||
const int *getPrevWordsPtNodePos() const { return mPrevWordsPtNodePos; }
|
||||
DicNodesCache *getDicTraverseCache() { return &mDicNodesCache; }
|
||||
MultiBigramMap *getMultiBigramMap() { return &mMultiBigramMap; }
|
||||
const ProximityInfoState *getProximityInfoState(int id) const {
|
||||
|
|
|
@ -92,7 +92,7 @@ void Suggest::initializeSearch(DicTraverseSession *traverseSession) const {
|
|||
// Create a new dic node here
|
||||
DicNode rootNode;
|
||||
DicNodeUtils::initAsRoot(traverseSession->getDictionaryStructurePolicy(),
|
||||
traverseSession->getPrevWordPtNodePos(), &rootNode);
|
||||
traverseSession->getPrevWordsPtNodePos(), &rootNode);
|
||||
traverseSession->getDicTraverseCache()->copyPushActive(&rootNode);
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue