Merge "Reduce the number of arguments required to initialize dic_node."
commit
c96b56a5ec
|
@ -109,12 +109,14 @@ class DicNode {
|
||||||
|
|
||||||
// TODO: minimize arguments by looking binary_format
|
// TODO: minimize arguments by looking binary_format
|
||||||
// Init for root with prevWordNodePos which is used for bigram
|
// Init for root with prevWordNodePos which is used for bigram
|
||||||
void initAsRoot(const int pos, const int childrenPos, const int childrenCount,
|
void initAsRoot(const int pos, const int childrenPos, const int prevWordNodePos) {
|
||||||
const int prevWordNodePos) {
|
|
||||||
mIsUsed = true;
|
mIsUsed = true;
|
||||||
mIsCachedForNextSuggestion = false;
|
mIsCachedForNextSuggestion = false;
|
||||||
mDicNodeProperties.init(
|
mDicNodeProperties.init(
|
||||||
pos, 0, childrenPos, 0, 0, 0, childrenCount, 0, 0, false, false, true, 0, 0);
|
pos, 0 /* flags */, childrenPos, 0 /* attributesPos */,
|
||||||
|
NOT_A_CODE_POINT /* nodeCodePoint */, NOT_A_PROBABILITY /* probability */,
|
||||||
|
false /* isTerminal */, true /* hasChildren */, 0 /* depth */,
|
||||||
|
0 /* terminalDepth */);
|
||||||
mDicNodeState.init(prevWordNodePos);
|
mDicNodeState.init(prevWordNodePos);
|
||||||
PROF_NODE_RESET(mProfiler);
|
PROF_NODE_RESET(mProfiler);
|
||||||
}
|
}
|
||||||
|
@ -130,12 +132,14 @@ class DicNode {
|
||||||
|
|
||||||
// TODO: minimize arguments by looking binary_format
|
// TODO: minimize arguments by looking binary_format
|
||||||
// Init for root with previous word
|
// Init for root with previous word
|
||||||
void initAsRootWithPreviousWord(DicNode *dicNode, const int pos, const int childrenPos,
|
void initAsRootWithPreviousWord(DicNode *dicNode, const int pos, const int childrenPos) {
|
||||||
const int childrenCount) {
|
|
||||||
mIsUsed = true;
|
mIsUsed = true;
|
||||||
mIsCachedForNextSuggestion = dicNode->mIsCachedForNextSuggestion;
|
mIsCachedForNextSuggestion = dicNode->mIsCachedForNextSuggestion;
|
||||||
mDicNodeProperties.init(
|
mDicNodeProperties.init(
|
||||||
pos, 0, childrenPos, 0, 0, 0, childrenCount, 0, 0, false, false, true, 0, 0);
|
pos, 0 /* flags */, childrenPos, 0 /* attributesPos */,
|
||||||
|
NOT_A_CODE_POINT /* nodeCodePoint */, NOT_A_PROBABILITY /* probability */,
|
||||||
|
false /* isTerminal */, true /* hasChildren */, 0 /* depth */,
|
||||||
|
0 /* terminalDepth */);
|
||||||
// TODO: Move to dicNodeState?
|
// TODO: Move to dicNodeState?
|
||||||
mDicNodeState.mDicNodeStateOutput.init(); // reset for next word
|
mDicNodeState.mDicNodeStateOutput.init(); // reset for next word
|
||||||
mDicNodeState.mDicNodeStateInput.init(
|
mDicNodeState.mDicNodeStateInput.init(
|
||||||
|
@ -157,19 +161,18 @@ class DicNode {
|
||||||
|
|
||||||
// TODO: minimize arguments by looking binary_format
|
// TODO: minimize arguments by looking binary_format
|
||||||
void initAsChild(DicNode *dicNode, const int pos, const uint8_t flags, const int childrenPos,
|
void initAsChild(DicNode *dicNode, const int pos, const uint8_t flags, const int childrenPos,
|
||||||
const int attributesPos, const int siblingPos, const int nodeCodePoint,
|
const int attributesPos, const int probability, const bool isTerminal,
|
||||||
const int childrenCount, const int probability, const int bigramProbability,
|
const bool hasChildren, const uint16_t mergedNodeCodePointCount,
|
||||||
const bool isTerminal, const bool hasMultipleChars, const bool hasChildren,
|
const int *const mergedNodeCodePoints) {
|
||||||
const uint16_t additionalSubwordLength, const int *additionalSubword) {
|
|
||||||
mIsUsed = true;
|
mIsUsed = true;
|
||||||
uint16_t newDepth = static_cast<uint16_t>(dicNode->getNodeCodePointCount() + 1);
|
uint16_t newDepth = static_cast<uint16_t>(dicNode->getNodeCodePointCount() + 1);
|
||||||
mIsCachedForNextSuggestion = dicNode->mIsCachedForNextSuggestion;
|
mIsCachedForNextSuggestion = dicNode->mIsCachedForNextSuggestion;
|
||||||
const uint16_t newLeavingDepth = static_cast<uint16_t>(
|
const uint16_t newLeavingDepth = static_cast<uint16_t>(
|
||||||
dicNode->mDicNodeProperties.getLeavingDepth() + additionalSubwordLength);
|
dicNode->mDicNodeProperties.getLeavingDepth() + mergedNodeCodePointCount);
|
||||||
mDicNodeProperties.init(pos, flags, childrenPos, attributesPos, siblingPos, nodeCodePoint,
|
mDicNodeProperties.init(pos, flags, childrenPos, attributesPos, mergedNodeCodePoints[0],
|
||||||
childrenCount, probability, bigramProbability, isTerminal, hasMultipleChars,
|
probability, isTerminal, hasChildren, newDepth, newLeavingDepth);
|
||||||
hasChildren, newDepth, newLeavingDepth);
|
mDicNodeState.init(&dicNode->mDicNodeState, mergedNodeCodePointCount,
|
||||||
mDicNodeState.init(&dicNode->mDicNodeState, additionalSubwordLength, additionalSubword);
|
mergedNodeCodePoints);
|
||||||
PROF_NODE_COPY(&dicNode->mProfiler, mProfiler);
|
PROF_NODE_COPY(&dicNode->mProfiler, mProfiler);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -193,8 +196,8 @@ class DicNode {
|
||||||
}
|
}
|
||||||
|
|
||||||
bool isLeavingNode() const {
|
bool isLeavingNode() const {
|
||||||
ASSERT(getNodeCodePointCount() <= getLeavingDepth());
|
ASSERT(getNodeCodePointCount() <= mDicNodeProperties.getLeavingDepth());
|
||||||
return getNodeCodePointCount() == getLeavingDepth();
|
return getNodeCodePointCount() == mDicNodeProperties.getLeavingDepth();
|
||||||
}
|
}
|
||||||
|
|
||||||
AK_FORCE_INLINE bool isFirstLetter() const {
|
AK_FORCE_INLINE bool isFirstLetter() const {
|
||||||
|
@ -256,12 +259,6 @@ class DicNode {
|
||||||
return mDicNodeProperties.getChildrenPos();
|
return mDicNodeProperties.getChildrenPos();
|
||||||
}
|
}
|
||||||
|
|
||||||
// Used in DicNodeUtils
|
|
||||||
int getChildrenCount() const {
|
|
||||||
return mDicNodeProperties.getChildrenCount();
|
|
||||||
}
|
|
||||||
|
|
||||||
// Used in DicNodeUtils
|
|
||||||
int getProbability() const {
|
int getProbability() const {
|
||||||
return mDicNodeProperties.getProbability();
|
return mDicNodeProperties.getProbability();
|
||||||
}
|
}
|
||||||
|
@ -280,10 +277,6 @@ class DicNode {
|
||||||
return !(currentDepth > 0 && (currentDepth != 1 || prevWordLen != 1));
|
return !(currentDepth > 0 && (currentDepth != 1 || prevWordLen != 1));
|
||||||
}
|
}
|
||||||
|
|
||||||
uint16_t getLeavingDepth() const {
|
|
||||||
return mDicNodeProperties.getLeavingDepth();
|
|
||||||
}
|
|
||||||
|
|
||||||
bool isTotalInputSizeExceedingLimit() const {
|
bool isTotalInputSizeExceedingLimit() const {
|
||||||
const int prevWordsLen = mDicNodeState.mDicNodeStatePrevWord.getPrevWordLength();
|
const int prevWordsLen = mDicNodeState.mDicNodeStatePrevWord.getPrevWordLength();
|
||||||
const int currentWordDepth = getNodeCodePointCount();
|
const int currentWordDepth = getNodeCodePointCount();
|
||||||
|
@ -370,7 +363,7 @@ class DicNode {
|
||||||
}
|
}
|
||||||
|
|
||||||
AK_FORCE_INLINE const int *getOutputWordBuf() const {
|
AK_FORCE_INLINE const int *getOutputWordBuf() const {
|
||||||
return mDicNodeState.mDicNodeStateOutput.mWordBuf;
|
return mDicNodeState.mDicNodeStateOutput.mCodePointsBuf;
|
||||||
}
|
}
|
||||||
|
|
||||||
int getPrevCodePointG(int pointerId) const {
|
int getPrevCodePointG(int pointerId) const {
|
||||||
|
|
|
@ -27,37 +27,31 @@ namespace latinime {
|
||||||
/**
|
/**
|
||||||
* Node for traversing the lexicon trie.
|
* Node for traversing the lexicon trie.
|
||||||
*/
|
*/
|
||||||
|
// TODO: Introduce a dictionary node class which has attribute members required to understand the
|
||||||
|
// dictionary structure.
|
||||||
class DicNodeProperties {
|
class DicNodeProperties {
|
||||||
public:
|
public:
|
||||||
AK_FORCE_INLINE DicNodeProperties()
|
AK_FORCE_INLINE DicNodeProperties()
|
||||||
: mPos(0), mFlags(0), mChildrenPos(0), mAttributesPos(0), mSiblingPos(0),
|
: mPos(0), mFlags(0), mChildrenPos(0), mAttributesPos(0), mProbability(0),
|
||||||
mChildrenCount(0), mProbability(0), mBigramProbability(0), mNodeCodePoint(0),
|
mNodeCodePoint(0), mDepth(0), mLeavingDepth(0), mIsTerminal(false),
|
||||||
mDepth(0), mLeavingDepth(0), mIsTerminal(false), mHasMultipleChars(false),
|
mHasChildren(false) {}
|
||||||
mHasChildren(false) {
|
|
||||||
}
|
|
||||||
|
|
||||||
virtual ~DicNodeProperties() {}
|
virtual ~DicNodeProperties() {}
|
||||||
|
|
||||||
// Should be called only once per DicNode is initialized.
|
// Should be called only once per DicNode is initialized.
|
||||||
void init(const int pos, const uint8_t flags, const int childrenPos, const int attributesPos,
|
void init(const int pos, const uint8_t flags, const int childrenPos, const int attributesPos,
|
||||||
const int siblingPos, const int nodeCodePoint, const int childrenCount,
|
const int nodeCodePoint, const int probability, const bool isTerminal,
|
||||||
const int probability, const int bigramProbability, const bool isTerminal,
|
const bool hasChildren, const uint16_t depth, const uint16_t leavingDepth) {
|
||||||
const bool hasMultipleChars, const bool hasChildren, const uint16_t depth,
|
|
||||||
const uint16_t terminalDepth) {
|
|
||||||
mPos = pos;
|
mPos = pos;
|
||||||
mFlags = flags;
|
mFlags = flags;
|
||||||
mChildrenPos = childrenPos;
|
mChildrenPos = childrenPos;
|
||||||
mAttributesPos = attributesPos;
|
mAttributesPos = attributesPos;
|
||||||
mSiblingPos = siblingPos;
|
|
||||||
mNodeCodePoint = nodeCodePoint;
|
mNodeCodePoint = nodeCodePoint;
|
||||||
mChildrenCount = childrenCount;
|
|
||||||
mProbability = probability;
|
mProbability = probability;
|
||||||
mBigramProbability = bigramProbability;
|
|
||||||
mIsTerminal = isTerminal;
|
mIsTerminal = isTerminal;
|
||||||
mHasMultipleChars = hasMultipleChars;
|
|
||||||
mHasChildren = hasChildren;
|
mHasChildren = hasChildren;
|
||||||
mDepth = depth;
|
mDepth = depth;
|
||||||
mLeavingDepth = terminalDepth;
|
mLeavingDepth = leavingDepth;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Init for copy
|
// Init for copy
|
||||||
|
@ -66,13 +60,9 @@ class DicNodeProperties {
|
||||||
mFlags = nodeProp->mFlags;
|
mFlags = nodeProp->mFlags;
|
||||||
mChildrenPos = nodeProp->mChildrenPos;
|
mChildrenPos = nodeProp->mChildrenPos;
|
||||||
mAttributesPos = nodeProp->mAttributesPos;
|
mAttributesPos = nodeProp->mAttributesPos;
|
||||||
mSiblingPos = nodeProp->mSiblingPos;
|
|
||||||
mNodeCodePoint = nodeProp->mNodeCodePoint;
|
mNodeCodePoint = nodeProp->mNodeCodePoint;
|
||||||
mChildrenCount = nodeProp->mChildrenCount;
|
|
||||||
mProbability = nodeProp->mProbability;
|
mProbability = nodeProp->mProbability;
|
||||||
mBigramProbability = nodeProp->mBigramProbability;
|
|
||||||
mIsTerminal = nodeProp->mIsTerminal;
|
mIsTerminal = nodeProp->mIsTerminal;
|
||||||
mHasMultipleChars = nodeProp->mHasMultipleChars;
|
|
||||||
mHasChildren = nodeProp->mHasChildren;
|
mHasChildren = nodeProp->mHasChildren;
|
||||||
mDepth = nodeProp->mDepth;
|
mDepth = nodeProp->mDepth;
|
||||||
mLeavingDepth = nodeProp->mLeavingDepth;
|
mLeavingDepth = nodeProp->mLeavingDepth;
|
||||||
|
@ -84,13 +74,9 @@ class DicNodeProperties {
|
||||||
mFlags = nodeProp->mFlags;
|
mFlags = nodeProp->mFlags;
|
||||||
mChildrenPos = nodeProp->mChildrenPos;
|
mChildrenPos = nodeProp->mChildrenPos;
|
||||||
mAttributesPos = nodeProp->mAttributesPos;
|
mAttributesPos = nodeProp->mAttributesPos;
|
||||||
mSiblingPos = nodeProp->mSiblingPos;
|
|
||||||
mNodeCodePoint = codePoint; // Overwrite the node char of a passing child
|
mNodeCodePoint = codePoint; // Overwrite the node char of a passing child
|
||||||
mChildrenCount = nodeProp->mChildrenCount;
|
|
||||||
mProbability = nodeProp->mProbability;
|
mProbability = nodeProp->mProbability;
|
||||||
mBigramProbability = nodeProp->mBigramProbability;
|
|
||||||
mIsTerminal = nodeProp->mIsTerminal;
|
mIsTerminal = nodeProp->mIsTerminal;
|
||||||
mHasMultipleChars = nodeProp->mHasMultipleChars;
|
|
||||||
mHasChildren = nodeProp->mHasChildren;
|
mHasChildren = nodeProp->mHasChildren;
|
||||||
mDepth = nodeProp->mDepth + 1; // Increment the depth of a passing child
|
mDepth = nodeProp->mDepth + 1; // Increment the depth of a passing child
|
||||||
mLeavingDepth = nodeProp->mLeavingDepth;
|
mLeavingDepth = nodeProp->mLeavingDepth;
|
||||||
|
@ -112,10 +98,6 @@ class DicNodeProperties {
|
||||||
return mAttributesPos;
|
return mAttributesPos;
|
||||||
}
|
}
|
||||||
|
|
||||||
int getChildrenCount() const {
|
|
||||||
return mChildrenCount;
|
|
||||||
}
|
|
||||||
|
|
||||||
int getProbability() const {
|
int getProbability() const {
|
||||||
return mProbability;
|
return mProbability;
|
||||||
}
|
}
|
||||||
|
@ -137,12 +119,8 @@ class DicNodeProperties {
|
||||||
return mIsTerminal;
|
return mIsTerminal;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool hasMultipleChars() const {
|
|
||||||
return mHasMultipleChars;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool hasChildren() const {
|
bool hasChildren() const {
|
||||||
return mChildrenCount > 0 || mDepth != mLeavingDepth;
|
return mHasChildren || mDepth != mLeavingDepth;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool hasBlacklistedOrNotAWordFlag() const {
|
bool hasBlacklistedOrNotAWordFlag() const {
|
||||||
|
@ -153,25 +131,15 @@ class DicNodeProperties {
|
||||||
// Caution!!!
|
// Caution!!!
|
||||||
// Use a default copy constructor and an assign operator because shallow copies are ok
|
// Use a default copy constructor and an assign operator because shallow copies are ok
|
||||||
// for this class
|
// for this class
|
||||||
|
|
||||||
// Not used
|
|
||||||
int getSiblingPos() const {
|
|
||||||
return mSiblingPos;
|
|
||||||
}
|
|
||||||
|
|
||||||
int mPos;
|
int mPos;
|
||||||
uint8_t mFlags;
|
uint8_t mFlags;
|
||||||
int mChildrenPos;
|
int mChildrenPos;
|
||||||
int mAttributesPos;
|
int mAttributesPos;
|
||||||
int mSiblingPos;
|
|
||||||
int mChildrenCount;
|
|
||||||
int mProbability;
|
int mProbability;
|
||||||
int mBigramProbability; // not used for now
|
|
||||||
int mNodeCodePoint;
|
int mNodeCodePoint;
|
||||||
uint16_t mDepth;
|
uint16_t mDepth;
|
||||||
uint16_t mLeavingDepth;
|
uint16_t mLeavingDepth;
|
||||||
bool mIsTerminal;
|
bool mIsTerminal;
|
||||||
bool mHasMultipleChars;
|
|
||||||
bool mHasChildren;
|
bool mHasChildren;
|
||||||
};
|
};
|
||||||
} // namespace latinime
|
} // namespace latinime
|
||||||
|
|
|
@ -55,11 +55,12 @@ class DicNodeState {
|
||||||
mDicNodeStateScoring.init(&src->mDicNodeStateScoring);
|
mDicNodeStateScoring.init(&src->mDicNodeStateScoring);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Init by copy and adding subword
|
// Init by copy and adding merged node code points.
|
||||||
void init(const DicNodeState *const src, const uint16_t additionalSubwordLength,
|
void init(const DicNodeState *const src, const uint16_t mergedNodeCodePointCount,
|
||||||
const int *const additionalSubword) {
|
const int *const mergedNodeCodePoints) {
|
||||||
init(src);
|
init(src);
|
||||||
mDicNodeStateOutput.addSubword(additionalSubwordLength, additionalSubword);
|
mDicNodeStateOutput.addMergedNodeCodePoints(
|
||||||
|
mergedNodeCodePointCount, mergedNodeCodePoints);
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
|
|
@ -26,50 +26,52 @@ namespace latinime {
|
||||||
|
|
||||||
class DicNodeStateOutput {
|
class DicNodeStateOutput {
|
||||||
public:
|
public:
|
||||||
DicNodeStateOutput() : mOutputtedLength(0) {
|
DicNodeStateOutput() : mOutputtedCodePointCount(0) {
|
||||||
init();
|
init();
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual ~DicNodeStateOutput() {}
|
virtual ~DicNodeStateOutput() {}
|
||||||
|
|
||||||
void init() {
|
void init() {
|
||||||
mOutputtedLength = 0;
|
mOutputtedCodePointCount = 0;
|
||||||
mWordBuf[0] = 0;
|
mCodePointsBuf[0] = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
void init(const DicNodeStateOutput *const stateOutput) {
|
void init(const DicNodeStateOutput *const stateOutput) {
|
||||||
memcpy(mWordBuf, stateOutput->mWordBuf,
|
memcpy(mCodePointsBuf, stateOutput->mCodePointsBuf,
|
||||||
stateOutput->mOutputtedLength * sizeof(mWordBuf[0]));
|
stateOutput->mOutputtedCodePointCount * sizeof(mCodePointsBuf[0]));
|
||||||
mOutputtedLength = stateOutput->mOutputtedLength;
|
mOutputtedCodePointCount = stateOutput->mOutputtedCodePointCount;
|
||||||
if (mOutputtedLength < MAX_WORD_LENGTH) {
|
if (mOutputtedCodePointCount < MAX_WORD_LENGTH) {
|
||||||
mWordBuf[mOutputtedLength] = 0;
|
mCodePointsBuf[mOutputtedCodePointCount] = 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void addSubword(const uint16_t additionalSubwordLength, const int *const additionalSubword) {
|
void addMergedNodeCodePoints(const uint16_t mergedNodeCodePointCount,
|
||||||
if (additionalSubword) {
|
const int *const mergedNodeCodePoints) {
|
||||||
memcpy(&mWordBuf[mOutputtedLength], additionalSubword,
|
if (mergedNodeCodePoints) {
|
||||||
additionalSubwordLength * sizeof(mWordBuf[0]));
|
memcpy(&mCodePointsBuf[mOutputtedCodePointCount], mergedNodeCodePoints,
|
||||||
mOutputtedLength = static_cast<uint16_t>(mOutputtedLength + additionalSubwordLength);
|
mergedNodeCodePointCount * sizeof(mCodePointsBuf[0]));
|
||||||
if (mOutputtedLength < MAX_WORD_LENGTH) {
|
mOutputtedCodePointCount = static_cast<uint16_t>(
|
||||||
mWordBuf[mOutputtedLength] = 0;
|
mOutputtedCodePointCount + mergedNodeCodePointCount);
|
||||||
|
if (mOutputtedCodePointCount < MAX_WORD_LENGTH) {
|
||||||
|
mCodePointsBuf[mOutputtedCodePointCount] = 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: Remove
|
// TODO: Remove
|
||||||
int getCodePointAt(const int id) const {
|
int getCodePointAt(const int index) const {
|
||||||
return mWordBuf[id];
|
return mCodePointsBuf[index];
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: Move to private
|
// TODO: Move to private
|
||||||
int mWordBuf[MAX_WORD_LENGTH];
|
int mCodePointsBuf[MAX_WORD_LENGTH];
|
||||||
|
|
||||||
private:
|
private:
|
||||||
// Caution!!!
|
// Caution!!!
|
||||||
// Use a default copy constructor and an assign operator because shallow copies are ok
|
// Use a default copy constructor and an assign operator because shallow copies are ok
|
||||||
// for this class
|
// for this class
|
||||||
uint16_t mOutputtedLength;
|
uint16_t mOutputtedCodePointCount;
|
||||||
};
|
};
|
||||||
} // namespace latinime
|
} // namespace latinime
|
||||||
#endif // LATINIME_DIC_NODE_STATE_OUTPUT_H
|
#endif // LATINIME_DIC_NODE_STATE_OUTPUT_H
|
||||||
|
|
|
@ -36,23 +36,17 @@ namespace latinime {
|
||||||
|
|
||||||
/* static */ void DicNodeUtils::initAsRoot(const BinaryDictionaryInfo *const binaryDictionaryInfo,
|
/* static */ void DicNodeUtils::initAsRoot(const BinaryDictionaryInfo *const binaryDictionaryInfo,
|
||||||
const int prevWordNodePos, DicNode *const newRootNode) {
|
const int prevWordNodePos, DicNode *const newRootNode) {
|
||||||
int curPos = binaryDictionaryInfo->getRootPosition();
|
const int rootPos = binaryDictionaryInfo->getRootPosition();
|
||||||
const int pos = curPos;
|
const int childrenPos = rootPos;
|
||||||
const int childrenCount = BinaryFormat::getGroupCountAndForwardPointer(
|
newRootNode->initAsRoot(rootPos, childrenPos, prevWordNodePos);
|
||||||
binaryDictionaryInfo->getDictRoot(), &curPos);
|
|
||||||
const int childrenPos = curPos;
|
|
||||||
newRootNode->initAsRoot(pos, childrenPos, childrenCount, prevWordNodePos);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*static */ void DicNodeUtils::initAsRootWithPreviousWord(
|
/*static */ void DicNodeUtils::initAsRootWithPreviousWord(
|
||||||
const BinaryDictionaryInfo *const binaryDictionaryInfo,
|
const BinaryDictionaryInfo *const binaryDictionaryInfo,
|
||||||
DicNode *const prevWordLastNode, DicNode *const newRootNode) {
|
DicNode *const prevWordLastNode, DicNode *const newRootNode) {
|
||||||
int curPos = binaryDictionaryInfo->getRootPosition();
|
const int rootPos = binaryDictionaryInfo->getRootPosition();
|
||||||
const int pos = curPos;
|
const int childrenPos = rootPos;
|
||||||
const int childrenCount = BinaryFormat::getGroupCountAndForwardPointer(
|
newRootNode->initAsRootWithPreviousWord(prevWordLastNode, rootPos, childrenPos);
|
||||||
binaryDictionaryInfo->getDictRoot(), &curPos);
|
|
||||||
const int childrenPos = curPos;
|
|
||||||
newRootNode->initAsRootWithPreviousWord(prevWordLastNode, pos, childrenPos, childrenCount);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* static */ void DicNodeUtils::initByCopy(DicNode *srcNode, DicNode *destNode) {
|
/* static */ void DicNodeUtils::initByCopy(DicNode *srcNode, DicNode *destNode) {
|
||||||
|
@ -76,7 +70,7 @@ namespace latinime {
|
||||||
}
|
}
|
||||||
|
|
||||||
/* static */ int DicNodeUtils::createAndGetLeavingChildNode(DicNode *dicNode, int pos,
|
/* static */ int DicNodeUtils::createAndGetLeavingChildNode(DicNode *dicNode, int pos,
|
||||||
const BinaryDictionaryInfo *const binaryDictionaryInfo, const int terminalDepth,
|
const BinaryDictionaryInfo *const binaryDictionaryInfo,
|
||||||
const ProximityInfoState *pInfoState, const int pointIndex, const bool exactOnly,
|
const ProximityInfoState *pInfoState, const int pointIndex, const bool exactOnly,
|
||||||
const std::vector<int> *const codePointsFilter, const ProximityInfo *const pInfo,
|
const std::vector<int> *const codePointsFilter, const ProximityInfo *const pInfo,
|
||||||
DicNodeVector *childDicNodes) {
|
DicNodeVector *childDicNodes) {
|
||||||
|
@ -90,11 +84,10 @@ namespace latinime {
|
||||||
int codePoint = BinaryFormat::getCodePointAndForwardPointer(
|
int codePoint = BinaryFormat::getCodePointAndForwardPointer(
|
||||||
binaryDictionaryInfo->getDictRoot(), &pos);
|
binaryDictionaryInfo->getDictRoot(), &pos);
|
||||||
ASSERT(NOT_A_CODE_POINT != codePoint);
|
ASSERT(NOT_A_CODE_POINT != codePoint);
|
||||||
const int nodeCodePoint = codePoint;
|
|
||||||
// TODO: optimize this
|
// TODO: optimize this
|
||||||
int additionalWordBuf[MAX_WORD_LENGTH];
|
int mergedNodeCodePoints[MAX_WORD_LENGTH];
|
||||||
uint16_t additionalSubwordLength = 0;
|
uint16_t mergedNodeCodePointCount = 0;
|
||||||
additionalWordBuf[additionalSubwordLength++] = codePoint;
|
mergedNodeCodePoints[mergedNodeCodePointCount++] = codePoint;
|
||||||
|
|
||||||
do {
|
do {
|
||||||
const int nextCodePoint = hasMultipleChars
|
const int nextCodePoint = hasMultipleChars
|
||||||
|
@ -102,7 +95,7 @@ namespace latinime {
|
||||||
binaryDictionaryInfo->getDictRoot(), &pos) : NOT_A_CODE_POINT;
|
binaryDictionaryInfo->getDictRoot(), &pos) : NOT_A_CODE_POINT;
|
||||||
const bool isLastChar = (NOT_A_CODE_POINT == nextCodePoint);
|
const bool isLastChar = (NOT_A_CODE_POINT == nextCodePoint);
|
||||||
if (!isLastChar) {
|
if (!isLastChar) {
|
||||||
additionalWordBuf[additionalSubwordLength++] = nextCodePoint;
|
mergedNodeCodePoints[mergedNodeCodePointCount++] = nextCodePoint;
|
||||||
}
|
}
|
||||||
codePoint = nextCodePoint;
|
codePoint = nextCodePoint;
|
||||||
} while (NOT_A_CODE_POINT != codePoint);
|
} while (NOT_A_CODE_POINT != codePoint);
|
||||||
|
@ -116,17 +109,14 @@ namespace latinime {
|
||||||
const int siblingPos = BinaryFormat::skipChildrenPosAndAttributes(
|
const int siblingPos = BinaryFormat::skipChildrenPosAndAttributes(
|
||||||
binaryDictionaryInfo->getDictRoot(), flags, pos);
|
binaryDictionaryInfo->getDictRoot(), flags, pos);
|
||||||
|
|
||||||
if (isDicNodeFilteredOut(nodeCodePoint, pInfo, codePointsFilter)) {
|
if (isDicNodeFilteredOut(mergedNodeCodePoints[0], pInfo, codePointsFilter)) {
|
||||||
return siblingPos;
|
return siblingPos;
|
||||||
}
|
}
|
||||||
if (!isMatchedNodeCodePoint(pInfoState, pointIndex, exactOnly, nodeCodePoint)) {
|
if (!isMatchedNodeCodePoint(pInfoState, pointIndex, exactOnly, mergedNodeCodePoints[0])) {
|
||||||
return siblingPos;
|
return siblingPos;
|
||||||
}
|
}
|
||||||
const int childrenCount = hasChildren ? BinaryFormat::getGroupCountAndForwardPointer(
|
childDicNodes->pushLeavingChild(dicNode, nextPos, flags, childrenPos, attributesPos,
|
||||||
binaryDictionaryInfo->getDictRoot(), &childrenPos) : 0;
|
probability, isTerminal, hasChildren, mergedNodeCodePointCount, mergedNodeCodePoints);
|
||||||
childDicNodes->pushLeavingChild(dicNode, nextPos, flags, childrenPos, attributesPos, siblingPos,
|
|
||||||
nodeCodePoint, childrenCount, probability, -1 /* bigramProbability */, isTerminal,
|
|
||||||
hasMultipleChars, hasChildren, additionalSubwordLength, additionalWordBuf);
|
|
||||||
return siblingPos;
|
return siblingPos;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -163,13 +153,16 @@ namespace latinime {
|
||||||
const ProximityInfoState *pInfoState, const int pointIndex, const bool exactOnly,
|
const ProximityInfoState *pInfoState, const int pointIndex, const bool exactOnly,
|
||||||
const std::vector<int> *const codePointsFilter, const ProximityInfo *const pInfo,
|
const std::vector<int> *const codePointsFilter, const ProximityInfo *const pInfo,
|
||||||
DicNodeVector *childDicNodes) {
|
DicNodeVector *childDicNodes) {
|
||||||
const int terminalDepth = dicNode->getLeavingDepth();
|
if (!dicNode->hasChildren()) {
|
||||||
const int childCount = dicNode->getChildrenCount();
|
return;
|
||||||
|
}
|
||||||
int nextPos = dicNode->getChildrenPos();
|
int nextPos = dicNode->getChildrenPos();
|
||||||
|
const int childCount = BinaryFormat::getGroupCountAndForwardPointer(
|
||||||
|
binaryDictionaryInfo->getDictRoot(), &nextPos);
|
||||||
for (int i = 0; i < childCount; i++) {
|
for (int i = 0; i < childCount; i++) {
|
||||||
const int filterSize = codePointsFilter ? codePointsFilter->size() : 0;
|
const int filterSize = codePointsFilter ? codePointsFilter->size() : 0;
|
||||||
nextPos = createAndGetLeavingChildNode(dicNode, nextPos, binaryDictionaryInfo,
|
nextPos = createAndGetLeavingChildNode(dicNode, nextPos, binaryDictionaryInfo,
|
||||||
terminalDepth, pInfoState, pointIndex, exactOnly, codePointsFilter, pInfo,
|
pInfoState, pointIndex, exactOnly, codePointsFilter, pInfo,
|
||||||
childDicNodes);
|
childDicNodes);
|
||||||
if (!pInfo && filterSize > 0 && childDicNodes->exceeds(filterSize)) {
|
if (!pInfo && filterSize > 0 && childDicNodes->exceeds(filterSize)) {
|
||||||
// All code points have been found.
|
// All code points have been found.
|
||||||
|
|
|
@ -72,7 +72,7 @@ class DicNodeUtils {
|
||||||
const std::vector<int> *const codePointsFilter,
|
const std::vector<int> *const codePointsFilter,
|
||||||
const ProximityInfo *const pInfo, DicNodeVector *childDicNodes);
|
const ProximityInfo *const pInfo, DicNodeVector *childDicNodes);
|
||||||
static int createAndGetLeavingChildNode(DicNode *dicNode, int pos,
|
static int createAndGetLeavingChildNode(DicNode *dicNode, int pos,
|
||||||
const BinaryDictionaryInfo *const binaryDictionaryInfo, const int terminalDepth,
|
const BinaryDictionaryInfo *const binaryDictionaryInfo,
|
||||||
const ProximityInfoState *pInfoState, const int pointIndex,
|
const ProximityInfoState *pInfoState, const int pointIndex,
|
||||||
const bool exactOnly, const std::vector<int> *const codePointsFilter,
|
const bool exactOnly, const std::vector<int> *const codePointsFilter,
|
||||||
const ProximityInfo *const pInfo, DicNodeVector *childDicNodes);
|
const ProximityInfo *const pInfo, DicNodeVector *childDicNodes);
|
||||||
|
|
|
@ -63,16 +63,13 @@ class DicNodeVector {
|
||||||
}
|
}
|
||||||
|
|
||||||
void pushLeavingChild(DicNode *dicNode, const int pos, const uint8_t flags,
|
void pushLeavingChild(DicNode *dicNode, const int pos, const uint8_t flags,
|
||||||
const int childrenPos, const int attributesPos, const int siblingPos,
|
const int childrenPos, const int attributesPos, const int probability,
|
||||||
const int nodeCodePoint, const int childrenCount, const int probability,
|
const bool isTerminal, const bool hasChildren, const uint16_t mergedNodeCodePointCount,
|
||||||
const int bigramProbability, const bool isTerminal, const bool hasMultipleChars,
|
const int *const mergedNodeCodePoints) {
|
||||||
const bool hasChildren, const uint16_t additionalSubwordLength,
|
|
||||||
const int *additionalSubword) {
|
|
||||||
ASSERT(!mLock);
|
ASSERT(!mLock);
|
||||||
mDicNodes.push_back(mEmptyNode);
|
mDicNodes.push_back(mEmptyNode);
|
||||||
mDicNodes.back().initAsChild(dicNode, pos, flags, childrenPos, attributesPos, siblingPos,
|
mDicNodes.back().initAsChild(dicNode, pos, flags, childrenPos, attributesPos, probability,
|
||||||
nodeCodePoint, childrenCount, probability, -1 /* bigramProbability */, isTerminal,
|
isTerminal, hasChildren, mergedNodeCodePointCount, mergedNodeCodePoints);
|
||||||
hasMultipleChars, hasChildren, additionalSubwordLength, additionalSubword);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
DicNode *operator[](const int id) {
|
DicNode *operator[](const int id) {
|
||||||
|
|
Loading…
Reference in New Issue