Merge "Reduce the number of arguments required to initialize dic_node."

This commit is contained in:
Keisuke Kuroynagi 2013-06-27 03:30:47 +00:00 committed by Android (Google) Code Review
commit c96b56a5ec
7 changed files with 83 additions and 129 deletions

View file

@ -109,12 +109,14 @@ class DicNode {
// TODO: minimize arguments by looking binary_format
// Init for root with prevWordNodePos which is used for bigram
void initAsRoot(const int pos, const int childrenPos, const int childrenCount,
const int prevWordNodePos) {
void initAsRoot(const int pos, const int childrenPos, const int prevWordNodePos) {
mIsUsed = true;
mIsCachedForNextSuggestion = false;
mDicNodeProperties.init(
pos, 0, childrenPos, 0, 0, 0, childrenCount, 0, 0, false, false, true, 0, 0);
pos, 0 /* flags */, childrenPos, 0 /* attributesPos */,
NOT_A_CODE_POINT /* nodeCodePoint */, NOT_A_PROBABILITY /* probability */,
false /* isTerminal */, true /* hasChildren */, 0 /* depth */,
0 /* terminalDepth */);
mDicNodeState.init(prevWordNodePos);
PROF_NODE_RESET(mProfiler);
}
@ -130,12 +132,14 @@ class DicNode {
// TODO: minimize arguments by looking binary_format
// Init for root with previous word
void initAsRootWithPreviousWord(DicNode *dicNode, const int pos, const int childrenPos,
const int childrenCount) {
void initAsRootWithPreviousWord(DicNode *dicNode, const int pos, const int childrenPos) {
mIsUsed = true;
mIsCachedForNextSuggestion = dicNode->mIsCachedForNextSuggestion;
mDicNodeProperties.init(
pos, 0, childrenPos, 0, 0, 0, childrenCount, 0, 0, false, false, true, 0, 0);
pos, 0 /* flags */, childrenPos, 0 /* attributesPos */,
NOT_A_CODE_POINT /* nodeCodePoint */, NOT_A_PROBABILITY /* probability */,
false /* isTerminal */, true /* hasChildren */, 0 /* depth */,
0 /* terminalDepth */);
// TODO: Move to dicNodeState?
mDicNodeState.mDicNodeStateOutput.init(); // reset for next word
mDicNodeState.mDicNodeStateInput.init(
@ -157,19 +161,18 @@ class DicNode {
// TODO: minimize arguments by looking binary_format
void initAsChild(DicNode *dicNode, const int pos, const uint8_t flags, const int childrenPos,
const int attributesPos, const int siblingPos, const int nodeCodePoint,
const int childrenCount, const int probability, const int bigramProbability,
const bool isTerminal, const bool hasMultipleChars, const bool hasChildren,
const uint16_t additionalSubwordLength, const int *additionalSubword) {
const int attributesPos, const int probability, const bool isTerminal,
const bool hasChildren, const uint16_t mergedNodeCodePointCount,
const int *const mergedNodeCodePoints) {
mIsUsed = true;
uint16_t newDepth = static_cast<uint16_t>(dicNode->getNodeCodePointCount() + 1);
mIsCachedForNextSuggestion = dicNode->mIsCachedForNextSuggestion;
const uint16_t newLeavingDepth = static_cast<uint16_t>(
dicNode->mDicNodeProperties.getLeavingDepth() + additionalSubwordLength);
mDicNodeProperties.init(pos, flags, childrenPos, attributesPos, siblingPos, nodeCodePoint,
childrenCount, probability, bigramProbability, isTerminal, hasMultipleChars,
hasChildren, newDepth, newLeavingDepth);
mDicNodeState.init(&dicNode->mDicNodeState, additionalSubwordLength, additionalSubword);
dicNode->mDicNodeProperties.getLeavingDepth() + mergedNodeCodePointCount);
mDicNodeProperties.init(pos, flags, childrenPos, attributesPos, mergedNodeCodePoints[0],
probability, isTerminal, hasChildren, newDepth, newLeavingDepth);
mDicNodeState.init(&dicNode->mDicNodeState, mergedNodeCodePointCount,
mergedNodeCodePoints);
PROF_NODE_COPY(&dicNode->mProfiler, mProfiler);
}
@ -193,8 +196,8 @@ class DicNode {
}
bool isLeavingNode() const {
ASSERT(getNodeCodePointCount() <= getLeavingDepth());
return getNodeCodePointCount() == getLeavingDepth();
ASSERT(getNodeCodePointCount() <= mDicNodeProperties.getLeavingDepth());
return getNodeCodePointCount() == mDicNodeProperties.getLeavingDepth();
}
AK_FORCE_INLINE bool isFirstLetter() const {
@ -256,12 +259,6 @@ class DicNode {
return mDicNodeProperties.getChildrenPos();
}
// Used in DicNodeUtils
int getChildrenCount() const {
return mDicNodeProperties.getChildrenCount();
}
// Used in DicNodeUtils
int getProbability() const {
return mDicNodeProperties.getProbability();
}
@ -280,10 +277,6 @@ class DicNode {
return !(currentDepth > 0 && (currentDepth != 1 || prevWordLen != 1));
}
uint16_t getLeavingDepth() const {
return mDicNodeProperties.getLeavingDepth();
}
bool isTotalInputSizeExceedingLimit() const {
const int prevWordsLen = mDicNodeState.mDicNodeStatePrevWord.getPrevWordLength();
const int currentWordDepth = getNodeCodePointCount();
@ -370,7 +363,7 @@ class DicNode {
}
AK_FORCE_INLINE const int *getOutputWordBuf() const {
return mDicNodeState.mDicNodeStateOutput.mWordBuf;
return mDicNodeState.mDicNodeStateOutput.mCodePointsBuf;
}
int getPrevCodePointG(int pointerId) const {

View file

@ -27,37 +27,31 @@ namespace latinime {
/**
* Node for traversing the lexicon trie.
*/
// TODO: Introduce a dictionary node class which has attribute members required to understand the
// dictionary structure.
class DicNodeProperties {
public:
AK_FORCE_INLINE DicNodeProperties()
: mPos(0), mFlags(0), mChildrenPos(0), mAttributesPos(0), mSiblingPos(0),
mChildrenCount(0), mProbability(0), mBigramProbability(0), mNodeCodePoint(0),
mDepth(0), mLeavingDepth(0), mIsTerminal(false), mHasMultipleChars(false),
mHasChildren(false) {
}
: mPos(0), mFlags(0), mChildrenPos(0), mAttributesPos(0), mProbability(0),
mNodeCodePoint(0), mDepth(0), mLeavingDepth(0), mIsTerminal(false),
mHasChildren(false) {}
virtual ~DicNodeProperties() {}
// Should be called only once per DicNode is initialized.
void init(const int pos, const uint8_t flags, const int childrenPos, const int attributesPos,
const int siblingPos, const int nodeCodePoint, const int childrenCount,
const int probability, const int bigramProbability, const bool isTerminal,
const bool hasMultipleChars, const bool hasChildren, const uint16_t depth,
const uint16_t terminalDepth) {
const int nodeCodePoint, const int probability, const bool isTerminal,
const bool hasChildren, const uint16_t depth, const uint16_t leavingDepth) {
mPos = pos;
mFlags = flags;
mChildrenPos = childrenPos;
mAttributesPos = attributesPos;
mSiblingPos = siblingPos;
mNodeCodePoint = nodeCodePoint;
mChildrenCount = childrenCount;
mProbability = probability;
mBigramProbability = bigramProbability;
mIsTerminal = isTerminal;
mHasMultipleChars = hasMultipleChars;
mHasChildren = hasChildren;
mDepth = depth;
mLeavingDepth = terminalDepth;
mLeavingDepth = leavingDepth;
}
// Init for copy
@ -66,13 +60,9 @@ class DicNodeProperties {
mFlags = nodeProp->mFlags;
mChildrenPos = nodeProp->mChildrenPos;
mAttributesPos = nodeProp->mAttributesPos;
mSiblingPos = nodeProp->mSiblingPos;
mNodeCodePoint = nodeProp->mNodeCodePoint;
mChildrenCount = nodeProp->mChildrenCount;
mProbability = nodeProp->mProbability;
mBigramProbability = nodeProp->mBigramProbability;
mIsTerminal = nodeProp->mIsTerminal;
mHasMultipleChars = nodeProp->mHasMultipleChars;
mHasChildren = nodeProp->mHasChildren;
mDepth = nodeProp->mDepth;
mLeavingDepth = nodeProp->mLeavingDepth;
@ -84,13 +74,9 @@ class DicNodeProperties {
mFlags = nodeProp->mFlags;
mChildrenPos = nodeProp->mChildrenPos;
mAttributesPos = nodeProp->mAttributesPos;
mSiblingPos = nodeProp->mSiblingPos;
mNodeCodePoint = codePoint; // Overwrite the node char of a passing child
mChildrenCount = nodeProp->mChildrenCount;
mProbability = nodeProp->mProbability;
mBigramProbability = nodeProp->mBigramProbability;
mIsTerminal = nodeProp->mIsTerminal;
mHasMultipleChars = nodeProp->mHasMultipleChars;
mHasChildren = nodeProp->mHasChildren;
mDepth = nodeProp->mDepth + 1; // Increment the depth of a passing child
mLeavingDepth = nodeProp->mLeavingDepth;
@ -112,10 +98,6 @@ class DicNodeProperties {
return mAttributesPos;
}
int getChildrenCount() const {
return mChildrenCount;
}
int getProbability() const {
return mProbability;
}
@ -137,12 +119,8 @@ class DicNodeProperties {
return mIsTerminal;
}
bool hasMultipleChars() const {
return mHasMultipleChars;
}
bool hasChildren() const {
return mChildrenCount > 0 || mDepth != mLeavingDepth;
return mHasChildren || mDepth != mLeavingDepth;
}
bool hasBlacklistedOrNotAWordFlag() const {
@ -153,25 +131,15 @@ class DicNodeProperties {
// Caution!!!
// Use a default copy constructor and an assign operator because shallow copies are ok
// for this class
// Not used
int getSiblingPos() const {
return mSiblingPos;
}
int mPos;
uint8_t mFlags;
int mChildrenPos;
int mAttributesPos;
int mSiblingPos;
int mChildrenCount;
int mProbability;
int mBigramProbability; // not used for now
int mNodeCodePoint;
uint16_t mDepth;
uint16_t mLeavingDepth;
bool mIsTerminal;
bool mHasMultipleChars;
bool mHasChildren;
};
} // namespace latinime

View file

@ -55,11 +55,12 @@ class DicNodeState {
mDicNodeStateScoring.init(&src->mDicNodeStateScoring);
}
// Init by copy and adding subword
void init(const DicNodeState *const src, const uint16_t additionalSubwordLength,
const int *const additionalSubword) {
// Init by copy and adding merged node code points.
void init(const DicNodeState *const src, const uint16_t mergedNodeCodePointCount,
const int *const mergedNodeCodePoints) {
init(src);
mDicNodeStateOutput.addSubword(additionalSubwordLength, additionalSubword);
mDicNodeStateOutput.addMergedNodeCodePoints(
mergedNodeCodePointCount, mergedNodeCodePoints);
}
private:

View file

@ -26,50 +26,52 @@ namespace latinime {
class DicNodeStateOutput {
public:
DicNodeStateOutput() : mOutputtedLength(0) {
DicNodeStateOutput() : mOutputtedCodePointCount(0) {
init();
}
virtual ~DicNodeStateOutput() {}
void init() {
mOutputtedLength = 0;
mWordBuf[0] = 0;
mOutputtedCodePointCount = 0;
mCodePointsBuf[0] = 0;
}
void init(const DicNodeStateOutput *const stateOutput) {
memcpy(mWordBuf, stateOutput->mWordBuf,
stateOutput->mOutputtedLength * sizeof(mWordBuf[0]));
mOutputtedLength = stateOutput->mOutputtedLength;
if (mOutputtedLength < MAX_WORD_LENGTH) {
mWordBuf[mOutputtedLength] = 0;
memcpy(mCodePointsBuf, stateOutput->mCodePointsBuf,
stateOutput->mOutputtedCodePointCount * sizeof(mCodePointsBuf[0]));
mOutputtedCodePointCount = stateOutput->mOutputtedCodePointCount;
if (mOutputtedCodePointCount < MAX_WORD_LENGTH) {
mCodePointsBuf[mOutputtedCodePointCount] = 0;
}
}
void addSubword(const uint16_t additionalSubwordLength, const int *const additionalSubword) {
if (additionalSubword) {
memcpy(&mWordBuf[mOutputtedLength], additionalSubword,
additionalSubwordLength * sizeof(mWordBuf[0]));
mOutputtedLength = static_cast<uint16_t>(mOutputtedLength + additionalSubwordLength);
if (mOutputtedLength < MAX_WORD_LENGTH) {
mWordBuf[mOutputtedLength] = 0;
void addMergedNodeCodePoints(const uint16_t mergedNodeCodePointCount,
const int *const mergedNodeCodePoints) {
if (mergedNodeCodePoints) {
memcpy(&mCodePointsBuf[mOutputtedCodePointCount], mergedNodeCodePoints,
mergedNodeCodePointCount * sizeof(mCodePointsBuf[0]));
mOutputtedCodePointCount = static_cast<uint16_t>(
mOutputtedCodePointCount + mergedNodeCodePointCount);
if (mOutputtedCodePointCount < MAX_WORD_LENGTH) {
mCodePointsBuf[mOutputtedCodePointCount] = 0;
}
}
}
// TODO: Remove
int getCodePointAt(const int id) const {
return mWordBuf[id];
int getCodePointAt(const int index) const {
return mCodePointsBuf[index];
}
// TODO: Move to private
int mWordBuf[MAX_WORD_LENGTH];
int mCodePointsBuf[MAX_WORD_LENGTH];
private:
// Caution!!!
// Use a default copy constructor and an assign operator because shallow copies are ok
// for this class
uint16_t mOutputtedLength;
uint16_t mOutputtedCodePointCount;
};
} // namespace latinime
#endif // LATINIME_DIC_NODE_STATE_OUTPUT_H

View file

@ -36,23 +36,17 @@ namespace latinime {
/* static */ void DicNodeUtils::initAsRoot(const BinaryDictionaryInfo *const binaryDictionaryInfo,
const int prevWordNodePos, DicNode *const newRootNode) {
int curPos = binaryDictionaryInfo->getRootPosition();
const int pos = curPos;
const int childrenCount = BinaryFormat::getGroupCountAndForwardPointer(
binaryDictionaryInfo->getDictRoot(), &curPos);
const int childrenPos = curPos;
newRootNode->initAsRoot(pos, childrenPos, childrenCount, prevWordNodePos);
const int rootPos = binaryDictionaryInfo->getRootPosition();
const int childrenPos = rootPos;
newRootNode->initAsRoot(rootPos, childrenPos, prevWordNodePos);
}
/*static */ void DicNodeUtils::initAsRootWithPreviousWord(
const BinaryDictionaryInfo *const binaryDictionaryInfo,
DicNode *const prevWordLastNode, DicNode *const newRootNode) {
int curPos = binaryDictionaryInfo->getRootPosition();
const int pos = curPos;
const int childrenCount = BinaryFormat::getGroupCountAndForwardPointer(
binaryDictionaryInfo->getDictRoot(), &curPos);
const int childrenPos = curPos;
newRootNode->initAsRootWithPreviousWord(prevWordLastNode, pos, childrenPos, childrenCount);
const int rootPos = binaryDictionaryInfo->getRootPosition();
const int childrenPos = rootPos;
newRootNode->initAsRootWithPreviousWord(prevWordLastNode, rootPos, childrenPos);
}
/* static */ void DicNodeUtils::initByCopy(DicNode *srcNode, DicNode *destNode) {
@ -76,7 +70,7 @@ namespace latinime {
}
/* static */ int DicNodeUtils::createAndGetLeavingChildNode(DicNode *dicNode, int pos,
const BinaryDictionaryInfo *const binaryDictionaryInfo, const int terminalDepth,
const BinaryDictionaryInfo *const binaryDictionaryInfo,
const ProximityInfoState *pInfoState, const int pointIndex, const bool exactOnly,
const std::vector<int> *const codePointsFilter, const ProximityInfo *const pInfo,
DicNodeVector *childDicNodes) {
@ -90,11 +84,10 @@ namespace latinime {
int codePoint = BinaryFormat::getCodePointAndForwardPointer(
binaryDictionaryInfo->getDictRoot(), &pos);
ASSERT(NOT_A_CODE_POINT != codePoint);
const int nodeCodePoint = codePoint;
// TODO: optimize this
int additionalWordBuf[MAX_WORD_LENGTH];
uint16_t additionalSubwordLength = 0;
additionalWordBuf[additionalSubwordLength++] = codePoint;
int mergedNodeCodePoints[MAX_WORD_LENGTH];
uint16_t mergedNodeCodePointCount = 0;
mergedNodeCodePoints[mergedNodeCodePointCount++] = codePoint;
do {
const int nextCodePoint = hasMultipleChars
@ -102,7 +95,7 @@ namespace latinime {
binaryDictionaryInfo->getDictRoot(), &pos) : NOT_A_CODE_POINT;
const bool isLastChar = (NOT_A_CODE_POINT == nextCodePoint);
if (!isLastChar) {
additionalWordBuf[additionalSubwordLength++] = nextCodePoint;
mergedNodeCodePoints[mergedNodeCodePointCount++] = nextCodePoint;
}
codePoint = nextCodePoint;
} while (NOT_A_CODE_POINT != codePoint);
@ -116,17 +109,14 @@ namespace latinime {
const int siblingPos = BinaryFormat::skipChildrenPosAndAttributes(
binaryDictionaryInfo->getDictRoot(), flags, pos);
if (isDicNodeFilteredOut(nodeCodePoint, pInfo, codePointsFilter)) {
if (isDicNodeFilteredOut(mergedNodeCodePoints[0], pInfo, codePointsFilter)) {
return siblingPos;
}
if (!isMatchedNodeCodePoint(pInfoState, pointIndex, exactOnly, nodeCodePoint)) {
if (!isMatchedNodeCodePoint(pInfoState, pointIndex, exactOnly, mergedNodeCodePoints[0])) {
return siblingPos;
}
const int childrenCount = hasChildren ? BinaryFormat::getGroupCountAndForwardPointer(
binaryDictionaryInfo->getDictRoot(), &childrenPos) : 0;
childDicNodes->pushLeavingChild(dicNode, nextPos, flags, childrenPos, attributesPos, siblingPos,
nodeCodePoint, childrenCount, probability, -1 /* bigramProbability */, isTerminal,
hasMultipleChars, hasChildren, additionalSubwordLength, additionalWordBuf);
childDicNodes->pushLeavingChild(dicNode, nextPos, flags, childrenPos, attributesPos,
probability, isTerminal, hasChildren, mergedNodeCodePointCount, mergedNodeCodePoints);
return siblingPos;
}
@ -163,13 +153,16 @@ namespace latinime {
const ProximityInfoState *pInfoState, const int pointIndex, const bool exactOnly,
const std::vector<int> *const codePointsFilter, const ProximityInfo *const pInfo,
DicNodeVector *childDicNodes) {
const int terminalDepth = dicNode->getLeavingDepth();
const int childCount = dicNode->getChildrenCount();
if (!dicNode->hasChildren()) {
return;
}
int nextPos = dicNode->getChildrenPos();
const int childCount = BinaryFormat::getGroupCountAndForwardPointer(
binaryDictionaryInfo->getDictRoot(), &nextPos);
for (int i = 0; i < childCount; i++) {
const int filterSize = codePointsFilter ? codePointsFilter->size() : 0;
nextPos = createAndGetLeavingChildNode(dicNode, nextPos, binaryDictionaryInfo,
terminalDepth, pInfoState, pointIndex, exactOnly, codePointsFilter, pInfo,
pInfoState, pointIndex, exactOnly, codePointsFilter, pInfo,
childDicNodes);
if (!pInfo && filterSize > 0 && childDicNodes->exceeds(filterSize)) {
// All code points have been found.

View file

@ -72,7 +72,7 @@ class DicNodeUtils {
const std::vector<int> *const codePointsFilter,
const ProximityInfo *const pInfo, DicNodeVector *childDicNodes);
static int createAndGetLeavingChildNode(DicNode *dicNode, int pos,
const BinaryDictionaryInfo *const binaryDictionaryInfo, const int terminalDepth,
const BinaryDictionaryInfo *const binaryDictionaryInfo,
const ProximityInfoState *pInfoState, const int pointIndex,
const bool exactOnly, const std::vector<int> *const codePointsFilter,
const ProximityInfo *const pInfo, DicNodeVector *childDicNodes);

View file

@ -63,16 +63,13 @@ class DicNodeVector {
}
void pushLeavingChild(DicNode *dicNode, const int pos, const uint8_t flags,
const int childrenPos, const int attributesPos, const int siblingPos,
const int nodeCodePoint, const int childrenCount, const int probability,
const int bigramProbability, const bool isTerminal, const bool hasMultipleChars,
const bool hasChildren, const uint16_t additionalSubwordLength,
const int *additionalSubword) {
const int childrenPos, const int attributesPos, const int probability,
const bool isTerminal, const bool hasChildren, const uint16_t mergedNodeCodePointCount,
const int *const mergedNodeCodePoints) {
ASSERT(!mLock);
mDicNodes.push_back(mEmptyNode);
mDicNodes.back().initAsChild(dicNode, pos, flags, childrenPos, attributesPos, siblingPos,
nodeCodePoint, childrenCount, probability, -1 /* bigramProbability */, isTerminal,
hasMultipleChars, hasChildren, additionalSubwordLength, additionalSubword);
mDicNodes.back().initAsChild(dicNode, pos, flags, childrenPos, attributesPos, probability,
isTerminal, hasChildren, mergedNodeCodePointCount, mergedNodeCodePoints);
}
DicNode *operator[](const int id) {