Use word id to construct DicNode instead of isTerminal flag.

Bug: 14425059
Change-Id: I8484d34756bd76668ece34211e7366a4758d7bf5
main
Keisuke Kuroyanagi 2014-08-26 14:33:19 +09:00
parent 4793e91317
commit 7d47500357
11 changed files with 23 additions and 16 deletions

View File

@ -299,6 +299,7 @@ static inline void prof_out(void) {
#define NOT_AN_INDEX (-1)
#define NOT_A_PROBABILITY (-1)
#define NOT_A_DICT_POS (S_INT_MIN)
#define NOT_A_WORD_ID (S_INT_MIN)
#define NOT_A_TIMESTAMP (-1)
#define NOT_A_LANGUAGE_WEIGHT (-1.0f)

View File

@ -136,7 +136,7 @@ class DicNode {
}
void initAsChild(const DicNode *const dicNode, const int ptNodePos,
const int childrenPtNodeArrayPos, const int probability, const bool isTerminal,
const int childrenPtNodeArrayPos, const int probability, const int wordId,
const bool hasChildren, const bool isBlacklistedOrNotAWord,
const uint16_t mergedNodeCodePointCount, const int *const mergedNodeCodePoints) {
uint16_t newDepth = static_cast<uint16_t>(dicNode->getNodeCodePointCount() + 1);
@ -144,7 +144,7 @@ class DicNode {
const uint16_t newLeavingDepth = static_cast<uint16_t>(
dicNode->mDicNodeProperties.getLeavingDepth() + mergedNodeCodePointCount);
mDicNodeProperties.init(ptNodePos, childrenPtNodeArrayPos, mergedNodeCodePoints[0],
probability, isTerminal, hasChildren, isBlacklistedOrNotAWord, newDepth,
probability, wordId, hasChildren, isBlacklistedOrNotAWord, newDepth,
newLeavingDepth, dicNode->mDicNodeProperties.getPrevWordsTerminalPtNodePos());
mDicNodeState.init(&dicNode->mDicNodeState, mergedNodeCodePointCount,
mergedNodeCodePoints);

View File

@ -59,13 +59,13 @@ class DicNodeVector {
}
void pushLeavingChild(const DicNode *const dicNode, const int ptNodePos,
const int childrenPtNodeArrayPos, const int probability, const bool isTerminal,
const int childrenPtNodeArrayPos, const int probability, const int wordId,
const bool hasChildren, const bool isBlacklistedOrNotAWord,
const uint16_t mergedNodeCodePointCount, const int *const mergedNodeCodePoints) {
ASSERT(!mLock);
mDicNodes.emplace_back();
mDicNodes.back().initAsChild(dicNode, ptNodePos, childrenPtNodeArrayPos, probability,
isTerminal, hasChildren, isBlacklistedOrNotAWord, mergedNodeCodePointCount,
wordId, hasChildren, isBlacklistedOrNotAWord, mergedNodeCodePointCount,
mergedNodeCodePoints);
}

View File

@ -31,20 +31,20 @@ class DicNodeProperties {
AK_FORCE_INLINE DicNodeProperties()
: mPtNodePos(NOT_A_DICT_POS), mChildrenPtNodeArrayPos(NOT_A_DICT_POS),
mProbability(NOT_A_PROBABILITY), mDicNodeCodePoint(NOT_A_CODE_POINT),
mIsTerminal(false), mHasChildrenPtNodes(false),
mWordId(NOT_A_WORD_ID), mHasChildrenPtNodes(false),
mIsBlacklistedOrNotAWord(false), mDepth(0), mLeavingDepth(0) {}
~DicNodeProperties() {}
// Should be called only once per DicNode is initialized.
void init(const int pos, const int childrenPos, const int nodeCodePoint, const int probability,
const bool isTerminal, const bool hasChildren, const bool isBlacklistedOrNotAWord,
const int wordId, const bool hasChildren, const bool isBlacklistedOrNotAWord,
const uint16_t depth, const uint16_t leavingDepth, const int *const prevWordsNodePos) {
mPtNodePos = pos;
mChildrenPtNodeArrayPos = childrenPos;
mDicNodeCodePoint = nodeCodePoint;
mProbability = probability;
mIsTerminal = isTerminal;
mWordId = wordId;
mHasChildrenPtNodes = hasChildren;
mIsBlacklistedOrNotAWord = isBlacklistedOrNotAWord;
mDepth = depth;
@ -58,7 +58,7 @@ class DicNodeProperties {
mChildrenPtNodeArrayPos = rootPtNodeArrayPos;
mDicNodeCodePoint = NOT_A_CODE_POINT;
mProbability = NOT_A_PROBABILITY;
mIsTerminal = false;
mWordId = NOT_A_WORD_ID;
mHasChildrenPtNodes = true;
mIsBlacklistedOrNotAWord = false;
mDepth = 0;
@ -71,7 +71,7 @@ class DicNodeProperties {
mChildrenPtNodeArrayPos = dicNodeProp->mChildrenPtNodeArrayPos;
mDicNodeCodePoint = dicNodeProp->mDicNodeCodePoint;
mProbability = dicNodeProp->mProbability;
mIsTerminal = dicNodeProp->mIsTerminal;
mWordId = dicNodeProp->mWordId;
mHasChildrenPtNodes = dicNodeProp->mHasChildrenPtNodes;
mIsBlacklistedOrNotAWord = dicNodeProp->mIsBlacklistedOrNotAWord;
mDepth = dicNodeProp->mDepth;
@ -86,7 +86,7 @@ class DicNodeProperties {
mChildrenPtNodeArrayPos = dicNodeProp->mChildrenPtNodeArrayPos;
mDicNodeCodePoint = codePoint; // Overwrite the node char of a passing child
mProbability = dicNodeProp->mProbability;
mIsTerminal = dicNodeProp->mIsTerminal;
mWordId = dicNodeProp->mWordId;
mHasChildrenPtNodes = dicNodeProp->mHasChildrenPtNodes;
mIsBlacklistedOrNotAWord = dicNodeProp->mIsBlacklistedOrNotAWord;
mDepth = dicNodeProp->mDepth + 1; // Increment the depth of a passing child
@ -121,7 +121,7 @@ class DicNodeProperties {
}
bool isTerminal() const {
return mIsTerminal;
return mWordId != NOT_A_WORD_ID;
}
bool hasChildren() const {
@ -144,7 +144,7 @@ class DicNodeProperties {
int mChildrenPtNodeArrayPos;
int mProbability;
int mDicNodeCodePoint;
bool mIsTerminal;
int mWordId;
bool mHasChildrenPtNodes;
bool mIsBlacklistedOrNotAWord;
uint16_t mDepth;

View File

@ -36,6 +36,7 @@ class UnigramProperty;
* This class abstracts the structure of dictionaries.
* Implement this policy to support additional dictionaries.
*/
// TODO: Use word id instead of terminal PtNode position.
class DictionaryStructureWithBufferPolicy {
public:
typedef std::unique_ptr<DictionaryStructureWithBufferPolicy> StructurePolicyPtr;

View File

@ -76,8 +76,9 @@ void Ver4PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const d
// Skip PtNodes that represent non-word information.
continue;
}
const int wordId = isTerminal ? ptNodeParams.getHeadPos() : NOT_A_WORD_ID;
childDicNodes->pushLeavingChild(dicNode, ptNodeParams.getHeadPos(),
ptNodeParams.getChildrenPos(), ptNodeParams.getProbability(), isTerminal,
ptNodeParams.getChildrenPos(), ptNodeParams.getProbability(), wordId,
ptNodeParams.hasChildren(),
ptNodeParams.isBlacklisted()
|| ptNodeParams.isNotAWord() /* isBlacklistedOrNotAWord */,

View File

@ -55,6 +55,7 @@ class DicNodeVector;
namespace backward {
namespace v402 {
// Word id = Position of a PtNode that represents the word.
class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
public:
Ver4PatriciaTriePolicy(Ver4DictBuffers::Ver4DictBuffersPtr buffers)

View File

@ -367,8 +367,8 @@ int PatriciaTriePolicy::createAndGetLeavingChildNode(const DicNode *const dicNod
&probability, &childrenPos, &shortcutPos, &bigramPos, &siblingPos);
// Skip PtNodes don't start with Unicode code point because they represent non-word information.
if (CharUtils::isInUnicodeSpace(mergedNodeCodePoints[0])) {
childDicNodes->pushLeavingChild(dicNode, ptNodePos, childrenPos, probability,
PatriciaTrieReadingUtils::isTerminal(flags),
const int wordId = PatriciaTrieReadingUtils::isTerminal(flags) ? ptNodePos : NOT_A_WORD_ID;
childDicNodes->pushLeavingChild(dicNode, ptNodePos, childrenPos, probability, wordId,
PatriciaTrieReadingUtils::hasChildrenInFlags(flags),
PatriciaTrieReadingUtils::isBlacklisted(flags)
|| PatriciaTrieReadingUtils::isNotAWord(flags),

View File

@ -36,6 +36,7 @@ namespace latinime {
class DicNode;
class DicNodeVector;
// Word id = Position of a PtNode that represents the word.
class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
public:
PatriciaTriePolicy(MmappedBuffer::MmappedBufferPtr mmappedBuffer)

View File

@ -66,8 +66,9 @@ void Ver4PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const d
// Skip PtNodes that represent non-word information.
continue;
}
const int wordId = isTerminal ? ptNodeParams.getTerminalId() : NOT_A_WORD_ID;
childDicNodes->pushLeavingChild(dicNode, ptNodeParams.getHeadPos(),
ptNodeParams.getChildrenPos(), ptNodeParams.getProbability(), isTerminal,
ptNodeParams.getChildrenPos(), ptNodeParams.getProbability(), wordId,
ptNodeParams.hasChildren(),
ptNodeParams.isBlacklisted()
|| ptNodeParams.isNotAWord() /* isBlacklistedOrNotAWord */,

View File

@ -37,6 +37,7 @@ namespace latinime {
class DicNode;
class DicNodeVector;
// Word id = Artificial id that is stored in the PtNode looked up by the word.
class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
public:
Ver4PatriciaTriePolicy(Ver4DictBuffers::Ver4DictBuffersPtr buffers)