Merge "Remove unigram probability from dicNode."
This commit is contained in:
commit
4634b75d34
6 changed files with 11 additions and 32 deletions
|
@ -137,15 +137,13 @@ class DicNode {
|
||||||
}
|
}
|
||||||
|
|
||||||
void initAsChild(const DicNode *const dicNode, const int childrenPtNodeArrayPos,
|
void initAsChild(const DicNode *const dicNode, const int childrenPtNodeArrayPos,
|
||||||
const int unigramProbability, const int wordId,
|
const int wordId, const CodePointArrayView mergedCodePoints) {
|
||||||
const CodePointArrayView mergedCodePoints) {
|
|
||||||
uint16_t newDepth = static_cast<uint16_t>(dicNode->getNodeCodePointCount() + 1);
|
uint16_t newDepth = static_cast<uint16_t>(dicNode->getNodeCodePointCount() + 1);
|
||||||
mIsCachedForNextSuggestion = dicNode->mIsCachedForNextSuggestion;
|
mIsCachedForNextSuggestion = dicNode->mIsCachedForNextSuggestion;
|
||||||
const uint16_t newLeavingDepth = static_cast<uint16_t>(
|
const uint16_t newLeavingDepth = static_cast<uint16_t>(
|
||||||
dicNode->mDicNodeProperties.getLeavingDepth() + mergedCodePoints.size());
|
dicNode->mDicNodeProperties.getLeavingDepth() + mergedCodePoints.size());
|
||||||
mDicNodeProperties.init(childrenPtNodeArrayPos, mergedCodePoints[0],
|
mDicNodeProperties.init(childrenPtNodeArrayPos, mergedCodePoints[0],
|
||||||
unigramProbability, wordId, newDepth, newLeavingDepth,
|
wordId, newDepth, newLeavingDepth, dicNode->mDicNodeProperties.getPrevWordIds());
|
||||||
dicNode->mDicNodeProperties.getPrevWordIds());
|
|
||||||
mDicNodeState.init(&dicNode->mDicNodeState, mergedCodePoints.size(),
|
mDicNodeState.init(&dicNode->mDicNodeState, mergedCodePoints.size(),
|
||||||
mergedCodePoints.data());
|
mergedCodePoints.data());
|
||||||
PROF_NODE_COPY(&dicNode->mProfiler, mProfiler);
|
PROF_NODE_COPY(&dicNode->mProfiler, mProfiler);
|
||||||
|
@ -215,11 +213,6 @@ class DicNode {
|
||||||
return mDicNodeProperties.getChildrenPtNodeArrayPos();
|
return mDicNodeProperties.getChildrenPtNodeArrayPos();
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: Remove
|
|
||||||
int getUnigramProbability() const {
|
|
||||||
return mDicNodeProperties.getUnigramProbability();
|
|
||||||
}
|
|
||||||
|
|
||||||
AK_FORCE_INLINE bool isTerminalDicNode() const {
|
AK_FORCE_INLINE bool isTerminalDicNode() const {
|
||||||
const bool isTerminalPtNode = mDicNodeProperties.isTerminal();
|
const bool isTerminalPtNode = mDicNodeProperties.isTerminal();
|
||||||
const int currentDicNodeDepth = getNodeCodePointCount();
|
const int currentDicNodeDepth = getNodeCodePointCount();
|
||||||
|
|
|
@ -60,12 +60,10 @@ class DicNodeVector {
|
||||||
}
|
}
|
||||||
|
|
||||||
void pushLeavingChild(const DicNode *const dicNode, const int childrenPtNodeArrayPos,
|
void pushLeavingChild(const DicNode *const dicNode, const int childrenPtNodeArrayPos,
|
||||||
const int unigramProbability, const int wordId,
|
const int wordId, const CodePointArrayView mergedCodePoints) {
|
||||||
const CodePointArrayView mergedCodePoints) {
|
|
||||||
ASSERT(!mLock);
|
ASSERT(!mLock);
|
||||||
mDicNodes.emplace_back();
|
mDicNodes.emplace_back();
|
||||||
mDicNodes.back().initAsChild(dicNode, childrenPtNodeArrayPos, unigramProbability,
|
mDicNodes.back().initAsChild(dicNode, childrenPtNodeArrayPos, wordId, mergedCodePoints);
|
||||||
wordId, mergedCodePoints);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
DicNode *operator[](const int id) {
|
DicNode *operator[](const int id) {
|
||||||
|
|
|
@ -29,19 +29,16 @@ namespace latinime {
|
||||||
class DicNodeProperties {
|
class DicNodeProperties {
|
||||||
public:
|
public:
|
||||||
AK_FORCE_INLINE DicNodeProperties()
|
AK_FORCE_INLINE DicNodeProperties()
|
||||||
: mChildrenPtNodeArrayPos(NOT_A_DICT_POS), mUnigramProbability(NOT_A_PROBABILITY),
|
: mChildrenPtNodeArrayPos(NOT_A_DICT_POS), mDicNodeCodePoint(NOT_A_CODE_POINT),
|
||||||
mDicNodeCodePoint(NOT_A_CODE_POINT), mWordId(NOT_A_WORD_ID), mDepth(0),
|
mWordId(NOT_A_WORD_ID), mDepth(0), mLeavingDepth(0) {}
|
||||||
mLeavingDepth(0) {}
|
|
||||||
|
|
||||||
~DicNodeProperties() {}
|
~DicNodeProperties() {}
|
||||||
|
|
||||||
// Should be called only once per DicNode is initialized.
|
// Should be called only once per DicNode is initialized.
|
||||||
void init(const int childrenPos, const int nodeCodePoint, const int unigramProbability,
|
void init(const int childrenPos, const int nodeCodePoint, const int wordId,
|
||||||
const int wordId, const uint16_t depth, const uint16_t leavingDepth,
|
const uint16_t depth, const uint16_t leavingDepth, const int *const prevWordIds) {
|
||||||
const int *const prevWordIds) {
|
|
||||||
mChildrenPtNodeArrayPos = childrenPos;
|
mChildrenPtNodeArrayPos = childrenPos;
|
||||||
mDicNodeCodePoint = nodeCodePoint;
|
mDicNodeCodePoint = nodeCodePoint;
|
||||||
mUnigramProbability = unigramProbability;
|
|
||||||
mWordId = wordId;
|
mWordId = wordId;
|
||||||
mDepth = depth;
|
mDepth = depth;
|
||||||
mLeavingDepth = leavingDepth;
|
mLeavingDepth = leavingDepth;
|
||||||
|
@ -52,7 +49,6 @@ class DicNodeProperties {
|
||||||
void init(const int rootPtNodeArrayPos, const int *const prevWordIds) {
|
void init(const int rootPtNodeArrayPos, const int *const prevWordIds) {
|
||||||
mChildrenPtNodeArrayPos = rootPtNodeArrayPos;
|
mChildrenPtNodeArrayPos = rootPtNodeArrayPos;
|
||||||
mDicNodeCodePoint = NOT_A_CODE_POINT;
|
mDicNodeCodePoint = NOT_A_CODE_POINT;
|
||||||
mUnigramProbability = NOT_A_PROBABILITY;
|
|
||||||
mWordId = NOT_A_WORD_ID;
|
mWordId = NOT_A_WORD_ID;
|
||||||
mDepth = 0;
|
mDepth = 0;
|
||||||
mLeavingDepth = 0;
|
mLeavingDepth = 0;
|
||||||
|
@ -62,7 +58,6 @@ class DicNodeProperties {
|
||||||
void initByCopy(const DicNodeProperties *const dicNodeProp) {
|
void initByCopy(const DicNodeProperties *const dicNodeProp) {
|
||||||
mChildrenPtNodeArrayPos = dicNodeProp->mChildrenPtNodeArrayPos;
|
mChildrenPtNodeArrayPos = dicNodeProp->mChildrenPtNodeArrayPos;
|
||||||
mDicNodeCodePoint = dicNodeProp->mDicNodeCodePoint;
|
mDicNodeCodePoint = dicNodeProp->mDicNodeCodePoint;
|
||||||
mUnigramProbability = dicNodeProp->mUnigramProbability;
|
|
||||||
mWordId = dicNodeProp->mWordId;
|
mWordId = dicNodeProp->mWordId;
|
||||||
mDepth = dicNodeProp->mDepth;
|
mDepth = dicNodeProp->mDepth;
|
||||||
mLeavingDepth = dicNodeProp->mLeavingDepth;
|
mLeavingDepth = dicNodeProp->mLeavingDepth;
|
||||||
|
@ -73,7 +68,6 @@ class DicNodeProperties {
|
||||||
void init(const DicNodeProperties *const dicNodeProp, const int codePoint) {
|
void init(const DicNodeProperties *const dicNodeProp, const int codePoint) {
|
||||||
mChildrenPtNodeArrayPos = dicNodeProp->mChildrenPtNodeArrayPos;
|
mChildrenPtNodeArrayPos = dicNodeProp->mChildrenPtNodeArrayPos;
|
||||||
mDicNodeCodePoint = codePoint; // Overwrite the node char of a passing child
|
mDicNodeCodePoint = codePoint; // Overwrite the node char of a passing child
|
||||||
mUnigramProbability = dicNodeProp->mUnigramProbability;
|
|
||||||
mWordId = dicNodeProp->mWordId;
|
mWordId = dicNodeProp->mWordId;
|
||||||
mDepth = dicNodeProp->mDepth + 1; // Increment the depth of a passing child
|
mDepth = dicNodeProp->mDepth + 1; // Increment the depth of a passing child
|
||||||
mLeavingDepth = dicNodeProp->mLeavingDepth;
|
mLeavingDepth = dicNodeProp->mLeavingDepth;
|
||||||
|
@ -84,10 +78,6 @@ class DicNodeProperties {
|
||||||
return mChildrenPtNodeArrayPos;
|
return mChildrenPtNodeArrayPos;
|
||||||
}
|
}
|
||||||
|
|
||||||
int getUnigramProbability() const {
|
|
||||||
return mUnigramProbability;
|
|
||||||
}
|
|
||||||
|
|
||||||
int getDicNodeCodePoint() const {
|
int getDicNodeCodePoint() const {
|
||||||
return mDicNodeCodePoint;
|
return mDicNodeCodePoint;
|
||||||
}
|
}
|
||||||
|
@ -122,8 +112,6 @@ class DicNodeProperties {
|
||||||
// Use a default copy constructor and an assign operator because shallow copies are ok
|
// Use a default copy constructor and an assign operator because shallow copies are ok
|
||||||
// for this class
|
// for this class
|
||||||
int mChildrenPtNodeArrayPos;
|
int mChildrenPtNodeArrayPos;
|
||||||
// TODO: Remove
|
|
||||||
int mUnigramProbability;
|
|
||||||
int mDicNodeCodePoint;
|
int mDicNodeCodePoint;
|
||||||
int mWordId;
|
int mWordId;
|
||||||
uint16_t mDepth;
|
uint16_t mDepth;
|
||||||
|
|
|
@ -79,7 +79,7 @@ void Ver4PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const d
|
||||||
}
|
}
|
||||||
const int wordId = isTerminal ? ptNodeParams.getHeadPos() : NOT_A_WORD_ID;
|
const int wordId = isTerminal ? ptNodeParams.getHeadPos() : NOT_A_WORD_ID;
|
||||||
childDicNodes->pushLeavingChild(dicNode, ptNodeParams.getChildrenPos(),
|
childDicNodes->pushLeavingChild(dicNode, ptNodeParams.getChildrenPos(),
|
||||||
ptNodeParams.getProbability(), wordId, ptNodeParams.getCodePointArrayView());
|
wordId, ptNodeParams.getCodePointArrayView());
|
||||||
}
|
}
|
||||||
if (readingHelper.isError()) {
|
if (readingHelper.isError()) {
|
||||||
mIsCorrupted = true;
|
mIsCorrupted = true;
|
||||||
|
|
|
@ -407,7 +407,7 @@ int PatriciaTriePolicy::createAndGetLeavingChildNode(const DicNode *const dicNod
|
||||||
// Skip PtNodes don't start with Unicode code point because they represent non-word information.
|
// Skip PtNodes don't start with Unicode code point because they represent non-word information.
|
||||||
if (CharUtils::isInUnicodeSpace(mergedNodeCodePoints[0])) {
|
if (CharUtils::isInUnicodeSpace(mergedNodeCodePoints[0])) {
|
||||||
const int wordId = PatriciaTrieReadingUtils::isTerminal(flags) ? ptNodePos : NOT_A_WORD_ID;
|
const int wordId = PatriciaTrieReadingUtils::isTerminal(flags) ? ptNodePos : NOT_A_WORD_ID;
|
||||||
childDicNodes->pushLeavingChild(dicNode, childrenPos, probability, wordId,
|
childDicNodes->pushLeavingChild(dicNode, childrenPos, wordId,
|
||||||
CodePointArrayView(mergedNodeCodePoints, mergedNodeCodePointCount));
|
CodePointArrayView(mergedNodeCodePoints, mergedNodeCodePointCount));
|
||||||
}
|
}
|
||||||
return siblingPos;
|
return siblingPos;
|
||||||
|
|
|
@ -69,7 +69,7 @@ void Ver4PatriciaTriePolicy::createAndGetAllChildDicNodes(const DicNode *const d
|
||||||
}
|
}
|
||||||
const int wordId = isTerminal ? ptNodeParams.getTerminalId() : NOT_A_WORD_ID;
|
const int wordId = isTerminal ? ptNodeParams.getTerminalId() : NOT_A_WORD_ID;
|
||||||
childDicNodes->pushLeavingChild(dicNode, ptNodeParams.getChildrenPos(),
|
childDicNodes->pushLeavingChild(dicNode, ptNodeParams.getChildrenPos(),
|
||||||
ptNodeParams.getProbability(), wordId, ptNodeParams.getCodePointArrayView());
|
wordId, ptNodeParams.getCodePointArrayView());
|
||||||
}
|
}
|
||||||
if (readingHelper.isError()) {
|
if (readingHelper.isError()) {
|
||||||
mIsCorrupted = true;
|
mIsCorrupted = true;
|
||||||
|
|
Loading…
Reference in a new issue