am 21661706: Merge "Implement split and create node methods."

* commit '216617062a92bf11947f1dca3494775450388ca1':
  Implement split and create node methods.
main
Keisuke Kuroyanagi 2013-09-09 23:41:00 -07:00 committed by Android Git Automerger
commit 5599b04a0b
2 changed files with 115 additions and 19 deletions

View File

@ -47,14 +47,16 @@ bool DynamicPatriciaTrieWritingHelper::addUnigramWord(
const int nodeCodePointCount = nodeReader->getCodePointCount(); const int nodeCodePointCount = nodeReader->getCodePointCount();
for (int j = 1; j < nodeCodePointCount; ++j) { for (int j = 1; j < nodeCodePointCount; ++j) {
const int nextIndex = matchedCodePointCount + j; const int nextIndex = matchedCodePointCount + j;
if (nextIndex >= codePointCount) { if (nextIndex >= codePointCount || !readingHelper->isMatchedCodePoint(j,
// TODO: split current node after j - 1, create child and make this terminal.
return false;
}
if (!readingHelper->isMatchedCodePoint(j,
wordCodePoints[matchedCodePointCount + j])) { wordCodePoints[matchedCodePointCount + j])) {
// TODO: split current node after j - 1 and create two children. if (ENABLE_DYNAMIC_UPDATE) {
return false; return reallocatePtNodeAndAddNewPtNodes(nodeReader,
readingHelper->getMergedNodeCodePoints(), j, probability,
wordCodePoints + matchedCodePointCount,
codePointCount - matchedCodePointCount);
} else {
return false;
}
} }
} }
// All characters are matched. // All characters are matched.
@ -145,8 +147,8 @@ bool DynamicPatriciaTrieWritingHelper::markNodeAsMovedAndSetPosition(
return true; return true;
} }
// Write new node at writingPos. // Write new PtNode at writingPos.
bool DynamicPatriciaTrieWritingHelper::writeNodeToBuffer(const bool isBlacklisted, bool DynamicPatriciaTrieWritingHelper::writePtNodeWithFullInfoToBuffer(const bool isBlacklisted,
const bool isNotAWord, const int parentPos, const int *const codePoints, const bool isNotAWord, const int parentPos, const int *const codePoints,
const int codePointCount, const int probability, const int childrenPos, const int codePointCount, const int probability, const int childrenPos,
const int originalBigramListPos, const int originalShortcutListPos, const int originalBigramListPos, const int originalShortcutListPos,
@ -203,6 +205,25 @@ bool DynamicPatriciaTrieWritingHelper::writeNodeToBuffer(const bool isBlackliste
return true; return true;
} }
bool DynamicPatriciaTrieWritingHelper::writePtNodeToBuffer(const int parentPos,
const int *const codePoints, const int codePointCount, const int probability,
int *const writingPos) {
return writePtNodeWithFullInfoToBuffer(false /* isBlacklisted */, false /* isNotAWord */,
parentPos, codePoints, codePointCount, probability,
NOT_A_DICT_POS /* childrenPos */, NOT_A_DICT_POS /* originalBigramsPos */,
NOT_A_DICT_POS /* originalShortcutPos */, writingPos);
}
bool DynamicPatriciaTrieWritingHelper::writePtNodeToBufferByCopyingPtNodeInfo(
const DynamicPatriciaTrieNodeReader *const originalNode, const int parentPos,
const int *const codePoints, const int codePointCount, const int probability,
int *const writingPos) {
return writePtNodeWithFullInfoToBuffer(originalNode->isBlacklisted(),
originalNode->isNotAWord(), parentPos, codePoints, codePointCount, probability,
originalNode->getChildrenPos(), originalNode->getBigramsPos(),
originalNode->getShortcutPos(), writingPos);
}
bool DynamicPatriciaTrieWritingHelper::createAndInsertNodeIntoPtNodeArray(const int parentPos, bool DynamicPatriciaTrieWritingHelper::createAndInsertNodeIntoPtNodeArray(const int parentPos,
const int *const nodeCodePoints, const int nodeCodePointCount, const int probability, const int *const nodeCodePoints, const int nodeCodePointCount, const int probability,
int *const forwardLinkFieldPos) { int *const forwardLinkFieldPos) {
@ -231,10 +252,8 @@ bool DynamicPatriciaTrieWritingHelper::setPtNodeProbability(
if (!markNodeAsMovedAndSetPosition(originalPtNode, movedPos)) { if (!markNodeAsMovedAndSetPosition(originalPtNode, movedPos)) {
return false; return false;
} }
if (!writeNodeToBuffer(originalPtNode->isBlacklisted(), originalPtNode->isNotAWord(), if (!writePtNodeToBufferByCopyingPtNodeInfo(originalPtNode, originalPtNode->getParentPos(),
originalPtNode->getParentPos(), codePoints, originalPtNode->getCodePointCount(), codePoints, originalPtNode->getCodePointCount(), probability, &movedPos)) {
probability, originalPtNode->getChildrenPos(), originalPtNode->getBigramsPos(),
originalPtNode->getShortcutPos(), &movedPos)) {
return false; return false;
} }
} }
@ -262,9 +281,7 @@ bool DynamicPatriciaTrieWritingHelper::createNewPtNodeArrayWithAChildPtNode(
1 /* arraySize */, &writingPos)) { 1 /* arraySize */, &writingPos)) {
return false; return false;
} }
if (!writeNodeToBuffer(false /* isBlacklisted */, false /* isNotAWord */, parentPtNodePos, if (!writePtNodeToBuffer(parentPtNodePos, nodeCodePoints, nodeCodePointCount, probability,
nodeCodePoints, nodeCodePointCount, probability, NOT_A_DICT_POS /* childrenPos */,
NOT_A_DICT_POS /* originalBigramsPos */, NOT_A_DICT_POS /* originalShortcutPos */,
&writingPos)) { &writingPos)) {
return false; return false;
} }
@ -275,4 +292,69 @@ bool DynamicPatriciaTrieWritingHelper::createNewPtNodeArrayWithAChildPtNode(
return true; return true;
} }
// Returns whether the dictionary updating was succeeded or not.
bool DynamicPatriciaTrieWritingHelper::reallocatePtNodeAndAddNewPtNodes(
const DynamicPatriciaTrieNodeReader *const reallocatingPtNode,
const int *const reallocatingPtNodeCodePoints, const int overlappingCodePointCount,
const int probabilityOfNewPtNode, const int *const newNodeCodePoints,
const int newNodeCodePointCount) {
// When addsExtraChild is true, split the reallocating PtNode and add new child.
// Reallocating PtNode: abcde, newNode: abcxy.
// abc (1st, not terminal) __ de (2nd)
// \_ xy (extra child, terminal)
// Otherwise, this method makes 1st part terminal and write probabilityOfNewPtNode.
// Reallocating PtNode: abcde, newNode: abc.
// abc (1st, terminal) __ de (2nd)
const bool addsExtraChild = newNodeCodePointCount > overlappingCodePointCount;
const int firstPtNodePos = mBuffer->getTailPosition();
if (!markNodeAsMovedAndSetPosition(reallocatingPtNode, firstPtNodePos)) {
return false;
}
int writingPos = firstPtNodePos;
// Write the 1st part of the reallocating node. The children position will be updated later
// with actual children position.
const int newProbability = addsExtraChild ? NOT_A_PROBABILITY : probabilityOfNewPtNode;
if (!writePtNodeToBuffer(reallocatingPtNode->getParentPos(), reallocatingPtNodeCodePoints,
overlappingCodePointCount, newProbability, &writingPos)) {
return false;
}
const int actualChildrenPos = writingPos;
// Create new children PtNode array.
const size_t newPtNodeCount = addsExtraChild ? 2 : 1;
if (!DynamicPatriciaTrieWritingUtils::writePtNodeArraySizeAndAdvancePosition(mBuffer,
newPtNodeCount, &writingPos)) {
return false;
}
// Write the 2nd part of the reallocating node.
if (!writePtNodeToBufferByCopyingPtNodeInfo(reallocatingPtNode,
reallocatingPtNode->getNodePos(),
reallocatingPtNodeCodePoints + overlappingCodePointCount,
reallocatingPtNode->getCodePointCount() - overlappingCodePointCount,
reallocatingPtNode->getProbability(), &writingPos)) {
return false;
}
if (addsExtraChild) {
if (!writePtNodeToBuffer(reallocatingPtNode->getNodePos(),
newNodeCodePoints + overlappingCodePointCount,
newNodeCodePointCount - overlappingCodePointCount, probabilityOfNewPtNode,
&writingPos)) {
return false;
}
}
if (!DynamicPatriciaTrieWritingUtils::writeForwardLinkPositionAndAdvancePosition(mBuffer,
NOT_A_DICT_POS /* forwardLinkPos */, &writingPos)) {
return false;
}
// Load node info. Information of the 1st part will be fetched.
DynamicPatriciaTrieNodeReader nodeReader(mBuffer, mBigramPolicy, mShortcutPolicy);
nodeReader.fetchNodeInfoFromBuffer(firstPtNodePos);
// Update children position.
int childrenPosFieldPos = nodeReader.getChildrenPosFieldPos();
if (!DynamicPatriciaTrieWritingUtils::writeChildrenPositionAndAdvancePosition(mBuffer,
actualChildrenPos, &childrenPosFieldPos)) {
return false;
}
return true;
}
} // namespace latinime } // namespace latinime

View File

@ -57,11 +57,19 @@ class DynamicPatriciaTrieWritingHelper {
bool markNodeAsMovedAndSetPosition(const DynamicPatriciaTrieNodeReader *const nodeToUpdate, bool markNodeAsMovedAndSetPosition(const DynamicPatriciaTrieNodeReader *const nodeToUpdate,
const int movedPos); const int movedPos);
bool writeNodeToBuffer(const bool isBlacklisted, const bool isNotAWord, const int parentPos, bool writePtNodeWithFullInfoToBuffer(const bool isBlacklisted, const bool isNotAWord,
const int *const codePoints, const int codePointCount, const int probability, const int parentPos, const int *const codePoints, const int codePointCount,
const int childrenPos, const int originalBigramListPos, const int probability, const int childrenPos, const int originalBigramListPos,
const int originalShortcutListPos, int *const writingPos); const int originalShortcutListPos, int *const writingPos);
bool writePtNodeToBuffer(const int parentPos, const int *const codePoints,
const int codePointCount, const int probability, int *const writingPos);
bool writePtNodeToBufferByCopyingPtNodeInfo(
const DynamicPatriciaTrieNodeReader *const originalNode, const int parentPos,
const int *const codePoints, const int codePointCount, const int probability,
int *const writingPos);
bool createAndInsertNodeIntoPtNodeArray(const int parentPos, const int *const nodeCodePoints, bool createAndInsertNodeIntoPtNodeArray(const int parentPos, const int *const nodeCodePoints,
const int nodeCodePointCount, const int probability, int *const forwardLinkFieldPos); const int nodeCodePointCount, const int probability, int *const forwardLinkFieldPos);
@ -74,6 +82,12 @@ class DynamicPatriciaTrieWritingHelper {
bool createNewPtNodeArrayWithAChildPtNode(const int parentPos, const int *const nodeCodePoints, bool createNewPtNodeArrayWithAChildPtNode(const int parentPos, const int *const nodeCodePoints,
const int nodeCodePointCount, const int probability); const int nodeCodePointCount, const int probability);
bool reallocatePtNodeAndAddNewPtNodes(
const DynamicPatriciaTrieNodeReader *const reallocatingPtNode,
const int *const reallocatingPtNodeCodePoints, const int overlappingCodePointCount,
const int probabilityOfNewPtNode, const int *const newNodeCodePoints,
const int newNodeCodePointCount);
}; };
} // namespace latinime } // namespace latinime
#endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_WRITING_HELPER_H */ #endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_WRITING_HELPER_H */