Merge "Use IntArrayView in DynamicPtUpdatingHelper."
commit
cac9486550
|
@ -268,8 +268,8 @@ bool Ver4PatriciaTriePolicy::addUnigramEntry(const CodePointArrayView wordCodePo
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
const CodePointArrayView codePointArrayView(codePointsToAdd, codePointCountToAdd);
|
const CodePointArrayView codePointArrayView(codePointsToAdd, codePointCountToAdd);
|
||||||
if (mUpdatingHelper.addUnigramWord(&readingHelper, codePointArrayView.data(),
|
if (mUpdatingHelper.addUnigramWord(&readingHelper, codePointArrayView, unigramProperty,
|
||||||
codePointArrayView.size(), unigramProperty, &addedNewUnigram)) {
|
&addedNewUnigram)) {
|
||||||
if (addedNewUnigram && !unigramProperty->representsBeginningOfSentence()) {
|
if (addedNewUnigram && !unigramProperty->representsBeginningOfSentence()) {
|
||||||
mUnigramCount++;
|
mUnigramCount++;
|
||||||
}
|
}
|
||||||
|
@ -283,8 +283,8 @@ bool Ver4PatriciaTriePolicy::addUnigramEntry(const CodePointArrayView wordCodePo
|
||||||
}
|
}
|
||||||
for (const auto &shortcut : unigramProperty->getShortcuts()) {
|
for (const auto &shortcut : unigramProperty->getShortcuts()) {
|
||||||
if (!mUpdatingHelper.addShortcutTarget(wordPos,
|
if (!mUpdatingHelper.addShortcutTarget(wordPos,
|
||||||
shortcut.getTargetCodePoints()->data(),
|
CodePointArrayView(*shortcut.getTargetCodePoints()),
|
||||||
shortcut.getTargetCodePoints()->size(), shortcut.getProbability())) {
|
shortcut.getProbability())) {
|
||||||
AKLOGE("Cannot add new shortcut target. PtNodePos: %d, length: %zd, "
|
AKLOGE("Cannot add new shortcut target. PtNodePos: %d, length: %zd, "
|
||||||
"probability: %d", wordPos, shortcut.getTargetCodePoints()->size(),
|
"probability: %d", wordPos, shortcut.getTargetCodePoints()->size(),
|
||||||
shortcut.getProbability());
|
shortcut.getProbability());
|
||||||
|
|
|
@ -218,9 +218,9 @@ int DynamicPtReadingHelper::getCodePointsAndProbabilityAndReturnCodePointCount(
|
||||||
}
|
}
|
||||||
|
|
||||||
int DynamicPtReadingHelper::getTerminalPtNodePositionOfWord(const int *const inWord,
|
int DynamicPtReadingHelper::getTerminalPtNodePositionOfWord(const int *const inWord,
|
||||||
const int length, const bool forceLowerCaseSearch) {
|
const size_t length, const bool forceLowerCaseSearch) {
|
||||||
int searchCodePoints[length];
|
int searchCodePoints[length];
|
||||||
for (int i = 0; i < length; ++i) {
|
for (size_t i = 0; i < length; ++i) {
|
||||||
searchCodePoints[i] = forceLowerCaseSearch ? CharUtils::toLowerCase(inWord[i]) : inWord[i];
|
searchCodePoints[i] = forceLowerCaseSearch ? CharUtils::toLowerCase(inWord[i]) : inWord[i];
|
||||||
}
|
}
|
||||||
while (!isEnd()) {
|
while (!isEnd()) {
|
||||||
|
|
|
@ -138,12 +138,12 @@ class DynamicPtReadingHelper {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Return code point count exclude the last read node's code points.
|
// Return code point count exclude the last read node's code points.
|
||||||
AK_FORCE_INLINE int getPrevTotalCodePointCount() const {
|
AK_FORCE_INLINE size_t getPrevTotalCodePointCount() const {
|
||||||
return mReadingState.mTotalCodePointCountSinceInitialization;
|
return mReadingState.mTotalCodePointCountSinceInitialization;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Return code point count include the last read node's code points.
|
// Return code point count include the last read node's code points.
|
||||||
AK_FORCE_INLINE int getTotalCodePointCount(const PtNodeParams &ptNodeParams) const {
|
AK_FORCE_INLINE size_t getTotalCodePointCount(const PtNodeParams &ptNodeParams) const {
|
||||||
return mReadingState.mTotalCodePointCountSinceInitialization
|
return mReadingState.mTotalCodePointCountSinceInitialization
|
||||||
+ ptNodeParams.getCodePointCount();
|
+ ptNodeParams.getCodePointCount();
|
||||||
}
|
}
|
||||||
|
@ -214,7 +214,7 @@ class DynamicPtReadingHelper {
|
||||||
int getCodePointsAndProbabilityAndReturnCodePointCount(const int maxCodePointCount,
|
int getCodePointsAndProbabilityAndReturnCodePointCount(const int maxCodePointCount,
|
||||||
int *const outCodePoints, int *const outUnigramProbability);
|
int *const outCodePoints, int *const outUnigramProbability);
|
||||||
|
|
||||||
int getTerminalPtNodePositionOfWord(const int *const inWord, const int length,
|
int getTerminalPtNodePositionOfWord(const int *const inWord, const size_t length,
|
||||||
const bool forceLowerCaseSearch);
|
const bool forceLowerCaseSearch);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
@ -234,7 +234,7 @@ class DynamicPtReadingHelper {
|
||||||
int mPos;
|
int mPos;
|
||||||
// Remaining node count in the current array.
|
// Remaining node count in the current array.
|
||||||
int mRemainingPtNodeCountInThisArray;
|
int mRemainingPtNodeCountInThisArray;
|
||||||
int mTotalCodePointCountSinceInitialization;
|
size_t mTotalCodePointCountSinceInitialization;
|
||||||
// Counter of PtNodes used to avoid infinite loops caused by broken or malicious links.
|
// Counter of PtNodes used to avoid infinite loops caused by broken or malicious links.
|
||||||
int mTotalPtNodeIndexInThisArrayChain;
|
int mTotalPtNodeIndexInThisArrayChain;
|
||||||
// Counter of PtNode arrays used to avoid infinite loops caused by cyclic links of empty
|
// Counter of PtNode arrays used to avoid infinite loops caused by cyclic links of empty
|
||||||
|
|
|
@ -28,17 +28,16 @@ namespace latinime {
|
||||||
|
|
||||||
const int DynamicPtUpdatingHelper::CHILDREN_POSITION_FIELD_SIZE = 3;
|
const int DynamicPtUpdatingHelper::CHILDREN_POSITION_FIELD_SIZE = 3;
|
||||||
|
|
||||||
bool DynamicPtUpdatingHelper::addUnigramWord(
|
bool DynamicPtUpdatingHelper::addUnigramWord(DynamicPtReadingHelper *const readingHelper,
|
||||||
DynamicPtReadingHelper *const readingHelper,
|
const CodePointArrayView wordCodePoints, const UnigramProperty *const unigramProperty,
|
||||||
const int *const wordCodePoints, const int codePointCount,
|
bool *const outAddedNewUnigram) {
|
||||||
const UnigramProperty *const unigramProperty, bool *const outAddedNewUnigram) {
|
|
||||||
int parentPos = NOT_A_DICT_POS;
|
int parentPos = NOT_A_DICT_POS;
|
||||||
while (!readingHelper->isEnd()) {
|
while (!readingHelper->isEnd()) {
|
||||||
const PtNodeParams ptNodeParams(readingHelper->getPtNodeParams());
|
const PtNodeParams ptNodeParams(readingHelper->getPtNodeParams());
|
||||||
if (!ptNodeParams.isValid()) {
|
if (!ptNodeParams.isValid()) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
const int matchedCodePointCount = readingHelper->getPrevTotalCodePointCount();
|
const size_t matchedCodePointCount = readingHelper->getPrevTotalCodePointCount();
|
||||||
if (!readingHelper->isMatchedCodePoint(ptNodeParams, 0 /* index */,
|
if (!readingHelper->isMatchedCodePoint(ptNodeParams, 0 /* index */,
|
||||||
wordCodePoints[matchedCodePointCount])) {
|
wordCodePoints[matchedCodePointCount])) {
|
||||||
// The first code point is different from target code point. Skip this node and read
|
// The first code point is different from target code point. Skip this node and read
|
||||||
|
@ -47,26 +46,25 @@ bool DynamicPtUpdatingHelper::addUnigramWord(
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
// Check following merged node code points.
|
// Check following merged node code points.
|
||||||
const int nodeCodePointCount = ptNodeParams.getCodePointCount();
|
const size_t nodeCodePointCount = ptNodeParams.getCodePointArrayView().size();
|
||||||
for (int j = 1; j < nodeCodePointCount; ++j) {
|
for (size_t j = 1; j < nodeCodePointCount; ++j) {
|
||||||
const int nextIndex = matchedCodePointCount + j;
|
const size_t nextIndex = matchedCodePointCount + j;
|
||||||
if (nextIndex >= codePointCount || !readingHelper->isMatchedCodePoint(ptNodeParams, j,
|
if (nextIndex >= wordCodePoints.size()
|
||||||
wordCodePoints[matchedCodePointCount + j])) {
|
|| !readingHelper->isMatchedCodePoint(ptNodeParams, j,
|
||||||
|
wordCodePoints[matchedCodePointCount + j])) {
|
||||||
*outAddedNewUnigram = true;
|
*outAddedNewUnigram = true;
|
||||||
return reallocatePtNodeAndAddNewPtNodes(&ptNodeParams, j, unigramProperty,
|
return reallocatePtNodeAndAddNewPtNodes(&ptNodeParams, j, unigramProperty,
|
||||||
wordCodePoints + matchedCodePointCount,
|
wordCodePoints.skip(matchedCodePointCount));
|
||||||
codePointCount - matchedCodePointCount);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// All characters are matched.
|
// All characters are matched.
|
||||||
if (codePointCount == readingHelper->getTotalCodePointCount(ptNodeParams)) {
|
if (wordCodePoints.size() == readingHelper->getTotalCodePointCount(ptNodeParams)) {
|
||||||
return setPtNodeProbability(&ptNodeParams, unigramProperty, outAddedNewUnigram);
|
return setPtNodeProbability(&ptNodeParams, unigramProperty, outAddedNewUnigram);
|
||||||
}
|
}
|
||||||
if (!ptNodeParams.hasChildren()) {
|
if (!ptNodeParams.hasChildren()) {
|
||||||
*outAddedNewUnigram = true;
|
*outAddedNewUnigram = true;
|
||||||
return createChildrenPtNodeArrayAndAChildPtNode(&ptNodeParams, unigramProperty,
|
return createChildrenPtNodeArrayAndAChildPtNode(&ptNodeParams, unigramProperty,
|
||||||
wordCodePoints + readingHelper->getTotalCodePointCount(ptNodeParams),
|
wordCodePoints.skip(readingHelper->getTotalCodePointCount(ptNodeParams)));
|
||||||
codePointCount - readingHelper->getTotalCodePointCount(ptNodeParams));
|
|
||||||
}
|
}
|
||||||
// Advance to the children nodes.
|
// Advance to the children nodes.
|
||||||
parentPos = ptNodeParams.getHeadPos();
|
parentPos = ptNodeParams.getHeadPos();
|
||||||
|
@ -79,9 +77,8 @@ bool DynamicPtUpdatingHelper::addUnigramWord(
|
||||||
int pos = readingHelper->getPosOfLastForwardLinkField();
|
int pos = readingHelper->getPosOfLastForwardLinkField();
|
||||||
*outAddedNewUnigram = true;
|
*outAddedNewUnigram = true;
|
||||||
return createAndInsertNodeIntoPtNodeArray(parentPos,
|
return createAndInsertNodeIntoPtNodeArray(parentPos,
|
||||||
wordCodePoints + readingHelper->getPrevTotalCodePointCount(),
|
wordCodePoints.skip(readingHelper->getPrevTotalCodePointCount()), unigramProperty,
|
||||||
codePointCount - readingHelper->getPrevTotalCodePointCount(),
|
&pos);
|
||||||
unigramProperty, &pos);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
bool DynamicPtUpdatingHelper::addNgramEntry(const PtNodePosArrayView prevWordsPtNodePos,
|
bool DynamicPtUpdatingHelper::addNgramEntry(const PtNodePosArrayView prevWordsPtNodePos,
|
||||||
|
@ -120,23 +117,21 @@ bool DynamicPtUpdatingHelper::removeNgramEntry(const PtNodePosArrayView prevWord
|
||||||
}
|
}
|
||||||
|
|
||||||
bool DynamicPtUpdatingHelper::addShortcutTarget(const int wordPos,
|
bool DynamicPtUpdatingHelper::addShortcutTarget(const int wordPos,
|
||||||
const int *const targetCodePoints, const int targetCodePointCount,
|
const CodePointArrayView targetCodePoints, const int shortcutProbability) {
|
||||||
const int shortcutProbability) {
|
|
||||||
const PtNodeParams ptNodeParams(mPtNodeReader->fetchPtNodeParamsInBufferFromPtNodePos(wordPos));
|
const PtNodeParams ptNodeParams(mPtNodeReader->fetchPtNodeParamsInBufferFromPtNodePos(wordPos));
|
||||||
return mPtNodeWriter->addShortcutTarget(&ptNodeParams, targetCodePoints, targetCodePointCount,
|
return mPtNodeWriter->addShortcutTarget(&ptNodeParams, targetCodePoints.data(),
|
||||||
shortcutProbability);
|
targetCodePoints.size(), shortcutProbability);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool DynamicPtUpdatingHelper::createAndInsertNodeIntoPtNodeArray(const int parentPos,
|
bool DynamicPtUpdatingHelper::createAndInsertNodeIntoPtNodeArray(const int parentPos,
|
||||||
const int *const nodeCodePoints, const int nodeCodePointCount,
|
const CodePointArrayView ptNodeCodePoints, const UnigramProperty *const unigramProperty,
|
||||||
const UnigramProperty *const unigramProperty, int *const forwardLinkFieldPos) {
|
int *const forwardLinkFieldPos) {
|
||||||
const int newPtNodeArrayPos = mBuffer->getTailPosition();
|
const int newPtNodeArrayPos = mBuffer->getTailPosition();
|
||||||
if (!DynamicPtWritingUtils::writeForwardLinkPositionAndAdvancePosition(mBuffer,
|
if (!DynamicPtWritingUtils::writeForwardLinkPositionAndAdvancePosition(mBuffer,
|
||||||
newPtNodeArrayPos, forwardLinkFieldPos)) {
|
newPtNodeArrayPos, forwardLinkFieldPos)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
return createNewPtNodeArrayWithAChildPtNode(parentPos, nodeCodePoints, nodeCodePointCount,
|
return createNewPtNodeArrayWithAChildPtNode(parentPos, ptNodeCodePoints, unigramProperty);
|
||||||
unigramProperty);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
bool DynamicPtUpdatingHelper::setPtNodeProbability(const PtNodeParams *const originalPtNodeParams,
|
bool DynamicPtUpdatingHelper::setPtNodeProbability(const PtNodeParams *const originalPtNodeParams,
|
||||||
|
@ -153,8 +148,7 @@ bool DynamicPtUpdatingHelper::setPtNodeProbability(const PtNodeParams *const ori
|
||||||
const PtNodeParams ptNodeParamsToWrite(getUpdatedPtNodeParams(originalPtNodeParams,
|
const PtNodeParams ptNodeParamsToWrite(getUpdatedPtNodeParams(originalPtNodeParams,
|
||||||
unigramProperty->isNotAWord(), unigramProperty->isBlacklisted(),
|
unigramProperty->isNotAWord(), unigramProperty->isBlacklisted(),
|
||||||
true /* isTerminal */, originalPtNodeParams->getParentPos(),
|
true /* isTerminal */, originalPtNodeParams->getParentPos(),
|
||||||
originalPtNodeParams->getCodePointCount(), originalPtNodeParams->getCodePoints(),
|
originalPtNodeParams->getCodePointArrayView(), unigramProperty->getProbability()));
|
||||||
unigramProperty->getProbability()));
|
|
||||||
if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&ptNodeParamsToWrite,
|
if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&ptNodeParamsToWrite,
|
||||||
unigramProperty, &writingPos)) {
|
unigramProperty, &writingPos)) {
|
||||||
return false;
|
return false;
|
||||||
|
@ -168,17 +162,17 @@ bool DynamicPtUpdatingHelper::setPtNodeProbability(const PtNodeParams *const ori
|
||||||
|
|
||||||
bool DynamicPtUpdatingHelper::createChildrenPtNodeArrayAndAChildPtNode(
|
bool DynamicPtUpdatingHelper::createChildrenPtNodeArrayAndAChildPtNode(
|
||||||
const PtNodeParams *const parentPtNodeParams, const UnigramProperty *const unigramProperty,
|
const PtNodeParams *const parentPtNodeParams, const UnigramProperty *const unigramProperty,
|
||||||
const int *const codePoints, const int codePointCount) {
|
const CodePointArrayView codePoints) {
|
||||||
const int newPtNodeArrayPos = mBuffer->getTailPosition();
|
const int newPtNodeArrayPos = mBuffer->getTailPosition();
|
||||||
if (!mPtNodeWriter->updateChildrenPosition(parentPtNodeParams, newPtNodeArrayPos)) {
|
if (!mPtNodeWriter->updateChildrenPosition(parentPtNodeParams, newPtNodeArrayPos)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
return createNewPtNodeArrayWithAChildPtNode(parentPtNodeParams->getHeadPos(), codePoints,
|
return createNewPtNodeArrayWithAChildPtNode(parentPtNodeParams->getHeadPos(), codePoints,
|
||||||
codePointCount, unigramProperty);
|
unigramProperty);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool DynamicPtUpdatingHelper::createNewPtNodeArrayWithAChildPtNode(
|
bool DynamicPtUpdatingHelper::createNewPtNodeArrayWithAChildPtNode(
|
||||||
const int parentPtNodePos, const int *const nodeCodePoints, const int nodeCodePointCount,
|
const int parentPtNodePos, const CodePointArrayView ptNodeCodePoints,
|
||||||
const UnigramProperty *const unigramProperty) {
|
const UnigramProperty *const unigramProperty) {
|
||||||
int writingPos = mBuffer->getTailPosition();
|
int writingPos = mBuffer->getTailPosition();
|
||||||
if (!DynamicPtWritingUtils::writePtNodeArraySizeAndAdvancePosition(mBuffer,
|
if (!DynamicPtWritingUtils::writePtNodeArraySizeAndAdvancePosition(mBuffer,
|
||||||
|
@ -187,8 +181,7 @@ bool DynamicPtUpdatingHelper::createNewPtNodeArrayWithAChildPtNode(
|
||||||
}
|
}
|
||||||
const PtNodeParams ptNodeParamsToWrite(getPtNodeParamsForNewPtNode(
|
const PtNodeParams ptNodeParamsToWrite(getPtNodeParamsForNewPtNode(
|
||||||
unigramProperty->isNotAWord(), unigramProperty->isBlacklisted(), true /* isTerminal */,
|
unigramProperty->isNotAWord(), unigramProperty->isBlacklisted(), true /* isTerminal */,
|
||||||
parentPtNodePos, nodeCodePointCount, nodeCodePoints,
|
parentPtNodePos, ptNodeCodePoints, unigramProperty->getProbability()));
|
||||||
unigramProperty->getProbability()));
|
|
||||||
if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&ptNodeParamsToWrite,
|
if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&ptNodeParamsToWrite,
|
||||||
unigramProperty, &writingPos)) {
|
unigramProperty, &writingPos)) {
|
||||||
return false;
|
return false;
|
||||||
|
@ -202,9 +195,9 @@ bool DynamicPtUpdatingHelper::createNewPtNodeArrayWithAChildPtNode(
|
||||||
|
|
||||||
// Returns whether the dictionary updating was succeeded or not.
|
// Returns whether the dictionary updating was succeeded or not.
|
||||||
bool DynamicPtUpdatingHelper::reallocatePtNodeAndAddNewPtNodes(
|
bool DynamicPtUpdatingHelper::reallocatePtNodeAndAddNewPtNodes(
|
||||||
const PtNodeParams *const reallocatingPtNodeParams, const int overlappingCodePointCount,
|
const PtNodeParams *const reallocatingPtNodeParams, const size_t overlappingCodePointCount,
|
||||||
const UnigramProperty *const unigramProperty, const int *const newNodeCodePoints,
|
const UnigramProperty *const unigramProperty,
|
||||||
const int newNodeCodePointCount) {
|
const CodePointArrayView newPtNodeCodePoints) {
|
||||||
// When addsExtraChild is true, split the reallocating PtNode and add new child.
|
// When addsExtraChild is true, split the reallocating PtNode and add new child.
|
||||||
// Reallocating PtNode: abcde, newNode: abcxy.
|
// Reallocating PtNode: abcde, newNode: abcxy.
|
||||||
// abc (1st, not terminal) __ de (2nd)
|
// abc (1st, not terminal) __ de (2nd)
|
||||||
|
@ -212,16 +205,18 @@ bool DynamicPtUpdatingHelper::reallocatePtNodeAndAddNewPtNodes(
|
||||||
// Otherwise, this method makes 1st part terminal and write information in unigramProperty.
|
// Otherwise, this method makes 1st part terminal and write information in unigramProperty.
|
||||||
// Reallocating PtNode: abcde, newNode: abc.
|
// Reallocating PtNode: abcde, newNode: abc.
|
||||||
// abc (1st, terminal) __ de (2nd)
|
// abc (1st, terminal) __ de (2nd)
|
||||||
const bool addsExtraChild = newNodeCodePointCount > overlappingCodePointCount;
|
const bool addsExtraChild = newPtNodeCodePoints.size() > overlappingCodePointCount;
|
||||||
const int firstPartOfReallocatedPtNodePos = mBuffer->getTailPosition();
|
const int firstPartOfReallocatedPtNodePos = mBuffer->getTailPosition();
|
||||||
int writingPos = firstPartOfReallocatedPtNodePos;
|
int writingPos = firstPartOfReallocatedPtNodePos;
|
||||||
// Write the 1st part of the reallocating node. The children position will be updated later
|
// Write the 1st part of the reallocating node. The children position will be updated later
|
||||||
// with actual children position.
|
// with actual children position.
|
||||||
|
const CodePointArrayView firstPtNodeCodePoints =
|
||||||
|
reallocatingPtNodeParams->getCodePointArrayView().limit(overlappingCodePointCount);
|
||||||
if (addsExtraChild) {
|
if (addsExtraChild) {
|
||||||
const PtNodeParams ptNodeParamsToWrite(getPtNodeParamsForNewPtNode(
|
const PtNodeParams ptNodeParamsToWrite(getPtNodeParamsForNewPtNode(
|
||||||
false /* isNotAWord */, false /* isBlacklisted */, false /* isTerminal */,
|
false /* isNotAWord */, false /* isBlacklisted */, false /* isTerminal */,
|
||||||
reallocatingPtNodeParams->getParentPos(), overlappingCodePointCount,
|
reallocatingPtNodeParams->getParentPos(), firstPtNodeCodePoints,
|
||||||
reallocatingPtNodeParams->getCodePoints(), NOT_A_PROBABILITY));
|
NOT_A_PROBABILITY));
|
||||||
if (!mPtNodeWriter->writePtNodeAndAdvancePosition(&ptNodeParamsToWrite, &writingPos)) {
|
if (!mPtNodeWriter->writePtNodeAndAdvancePosition(&ptNodeParamsToWrite, &writingPos)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -229,8 +224,7 @@ bool DynamicPtUpdatingHelper::reallocatePtNodeAndAddNewPtNodes(
|
||||||
const PtNodeParams ptNodeParamsToWrite(getPtNodeParamsForNewPtNode(
|
const PtNodeParams ptNodeParamsToWrite(getPtNodeParamsForNewPtNode(
|
||||||
unigramProperty->isNotAWord(), unigramProperty->isBlacklisted(),
|
unigramProperty->isNotAWord(), unigramProperty->isBlacklisted(),
|
||||||
true /* isTerminal */, reallocatingPtNodeParams->getParentPos(),
|
true /* isTerminal */, reallocatingPtNodeParams->getParentPos(),
|
||||||
overlappingCodePointCount, reallocatingPtNodeParams->getCodePoints(),
|
firstPtNodeCodePoints, unigramProperty->getProbability()));
|
||||||
unigramProperty->getProbability()));
|
|
||||||
if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&ptNodeParamsToWrite,
|
if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&ptNodeParamsToWrite,
|
||||||
unigramProperty, &writingPos)) {
|
unigramProperty, &writingPos)) {
|
||||||
return false;
|
return false;
|
||||||
|
@ -248,8 +242,7 @@ bool DynamicPtUpdatingHelper::reallocatePtNodeAndAddNewPtNodes(
|
||||||
const PtNodeParams childPartPtNodeParams(getUpdatedPtNodeParams(reallocatingPtNodeParams,
|
const PtNodeParams childPartPtNodeParams(getUpdatedPtNodeParams(reallocatingPtNodeParams,
|
||||||
reallocatingPtNodeParams->isNotAWord(), reallocatingPtNodeParams->isBlacklisted(),
|
reallocatingPtNodeParams->isNotAWord(), reallocatingPtNodeParams->isBlacklisted(),
|
||||||
reallocatingPtNodeParams->isTerminal(), firstPartOfReallocatedPtNodePos,
|
reallocatingPtNodeParams->isTerminal(), firstPartOfReallocatedPtNodePos,
|
||||||
reallocatingPtNodeParams->getCodePointCount() - overlappingCodePointCount,
|
reallocatingPtNodeParams->getCodePointArrayView().skip(overlappingCodePointCount),
|
||||||
reallocatingPtNodeParams->getCodePoints() + overlappingCodePointCount,
|
|
||||||
reallocatingPtNodeParams->getProbability()));
|
reallocatingPtNodeParams->getProbability()));
|
||||||
if (!mPtNodeWriter->writePtNodeAndAdvancePosition(&childPartPtNodeParams, &writingPos)) {
|
if (!mPtNodeWriter->writePtNodeAndAdvancePosition(&childPartPtNodeParams, &writingPos)) {
|
||||||
return false;
|
return false;
|
||||||
|
@ -258,8 +251,8 @@ bool DynamicPtUpdatingHelper::reallocatePtNodeAndAddNewPtNodes(
|
||||||
const PtNodeParams extraChildPtNodeParams(getPtNodeParamsForNewPtNode(
|
const PtNodeParams extraChildPtNodeParams(getPtNodeParamsForNewPtNode(
|
||||||
unigramProperty->isNotAWord(), unigramProperty->isBlacklisted(),
|
unigramProperty->isNotAWord(), unigramProperty->isBlacklisted(),
|
||||||
true /* isTerminal */, firstPartOfReallocatedPtNodePos,
|
true /* isTerminal */, firstPartOfReallocatedPtNodePos,
|
||||||
newNodeCodePointCount - overlappingCodePointCount,
|
newPtNodeCodePoints.skip(overlappingCodePointCount),
|
||||||
newNodeCodePoints + overlappingCodePointCount, unigramProperty->getProbability()));
|
unigramProperty->getProbability()));
|
||||||
if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&extraChildPtNodeParams,
|
if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&extraChildPtNodeParams,
|
||||||
unigramProperty, &writingPos)) {
|
unigramProperty, &writingPos)) {
|
||||||
return false;
|
return false;
|
||||||
|
@ -282,26 +275,24 @@ bool DynamicPtUpdatingHelper::reallocatePtNodeAndAddNewPtNodes(
|
||||||
}
|
}
|
||||||
|
|
||||||
const PtNodeParams DynamicPtUpdatingHelper::getUpdatedPtNodeParams(
|
const PtNodeParams DynamicPtUpdatingHelper::getUpdatedPtNodeParams(
|
||||||
const PtNodeParams *const originalPtNodeParams,
|
const PtNodeParams *const originalPtNodeParams, const bool isNotAWord,
|
||||||
const bool isNotAWord, const bool isBlacklisted, const bool isTerminal, const int parentPos,
|
const bool isBlacklisted, const bool isTerminal, const int parentPos,
|
||||||
const int codePointCount, const int *const codePoints, const int probability) const {
|
const CodePointArrayView codePoints, const int probability) const {
|
||||||
const PatriciaTrieReadingUtils::NodeFlags flags = PatriciaTrieReadingUtils::createAndGetFlags(
|
const PatriciaTrieReadingUtils::NodeFlags flags = PatriciaTrieReadingUtils::createAndGetFlags(
|
||||||
isBlacklisted, isNotAWord, isTerminal, false /* hasShortcutTargets */,
|
isBlacklisted, isNotAWord, isTerminal, false /* hasShortcutTargets */,
|
||||||
false /* hasBigrams */, codePointCount > 1 /* hasMultipleChars */,
|
false /* hasBigrams */, codePoints.size() > 1u /* hasMultipleChars */,
|
||||||
CHILDREN_POSITION_FIELD_SIZE);
|
CHILDREN_POSITION_FIELD_SIZE);
|
||||||
return PtNodeParams(originalPtNodeParams, flags, parentPos, codePointCount, codePoints,
|
return PtNodeParams(originalPtNodeParams, flags, parentPos, codePoints, probability);
|
||||||
probability);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
const PtNodeParams DynamicPtUpdatingHelper::getPtNodeParamsForNewPtNode(
|
const PtNodeParams DynamicPtUpdatingHelper::getPtNodeParamsForNewPtNode(const bool isNotAWord,
|
||||||
const bool isNotAWord, const bool isBlacklisted, const bool isTerminal,
|
const bool isBlacklisted, const bool isTerminal, const int parentPos,
|
||||||
const int parentPos, const int codePointCount, const int *const codePoints,
|
const CodePointArrayView codePoints, const int probability) const {
|
||||||
const int probability) const {
|
|
||||||
const PatriciaTrieReadingUtils::NodeFlags flags = PatriciaTrieReadingUtils::createAndGetFlags(
|
const PatriciaTrieReadingUtils::NodeFlags flags = PatriciaTrieReadingUtils::createAndGetFlags(
|
||||||
isBlacklisted, isNotAWord, isTerminal, false /* hasShortcutTargets */,
|
isBlacklisted, isNotAWord, isTerminal, false /* hasShortcutTargets */,
|
||||||
false /* hasBigrams */, codePointCount > 1 /* hasMultipleChars */,
|
false /* hasBigrams */, codePoints.size() > 1u /* hasMultipleChars */,
|
||||||
CHILDREN_POSITION_FIELD_SIZE);
|
CHILDREN_POSITION_FIELD_SIZE);
|
||||||
return PtNodeParams(flags, parentPos, codePointCount, codePoints, probability);
|
return PtNodeParams(flags, parentPos, codePoints, probability);
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace latinime
|
} // namespace latinime
|
||||||
|
|
|
@ -40,8 +40,8 @@ class DynamicPtUpdatingHelper {
|
||||||
|
|
||||||
// Add a word to the dictionary. If the word already exists, update the probability.
|
// Add a word to the dictionary. If the word already exists, update the probability.
|
||||||
bool addUnigramWord(DynamicPtReadingHelper *const readingHelper,
|
bool addUnigramWord(DynamicPtReadingHelper *const readingHelper,
|
||||||
const int *const wordCodePoints, const int codePointCount,
|
const CodePointArrayView wordCodePoints, const UnigramProperty *const unigramProperty,
|
||||||
const UnigramProperty *const unigramProperty, bool *const outAddedNewUnigram);
|
bool *const outAddedNewUnigram);
|
||||||
|
|
||||||
// TODO: Remove after stopping supporting v402.
|
// TODO: Remove after stopping supporting v402.
|
||||||
// Add an n-gram entry.
|
// Add an n-gram entry.
|
||||||
|
@ -53,8 +53,8 @@ class DynamicPtUpdatingHelper {
|
||||||
bool removeNgramEntry(const PtNodePosArrayView prevWordsPtNodePos, const int wordPos);
|
bool removeNgramEntry(const PtNodePosArrayView prevWordsPtNodePos, const int wordPos);
|
||||||
|
|
||||||
// Add a shortcut target.
|
// Add a shortcut target.
|
||||||
bool addShortcutTarget(const int wordPos, const int *const targetCodePoints,
|
bool addShortcutTarget(const int wordPos, const CodePointArrayView targetCodePoints,
|
||||||
const int targetCodePointCount, const int shortcutProbability);
|
const int shortcutProbability);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPtUpdatingHelper);
|
DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPtUpdatingHelper);
|
||||||
|
@ -65,33 +65,32 @@ class DynamicPtUpdatingHelper {
|
||||||
const PtNodeReader *const mPtNodeReader;
|
const PtNodeReader *const mPtNodeReader;
|
||||||
PtNodeWriter *const mPtNodeWriter;
|
PtNodeWriter *const mPtNodeWriter;
|
||||||
|
|
||||||
bool createAndInsertNodeIntoPtNodeArray(const int parentPos, const int *const nodeCodePoints,
|
bool createAndInsertNodeIntoPtNodeArray(const int parentPos,
|
||||||
const int nodeCodePointCount, const UnigramProperty *const unigramProperty,
|
const CodePointArrayView ptNodeCodePoints, const UnigramProperty *const unigramProperty,
|
||||||
int *const forwardLinkFieldPos);
|
int *const forwardLinkFieldPos);
|
||||||
|
|
||||||
bool setPtNodeProbability(const PtNodeParams *const originalPtNodeParams,
|
bool setPtNodeProbability(const PtNodeParams *const originalPtNodeParams,
|
||||||
const UnigramProperty *const unigramProperty, bool *const outAddedNewUnigram);
|
const UnigramProperty *const unigramProperty, bool *const outAddedNewUnigram);
|
||||||
|
|
||||||
bool createChildrenPtNodeArrayAndAChildPtNode(const PtNodeParams *const parentPtNodeParams,
|
bool createChildrenPtNodeArrayAndAChildPtNode(const PtNodeParams *const parentPtNodeParams,
|
||||||
const UnigramProperty *const unigramProperty, const int *const codePoints,
|
const UnigramProperty *const unigramProperty,
|
||||||
const int codePointCount);
|
const CodePointArrayView remainingCodePoints);
|
||||||
|
|
||||||
bool createNewPtNodeArrayWithAChildPtNode(const int parentPos, const int *const nodeCodePoints,
|
bool createNewPtNodeArrayWithAChildPtNode(const int parentPos,
|
||||||
const int nodeCodePointCount, const UnigramProperty *const unigramProperty);
|
const CodePointArrayView ptNodeCodePoints,
|
||||||
|
const UnigramProperty *const unigramProperty);
|
||||||
|
|
||||||
bool reallocatePtNodeAndAddNewPtNodes(
|
bool reallocatePtNodeAndAddNewPtNodes(const PtNodeParams *const reallocatingPtNodeParams,
|
||||||
const PtNodeParams *const reallocatingPtNodeParams, const int overlappingCodePointCount,
|
const size_t overlappingCodePointCount, const UnigramProperty *const unigramProperty,
|
||||||
const UnigramProperty *const unigramProperty, const int *const newNodeCodePoints,
|
const CodePointArrayView newPtNodeCodePoints);
|
||||||
const int newNodeCodePointCount);
|
|
||||||
|
|
||||||
const PtNodeParams getUpdatedPtNodeParams(const PtNodeParams *const originalPtNodeParams,
|
const PtNodeParams getUpdatedPtNodeParams(const PtNodeParams *const originalPtNodeParams,
|
||||||
const bool isNotAWord, const bool isBlacklisted, const bool isTerminal,
|
const bool isNotAWord, const bool isBlacklisted, const bool isTerminal,
|
||||||
const int parentPos, const int codePointCount,
|
const int parentPos, const CodePointArrayView codePoints, const int probability) const;
|
||||||
const int *const codePoints, const int probability) const;
|
|
||||||
|
|
||||||
const PtNodeParams getPtNodeParamsForNewPtNode(const bool isNotAWord, const bool isBlacklisted,
|
const PtNodeParams getPtNodeParamsForNewPtNode(const bool isNotAWord, const bool isBlacklisted,
|
||||||
const bool isTerminal, const int parentPos,
|
const bool isTerminal, const int parentPos, const CodePointArrayView codePoints,
|
||||||
const int codePointCount, const int *const codePoints, const int probability) const;
|
const int probability) const;
|
||||||
};
|
};
|
||||||
} // namespace latinime
|
} // namespace latinime
|
||||||
#endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_UPDATING_HELPER_H */
|
#endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_UPDATING_HELPER_H */
|
||||||
|
|
|
@ -89,9 +89,9 @@ class PtNodeParams {
|
||||||
// Construct new params by updating existing PtNode params.
|
// Construct new params by updating existing PtNode params.
|
||||||
PtNodeParams(const PtNodeParams *const ptNodeParams,
|
PtNodeParams(const PtNodeParams *const ptNodeParams,
|
||||||
const PatriciaTrieReadingUtils::NodeFlags flags, const int parentPos,
|
const PatriciaTrieReadingUtils::NodeFlags flags, const int parentPos,
|
||||||
const int codePointCount, const int *const codePoints, const int probability)
|
const CodePointArrayView codePoints, const int probability)
|
||||||
: mHeadPos(ptNodeParams->getHeadPos()), mFlags(flags), mHasMovedFlag(true),
|
: mHeadPos(ptNodeParams->getHeadPos()), mFlags(flags), mHasMovedFlag(true),
|
||||||
mParentPos(parentPos), mCodePointCount(codePointCount), mCodePoints(),
|
mParentPos(parentPos), mCodePointCount(codePoints.size()), mCodePoints(),
|
||||||
mTerminalIdFieldPos(ptNodeParams->getTerminalIdFieldPos()),
|
mTerminalIdFieldPos(ptNodeParams->getTerminalIdFieldPos()),
|
||||||
mTerminalId(ptNodeParams->getTerminalId()),
|
mTerminalId(ptNodeParams->getTerminalId()),
|
||||||
mProbabilityFieldPos(ptNodeParams->getProbabilityFieldPos()),
|
mProbabilityFieldPos(ptNodeParams->getProbabilityFieldPos()),
|
||||||
|
@ -102,20 +102,20 @@ class PtNodeParams {
|
||||||
mShortcutPos(ptNodeParams->getShortcutPos()),
|
mShortcutPos(ptNodeParams->getShortcutPos()),
|
||||||
mBigramPos(ptNodeParams->getBigramsPos()),
|
mBigramPos(ptNodeParams->getBigramsPos()),
|
||||||
mSiblingPos(ptNodeParams->getSiblingNodePos()) {
|
mSiblingPos(ptNodeParams->getSiblingNodePos()) {
|
||||||
memcpy(mCodePoints, codePoints, sizeof(int) * mCodePointCount);
|
memcpy(mCodePoints, codePoints.data(), sizeof(int) * mCodePointCount);
|
||||||
}
|
}
|
||||||
|
|
||||||
PtNodeParams(const PatriciaTrieReadingUtils::NodeFlags flags, const int parentPos,
|
PtNodeParams(const PatriciaTrieReadingUtils::NodeFlags flags, const int parentPos,
|
||||||
const int codePointCount, const int *const codePoints, const int probability)
|
const CodePointArrayView codePoints, const int probability)
|
||||||
: mHeadPos(NOT_A_DICT_POS), mFlags(flags), mHasMovedFlag(true), mParentPos(parentPos),
|
: mHeadPos(NOT_A_DICT_POS), mFlags(flags), mHasMovedFlag(true), mParentPos(parentPos),
|
||||||
mCodePointCount(codePointCount), mCodePoints(),
|
mCodePointCount(codePoints.size()), mCodePoints(),
|
||||||
mTerminalIdFieldPos(NOT_A_DICT_POS),
|
mTerminalIdFieldPos(NOT_A_DICT_POS),
|
||||||
mTerminalId(Ver4DictConstants::NOT_A_TERMINAL_ID),
|
mTerminalId(Ver4DictConstants::NOT_A_TERMINAL_ID),
|
||||||
mProbabilityFieldPos(NOT_A_DICT_POS), mProbability(probability),
|
mProbabilityFieldPos(NOT_A_DICT_POS), mProbability(probability),
|
||||||
mChildrenPosFieldPos(NOT_A_DICT_POS), mChildrenPos(NOT_A_DICT_POS),
|
mChildrenPosFieldPos(NOT_A_DICT_POS), mChildrenPos(NOT_A_DICT_POS),
|
||||||
mBigramLinkedNodePos(NOT_A_DICT_POS), mShortcutPos(NOT_A_DICT_POS),
|
mBigramLinkedNodePos(NOT_A_DICT_POS), mShortcutPos(NOT_A_DICT_POS),
|
||||||
mBigramPos(NOT_A_DICT_POS), mSiblingPos(NOT_A_DICT_POS) {
|
mBigramPos(NOT_A_DICT_POS), mSiblingPos(NOT_A_DICT_POS) {
|
||||||
memcpy(mCodePoints, codePoints, sizeof(int) * mCodePointCount);
|
memcpy(mCodePoints, codePoints.data(), sizeof(int) * mCodePointCount);
|
||||||
}
|
}
|
||||||
|
|
||||||
AK_FORCE_INLINE bool isValid() const {
|
AK_FORCE_INLINE bool isValid() const {
|
||||||
|
|
|
@ -227,8 +227,8 @@ bool Ver4PatriciaTriePolicy::addUnigramEntry(const CodePointArrayView wordCodePo
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
const CodePointArrayView codePointArrayView(codePointsToAdd, codePointCountToAdd);
|
const CodePointArrayView codePointArrayView(codePointsToAdd, codePointCountToAdd);
|
||||||
if (mUpdatingHelper.addUnigramWord(&readingHelper, codePointArrayView.data(),
|
if (mUpdatingHelper.addUnigramWord(&readingHelper, codePointArrayView, unigramProperty,
|
||||||
codePointArrayView.size(), unigramProperty, &addedNewUnigram)) {
|
&addedNewUnigram)) {
|
||||||
if (addedNewUnigram && !unigramProperty->representsBeginningOfSentence()) {
|
if (addedNewUnigram && !unigramProperty->representsBeginningOfSentence()) {
|
||||||
mUnigramCount++;
|
mUnigramCount++;
|
||||||
}
|
}
|
||||||
|
@ -243,8 +243,8 @@ bool Ver4PatriciaTriePolicy::addUnigramEntry(const CodePointArrayView wordCodePo
|
||||||
mBuffers->getTerminalPositionLookupTable()->getTerminalPtNodePosition(wordId);
|
mBuffers->getTerminalPositionLookupTable()->getTerminalPtNodePosition(wordId);
|
||||||
for (const auto &shortcut : unigramProperty->getShortcuts()) {
|
for (const auto &shortcut : unigramProperty->getShortcuts()) {
|
||||||
if (!mUpdatingHelper.addShortcutTarget(wordPos,
|
if (!mUpdatingHelper.addShortcutTarget(wordPos,
|
||||||
shortcut.getTargetCodePoints()->data(),
|
CodePointArrayView(*shortcut.getTargetCodePoints()),
|
||||||
shortcut.getTargetCodePoints()->size(), shortcut.getProbability())) {
|
shortcut.getProbability())) {
|
||||||
AKLOGE("Cannot add new shortcut target. PtNodePos: %d, length: %zd, "
|
AKLOGE("Cannot add new shortcut target. PtNodePos: %d, length: %zd, "
|
||||||
"probability: %d", wordPos, shortcut.getTargetCodePoints()->size(),
|
"probability: %d", wordPos, shortcut.getTargetCodePoints()->size(),
|
||||||
shortcut.getProbability());
|
shortcut.getProbability());
|
||||||
|
|
Loading…
Reference in New Issue