Use IntArrayView in DynamicPtUpdatingHelper.

Change-Id: Ifa185eba7dd9abee1b35e49fe559be1042ca63d7
main
Keisuke Kuroyanagi 2014-09-17 19:49:26 +09:00
parent 7542b8d83d
commit 89a074fade
7 changed files with 85 additions and 95 deletions

View File

@ -268,8 +268,8 @@ bool Ver4PatriciaTriePolicy::addUnigramEntry(const CodePointArrayView wordCodePo
return false; return false;
} }
const CodePointArrayView codePointArrayView(codePointsToAdd, codePointCountToAdd); const CodePointArrayView codePointArrayView(codePointsToAdd, codePointCountToAdd);
if (mUpdatingHelper.addUnigramWord(&readingHelper, codePointArrayView.data(), if (mUpdatingHelper.addUnigramWord(&readingHelper, codePointArrayView, unigramProperty,
codePointArrayView.size(), unigramProperty, &addedNewUnigram)) { &addedNewUnigram)) {
if (addedNewUnigram && !unigramProperty->representsBeginningOfSentence()) { if (addedNewUnigram && !unigramProperty->representsBeginningOfSentence()) {
mUnigramCount++; mUnigramCount++;
} }
@ -283,8 +283,8 @@ bool Ver4PatriciaTriePolicy::addUnigramEntry(const CodePointArrayView wordCodePo
} }
for (const auto &shortcut : unigramProperty->getShortcuts()) { for (const auto &shortcut : unigramProperty->getShortcuts()) {
if (!mUpdatingHelper.addShortcutTarget(wordPos, if (!mUpdatingHelper.addShortcutTarget(wordPos,
shortcut.getTargetCodePoints()->data(), CodePointArrayView(*shortcut.getTargetCodePoints()),
shortcut.getTargetCodePoints()->size(), shortcut.getProbability())) { shortcut.getProbability())) {
AKLOGE("Cannot add new shortcut target. PtNodePos: %d, length: %zd, " AKLOGE("Cannot add new shortcut target. PtNodePos: %d, length: %zd, "
"probability: %d", wordPos, shortcut.getTargetCodePoints()->size(), "probability: %d", wordPos, shortcut.getTargetCodePoints()->size(),
shortcut.getProbability()); shortcut.getProbability());

View File

@ -218,9 +218,9 @@ int DynamicPtReadingHelper::getCodePointsAndProbabilityAndReturnCodePointCount(
} }
int DynamicPtReadingHelper::getTerminalPtNodePositionOfWord(const int *const inWord, int DynamicPtReadingHelper::getTerminalPtNodePositionOfWord(const int *const inWord,
const int length, const bool forceLowerCaseSearch) { const size_t length, const bool forceLowerCaseSearch) {
int searchCodePoints[length]; int searchCodePoints[length];
for (int i = 0; i < length; ++i) { for (size_t i = 0; i < length; ++i) {
searchCodePoints[i] = forceLowerCaseSearch ? CharUtils::toLowerCase(inWord[i]) : inWord[i]; searchCodePoints[i] = forceLowerCaseSearch ? CharUtils::toLowerCase(inWord[i]) : inWord[i];
} }
while (!isEnd()) { while (!isEnd()) {

View File

@ -138,12 +138,12 @@ class DynamicPtReadingHelper {
} }
// Return code point count exclude the last read node's code points. // Return code point count exclude the last read node's code points.
AK_FORCE_INLINE int getPrevTotalCodePointCount() const { AK_FORCE_INLINE size_t getPrevTotalCodePointCount() const {
return mReadingState.mTotalCodePointCountSinceInitialization; return mReadingState.mTotalCodePointCountSinceInitialization;
} }
// Return code point count include the last read node's code points. // Return code point count include the last read node's code points.
AK_FORCE_INLINE int getTotalCodePointCount(const PtNodeParams &ptNodeParams) const { AK_FORCE_INLINE size_t getTotalCodePointCount(const PtNodeParams &ptNodeParams) const {
return mReadingState.mTotalCodePointCountSinceInitialization return mReadingState.mTotalCodePointCountSinceInitialization
+ ptNodeParams.getCodePointCount(); + ptNodeParams.getCodePointCount();
} }
@ -214,7 +214,7 @@ class DynamicPtReadingHelper {
int getCodePointsAndProbabilityAndReturnCodePointCount(const int maxCodePointCount, int getCodePointsAndProbabilityAndReturnCodePointCount(const int maxCodePointCount,
int *const outCodePoints, int *const outUnigramProbability); int *const outCodePoints, int *const outUnigramProbability);
int getTerminalPtNodePositionOfWord(const int *const inWord, const int length, int getTerminalPtNodePositionOfWord(const int *const inWord, const size_t length,
const bool forceLowerCaseSearch); const bool forceLowerCaseSearch);
private: private:
@ -234,7 +234,7 @@ class DynamicPtReadingHelper {
int mPos; int mPos;
// Remaining node count in the current array. // Remaining node count in the current array.
int mRemainingPtNodeCountInThisArray; int mRemainingPtNodeCountInThisArray;
int mTotalCodePointCountSinceInitialization; size_t mTotalCodePointCountSinceInitialization;
// Counter of PtNodes used to avoid infinite loops caused by broken or malicious links. // Counter of PtNodes used to avoid infinite loops caused by broken or malicious links.
int mTotalPtNodeIndexInThisArrayChain; int mTotalPtNodeIndexInThisArrayChain;
// Counter of PtNode arrays used to avoid infinite loops caused by cyclic links of empty // Counter of PtNode arrays used to avoid infinite loops caused by cyclic links of empty

View File

@ -28,17 +28,16 @@ namespace latinime {
const int DynamicPtUpdatingHelper::CHILDREN_POSITION_FIELD_SIZE = 3; const int DynamicPtUpdatingHelper::CHILDREN_POSITION_FIELD_SIZE = 3;
bool DynamicPtUpdatingHelper::addUnigramWord( bool DynamicPtUpdatingHelper::addUnigramWord(DynamicPtReadingHelper *const readingHelper,
DynamicPtReadingHelper *const readingHelper, const CodePointArrayView wordCodePoints, const UnigramProperty *const unigramProperty,
const int *const wordCodePoints, const int codePointCount, bool *const outAddedNewUnigram) {
const UnigramProperty *const unigramProperty, bool *const outAddedNewUnigram) {
int parentPos = NOT_A_DICT_POS; int parentPos = NOT_A_DICT_POS;
while (!readingHelper->isEnd()) { while (!readingHelper->isEnd()) {
const PtNodeParams ptNodeParams(readingHelper->getPtNodeParams()); const PtNodeParams ptNodeParams(readingHelper->getPtNodeParams());
if (!ptNodeParams.isValid()) { if (!ptNodeParams.isValid()) {
break; break;
} }
const int matchedCodePointCount = readingHelper->getPrevTotalCodePointCount(); const size_t matchedCodePointCount = readingHelper->getPrevTotalCodePointCount();
if (!readingHelper->isMatchedCodePoint(ptNodeParams, 0 /* index */, if (!readingHelper->isMatchedCodePoint(ptNodeParams, 0 /* index */,
wordCodePoints[matchedCodePointCount])) { wordCodePoints[matchedCodePointCount])) {
// The first code point is different from target code point. Skip this node and read // The first code point is different from target code point. Skip this node and read
@ -47,26 +46,25 @@ bool DynamicPtUpdatingHelper::addUnigramWord(
continue; continue;
} }
// Check following merged node code points. // Check following merged node code points.
const int nodeCodePointCount = ptNodeParams.getCodePointCount(); const size_t nodeCodePointCount = ptNodeParams.getCodePointArrayView().size();
for (int j = 1; j < nodeCodePointCount; ++j) { for (size_t j = 1; j < nodeCodePointCount; ++j) {
const int nextIndex = matchedCodePointCount + j; const size_t nextIndex = matchedCodePointCount + j;
if (nextIndex >= codePointCount || !readingHelper->isMatchedCodePoint(ptNodeParams, j, if (nextIndex >= wordCodePoints.size()
|| !readingHelper->isMatchedCodePoint(ptNodeParams, j,
wordCodePoints[matchedCodePointCount + j])) { wordCodePoints[matchedCodePointCount + j])) {
*outAddedNewUnigram = true; *outAddedNewUnigram = true;
return reallocatePtNodeAndAddNewPtNodes(&ptNodeParams, j, unigramProperty, return reallocatePtNodeAndAddNewPtNodes(&ptNodeParams, j, unigramProperty,
wordCodePoints + matchedCodePointCount, wordCodePoints.skip(matchedCodePointCount));
codePointCount - matchedCodePointCount);
} }
} }
// All characters are matched. // All characters are matched.
if (codePointCount == readingHelper->getTotalCodePointCount(ptNodeParams)) { if (wordCodePoints.size() == readingHelper->getTotalCodePointCount(ptNodeParams)) {
return setPtNodeProbability(&ptNodeParams, unigramProperty, outAddedNewUnigram); return setPtNodeProbability(&ptNodeParams, unigramProperty, outAddedNewUnigram);
} }
if (!ptNodeParams.hasChildren()) { if (!ptNodeParams.hasChildren()) {
*outAddedNewUnigram = true; *outAddedNewUnigram = true;
return createChildrenPtNodeArrayAndAChildPtNode(&ptNodeParams, unigramProperty, return createChildrenPtNodeArrayAndAChildPtNode(&ptNodeParams, unigramProperty,
wordCodePoints + readingHelper->getTotalCodePointCount(ptNodeParams), wordCodePoints.skip(readingHelper->getTotalCodePointCount(ptNodeParams)));
codePointCount - readingHelper->getTotalCodePointCount(ptNodeParams));
} }
// Advance to the children nodes. // Advance to the children nodes.
parentPos = ptNodeParams.getHeadPos(); parentPos = ptNodeParams.getHeadPos();
@ -79,9 +77,8 @@ bool DynamicPtUpdatingHelper::addUnigramWord(
int pos = readingHelper->getPosOfLastForwardLinkField(); int pos = readingHelper->getPosOfLastForwardLinkField();
*outAddedNewUnigram = true; *outAddedNewUnigram = true;
return createAndInsertNodeIntoPtNodeArray(parentPos, return createAndInsertNodeIntoPtNodeArray(parentPos,
wordCodePoints + readingHelper->getPrevTotalCodePointCount(), wordCodePoints.skip(readingHelper->getPrevTotalCodePointCount()), unigramProperty,
codePointCount - readingHelper->getPrevTotalCodePointCount(), &pos);
unigramProperty, &pos);
} }
bool DynamicPtUpdatingHelper::addNgramEntry(const PtNodePosArrayView prevWordsPtNodePos, bool DynamicPtUpdatingHelper::addNgramEntry(const PtNodePosArrayView prevWordsPtNodePos,
@ -120,23 +117,21 @@ bool DynamicPtUpdatingHelper::removeNgramEntry(const PtNodePosArrayView prevWord
} }
bool DynamicPtUpdatingHelper::addShortcutTarget(const int wordPos, bool DynamicPtUpdatingHelper::addShortcutTarget(const int wordPos,
const int *const targetCodePoints, const int targetCodePointCount, const CodePointArrayView targetCodePoints, const int shortcutProbability) {
const int shortcutProbability) {
const PtNodeParams ptNodeParams(mPtNodeReader->fetchPtNodeParamsInBufferFromPtNodePos(wordPos)); const PtNodeParams ptNodeParams(mPtNodeReader->fetchPtNodeParamsInBufferFromPtNodePos(wordPos));
return mPtNodeWriter->addShortcutTarget(&ptNodeParams, targetCodePoints, targetCodePointCount, return mPtNodeWriter->addShortcutTarget(&ptNodeParams, targetCodePoints.data(),
shortcutProbability); targetCodePoints.size(), shortcutProbability);
} }
bool DynamicPtUpdatingHelper::createAndInsertNodeIntoPtNodeArray(const int parentPos, bool DynamicPtUpdatingHelper::createAndInsertNodeIntoPtNodeArray(const int parentPos,
const int *const nodeCodePoints, const int nodeCodePointCount, const CodePointArrayView ptNodeCodePoints, const UnigramProperty *const unigramProperty,
const UnigramProperty *const unigramProperty, int *const forwardLinkFieldPos) { int *const forwardLinkFieldPos) {
const int newPtNodeArrayPos = mBuffer->getTailPosition(); const int newPtNodeArrayPos = mBuffer->getTailPosition();
if (!DynamicPtWritingUtils::writeForwardLinkPositionAndAdvancePosition(mBuffer, if (!DynamicPtWritingUtils::writeForwardLinkPositionAndAdvancePosition(mBuffer,
newPtNodeArrayPos, forwardLinkFieldPos)) { newPtNodeArrayPos, forwardLinkFieldPos)) {
return false; return false;
} }
return createNewPtNodeArrayWithAChildPtNode(parentPos, nodeCodePoints, nodeCodePointCount, return createNewPtNodeArrayWithAChildPtNode(parentPos, ptNodeCodePoints, unigramProperty);
unigramProperty);
} }
bool DynamicPtUpdatingHelper::setPtNodeProbability(const PtNodeParams *const originalPtNodeParams, bool DynamicPtUpdatingHelper::setPtNodeProbability(const PtNodeParams *const originalPtNodeParams,
@ -153,8 +148,7 @@ bool DynamicPtUpdatingHelper::setPtNodeProbability(const PtNodeParams *const ori
const PtNodeParams ptNodeParamsToWrite(getUpdatedPtNodeParams(originalPtNodeParams, const PtNodeParams ptNodeParamsToWrite(getUpdatedPtNodeParams(originalPtNodeParams,
unigramProperty->isNotAWord(), unigramProperty->isBlacklisted(), unigramProperty->isNotAWord(), unigramProperty->isBlacklisted(),
true /* isTerminal */, originalPtNodeParams->getParentPos(), true /* isTerminal */, originalPtNodeParams->getParentPos(),
originalPtNodeParams->getCodePointCount(), originalPtNodeParams->getCodePoints(), originalPtNodeParams->getCodePointArrayView(), unigramProperty->getProbability()));
unigramProperty->getProbability()));
if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&ptNodeParamsToWrite, if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&ptNodeParamsToWrite,
unigramProperty, &writingPos)) { unigramProperty, &writingPos)) {
return false; return false;
@ -168,17 +162,17 @@ bool DynamicPtUpdatingHelper::setPtNodeProbability(const PtNodeParams *const ori
bool DynamicPtUpdatingHelper::createChildrenPtNodeArrayAndAChildPtNode( bool DynamicPtUpdatingHelper::createChildrenPtNodeArrayAndAChildPtNode(
const PtNodeParams *const parentPtNodeParams, const UnigramProperty *const unigramProperty, const PtNodeParams *const parentPtNodeParams, const UnigramProperty *const unigramProperty,
const int *const codePoints, const int codePointCount) { const CodePointArrayView codePoints) {
const int newPtNodeArrayPos = mBuffer->getTailPosition(); const int newPtNodeArrayPos = mBuffer->getTailPosition();
if (!mPtNodeWriter->updateChildrenPosition(parentPtNodeParams, newPtNodeArrayPos)) { if (!mPtNodeWriter->updateChildrenPosition(parentPtNodeParams, newPtNodeArrayPos)) {
return false; return false;
} }
return createNewPtNodeArrayWithAChildPtNode(parentPtNodeParams->getHeadPos(), codePoints, return createNewPtNodeArrayWithAChildPtNode(parentPtNodeParams->getHeadPos(), codePoints,
codePointCount, unigramProperty); unigramProperty);
} }
bool DynamicPtUpdatingHelper::createNewPtNodeArrayWithAChildPtNode( bool DynamicPtUpdatingHelper::createNewPtNodeArrayWithAChildPtNode(
const int parentPtNodePos, const int *const nodeCodePoints, const int nodeCodePointCount, const int parentPtNodePos, const CodePointArrayView ptNodeCodePoints,
const UnigramProperty *const unigramProperty) { const UnigramProperty *const unigramProperty) {
int writingPos = mBuffer->getTailPosition(); int writingPos = mBuffer->getTailPosition();
if (!DynamicPtWritingUtils::writePtNodeArraySizeAndAdvancePosition(mBuffer, if (!DynamicPtWritingUtils::writePtNodeArraySizeAndAdvancePosition(mBuffer,
@ -187,8 +181,7 @@ bool DynamicPtUpdatingHelper::createNewPtNodeArrayWithAChildPtNode(
} }
const PtNodeParams ptNodeParamsToWrite(getPtNodeParamsForNewPtNode( const PtNodeParams ptNodeParamsToWrite(getPtNodeParamsForNewPtNode(
unigramProperty->isNotAWord(), unigramProperty->isBlacklisted(), true /* isTerminal */, unigramProperty->isNotAWord(), unigramProperty->isBlacklisted(), true /* isTerminal */,
parentPtNodePos, nodeCodePointCount, nodeCodePoints, parentPtNodePos, ptNodeCodePoints, unigramProperty->getProbability()));
unigramProperty->getProbability()));
if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&ptNodeParamsToWrite, if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&ptNodeParamsToWrite,
unigramProperty, &writingPos)) { unigramProperty, &writingPos)) {
return false; return false;
@ -202,9 +195,9 @@ bool DynamicPtUpdatingHelper::createNewPtNodeArrayWithAChildPtNode(
// Returns whether the dictionary updating was succeeded or not. // Returns whether the dictionary updating was succeeded or not.
bool DynamicPtUpdatingHelper::reallocatePtNodeAndAddNewPtNodes( bool DynamicPtUpdatingHelper::reallocatePtNodeAndAddNewPtNodes(
const PtNodeParams *const reallocatingPtNodeParams, const int overlappingCodePointCount, const PtNodeParams *const reallocatingPtNodeParams, const size_t overlappingCodePointCount,
const UnigramProperty *const unigramProperty, const int *const newNodeCodePoints, const UnigramProperty *const unigramProperty,
const int newNodeCodePointCount) { const CodePointArrayView newPtNodeCodePoints) {
// When addsExtraChild is true, split the reallocating PtNode and add new child. // When addsExtraChild is true, split the reallocating PtNode and add new child.
// Reallocating PtNode: abcde, newNode: abcxy. // Reallocating PtNode: abcde, newNode: abcxy.
// abc (1st, not terminal) __ de (2nd) // abc (1st, not terminal) __ de (2nd)
@ -212,16 +205,18 @@ bool DynamicPtUpdatingHelper::reallocatePtNodeAndAddNewPtNodes(
// Otherwise, this method makes 1st part terminal and write information in unigramProperty. // Otherwise, this method makes 1st part terminal and write information in unigramProperty.
// Reallocating PtNode: abcde, newNode: abc. // Reallocating PtNode: abcde, newNode: abc.
// abc (1st, terminal) __ de (2nd) // abc (1st, terminal) __ de (2nd)
const bool addsExtraChild = newNodeCodePointCount > overlappingCodePointCount; const bool addsExtraChild = newPtNodeCodePoints.size() > overlappingCodePointCount;
const int firstPartOfReallocatedPtNodePos = mBuffer->getTailPosition(); const int firstPartOfReallocatedPtNodePos = mBuffer->getTailPosition();
int writingPos = firstPartOfReallocatedPtNodePos; int writingPos = firstPartOfReallocatedPtNodePos;
// Write the 1st part of the reallocating node. The children position will be updated later // Write the 1st part of the reallocating node. The children position will be updated later
// with actual children position. // with actual children position.
const CodePointArrayView firstPtNodeCodePoints =
reallocatingPtNodeParams->getCodePointArrayView().limit(overlappingCodePointCount);
if (addsExtraChild) { if (addsExtraChild) {
const PtNodeParams ptNodeParamsToWrite(getPtNodeParamsForNewPtNode( const PtNodeParams ptNodeParamsToWrite(getPtNodeParamsForNewPtNode(
false /* isNotAWord */, false /* isBlacklisted */, false /* isTerminal */, false /* isNotAWord */, false /* isBlacklisted */, false /* isTerminal */,
reallocatingPtNodeParams->getParentPos(), overlappingCodePointCount, reallocatingPtNodeParams->getParentPos(), firstPtNodeCodePoints,
reallocatingPtNodeParams->getCodePoints(), NOT_A_PROBABILITY)); NOT_A_PROBABILITY));
if (!mPtNodeWriter->writePtNodeAndAdvancePosition(&ptNodeParamsToWrite, &writingPos)) { if (!mPtNodeWriter->writePtNodeAndAdvancePosition(&ptNodeParamsToWrite, &writingPos)) {
return false; return false;
} }
@ -229,8 +224,7 @@ bool DynamicPtUpdatingHelper::reallocatePtNodeAndAddNewPtNodes(
const PtNodeParams ptNodeParamsToWrite(getPtNodeParamsForNewPtNode( const PtNodeParams ptNodeParamsToWrite(getPtNodeParamsForNewPtNode(
unigramProperty->isNotAWord(), unigramProperty->isBlacklisted(), unigramProperty->isNotAWord(), unigramProperty->isBlacklisted(),
true /* isTerminal */, reallocatingPtNodeParams->getParentPos(), true /* isTerminal */, reallocatingPtNodeParams->getParentPos(),
overlappingCodePointCount, reallocatingPtNodeParams->getCodePoints(), firstPtNodeCodePoints, unigramProperty->getProbability()));
unigramProperty->getProbability()));
if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&ptNodeParamsToWrite, if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&ptNodeParamsToWrite,
unigramProperty, &writingPos)) { unigramProperty, &writingPos)) {
return false; return false;
@ -248,8 +242,7 @@ bool DynamicPtUpdatingHelper::reallocatePtNodeAndAddNewPtNodes(
const PtNodeParams childPartPtNodeParams(getUpdatedPtNodeParams(reallocatingPtNodeParams, const PtNodeParams childPartPtNodeParams(getUpdatedPtNodeParams(reallocatingPtNodeParams,
reallocatingPtNodeParams->isNotAWord(), reallocatingPtNodeParams->isBlacklisted(), reallocatingPtNodeParams->isNotAWord(), reallocatingPtNodeParams->isBlacklisted(),
reallocatingPtNodeParams->isTerminal(), firstPartOfReallocatedPtNodePos, reallocatingPtNodeParams->isTerminal(), firstPartOfReallocatedPtNodePos,
reallocatingPtNodeParams->getCodePointCount() - overlappingCodePointCount, reallocatingPtNodeParams->getCodePointArrayView().skip(overlappingCodePointCount),
reallocatingPtNodeParams->getCodePoints() + overlappingCodePointCount,
reallocatingPtNodeParams->getProbability())); reallocatingPtNodeParams->getProbability()));
if (!mPtNodeWriter->writePtNodeAndAdvancePosition(&childPartPtNodeParams, &writingPos)) { if (!mPtNodeWriter->writePtNodeAndAdvancePosition(&childPartPtNodeParams, &writingPos)) {
return false; return false;
@ -258,8 +251,8 @@ bool DynamicPtUpdatingHelper::reallocatePtNodeAndAddNewPtNodes(
const PtNodeParams extraChildPtNodeParams(getPtNodeParamsForNewPtNode( const PtNodeParams extraChildPtNodeParams(getPtNodeParamsForNewPtNode(
unigramProperty->isNotAWord(), unigramProperty->isBlacklisted(), unigramProperty->isNotAWord(), unigramProperty->isBlacklisted(),
true /* isTerminal */, firstPartOfReallocatedPtNodePos, true /* isTerminal */, firstPartOfReallocatedPtNodePos,
newNodeCodePointCount - overlappingCodePointCount, newPtNodeCodePoints.skip(overlappingCodePointCount),
newNodeCodePoints + overlappingCodePointCount, unigramProperty->getProbability())); unigramProperty->getProbability()));
if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&extraChildPtNodeParams, if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&extraChildPtNodeParams,
unigramProperty, &writingPos)) { unigramProperty, &writingPos)) {
return false; return false;
@ -282,26 +275,24 @@ bool DynamicPtUpdatingHelper::reallocatePtNodeAndAddNewPtNodes(
} }
const PtNodeParams DynamicPtUpdatingHelper::getUpdatedPtNodeParams( const PtNodeParams DynamicPtUpdatingHelper::getUpdatedPtNodeParams(
const PtNodeParams *const originalPtNodeParams, const PtNodeParams *const originalPtNodeParams, const bool isNotAWord,
const bool isNotAWord, const bool isBlacklisted, const bool isTerminal, const int parentPos, const bool isBlacklisted, const bool isTerminal, const int parentPos,
const int codePointCount, const int *const codePoints, const int probability) const { const CodePointArrayView codePoints, const int probability) const {
const PatriciaTrieReadingUtils::NodeFlags flags = PatriciaTrieReadingUtils::createAndGetFlags( const PatriciaTrieReadingUtils::NodeFlags flags = PatriciaTrieReadingUtils::createAndGetFlags(
isBlacklisted, isNotAWord, isTerminal, false /* hasShortcutTargets */, isBlacklisted, isNotAWord, isTerminal, false /* hasShortcutTargets */,
false /* hasBigrams */, codePointCount > 1 /* hasMultipleChars */, false /* hasBigrams */, codePoints.size() > 1u /* hasMultipleChars */,
CHILDREN_POSITION_FIELD_SIZE); CHILDREN_POSITION_FIELD_SIZE);
return PtNodeParams(originalPtNodeParams, flags, parentPos, codePointCount, codePoints, return PtNodeParams(originalPtNodeParams, flags, parentPos, codePoints, probability);
probability);
} }
const PtNodeParams DynamicPtUpdatingHelper::getPtNodeParamsForNewPtNode( const PtNodeParams DynamicPtUpdatingHelper::getPtNodeParamsForNewPtNode(const bool isNotAWord,
const bool isNotAWord, const bool isBlacklisted, const bool isTerminal, const bool isBlacklisted, const bool isTerminal, const int parentPos,
const int parentPos, const int codePointCount, const int *const codePoints, const CodePointArrayView codePoints, const int probability) const {
const int probability) const {
const PatriciaTrieReadingUtils::NodeFlags flags = PatriciaTrieReadingUtils::createAndGetFlags( const PatriciaTrieReadingUtils::NodeFlags flags = PatriciaTrieReadingUtils::createAndGetFlags(
isBlacklisted, isNotAWord, isTerminal, false /* hasShortcutTargets */, isBlacklisted, isNotAWord, isTerminal, false /* hasShortcutTargets */,
false /* hasBigrams */, codePointCount > 1 /* hasMultipleChars */, false /* hasBigrams */, codePoints.size() > 1u /* hasMultipleChars */,
CHILDREN_POSITION_FIELD_SIZE); CHILDREN_POSITION_FIELD_SIZE);
return PtNodeParams(flags, parentPos, codePointCount, codePoints, probability); return PtNodeParams(flags, parentPos, codePoints, probability);
} }
} // namespace latinime } // namespace latinime

View File

@ -40,8 +40,8 @@ class DynamicPtUpdatingHelper {
// Add a word to the dictionary. If the word already exists, update the probability. // Add a word to the dictionary. If the word already exists, update the probability.
bool addUnigramWord(DynamicPtReadingHelper *const readingHelper, bool addUnigramWord(DynamicPtReadingHelper *const readingHelper,
const int *const wordCodePoints, const int codePointCount, const CodePointArrayView wordCodePoints, const UnigramProperty *const unigramProperty,
const UnigramProperty *const unigramProperty, bool *const outAddedNewUnigram); bool *const outAddedNewUnigram);
// TODO: Remove after stopping supporting v402. // TODO: Remove after stopping supporting v402.
// Add an n-gram entry. // Add an n-gram entry.
@ -53,8 +53,8 @@ class DynamicPtUpdatingHelper {
bool removeNgramEntry(const PtNodePosArrayView prevWordsPtNodePos, const int wordPos); bool removeNgramEntry(const PtNodePosArrayView prevWordsPtNodePos, const int wordPos);
// Add a shortcut target. // Add a shortcut target.
bool addShortcutTarget(const int wordPos, const int *const targetCodePoints, bool addShortcutTarget(const int wordPos, const CodePointArrayView targetCodePoints,
const int targetCodePointCount, const int shortcutProbability); const int shortcutProbability);
private: private:
DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPtUpdatingHelper); DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPtUpdatingHelper);
@ -65,33 +65,32 @@ class DynamicPtUpdatingHelper {
const PtNodeReader *const mPtNodeReader; const PtNodeReader *const mPtNodeReader;
PtNodeWriter *const mPtNodeWriter; PtNodeWriter *const mPtNodeWriter;
bool createAndInsertNodeIntoPtNodeArray(const int parentPos, const int *const nodeCodePoints, bool createAndInsertNodeIntoPtNodeArray(const int parentPos,
const int nodeCodePointCount, const UnigramProperty *const unigramProperty, const CodePointArrayView ptNodeCodePoints, const UnigramProperty *const unigramProperty,
int *const forwardLinkFieldPos); int *const forwardLinkFieldPos);
bool setPtNodeProbability(const PtNodeParams *const originalPtNodeParams, bool setPtNodeProbability(const PtNodeParams *const originalPtNodeParams,
const UnigramProperty *const unigramProperty, bool *const outAddedNewUnigram); const UnigramProperty *const unigramProperty, bool *const outAddedNewUnigram);
bool createChildrenPtNodeArrayAndAChildPtNode(const PtNodeParams *const parentPtNodeParams, bool createChildrenPtNodeArrayAndAChildPtNode(const PtNodeParams *const parentPtNodeParams,
const UnigramProperty *const unigramProperty, const int *const codePoints, const UnigramProperty *const unigramProperty,
const int codePointCount); const CodePointArrayView remainingCodePoints);
bool createNewPtNodeArrayWithAChildPtNode(const int parentPos, const int *const nodeCodePoints, bool createNewPtNodeArrayWithAChildPtNode(const int parentPos,
const int nodeCodePointCount, const UnigramProperty *const unigramProperty); const CodePointArrayView ptNodeCodePoints,
const UnigramProperty *const unigramProperty);
bool reallocatePtNodeAndAddNewPtNodes( bool reallocatePtNodeAndAddNewPtNodes(const PtNodeParams *const reallocatingPtNodeParams,
const PtNodeParams *const reallocatingPtNodeParams, const int overlappingCodePointCount, const size_t overlappingCodePointCount, const UnigramProperty *const unigramProperty,
const UnigramProperty *const unigramProperty, const int *const newNodeCodePoints, const CodePointArrayView newPtNodeCodePoints);
const int newNodeCodePointCount);
const PtNodeParams getUpdatedPtNodeParams(const PtNodeParams *const originalPtNodeParams, const PtNodeParams getUpdatedPtNodeParams(const PtNodeParams *const originalPtNodeParams,
const bool isNotAWord, const bool isBlacklisted, const bool isTerminal, const bool isNotAWord, const bool isBlacklisted, const bool isTerminal,
const int parentPos, const int codePointCount, const int parentPos, const CodePointArrayView codePoints, const int probability) const;
const int *const codePoints, const int probability) const;
const PtNodeParams getPtNodeParamsForNewPtNode(const bool isNotAWord, const bool isBlacklisted, const PtNodeParams getPtNodeParamsForNewPtNode(const bool isNotAWord, const bool isBlacklisted,
const bool isTerminal, const int parentPos, const bool isTerminal, const int parentPos, const CodePointArrayView codePoints,
const int codePointCount, const int *const codePoints, const int probability) const; const int probability) const;
}; };
} // namespace latinime } // namespace latinime
#endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_UPDATING_HELPER_H */ #endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_UPDATING_HELPER_H */

View File

@ -89,9 +89,9 @@ class PtNodeParams {
// Construct new params by updating existing PtNode params. // Construct new params by updating existing PtNode params.
PtNodeParams(const PtNodeParams *const ptNodeParams, PtNodeParams(const PtNodeParams *const ptNodeParams,
const PatriciaTrieReadingUtils::NodeFlags flags, const int parentPos, const PatriciaTrieReadingUtils::NodeFlags flags, const int parentPos,
const int codePointCount, const int *const codePoints, const int probability) const CodePointArrayView codePoints, const int probability)
: mHeadPos(ptNodeParams->getHeadPos()), mFlags(flags), mHasMovedFlag(true), : mHeadPos(ptNodeParams->getHeadPos()), mFlags(flags), mHasMovedFlag(true),
mParentPos(parentPos), mCodePointCount(codePointCount), mCodePoints(), mParentPos(parentPos), mCodePointCount(codePoints.size()), mCodePoints(),
mTerminalIdFieldPos(ptNodeParams->getTerminalIdFieldPos()), mTerminalIdFieldPos(ptNodeParams->getTerminalIdFieldPos()),
mTerminalId(ptNodeParams->getTerminalId()), mTerminalId(ptNodeParams->getTerminalId()),
mProbabilityFieldPos(ptNodeParams->getProbabilityFieldPos()), mProbabilityFieldPos(ptNodeParams->getProbabilityFieldPos()),
@ -102,20 +102,20 @@ class PtNodeParams {
mShortcutPos(ptNodeParams->getShortcutPos()), mShortcutPos(ptNodeParams->getShortcutPos()),
mBigramPos(ptNodeParams->getBigramsPos()), mBigramPos(ptNodeParams->getBigramsPos()),
mSiblingPos(ptNodeParams->getSiblingNodePos()) { mSiblingPos(ptNodeParams->getSiblingNodePos()) {
memcpy(mCodePoints, codePoints, sizeof(int) * mCodePointCount); memcpy(mCodePoints, codePoints.data(), sizeof(int) * mCodePointCount);
} }
PtNodeParams(const PatriciaTrieReadingUtils::NodeFlags flags, const int parentPos, PtNodeParams(const PatriciaTrieReadingUtils::NodeFlags flags, const int parentPos,
const int codePointCount, const int *const codePoints, const int probability) const CodePointArrayView codePoints, const int probability)
: mHeadPos(NOT_A_DICT_POS), mFlags(flags), mHasMovedFlag(true), mParentPos(parentPos), : mHeadPos(NOT_A_DICT_POS), mFlags(flags), mHasMovedFlag(true), mParentPos(parentPos),
mCodePointCount(codePointCount), mCodePoints(), mCodePointCount(codePoints.size()), mCodePoints(),
mTerminalIdFieldPos(NOT_A_DICT_POS), mTerminalIdFieldPos(NOT_A_DICT_POS),
mTerminalId(Ver4DictConstants::NOT_A_TERMINAL_ID), mTerminalId(Ver4DictConstants::NOT_A_TERMINAL_ID),
mProbabilityFieldPos(NOT_A_DICT_POS), mProbability(probability), mProbabilityFieldPos(NOT_A_DICT_POS), mProbability(probability),
mChildrenPosFieldPos(NOT_A_DICT_POS), mChildrenPos(NOT_A_DICT_POS), mChildrenPosFieldPos(NOT_A_DICT_POS), mChildrenPos(NOT_A_DICT_POS),
mBigramLinkedNodePos(NOT_A_DICT_POS), mShortcutPos(NOT_A_DICT_POS), mBigramLinkedNodePos(NOT_A_DICT_POS), mShortcutPos(NOT_A_DICT_POS),
mBigramPos(NOT_A_DICT_POS), mSiblingPos(NOT_A_DICT_POS) { mBigramPos(NOT_A_DICT_POS), mSiblingPos(NOT_A_DICT_POS) {
memcpy(mCodePoints, codePoints, sizeof(int) * mCodePointCount); memcpy(mCodePoints, codePoints.data(), sizeof(int) * mCodePointCount);
} }
AK_FORCE_INLINE bool isValid() const { AK_FORCE_INLINE bool isValid() const {

View File

@ -227,8 +227,8 @@ bool Ver4PatriciaTriePolicy::addUnigramEntry(const CodePointArrayView wordCodePo
return false; return false;
} }
const CodePointArrayView codePointArrayView(codePointsToAdd, codePointCountToAdd); const CodePointArrayView codePointArrayView(codePointsToAdd, codePointCountToAdd);
if (mUpdatingHelper.addUnigramWord(&readingHelper, codePointArrayView.data(), if (mUpdatingHelper.addUnigramWord(&readingHelper, codePointArrayView, unigramProperty,
codePointArrayView.size(), unigramProperty, &addedNewUnigram)) { &addedNewUnigram)) {
if (addedNewUnigram && !unigramProperty->representsBeginningOfSentence()) { if (addedNewUnigram && !unigramProperty->representsBeginningOfSentence()) {
mUnigramCount++; mUnigramCount++;
} }
@ -243,8 +243,8 @@ bool Ver4PatriciaTriePolicy::addUnigramEntry(const CodePointArrayView wordCodePo
mBuffers->getTerminalPositionLookupTable()->getTerminalPtNodePosition(wordId); mBuffers->getTerminalPositionLookupTable()->getTerminalPtNodePosition(wordId);
for (const auto &shortcut : unigramProperty->getShortcuts()) { for (const auto &shortcut : unigramProperty->getShortcuts()) {
if (!mUpdatingHelper.addShortcutTarget(wordPos, if (!mUpdatingHelper.addShortcutTarget(wordPos,
shortcut.getTargetCodePoints()->data(), CodePointArrayView(*shortcut.getTargetCodePoints()),
shortcut.getTargetCodePoints()->size(), shortcut.getProbability())) { shortcut.getProbability())) {
AKLOGE("Cannot add new shortcut target. PtNodePos: %d, length: %zd, " AKLOGE("Cannot add new shortcut target. PtNodePos: %d, length: %zd, "
"probability: %d", wordPos, shortcut.getTargetCodePoints()->size(), "probability: %d", wordPos, shortcut.getTargetCodePoints()->size(),
shortcut.getProbability()); shortcut.getProbability());