Merge "Support controlling some unigram flags in addUnigramWord."

This commit is contained in:
Keisuke Kuroyanagi 2013-12-04 11:43:46 +00:00 committed by Android (Google) Code Review
commit 5b0ca975c4
9 changed files with 108 additions and 53 deletions

View file

@ -301,11 +301,12 @@ static void latinime_BinaryDictionary_addUnigramWord(JNIEnv *env, jclass clazz,
int codePoints[wordLength]; int codePoints[wordLength];
env->GetIntArrayRegion(word, 0, wordLength, codePoints); env->GetIntArrayRegion(word, 0, wordLength, codePoints);
jsize shortcutLength = shortcutTarget ? env->GetArrayLength(shortcutTarget) : 0; jsize shortcutLength = shortcutTarget ? env->GetArrayLength(shortcutTarget) : 0;
int shortcutTargetCodePoints[wordLength]; int shortcutTargetCodePoints[shortcutLength];
if (shortcutTarget) { if (shortcutTarget) {
env->GetIntArrayRegion(shortcutTarget, 0, shortcutLength, shortcutTargetCodePoints); env->GetIntArrayRegion(shortcutTarget, 0, shortcutLength, shortcutTargetCodePoints);
} }
dictionary->addUnigramWord(codePoints, wordLength, probability, timestamp); dictionary->addUnigramWord(codePoints, wordLength, probability, shortcutTargetCodePoints,
shortcutLength, shortuctProbability, isNotAWord, isBlacklisted, timestamp);
} }
static void latinime_BinaryDictionary_addBigramWords(JNIEnv *env, jclass clazz, jlong dict, static void latinime_BinaryDictionary_addBigramWords(JNIEnv *env, jclass clazz, jlong dict,
@ -356,7 +357,6 @@ static int latinime_BinaryDictionary_addMultipleDictionaryEntries(JNIEnv *env, j
jclass languageModelParamClass = env->GetObjectClass(languageModelParam); jclass languageModelParamClass = env->GetObjectClass(languageModelParam);
env->DeleteLocalRef(languageModelParam); env->DeleteLocalRef(languageModelParam);
// TODO: Support shortcut and flags.
jfieldID word0FieldId = env->GetFieldID(languageModelParamClass, "mWord0", "[I"); jfieldID word0FieldId = env->GetFieldID(languageModelParamClass, "mWord0", "[I");
jfieldID word1FieldId = env->GetFieldID(languageModelParamClass, "mWord1", "[I"); jfieldID word1FieldId = env->GetFieldID(languageModelParamClass, "mWord1", "[I");
jfieldID unigramProbabilityFieldId = jfieldID unigramProbabilityFieldId =
@ -365,6 +365,14 @@ static int latinime_BinaryDictionary_addMultipleDictionaryEntries(JNIEnv *env, j
env->GetFieldID(languageModelParamClass, "mBigramProbability", "I"); env->GetFieldID(languageModelParamClass, "mBigramProbability", "I");
jfieldID timestampFieldId = jfieldID timestampFieldId =
env->GetFieldID(languageModelParamClass, "mTimestamp", "I"); env->GetFieldID(languageModelParamClass, "mTimestamp", "I");
jfieldID shortcutTargetFieldId =
env->GetFieldID(languageModelParamClass, "mShortcutTarget", "[I");
jfieldID shortcutProbabilityFieldId =
env->GetFieldID(languageModelParamClass, "mShortcutProbability", "I");
jfieldID isNotAWordFieldId =
env->GetFieldID(languageModelParamClass, "mIsNotAWord", "Z");
jfieldID isBlacklistedFieldId =
env->GetFieldID(languageModelParamClass, "mIsBlacklisted", "Z");
env->DeleteLocalRef(languageModelParamClass); env->DeleteLocalRef(languageModelParamClass);
for (int i = startIndex; i < languageModelParamCount; ++i) { for (int i = startIndex; i < languageModelParamCount; ++i) {
@ -386,7 +394,19 @@ static int latinime_BinaryDictionary_addMultipleDictionaryEntries(JNIEnv *env, j
env->GetIntArrayRegion(word1, 0, word1Length, word1CodePoints); env->GetIntArrayRegion(word1, 0, word1Length, word1CodePoints);
jint unigramProbability = env->GetIntField(languageModelParam, unigramProbabilityFieldId); jint unigramProbability = env->GetIntField(languageModelParam, unigramProbabilityFieldId);
jint timestamp = env->GetIntField(languageModelParam, timestampFieldId); jint timestamp = env->GetIntField(languageModelParam, timestampFieldId);
dictionary->addUnigramWord(word1CodePoints, word1Length, unigramProbability, timestamp); jboolean isNotAWord = env->GetBooleanField(languageModelParam, isNotAWordFieldId);
jboolean isBlacklisted = env->GetBooleanField(languageModelParam, isBlacklistedFieldId);
jintArray shortcutTarget = static_cast<jintArray>(
env->GetObjectField(languageModelParam, shortcutTargetFieldId));
jsize shortcutLength = shortcutTarget ? env->GetArrayLength(shortcutTarget) : 0;
int shortcutTargetCodePoints[shortcutLength];
if (shortcutTarget) {
env->GetIntArrayRegion(shortcutTarget, 0, shortcutLength, shortcutTargetCodePoints);
}
jint shortcutProbability = env->GetIntField(languageModelParam, shortcutProbabilityFieldId);
dictionary->addUnigramWord(word1CodePoints, word1Length, unigramProbability,
shortcutTargetCodePoints, shortcutLength, shortcutProbability,
isNotAWord, isBlacklisted, timestamp);
if (word0) { if (word0) {
jint bigramProbability = env->GetIntField(languageModelParam, bigramProbabilityFieldId); jint bigramProbability = env->GetIntField(languageModelParam, bigramProbabilityFieldId);
dictionary->addBigramWords(word0CodePoints, word0Length, word1CodePoints, word1Length, dictionary->addBigramWords(word0CodePoints, word0Length, word1CodePoints, word1Length,
@ -397,6 +417,7 @@ static int latinime_BinaryDictionary_addMultipleDictionaryEntries(JNIEnv *env, j
} }
env->DeleteLocalRef(word0); env->DeleteLocalRef(word0);
env->DeleteLocalRef(word1); env->DeleteLocalRef(word1);
env->DeleteLocalRef(shortcutTarget);
env->DeleteLocalRef(languageModelParam); env->DeleteLocalRef(languageModelParam);
} }
return languageModelParamCount; return languageModelParamCount;

View file

@ -99,10 +99,13 @@ int Dictionary::getBigramProbability(const int *word0, int length0, const int *w
} }
void Dictionary::addUnigramWord(const int *const word, const int length, const int probability, void Dictionary::addUnigramWord(const int *const word, const int length, const int probability,
const int *const shortcutTargetCodePoints, const int shortcutLength,
const int shortcutProbability, const bool isNotAWord, const bool isBlacklisted,
const int timestamp) { const int timestamp) {
TimeKeeper::setCurrentTime(); TimeKeeper::setCurrentTime();
mDictionaryStructureWithBufferPolicy.get()->addUnigramWord(word, length, probability, mDictionaryStructureWithBufferPolicy.get()->addUnigramWord(word, length, probability,
timestamp); shortcutTargetCodePoints, shortcutLength, shortcutProbability, isNotAWord,
isBlacklisted, timestamp);
} }
void Dictionary::addBigramWords(const int *const word0, const int length0, const int *const word1, void Dictionary::addBigramWords(const int *const word0, const int length0, const int *const word1,

View file

@ -73,6 +73,8 @@ class Dictionary {
int getBigramProbability(const int *word0, int length0, const int *word1, int length1) const; int getBigramProbability(const int *word0, int length0, const int *word1, int length1) const;
void addUnigramWord(const int *const word, const int length, const int probability, void addUnigramWord(const int *const word, const int length, const int probability,
const int *const shortcutTargetCodePoints, const int shortcutLength,
const int shortcutProbability, const bool isNotAWord, const bool isBlacklisted,
const int timestamp); const int timestamp);
void addBigramWords(const int *const word0, const int length0, const int *const word1, void addBigramWords(const int *const word0, const int length0, const int *const word1,

View file

@ -67,7 +67,9 @@ class DictionaryStructureWithBufferPolicy {
// Returns whether the update was success or not. // Returns whether the update was success or not.
virtual bool addUnigramWord(const int *const word, const int length, virtual bool addUnigramWord(const int *const word, const int length,
const int probability, const int timestamp) = 0; const int probability, const int *const shortcutTargetCodePoints,
const int shortcutLength, const int shortcutProbability, const bool isNotAWord,
const bool isBlacklisted,const int timestamp) = 0;
// Returns whether the update was success or not. // Returns whether the update was success or not.
virtual bool addBigramWords(const int *const word0, const int length0, const int *const word1, virtual bool addBigramWords(const int *const word0, const int length0, const int *const word1,

View file

@ -77,6 +77,8 @@ class PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
} }
bool addUnigramWord(const int *const word, const int length, const int probability, bool addUnigramWord(const int *const word, const int length, const int probability,
const int *const shortcutTargetCodePoints, const int shortcutLength,
const int shortcutProbability, const bool isNotAWord, const bool isBlacklisted,
const int timestamp) { const int timestamp) {
// This method should not be called for non-updatable dictionary. // This method should not be called for non-updatable dictionary.
AKLOGI("Warning: addUnigramWord() is called for non-updatable dictionary."); AKLOGI("Warning: addUnigramWord() is called for non-updatable dictionary.");

View file

@ -30,7 +30,8 @@ const int DynamicPatriciaTrieUpdatingHelper::CHILDREN_POSITION_FIELD_SIZE = 3;
bool DynamicPatriciaTrieUpdatingHelper::addUnigramWord( bool DynamicPatriciaTrieUpdatingHelper::addUnigramWord(
DynamicPatriciaTrieReadingHelper *const readingHelper, DynamicPatriciaTrieReadingHelper *const readingHelper,
const int *const wordCodePoints, const int codePointCount, const int probability, const int *const wordCodePoints, const int codePointCount, const int probability,
const int timestamp, bool *const outAddedNewUnigram) { const bool isNotAWord, const bool isBlacklisted, const int timestamp,
bool *const outAddedNewUnigram) {
int parentPos = NOT_A_DICT_POS; int parentPos = NOT_A_DICT_POS;
while (!readingHelper->isEnd()) { while (!readingHelper->isEnd()) {
const PtNodeParams ptNodeParams(readingHelper->getPtNodeParams()); const PtNodeParams ptNodeParams(readingHelper->getPtNodeParams());
@ -52,18 +53,20 @@ bool DynamicPatriciaTrieUpdatingHelper::addUnigramWord(
if (nextIndex >= codePointCount || !readingHelper->isMatchedCodePoint(ptNodeParams, j, if (nextIndex >= codePointCount || !readingHelper->isMatchedCodePoint(ptNodeParams, j,
wordCodePoints[matchedCodePointCount + j])) { wordCodePoints[matchedCodePointCount + j])) {
*outAddedNewUnigram = true; *outAddedNewUnigram = true;
return reallocatePtNodeAndAddNewPtNodes(&ptNodeParams, j, probability, timestamp, return reallocatePtNodeAndAddNewPtNodes(&ptNodeParams, j, isNotAWord, isBlacklisted,
wordCodePoints + matchedCodePointCount, probability, timestamp, wordCodePoints + matchedCodePointCount,
codePointCount - matchedCodePointCount); codePointCount - matchedCodePointCount);
} }
} }
// All characters are matched. // All characters are matched.
if (codePointCount == readingHelper->getTotalCodePointCount(ptNodeParams)) { if (codePointCount == readingHelper->getTotalCodePointCount(ptNodeParams)) {
return setPtNodeProbability(&ptNodeParams, probability, timestamp, outAddedNewUnigram); return setPtNodeProbability(&ptNodeParams, isNotAWord, isBlacklisted, probability,
timestamp, outAddedNewUnigram);
} }
if (!ptNodeParams.hasChildren()) { if (!ptNodeParams.hasChildren()) {
*outAddedNewUnigram = true; *outAddedNewUnigram = true;
return createChildrenPtNodeArrayAndAChildPtNode(&ptNodeParams, probability, timestamp, return createChildrenPtNodeArrayAndAChildPtNode(&ptNodeParams,
isNotAWord, isBlacklisted, probability, timestamp,
wordCodePoints + readingHelper->getTotalCodePointCount(ptNodeParams), wordCodePoints + readingHelper->getTotalCodePointCount(ptNodeParams),
codePointCount - readingHelper->getTotalCodePointCount(ptNodeParams)); codePointCount - readingHelper->getTotalCodePointCount(ptNodeParams));
} }
@ -79,8 +82,8 @@ bool DynamicPatriciaTrieUpdatingHelper::addUnigramWord(
*outAddedNewUnigram = true; *outAddedNewUnigram = true;
return createAndInsertNodeIntoPtNodeArray(parentPos, return createAndInsertNodeIntoPtNodeArray(parentPos,
wordCodePoints + readingHelper->getPrevTotalCodePointCount(), wordCodePoints + readingHelper->getPrevTotalCodePointCount(),
codePointCount - readingHelper->getPrevTotalCodePointCount(), probability, codePointCount - readingHelper->getPrevTotalCodePointCount(),
timestamp, &pos); isNotAWord, isBlacklisted, probability, timestamp, &pos);
} }
bool DynamicPatriciaTrieUpdatingHelper::addBigramWords(const int word0Pos, const int word1Pos, bool DynamicPatriciaTrieUpdatingHelper::addBigramWords(const int word0Pos, const int word1Pos,
@ -103,19 +106,21 @@ bool DynamicPatriciaTrieUpdatingHelper::removeBigramWords(const int word0Pos, co
} }
bool DynamicPatriciaTrieUpdatingHelper::createAndInsertNodeIntoPtNodeArray(const int parentPos, bool DynamicPatriciaTrieUpdatingHelper::createAndInsertNodeIntoPtNodeArray(const int parentPos,
const int *const nodeCodePoints, const int nodeCodePointCount, const int probability, const int *const nodeCodePoints, const int nodeCodePointCount,
const int timestamp, int *const forwardLinkFieldPos) { const bool isNotAWord, const bool isBlacklisted, const int probability,
const int timestamp, int *const forwardLinkFieldPos) {
const int newPtNodeArrayPos = mBuffer->getTailPosition(); const int newPtNodeArrayPos = mBuffer->getTailPosition();
if (!DynamicPatriciaTrieWritingUtils::writeForwardLinkPositionAndAdvancePosition(mBuffer, if (!DynamicPatriciaTrieWritingUtils::writeForwardLinkPositionAndAdvancePosition(mBuffer,
newPtNodeArrayPos, forwardLinkFieldPos)) { newPtNodeArrayPos, forwardLinkFieldPos)) {
return false; return false;
} }
return createNewPtNodeArrayWithAChildPtNode(parentPos, nodeCodePoints, nodeCodePointCount, return createNewPtNodeArrayWithAChildPtNode(parentPos, nodeCodePoints, nodeCodePointCount,
probability, timestamp); isNotAWord, isBlacklisted, probability, timestamp);
} }
bool DynamicPatriciaTrieUpdatingHelper::setPtNodeProbability( bool DynamicPatriciaTrieUpdatingHelper::setPtNodeProbability(
const PtNodeParams *const originalPtNodeParams, const int probability, const int timestamp, const PtNodeParams *const originalPtNodeParams, const bool isNotAWord,
const bool isBlacklisted, const int probability, const int timestamp,
bool *const outAddedNewUnigram) { bool *const outAddedNewUnigram) {
if (originalPtNodeParams->isTerminal()) { if (originalPtNodeParams->isTerminal()) {
// Overwrites the probability. // Overwrites the probability.
@ -127,9 +132,9 @@ bool DynamicPatriciaTrieUpdatingHelper::setPtNodeProbability(
const int movedPos = mBuffer->getTailPosition(); const int movedPos = mBuffer->getTailPosition();
int writingPos = movedPos; int writingPos = movedPos;
const PtNodeParams ptNodeParamsToWrite(getUpdatedPtNodeParams(originalPtNodeParams, const PtNodeParams ptNodeParamsToWrite(getUpdatedPtNodeParams(originalPtNodeParams,
true /* isTerminal */, originalPtNodeParams->getParentPos(), isNotAWord, isBlacklisted, true /* isTerminal */,
originalPtNodeParams->getCodePointCount(), originalPtNodeParams->getCodePoints(), originalPtNodeParams->getParentPos(), originalPtNodeParams->getCodePointCount(),
probability)); originalPtNodeParams->getCodePoints(), probability));
if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&ptNodeParamsToWrite, if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&ptNodeParamsToWrite,
timestamp, &writingPos)) { timestamp, &writingPos)) {
return false; return false;
@ -142,25 +147,28 @@ bool DynamicPatriciaTrieUpdatingHelper::setPtNodeProbability(
} }
bool DynamicPatriciaTrieUpdatingHelper::createChildrenPtNodeArrayAndAChildPtNode( bool DynamicPatriciaTrieUpdatingHelper::createChildrenPtNodeArrayAndAChildPtNode(
const PtNodeParams *const parentPtNodeParams, const int probability, const int timestamp, const PtNodeParams *const parentPtNodeParams, const bool isNotAWord,
const bool isBlacklisted, const int probability, const int timestamp,
const int *const codePoints, const int codePointCount) { const int *const codePoints, const int codePointCount) {
const int newPtNodeArrayPos = mBuffer->getTailPosition(); const int newPtNodeArrayPos = mBuffer->getTailPosition();
if (!mPtNodeWriter->updateChildrenPosition(parentPtNodeParams, newPtNodeArrayPos)) { if (!mPtNodeWriter->updateChildrenPosition(parentPtNodeParams, newPtNodeArrayPos)) {
return false; return false;
} }
return createNewPtNodeArrayWithAChildPtNode(parentPtNodeParams->getHeadPos(), codePoints, return createNewPtNodeArrayWithAChildPtNode(parentPtNodeParams->getHeadPos(), codePoints,
codePointCount, probability, timestamp); codePointCount, isNotAWord, isBlacklisted, probability, timestamp);
} }
bool DynamicPatriciaTrieUpdatingHelper::createNewPtNodeArrayWithAChildPtNode( bool DynamicPatriciaTrieUpdatingHelper::createNewPtNodeArrayWithAChildPtNode(
const int parentPtNodePos, const int *const nodeCodePoints, const int nodeCodePointCount, const int parentPtNodePos, const int *const nodeCodePoints, const int nodeCodePointCount,
const int probability, const int timestamp) { const bool isNotAWord, const bool isBlacklisted, const int probability,
const int timestamp) {
int writingPos = mBuffer->getTailPosition(); int writingPos = mBuffer->getTailPosition();
if (!DynamicPatriciaTrieWritingUtils::writePtNodeArraySizeAndAdvancePosition(mBuffer, if (!DynamicPatriciaTrieWritingUtils::writePtNodeArraySizeAndAdvancePosition(mBuffer,
1 /* arraySize */, &writingPos)) { 1 /* arraySize */, &writingPos)) {
return false; return false;
} }
const PtNodeParams ptNodeParamsToWrite(getPtNodeParamsForNewPtNode(true /* isTerminal */, const PtNodeParams ptNodeParamsToWrite(getPtNodeParamsForNewPtNode(
isNotAWord, isBlacklisted, true /* isTerminal */,
parentPtNodePos, nodeCodePointCount, nodeCodePoints, probability)); parentPtNodePos, nodeCodePointCount, nodeCodePoints, probability));
if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&ptNodeParamsToWrite, timestamp, if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&ptNodeParamsToWrite, timestamp,
&writingPos)) { &writingPos)) {
@ -176,8 +184,8 @@ bool DynamicPatriciaTrieUpdatingHelper::createNewPtNodeArrayWithAChildPtNode(
// Returns whether the dictionary updating was succeeded or not. // Returns whether the dictionary updating was succeeded or not.
bool DynamicPatriciaTrieUpdatingHelper::reallocatePtNodeAndAddNewPtNodes( bool DynamicPatriciaTrieUpdatingHelper::reallocatePtNodeAndAddNewPtNodes(
const PtNodeParams *const reallocatingPtNodeParams, const int overlappingCodePointCount, const PtNodeParams *const reallocatingPtNodeParams, const int overlappingCodePointCount,
const int probabilityOfNewPtNode, const int timestamp, const int *const newNodeCodePoints, const bool isNotAWord, const bool isBlacklisted, const int probabilityOfNewPtNode,
const int newNodeCodePointCount) { const int timestamp, const int *const newNodeCodePoints, const int newNodeCodePointCount) {
// When addsExtraChild is true, split the reallocating PtNode and add new child. // When addsExtraChild is true, split the reallocating PtNode and add new child.
// Reallocating PtNode: abcde, newNode: abcxy. // Reallocating PtNode: abcde, newNode: abcxy.
// abc (1st, not terminal) __ de (2nd) // abc (1st, not terminal) __ de (2nd)
@ -191,14 +199,16 @@ bool DynamicPatriciaTrieUpdatingHelper::reallocatePtNodeAndAddNewPtNodes(
// Write the 1st part of the reallocating node. The children position will be updated later // Write the 1st part of the reallocating node. The children position will be updated later
// with actual children position. // with actual children position.
if (addsExtraChild) { if (addsExtraChild) {
const PtNodeParams ptNodeParamsToWrite(getPtNodeParamsForNewPtNode(false /* isTerminal */, const PtNodeParams ptNodeParamsToWrite(getPtNodeParamsForNewPtNode(
false /* isNotAWord */, false /* isBlacklisted */, false /* isTerminal */,
reallocatingPtNodeParams->getParentPos(), overlappingCodePointCount, reallocatingPtNodeParams->getParentPos(), overlappingCodePointCount,
reallocatingPtNodeParams->getCodePoints(), NOT_A_PROBABILITY)); reallocatingPtNodeParams->getCodePoints(), NOT_A_PROBABILITY));
if (!mPtNodeWriter->writePtNodeAndAdvancePosition(&ptNodeParamsToWrite, &writingPos)) { if (!mPtNodeWriter->writePtNodeAndAdvancePosition(&ptNodeParamsToWrite, &writingPos)) {
return false; return false;
} }
} else { } else {
const PtNodeParams ptNodeParamsToWrite(getPtNodeParamsForNewPtNode(true /* isTerminal */, const PtNodeParams ptNodeParamsToWrite(getPtNodeParamsForNewPtNode(
isNotAWord, isBlacklisted, true /* isTerminal */,
reallocatingPtNodeParams->getParentPos(), overlappingCodePointCount, reallocatingPtNodeParams->getParentPos(), overlappingCodePointCount,
reallocatingPtNodeParams->getCodePoints(), probabilityOfNewPtNode)); reallocatingPtNodeParams->getCodePoints(), probabilityOfNewPtNode));
if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&ptNodeParamsToWrite, if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&ptNodeParamsToWrite,
@ -216,6 +226,7 @@ bool DynamicPatriciaTrieUpdatingHelper::reallocatePtNodeAndAddNewPtNodes(
// Write the 2nd part of the reallocating node. // Write the 2nd part of the reallocating node.
const int secondPartOfReallocatedPtNodePos = writingPos; const int secondPartOfReallocatedPtNodePos = writingPos;
const PtNodeParams childPartPtNodeParams(getUpdatedPtNodeParams(reallocatingPtNodeParams, const PtNodeParams childPartPtNodeParams(getUpdatedPtNodeParams(reallocatingPtNodeParams,
reallocatingPtNodeParams->isNotAWord(), reallocatingPtNodeParams->isBlacklisted(),
reallocatingPtNodeParams->isTerminal(), firstPartOfReallocatedPtNodePos, reallocatingPtNodeParams->isTerminal(), firstPartOfReallocatedPtNodePos,
reallocatingPtNodeParams->getCodePointCount() - overlappingCodePointCount, reallocatingPtNodeParams->getCodePointCount() - overlappingCodePointCount,
reallocatingPtNodeParams->getCodePoints() + overlappingCodePointCount, reallocatingPtNodeParams->getCodePoints() + overlappingCodePointCount,
@ -224,7 +235,8 @@ bool DynamicPatriciaTrieUpdatingHelper::reallocatePtNodeAndAddNewPtNodes(
return false; return false;
} }
if (addsExtraChild) { if (addsExtraChild) {
const PtNodeParams extraChildPtNodeParams(getPtNodeParamsForNewPtNode(true /* isTerminal */, const PtNodeParams extraChildPtNodeParams(getPtNodeParamsForNewPtNode(
isNotAWord, isBlacklisted, true /* isTerminal */,
firstPartOfReallocatedPtNodePos, newNodeCodePointCount - overlappingCodePointCount, firstPartOfReallocatedPtNodePos, newNodeCodePointCount - overlappingCodePointCount,
newNodeCodePoints + overlappingCodePointCount, probabilityOfNewPtNode)); newNodeCodePoints + overlappingCodePointCount, probabilityOfNewPtNode));
if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&extraChildPtNodeParams, if (!mPtNodeWriter->writeNewTerminalPtNodeAndAdvancePosition(&extraChildPtNodeParams,
@ -249,23 +261,25 @@ bool DynamicPatriciaTrieUpdatingHelper::reallocatePtNodeAndAddNewPtNodes(
} }
const PtNodeParams DynamicPatriciaTrieUpdatingHelper::getUpdatedPtNodeParams( const PtNodeParams DynamicPatriciaTrieUpdatingHelper::getUpdatedPtNodeParams(
const PtNodeParams *const originalPtNodeParams, const bool isTerminal, const int parentPos, const PtNodeParams *const originalPtNodeParams, const bool isNotAWord,
const bool isBlacklisted, const bool isTerminal, const int parentPos,
const int codePointCount, const int *const codePoints, const int probability) const { const int codePointCount, const int *const codePoints, const int probability) const {
const PatriciaTrieReadingUtils::NodeFlags flags = PatriciaTrieReadingUtils::createAndGetFlags( const PatriciaTrieReadingUtils::NodeFlags flags = PatriciaTrieReadingUtils::createAndGetFlags(
originalPtNodeParams->isBlacklisted(), originalPtNodeParams->isNotAWord(), isTerminal, isBlacklisted, isNotAWord, isTerminal, originalPtNodeParams->hasShortcutTargets(),
originalPtNodeParams->hasShortcutTargets(), originalPtNodeParams->hasBigrams(), originalPtNodeParams->hasBigrams(), codePointCount > 1 /* hasMultipleChars */,
codePointCount > 1 /* hasMultipleChars */, CHILDREN_POSITION_FIELD_SIZE); CHILDREN_POSITION_FIELD_SIZE);
return PtNodeParams(originalPtNodeParams, flags, parentPos, codePointCount, codePoints, return PtNodeParams(originalPtNodeParams, flags, parentPos, codePointCount, codePoints,
probability); probability);
} }
const PtNodeParams DynamicPatriciaTrieUpdatingHelper::getPtNodeParamsForNewPtNode( const PtNodeParams DynamicPatriciaTrieUpdatingHelper::getPtNodeParamsForNewPtNode(
const bool isTerminal, const int parentPos, const int codePointCount, const bool isNotAWord, const bool isBlacklisted, const bool isTerminal,
const int *const codePoints, const int probability) const { const int parentPos, const int codePointCount, const int *const codePoints,
const int probability) const {
const PatriciaTrieReadingUtils::NodeFlags flags = PatriciaTrieReadingUtils::createAndGetFlags( const PatriciaTrieReadingUtils::NodeFlags flags = PatriciaTrieReadingUtils::createAndGetFlags(
false /* isBlacklisted */, false /* isNotAWord */, isTerminal, isBlacklisted, isNotAWord, isTerminal, false /* hasShortcutTargets */,
false /* hasShortcutTargets */, false /* hasBigrams */, false /* hasBigrams */, codePointCount > 1 /* hasMultipleChars */,
codePointCount > 1 /* hasMultipleChars */, CHILDREN_POSITION_FIELD_SIZE); CHILDREN_POSITION_FIELD_SIZE);
return PtNodeParams(flags, parentPos, codePointCount, codePoints, probability); return PtNodeParams(flags, parentPos, codePointCount, codePoints, probability);
} }

View file

@ -42,7 +42,8 @@ class DynamicPatriciaTrieUpdatingHelper {
// Add a word to the dictionary. If the word already exists, update the probability. // Add a word to the dictionary. If the word already exists, update the probability.
bool addUnigramWord(DynamicPatriciaTrieReadingHelper *const readingHelper, bool addUnigramWord(DynamicPatriciaTrieReadingHelper *const readingHelper,
const int *const wordCodePoints, const int codePointCount, const int probability, const int *const wordCodePoints, const int codePointCount, const int probability,
const int timestamp, bool *const outAddedNewUnigram); const bool isNotAWord, const bool isBlacklisted, const int timestamp,
bool *const outAddedNewUnigram);
// Add a bigram relation from word0Pos to word1Pos. // Add a bigram relation from word0Pos to word1Pos.
bool addBigramWords(const int word0Pos, const int word1Pos, const int probability, bool addBigramWords(const int word0Pos, const int word1Pos, const int probability,
@ -61,29 +62,34 @@ class DynamicPatriciaTrieUpdatingHelper {
PtNodeWriter *const mPtNodeWriter; PtNodeWriter *const mPtNodeWriter;
bool createAndInsertNodeIntoPtNodeArray(const int parentPos, const int *const nodeCodePoints, bool createAndInsertNodeIntoPtNodeArray(const int parentPos, const int *const nodeCodePoints,
const int nodeCodePointCount, const int probability, const int timestamp, const int nodeCodePointCount, const bool isNotAWord, const bool isBlacklisted,
int *const forwardLinkFieldPos); const int probability, const int timestamp, int *const forwardLinkFieldPos);
bool setPtNodeProbability(const PtNodeParams *const originalPtNodeParams, const int probability, bool setPtNodeProbability(const PtNodeParams *const originalPtNodeParams, const bool isNotAWord,
const int timestamp, bool *const outAddedNewUnigram); const bool isBlacklisted, const int probability, const int timestamp,
bool *const outAddedNewUnigram);
bool createChildrenPtNodeArrayAndAChildPtNode(const PtNodeParams *const parentPtNodeParams, bool createChildrenPtNodeArrayAndAChildPtNode(const PtNodeParams *const parentPtNodeParams,
const int probability, const int timestamp, const int *const codePoints, const bool isNotAWord, const bool isBlacklisted, const int probability,
const int codePointCount); const int timestamp, const int *const codePoints, const int codePointCount);
bool createNewPtNodeArrayWithAChildPtNode(const int parentPos, const int *const nodeCodePoints, bool createNewPtNodeArrayWithAChildPtNode(const int parentPos, const int *const nodeCodePoints,
const int nodeCodePointCount, const int probability, const int timestamp); const int nodeCodePointCount, const bool isNotAWord, const bool isBlacklisted,
const int probability, const int timestamp);
bool reallocatePtNodeAndAddNewPtNodes( bool reallocatePtNodeAndAddNewPtNodes(
const PtNodeParams *const reallocatingPtNodeParams, const int overlappingCodePointCount, const PtNodeParams *const reallocatingPtNodeParams, const int overlappingCodePointCount,
const int probabilityOfNewPtNode, const int timestamp, const bool isNotAWord, const bool isBlacklisted, const int probabilityOfNewPtNode,
const int *const newNodeCodePoints, const int newNodeCodePointCount); const int timestamp, const int *const newNodeCodePoints,
const int newNodeCodePointCount);
const PtNodeParams getUpdatedPtNodeParams(const PtNodeParams *const originalPtNodeParams, const PtNodeParams getUpdatedPtNodeParams(const PtNodeParams *const originalPtNodeParams,
const bool isTerminal, const int parentPos, const int codePointCount, const bool isNotAWord, const bool isBlacklisted, const bool isTerminal,
const int parentPos, const int codePointCount,
const int *const codePoints, const int probability) const; const int *const codePoints, const int probability) const;
const PtNodeParams getPtNodeParamsForNewPtNode(const bool isTerminal, const int parentPos, const PtNodeParams getPtNodeParamsForNewPtNode(const bool isNotAWord, const bool isBlacklisted,
const bool isTerminal, const int parentPos,
const int codePointCount, const int *const codePoints, const int probability) const; const int codePointCount, const int *const codePoints, const int probability) const;
}; };
} // namespace latinime } // namespace latinime

View file

@ -137,7 +137,9 @@ int Ver4PatriciaTriePolicy::getBigramsPositionOfPtNode(const int ptNodePos) cons
} }
bool Ver4PatriciaTriePolicy::addUnigramWord(const int *const word, const int length, bool Ver4PatriciaTriePolicy::addUnigramWord(const int *const word, const int length,
const int probability, const int timestamp) { const int probability, const int *const shortcutTargetCodePoints, const int shortcutLength,
const int shortcutProbability, const bool isNotAWord, const bool isBlacklisted,
const int timestamp) {
if (!mBuffers.get()->isUpdatable()) { if (!mBuffers.get()->isUpdatable()) {
AKLOGI("Warning: addUnigramWord() is called for non-updatable dictionary."); AKLOGI("Warning: addUnigramWord() is called for non-updatable dictionary.");
return false; return false;
@ -150,8 +152,9 @@ bool Ver4PatriciaTriePolicy::addUnigramWord(const int *const word, const int len
DynamicPatriciaTrieReadingHelper readingHelper(mDictBuffer, &mNodeReader); DynamicPatriciaTrieReadingHelper readingHelper(mDictBuffer, &mNodeReader);
readingHelper.initWithPtNodeArrayPos(getRootPosition()); readingHelper.initWithPtNodeArrayPos(getRootPosition());
bool addedNewUnigram = false; bool addedNewUnigram = false;
if (mUpdatingHelper.addUnigramWord(&readingHelper, word, length, probability, timestamp, // TODO: Add shortcut.
&addedNewUnigram)) { if (mUpdatingHelper.addUnigramWord(&readingHelper, word, length, probability, isNotAWord,
isBlacklisted, timestamp, &addedNewUnigram)) {
if (addedNewUnigram) { if (addedNewUnigram) {
mUnigramCount++; mUnigramCount++;
} }

View file

@ -88,6 +88,8 @@ class Ver4PatriciaTriePolicy : public DictionaryStructureWithBufferPolicy {
} }
bool addUnigramWord(const int *const word, const int length, const int probability, bool addUnigramWord(const int *const word, const int length, const int probability,
const int *const shortcutTargetCodePoints, const int shortcutLength,
const int shortcutProbability, const bool isNotAWord, const bool isBlacklisted,
const int timestamp); const int timestamp);
bool addBigramWords(const int *const word0, const int length0, const int *const word1, bool addBigramWords(const int *const word0, const int length0, const int *const word1,